]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
(output.h): Move after inclusion of tree.h. Fix folding marks.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
fce5a9f2 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
2ab0437e 87};
32b5b1aa 88/* Processor costs (relative to an add) */
fce5a9f2 89static const
32b5b1aa 90struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 91 1, /* cost of an add instruction */
32b5b1aa
SC
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
e075ae69 97 23, /* cost of a divide/mod */
44cf5b6a
JH
98 3, /* cost of movsx */
99 2, /* cost of movzx */
96e7ae40 100 15, /* "large" insn */
e2e52e1b 101 3, /* MOVE_RATIO */
7c6b971d 102 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
0f290768 105 Relative to reg-reg move (2). */
96e7ae40
JH
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
fa79946e
JH
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
f4365627
JH
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
32b5b1aa
SC
124};
125
fce5a9f2 126static const
32b5b1aa
SC
127struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
e075ae69 134 40, /* cost of a divide/mod */
44cf5b6a
JH
135 3, /* cost of movsx */
136 2, /* cost of movzx */
96e7ae40 137 15, /* "large" insn */
e2e52e1b 138 3, /* MOVE_RATIO */
7c6b971d 139 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
0f290768 142 Relative to reg-reg move (2). */
96e7ae40
JH
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
fa79946e
JH
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
f4365627
JH
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
32b5b1aa
SC
161};
162
fce5a9f2 163static const
e5cb57e8 164struct processor_costs pentium_cost = {
32b5b1aa
SC
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
856b07a1 167 4, /* variable shift costs */
e5cb57e8 168 1, /* constant shift costs */
856b07a1
SC
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
e075ae69 171 25, /* cost of a divide/mod */
44cf5b6a
JH
172 3, /* cost of movsx */
173 2, /* cost of movzx */
96e7ae40 174 8, /* "large" insn */
e2e52e1b 175 6, /* MOVE_RATIO */
7c6b971d 176 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
0f290768 179 Relative to reg-reg move (2). */
96e7ae40
JH
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
fa79946e
JH
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
f4365627
JH
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
32b5b1aa
SC
198};
199
fce5a9f2 200static const
856b07a1
SC
201struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
e075ae69 204 1, /* variable shift costs */
856b07a1 205 1, /* constant shift costs */
369e59b1 206 4, /* cost of starting a multiply */
856b07a1 207 0, /* cost of multiply per each bit set */
e075ae69 208 17, /* cost of a divide/mod */
44cf5b6a
JH
209 1, /* cost of movsx */
210 1, /* cost of movzx */
96e7ae40 211 8, /* "large" insn */
e2e52e1b 212 6, /* MOVE_RATIO */
7c6b971d 213 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
0f290768 216 Relative to reg-reg move (2). */
96e7ae40
JH
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
fa79946e
JH
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
f4365627
JH
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
856b07a1
SC
235};
236
fce5a9f2 237static const
a269a03c
JC
238struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
e075ae69 240 2, /* cost of a lea instruction */
a269a03c
JC
241 1, /* variable shift costs */
242 1, /* constant shift costs */
73fe76e4 243 3, /* cost of starting a multiply */
a269a03c 244 0, /* cost of multiply per each bit set */
e075ae69 245 18, /* cost of a divide/mod */
44cf5b6a
JH
246 2, /* cost of movsx */
247 2, /* cost of movzx */
96e7ae40 248 8, /* "large" insn */
e2e52e1b 249 4, /* MOVE_RATIO */
7c6b971d 250 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
0f290768 253 Relative to reg-reg move (2). */
96e7ae40
JH
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
fa79946e
JH
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
f4365627
JH
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
a269a03c
JC
272};
273
fce5a9f2 274static const
309ada50
JH
275struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
0b5107cf 277 2, /* cost of a lea instruction */
309ada50
JH
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
0b5107cf 282 42, /* cost of a divide/mod */
44cf5b6a
JH
283 1, /* cost of movsx */
284 1, /* cost of movzx */
309ada50 285 8, /* "large" insn */
e2e52e1b 286 9, /* MOVE_RATIO */
309ada50
JH
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
0f290768 290 Relative to reg-reg move (2). */
309ada50
JH
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
0b5107cf 293 {6, 6, 20}, /* cost of loading fp registers
309ada50 294 in SFmode, DFmode and XFmode */
fa79946e
JH
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
f4365627
JH
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309ada50
JH
309};
310
fce5a9f2 311static const
b4e89e2d
JH
312struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
b4e89e2d
JH
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
b4e89e2d
JH
346};
347
8b60264b 348const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 349
a269a03c
JC
350/* Processor feature/optimization bitmasks. */
351#define m_386 (1<<PROCESSOR_I386)
352#define m_486 (1<<PROCESSOR_I486)
353#define m_PENT (1<<PROCESSOR_PENTIUM)
354#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355#define m_K6 (1<<PROCESSOR_K6)
309ada50 356#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 357#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 358
309ada50 359const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 360const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 361const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 362const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 363const int x86_double_with_add = ~m_386;
a269a03c 364const int x86_use_bit_test = m_386;
e2e52e1b 365const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 366const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 367const int x86_3dnow_a = m_ATHLON;
b4e89e2d 368const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 369const int x86_branch_hints = m_PENT4;
b4e89e2d 370const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
371const int x86_partial_reg_stall = m_PPRO;
372const int x86_use_loop = m_K6;
309ada50 373const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
374const int x86_use_mov0 = m_K6;
375const int x86_use_cltd = ~(m_PENT | m_K6);
376const int x86_read_modify_write = ~m_PENT;
377const int x86_read_modify = ~(m_PENT | m_PPRO);
378const int x86_split_long_moves = m_PPRO;
285464d0
JH
379const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 381const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
382const int x86_qimode_math = ~(0);
383const int x86_promote_qi_regs = 0;
384const int x86_himode_math = ~(m_PPRO);
385const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
386const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
77966be3 390const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
b4e89e2d
JH
391const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
393const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 396const int x86_decompose_lea = m_PENT4;
495333a6 397const int x86_shift1 = ~m_486;
285464d0 398const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
a269a03c 399
6ab16dd9
JH
400/* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403#define FAST_PROLOGUE_INSN_COUNT 30
5bf0ebab 404
6ab16dd9
JH
405/* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407static int use_fast_prologue_epilogue;
408
5bf0ebab
RH
409/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
413
414/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 416
e075ae69 417enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
418{
419 /* ax, dx, cx, bx */
ab408a86 420 AREG, DREG, CREG, BREG,
4c0d89b5 421 /* si, di, bp, sp */
e075ae69 422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 426 /* arg pointer */
83774849 427 NON_Q_REGS,
564d80f4 428 /* flags, fpsr, dirflag, frame */
a7180f70
BS
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
4c0d89b5 438};
c572e5ba 439
3d117b30 440/* The "default" register map used in 32bit mode. */
83774849 441
0f290768 442int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
443{
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
451};
452
5bf0ebab
RH
453static int const x86_64_int_parameter_registers[6] =
454{
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457};
458
459static int const x86_64_int_return_registers[4] =
460{
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462};
53c17031 463
0f7fa3d0
JH
464/* The "default" register map used in 64bit mode. */
465int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466{
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474};
475
83774849
RH
476/* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529*/
0f290768 530int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
531{
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
539};
540
c572e5ba
JVA
541/* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
07933f72
GS
544rtx ix86_compare_op0 = NULL_RTX;
545rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 546
f996902d
RH
547/* The encoding characters for the four TLS models present in ELF. */
548
755ac5d4 549static char const tls_model_chars[] = " GLil";
f996902d 550
7a2e09f4 551#define MAX_386_STACK_LOCALS 3
8362f420
JH
552/* Size of the register save area. */
553#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
554
555/* Define the structure for the machine field in struct function. */
e2500fed 556struct machine_function GTY(())
36edd3cc
BS
557{
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 559 const char *some_ld_name;
8362f420 560 int save_varrargs_registers;
6fca22eb 561 int accesses_prev_frame;
36edd3cc
BS
562};
563
01d939e8 564#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 565#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 566
4dd2ac2c
JH
567/* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586struct ix86_frame
587{
588 int nregs;
589 int padding1;
8362f420 590 int va_arg_size;
4dd2ac2c
JH
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
8362f420 594 int red_zone_size;
4dd2ac2c
JH
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601};
602
c93e80a5
JH
603/* Used to enable/disable debugging features. */
604const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
605/* Code model option as passed by user. */
606const char *ix86_cmodel_string;
607/* Parsed value. */
608enum cmodel ix86_cmodel;
80f33d06
GS
609/* Asm dialect. */
610const char *ix86_asm_string;
611enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
612/* TLS dialext. */
613const char *ix86_tls_dialect_string;
614enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 615
5bf0ebab 616/* Which unit we are generating floating point math for. */
965f5423
JH
617enum fpmath_unit ix86_fpmath;
618
5bf0ebab
RH
619/* Which cpu are we scheduling for. */
620enum processor_type ix86_cpu;
621/* Which instruction set architecture to use. */
622enum processor_type ix86_arch;
c8c5cb99
SC
623
624/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
625const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 627const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 628
0f290768 629/* # of registers to use to pass arguments. */
e075ae69 630const char *ix86_regparm_string;
e9a25f70 631
f4365627
JH
632/* true if sse prefetch instruction is not NOOP. */
633int x86_prefetch_sse;
634
e075ae69
RH
635/* ix86_regparm_string as a number */
636int ix86_regparm;
e9a25f70
JL
637
638/* Alignment to use for loops and jumps: */
639
0f290768 640/* Power of two alignment for loops. */
e075ae69 641const char *ix86_align_loops_string;
e9a25f70 642
0f290768 643/* Power of two alignment for non-loop jumps. */
e075ae69 644const char *ix86_align_jumps_string;
e9a25f70 645
3af4bd89 646/* Power of two alignment for stack boundary in bytes. */
e075ae69 647const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
648
649/* Preferred alignment for stack boundary in bits. */
e075ae69 650int ix86_preferred_stack_boundary;
3af4bd89 651
e9a25f70 652/* Values 1-5: see jump.c */
e075ae69
RH
653int ix86_branch_cost;
654const char *ix86_branch_cost_string;
e9a25f70 655
0f290768 656/* Power of two alignment for functions. */
e075ae69 657const char *ix86_align_funcs_string;
623fe810
RH
658
659/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660static char internal_label_prefix[16];
661static int internal_label_prefix_len;
e075ae69 662\f
623fe810 663static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 664static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
665static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 667 int, int, FILE *));
f996902d
RH
668static const char *get_some_local_dynamic_name PARAMS ((void));
669static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 671static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
672static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
f996902d 674static rtx get_thread_pointer PARAMS ((void));
145aacc2 675static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
676static rtx gen_push PARAMS ((rtx));
677static int memory_address_length PARAMS ((rtx addr));
678static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
680static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681static void ix86_dump_ppro_packet PARAMS ((FILE *));
682static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 683static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 684static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
685static int ix86_nsaved_regs PARAMS ((void));
686static void ix86_emit_save_regs PARAMS ((void));
c6036a37 687static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 688static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 689static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 690static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 691static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 692static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 693static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
694static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
696static int ix86_issue_rate PARAMS ((void));
697static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698static void ix86_sched_init PARAMS ((FILE *, int, int));
699static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
701static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 703static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
704
705struct ix86_address
706{
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709};
b08de47e 710
e075ae69 711static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65 712
f996902d
RH
713static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
fb49053f 716
bd793c65 717struct builtin_description;
8b60264b
KG
718static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
bd793c65
BS
722static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
725static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 728static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
729static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
9e7adcb3
JH
734static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 740static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 741static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 742static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 743static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
744const struct attribute_spec ix86_attribute_table[];
745static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 747static int ix86_value_regno PARAMS ((enum machine_mode));
7c262518 748
21c318ba 749#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
750static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751#endif
e56feed6 752
53c17031
JH
753/* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
757
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
760 */
761enum x86_64_reg_class
762 {
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
773 };
0b5826ac 774static const char * const x86_64_reg_class_name[] =
53c17031
JH
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
776
777#define MAX_CLASSES 4
778static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 784 const int *, int));
53c17031
JH
785static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
672a6f42
NB
787\f
788/* Initialize the GCC target structure. */
91d231cb
JM
789#undef TARGET_ATTRIBUTE_TABLE
790#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 791#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
792# undef TARGET_MERGE_DECL_ATTRIBUTES
793# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
794#endif
795
8d8e52be
JM
796#undef TARGET_COMP_TYPE_ATTRIBUTES
797#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
798
f6155fda
SS
799#undef TARGET_INIT_BUILTINS
800#define TARGET_INIT_BUILTINS ix86_init_builtins
801
802#undef TARGET_EXPAND_BUILTIN
803#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
804
bd09bdeb
RH
805#undef TARGET_ASM_FUNCTION_EPILOGUE
806#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 807
17b53c33
NB
808#undef TARGET_ASM_OPEN_PAREN
809#define TARGET_ASM_OPEN_PAREN ""
810#undef TARGET_ASM_CLOSE_PAREN
811#define TARGET_ASM_CLOSE_PAREN ""
812
301d03af
RS
813#undef TARGET_ASM_ALIGNED_HI_OP
814#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815#undef TARGET_ASM_ALIGNED_SI_OP
816#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817#ifdef ASM_QUAD
818#undef TARGET_ASM_ALIGNED_DI_OP
819#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820#endif
821
822#undef TARGET_ASM_UNALIGNED_HI_OP
823#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824#undef TARGET_ASM_UNALIGNED_SI_OP
825#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826#undef TARGET_ASM_UNALIGNED_DI_OP
827#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
828
c237e94a
ZW
829#undef TARGET_SCHED_ADJUST_COST
830#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831#undef TARGET_SCHED_ISSUE_RATE
832#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833#undef TARGET_SCHED_VARIABLE_ISSUE
834#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835#undef TARGET_SCHED_INIT
836#define TARGET_SCHED_INIT ix86_sched_init
837#undef TARGET_SCHED_REORDER
838#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 839#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
840#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
c237e94a 845
f996902d
RH
846#ifdef HAVE_AS_TLS
847#undef TARGET_HAVE_TLS
848#define TARGET_HAVE_TLS true
849#endif
850
f6897b10 851struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 852\f
f5316dfe
MM
853/* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
858
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
861
862void
863override_options ()
864{
400500c4 865 int i;
e075ae69
RH
866 /* Comes from final.c -- no real reason to change it. */
867#define MAX_CODE_ALIGN 16
f5316dfe 868
c8c5cb99
SC
869 static struct ptt
870 {
8b60264b
KG
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
2cca7283 875 const int align_loop_max_skip;
8b60264b 876 const int align_jump;
2cca7283 877 const int align_jump_max_skip;
8b60264b
KG
878 const int align_func;
879 const int branch_cost;
e075ae69 880 }
0f290768 881 const processor_target_table[PROCESSOR_max] =
e075ae69 882 {
2cca7283
JH
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
890 };
891
f4365627 892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
893 static struct pta
894 {
8b60264b
KG
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
0dd0e980
JH
897 const enum pta_flags
898 {
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
f4365627 902 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
e075ae69 906 }
0f290768 907 const processor_alias_table[] =
e075ae69 908 {
0dd0e980
JH
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"i686", PROCESSOR_PENTIUMPRO, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 917 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 918 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 919 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
920 {"k6", PROCESSOR_K6, PTA_MMX},
921 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
922 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 923 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 924 | PTA_3DNOW_A},
f4365627 925 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 926 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 927 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 928 | PTA_3DNOW_A | PTA_SSE},
f4365627 929 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 930 | PTA_3DNOW_A | PTA_SSE},
f4365627 931 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 932 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 933 };
c8c5cb99 934
ca7558fc 935 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 936
f5316dfe
MM
937#ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS;
939#endif
940
f4365627
JH
941 if (!ix86_cpu_string && ix86_arch_string)
942 ix86_cpu_string = ix86_arch_string;
943 if (!ix86_cpu_string)
944 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
945 if (!ix86_arch_string)
946 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 947
6189a572
JH
948 if (ix86_cmodel_string != 0)
949 {
950 if (!strcmp (ix86_cmodel_string, "small"))
951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
952 else if (flag_pic)
c725bd79 953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
954 else if (!strcmp (ix86_cmodel_string, "32"))
955 ix86_cmodel = CM_32;
956 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
957 ix86_cmodel = CM_KERNEL;
958 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
959 ix86_cmodel = CM_MEDIUM;
960 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
961 ix86_cmodel = CM_LARGE;
962 else
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
964 }
965 else
966 {
967 ix86_cmodel = CM_32;
968 if (TARGET_64BIT)
969 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
970 }
c93e80a5
JH
971 if (ix86_asm_string != 0)
972 {
973 if (!strcmp (ix86_asm_string, "intel"))
974 ix86_asm_dialect = ASM_INTEL;
975 else if (!strcmp (ix86_asm_string, "att"))
976 ix86_asm_dialect = ASM_ATT;
977 else
978 error ("bad value (%s) for -masm= switch", ix86_asm_string);
979 }
6189a572 980 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 981 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
982 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
983 if (ix86_cmodel == CM_LARGE)
c725bd79 984 sorry ("code model `large' not supported yet");
0c2dc519 985 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 986 sorry ("%i-bit mode not compiled in",
0c2dc519 987 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 988
f4365627
JH
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
991 {
992 ix86_arch = processor_alias_table[i].processor;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu = ix86_arch;
995 if (processor_alias_table[i].flags & PTA_MMX
996 && !(target_flags & MASK_MMX_SET))
997 target_flags |= MASK_MMX;
998 if (processor_alias_table[i].flags & PTA_3DNOW
999 && !(target_flags & MASK_3DNOW_SET))
1000 target_flags |= MASK_3DNOW;
1001 if (processor_alias_table[i].flags & PTA_3DNOW_A
1002 && !(target_flags & MASK_3DNOW_A_SET))
1003 target_flags |= MASK_3DNOW_A;
1004 if (processor_alias_table[i].flags & PTA_SSE
1005 && !(target_flags & MASK_SSE_SET))
1006 target_flags |= MASK_SSE;
1007 if (processor_alias_table[i].flags & PTA_SSE2
1008 && !(target_flags & MASK_SSE2_SET))
1009 target_flags |= MASK_SSE2;
1010 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1011 x86_prefetch_sse = true;
1012 break;
1013 }
400500c4 1014
f4365627
JH
1015 if (i == pta_size)
1016 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1017
f4365627
JH
1018 for (i = 0; i < pta_size; i++)
1019 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1020 {
1021 ix86_cpu = processor_alias_table[i].processor;
1022 break;
1023 }
1024 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1025 x86_prefetch_sse = true;
1026 if (i == pta_size)
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1028
2ab0437e
JH
1029 if (optimize_size)
1030 ix86_cost = &size_cost;
1031 else
1032 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1033 target_flags |= processor_target_table[ix86_cpu].target_enable;
1034 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1035
36edd3cc
BS
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status = ix86_init_machine_status;
fce5a9f2 1038
0f290768 1039 /* Validate -mregparm= value. */
e075ae69 1040 if (ix86_regparm_string)
b08de47e 1041 {
400500c4
RK
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1045 else
1046 ix86_regparm = i;
b08de47e 1047 }
0d7d98ee
JH
1048 else
1049 if (TARGET_64BIT)
1050 ix86_regparm = REGPARM_MAX;
b08de47e 1051
3e18fdf6 1052 /* If the user has provided any of the -malign-* options,
a4f31c00 1053 warn and use that value only if -falign-* is not set.
3e18fdf6 1054 Remove this code in GCC 3.2 or later. */
e075ae69 1055 if (ix86_align_loops_string)
b08de47e 1056 {
3e18fdf6
GK
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1059 {
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1063 else
1064 align_loops = 1 << i;
1065 }
b08de47e 1066 }
3af4bd89 1067
e075ae69 1068 if (ix86_align_jumps_string)
b08de47e 1069 {
3e18fdf6
GK
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1072 {
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1076 else
1077 align_jumps = 1 << i;
1078 }
b08de47e 1079 }
b08de47e 1080
e075ae69 1081 if (ix86_align_funcs_string)
b08de47e 1082 {
3e18fdf6
GK
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1085 {
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1089 else
1090 align_functions = 1 << i;
1091 }
b08de47e 1092 }
3af4bd89 1093
3e18fdf6 1094 /* Default align_* from the processor table. */
3e18fdf6 1095 if (align_loops == 0)
2cca7283
JH
1096 {
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1099 }
3e18fdf6 1100 if (align_jumps == 0)
2cca7283
JH
1101 {
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1104 }
3e18fdf6 1105 if (align_functions == 0)
2cca7283
JH
1106 {
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1108 }
3e18fdf6 1109
e4c0478d 1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1116 : 128);
e075ae69 1117 if (ix86_preferred_stack_boundary_string)
3af4bd89 1118 {
400500c4 1119 i = atoi (ix86_preferred_stack_boundary_string);
c6257c5d
AO
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
0d7d98ee 1122 TARGET_64BIT ? 3 : 2);
400500c4
RK
1123 else
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1125 }
77a989d1 1126
0f290768 1127 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
804a8ee0 1130 {
400500c4
RK
1131 i = atoi (ix86_branch_cost_string);
1132 if (i < 0 || i > 5)
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1134 else
1135 ix86_branch_cost = i;
804a8ee0 1136 }
804a8ee0 1137
f996902d
RH
1138 if (ix86_tls_dialect_string)
1139 {
1140 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1141 ix86_tls_dialect = TLS_DIALECT_GNU;
1142 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1143 ix86_tls_dialect = TLS_DIALECT_SUN;
1144 else
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string);
1147 }
1148
db01f480
JH
1149 if (profile_flag)
1150 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1151
e9a25f70
JL
1152 /* Keep nonleaf frame pointers. */
1153 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1154 flag_omit_frame_pointer = 1;
e075ae69
RH
1155
1156 /* If we're doing fast math, we don't care about comparison order
1157 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1158 if (flag_unsafe_math_optimizations)
e075ae69
RH
1159 target_flags &= ~MASK_IEEE_FP;
1160
30c99a84
RH
1161 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1162 since the insns won't need emulation. */
1163 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1164 target_flags &= ~MASK_NO_FANCY_MATH_387;
1165
14f73b5a
JH
1166 if (TARGET_64BIT)
1167 {
1168 if (TARGET_ALIGN_DOUBLE)
c725bd79 1169 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1170 if (TARGET_RTD)
c725bd79 1171 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1172 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1173 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1174 ix86_fpmath = FPMATH_SSE;
14f73b5a 1175 }
965f5423
JH
1176 else
1177 ix86_fpmath = FPMATH_387;
1178
1179 if (ix86_fpmath_string != 0)
1180 {
1181 if (! strcmp (ix86_fpmath_string, "387"))
1182 ix86_fpmath = FPMATH_387;
1183 else if (! strcmp (ix86_fpmath_string, "sse"))
1184 {
1185 if (!TARGET_SSE)
1186 {
1187 warning ("SSE instruction set disabled, using 387 arithmetics");
1188 ix86_fpmath = FPMATH_387;
1189 }
1190 else
1191 ix86_fpmath = FPMATH_SSE;
1192 }
1193 else if (! strcmp (ix86_fpmath_string, "387,sse")
1194 || ! strcmp (ix86_fpmath_string, "sse,387"))
1195 {
1196 if (!TARGET_SSE)
1197 {
1198 warning ("SSE instruction set disabled, using 387 arithmetics");
1199 ix86_fpmath = FPMATH_387;
1200 }
1201 else if (!TARGET_80387)
1202 {
1203 warning ("387 instruction set disabled, using SSE arithmetics");
1204 ix86_fpmath = FPMATH_SSE;
1205 }
1206 else
1207 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1208 }
fce5a9f2 1209 else
965f5423
JH
1210 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1211 }
14f73b5a 1212
a7180f70
BS
1213 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1214 on by -msse. */
1215 if (TARGET_SSE)
e37af218
RH
1216 {
1217 target_flags |= MASK_MMX;
1218 x86_prefetch_sse = true;
1219 }
c6036a37 1220
47f339cf
BS
1221 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1222 if (TARGET_3DNOW)
1223 {
1224 target_flags |= MASK_MMX;
1225 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1226 extensions it adds. */
1227 if (x86_3dnow_a & (1 << ix86_arch))
1228 target_flags |= MASK_3DNOW_A;
1229 }
c6036a37 1230 if ((x86_accumulate_outgoing_args & CPUMASK)
0dd0e980 1231 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
c6036a37
JH
1232 && !optimize_size)
1233 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1234
1235 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1236 {
1237 char *p;
1238 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1239 p = strchr (internal_label_prefix, 'X');
1240 internal_label_prefix_len = p - internal_label_prefix;
1241 *p = '\0';
1242 }
f5316dfe
MM
1243}
1244\f
32b5b1aa 1245void
c6aded7c 1246optimization_options (level, size)
32b5b1aa 1247 int level;
bb5177ac 1248 int size ATTRIBUTE_UNUSED;
32b5b1aa 1249{
e9a25f70
JL
1250 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1251 make the problem with not enough registers even worse. */
32b5b1aa
SC
1252#ifdef INSN_SCHEDULING
1253 if (level > 1)
1254 flag_schedule_insns = 0;
1255#endif
53c17031
JH
1256 if (TARGET_64BIT && optimize >= 1)
1257 flag_omit_frame_pointer = 1;
1258 if (TARGET_64BIT)
b932f770
JH
1259 {
1260 flag_pcc_struct_return = 0;
1261 flag_asynchronous_unwind_tables = 1;
1262 }
db01f480
JH
1263 if (profile_flag)
1264 flag_omit_frame_pointer = 0;
32b5b1aa 1265}
b08de47e 1266\f
91d231cb
JM
1267/* Table of valid machine attributes. */
1268const struct attribute_spec ix86_attribute_table[] =
b08de47e 1269{
91d231cb 1270 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1271 /* Stdcall attribute says callee is responsible for popping arguments
1272 if they are not variable. */
91d231cb
JM
1273 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1274 /* Cdecl attribute says the callee is a normal C declaration */
1275 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1276 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1277 passed in registers. */
91d231cb
JM
1278 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1279#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1280 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1281 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1282 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1283#endif
1284 { NULL, 0, 0, false, false, false, NULL }
1285};
1286
1287/* Handle a "cdecl" or "stdcall" attribute;
1288 arguments as in struct attribute_spec.handler. */
1289static tree
1290ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1291 tree *node;
1292 tree name;
1293 tree args ATTRIBUTE_UNUSED;
1294 int flags ATTRIBUTE_UNUSED;
1295 bool *no_add_attrs;
1296{
1297 if (TREE_CODE (*node) != FUNCTION_TYPE
1298 && TREE_CODE (*node) != METHOD_TYPE
1299 && TREE_CODE (*node) != FIELD_DECL
1300 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1301 {
91d231cb
JM
1302 warning ("`%s' attribute only applies to functions",
1303 IDENTIFIER_POINTER (name));
1304 *no_add_attrs = true;
1305 }
b08de47e 1306
91d231cb
JM
1307 if (TARGET_64BIT)
1308 {
1309 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1310 *no_add_attrs = true;
1311 }
b08de47e 1312
91d231cb
JM
1313 return NULL_TREE;
1314}
b08de47e 1315
91d231cb
JM
1316/* Handle a "regparm" attribute;
1317 arguments as in struct attribute_spec.handler. */
1318static tree
1319ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1320 tree *node;
1321 tree name;
1322 tree args;
1323 int flags ATTRIBUTE_UNUSED;
1324 bool *no_add_attrs;
1325{
1326 if (TREE_CODE (*node) != FUNCTION_TYPE
1327 && TREE_CODE (*node) != METHOD_TYPE
1328 && TREE_CODE (*node) != FIELD_DECL
1329 && TREE_CODE (*node) != TYPE_DECL)
1330 {
1331 warning ("`%s' attribute only applies to functions",
1332 IDENTIFIER_POINTER (name));
1333 *no_add_attrs = true;
1334 }
1335 else
1336 {
1337 tree cst;
b08de47e 1338
91d231cb
JM
1339 cst = TREE_VALUE (args);
1340 if (TREE_CODE (cst) != INTEGER_CST)
1341 {
1342 warning ("`%s' attribute requires an integer constant argument",
1343 IDENTIFIER_POINTER (name));
1344 *no_add_attrs = true;
1345 }
1346 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1347 {
1348 warning ("argument to `%s' attribute larger than %d",
1349 IDENTIFIER_POINTER (name), REGPARM_MAX);
1350 *no_add_attrs = true;
1351 }
b08de47e
MM
1352 }
1353
91d231cb 1354 return NULL_TREE;
b08de47e
MM
1355}
1356
1357/* Return 0 if the attributes for two types are incompatible, 1 if they
1358 are compatible, and 2 if they are nearly compatible (which causes a
1359 warning to be generated). */
1360
8d8e52be 1361static int
e075ae69 1362ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1363 tree type1;
1364 tree type2;
b08de47e 1365{
0f290768 1366 /* Check for mismatch of non-default calling convention. */
27c38fbe 1367 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1368
1369 if (TREE_CODE (type1) != FUNCTION_TYPE)
1370 return 1;
1371
1372 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1373 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1374 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1375 return 0;
b08de47e
MM
1376 return 1;
1377}
b08de47e
MM
1378\f
1379/* Value is the number of bytes of arguments automatically
1380 popped when returning from a subroutine call.
1381 FUNDECL is the declaration node of the function (as a tree),
1382 FUNTYPE is the data type of the function (as a tree),
1383 or for a library call it is an identifier node for the subroutine name.
1384 SIZE is the number of bytes of arguments passed on the stack.
1385
1386 On the 80386, the RTD insn may be used to pop them if the number
1387 of args is fixed, but if the number is variable then the caller
1388 must pop them all. RTD can't be used for library calls now
1389 because the library is compiled with the Unix compiler.
1390 Use of RTD is a selectable option, since it is incompatible with
1391 standard Unix calling sequences. If the option is not selected,
1392 the caller must always pop the args.
1393
1394 The attribute stdcall is equivalent to RTD on a per module basis. */
1395
1396int
e075ae69 1397ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1398 tree fundecl;
1399 tree funtype;
1400 int size;
79325812 1401{
3345ee7d 1402 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1403
0f290768 1404 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1405 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1406
0f290768 1407 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1408 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1409 rtd = 1;
79325812 1410
698cdd84
SC
1411 if (rtd
1412 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1413 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1414 == void_type_node)))
698cdd84
SC
1415 return size;
1416 }
79325812 1417
232b8f52 1418 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1419 if (aggregate_value_p (TREE_TYPE (funtype))
1420 && !TARGET_64BIT)
232b8f52
JJ
1421 {
1422 int nregs = ix86_regparm;
79325812 1423
232b8f52
JJ
1424 if (funtype)
1425 {
1426 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1427
1428 if (attr)
1429 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1430 }
1431
1432 if (!nregs)
1433 return GET_MODE_SIZE (Pmode);
1434 }
1435
1436 return 0;
b08de47e 1437}
b08de47e
MM
1438\f
1439/* Argument support functions. */
1440
53c17031
JH
1441/* Return true when register may be used to pass function parameters. */
1442bool
1443ix86_function_arg_regno_p (regno)
1444 int regno;
1445{
1446 int i;
1447 if (!TARGET_64BIT)
0333394e
JJ
1448 return (regno < REGPARM_MAX
1449 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1450 if (SSE_REGNO_P (regno) && TARGET_SSE)
1451 return true;
1452 /* RAX is used as hidden argument to va_arg functions. */
1453 if (!regno)
1454 return true;
1455 for (i = 0; i < REGPARM_MAX; i++)
1456 if (regno == x86_64_int_parameter_registers[i])
1457 return true;
1458 return false;
1459}
1460
b08de47e
MM
1461/* Initialize a variable CUM of type CUMULATIVE_ARGS
1462 for a call to a function whose data type is FNTYPE.
1463 For a library call, FNTYPE is 0. */
1464
1465void
1466init_cumulative_args (cum, fntype, libname)
e9a25f70 1467 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1468 tree fntype; /* tree ptr for function decl */
1469 rtx libname; /* SYMBOL_REF of library name or 0 */
1470{
1471 static CUMULATIVE_ARGS zero_cum;
1472 tree param, next_param;
1473
1474 if (TARGET_DEBUG_ARG)
1475 {
1476 fprintf (stderr, "\ninit_cumulative_args (");
1477 if (fntype)
e9a25f70
JL
1478 fprintf (stderr, "fntype code = %s, ret code = %s",
1479 tree_code_name[(int) TREE_CODE (fntype)],
1480 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1481 else
1482 fprintf (stderr, "no fntype");
1483
1484 if (libname)
1485 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1486 }
1487
1488 *cum = zero_cum;
1489
1490 /* Set up the number of registers to use for passing arguments. */
e075ae69 1491 cum->nregs = ix86_regparm;
53c17031
JH
1492 cum->sse_nregs = SSE_REGPARM_MAX;
1493 if (fntype && !TARGET_64BIT)
b08de47e
MM
1494 {
1495 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1496
b08de47e
MM
1497 if (attr)
1498 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1499 }
53c17031 1500 cum->maybe_vaarg = false;
b08de47e
MM
1501
1502 /* Determine if this function has variable arguments. This is
1503 indicated by the last argument being 'void_type_mode' if there
1504 are no variable arguments. If there are variable arguments, then
1505 we won't pass anything in registers */
1506
1507 if (cum->nregs)
1508 {
1509 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1510 param != 0; param = next_param)
b08de47e
MM
1511 {
1512 next_param = TREE_CHAIN (param);
e9a25f70 1513 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1514 {
1515 if (!TARGET_64BIT)
1516 cum->nregs = 0;
1517 cum->maybe_vaarg = true;
1518 }
b08de47e
MM
1519 }
1520 }
53c17031
JH
1521 if ((!fntype && !libname)
1522 || (fntype && !TYPE_ARG_TYPES (fntype)))
1523 cum->maybe_vaarg = 1;
b08de47e
MM
1524
1525 if (TARGET_DEBUG_ARG)
1526 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1527
1528 return;
1529}
1530
53c17031 1531/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1532 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1533 class and assign registers accordingly. */
1534
1535/* Return the union class of CLASS1 and CLASS2.
1536 See the x86-64 PS ABI for details. */
1537
1538static enum x86_64_reg_class
1539merge_classes (class1, class2)
1540 enum x86_64_reg_class class1, class2;
1541{
1542 /* Rule #1: If both classes are equal, this is the resulting class. */
1543 if (class1 == class2)
1544 return class1;
1545
1546 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1547 the other class. */
1548 if (class1 == X86_64_NO_CLASS)
1549 return class2;
1550 if (class2 == X86_64_NO_CLASS)
1551 return class1;
1552
1553 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1554 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1555 return X86_64_MEMORY_CLASS;
1556
1557 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1558 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1559 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1560 return X86_64_INTEGERSI_CLASS;
1561 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1562 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1563 return X86_64_INTEGER_CLASS;
1564
1565 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1566 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1567 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1568 return X86_64_MEMORY_CLASS;
1569
1570 /* Rule #6: Otherwise class SSE is used. */
1571 return X86_64_SSE_CLASS;
1572}
1573
1574/* Classify the argument of type TYPE and mode MODE.
1575 CLASSES will be filled by the register class used to pass each word
1576 of the operand. The number of words is returned. In case the parameter
1577 should be passed in memory, 0 is returned. As a special case for zero
1578 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1579
1580 BIT_OFFSET is used internally for handling records and specifies offset
1581 of the offset in bits modulo 256 to avoid overflow cases.
1582
1583 See the x86-64 PS ABI for details.
1584*/
1585
1586static int
1587classify_argument (mode, type, classes, bit_offset)
1588 enum machine_mode mode;
1589 tree type;
1590 enum x86_64_reg_class classes[MAX_CLASSES];
1591 int bit_offset;
1592{
1593 int bytes =
1594 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1595 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1596
c60ee6f5
JH
1597 /* Variable sized entities are always passed/returned in memory. */
1598 if (bytes < 0)
1599 return 0;
1600
53c17031
JH
1601 if (type && AGGREGATE_TYPE_P (type))
1602 {
1603 int i;
1604 tree field;
1605 enum x86_64_reg_class subclasses[MAX_CLASSES];
1606
1607 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1608 if (bytes > 16)
1609 return 0;
1610
1611 for (i = 0; i < words; i++)
1612 classes[i] = X86_64_NO_CLASS;
1613
1614 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1615 signalize memory class, so handle it as special case. */
1616 if (!words)
1617 {
1618 classes[0] = X86_64_NO_CLASS;
1619 return 1;
1620 }
1621
1622 /* Classify each field of record and merge classes. */
1623 if (TREE_CODE (type) == RECORD_TYPE)
1624 {
91ea38f9
JH
1625 /* For classes first merge in the field of the subclasses. */
1626 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1627 {
1628 tree bases = TYPE_BINFO_BASETYPES (type);
1629 int n_bases = TREE_VEC_LENGTH (bases);
1630 int i;
1631
1632 for (i = 0; i < n_bases; ++i)
1633 {
1634 tree binfo = TREE_VEC_ELT (bases, i);
1635 int num;
1636 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1637 tree type = BINFO_TYPE (binfo);
1638
1639 num = classify_argument (TYPE_MODE (type),
1640 type, subclasses,
1641 (offset + bit_offset) % 256);
1642 if (!num)
1643 return 0;
1644 for (i = 0; i < num; i++)
1645 {
db01f480 1646 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1647 classes[i + pos] =
1648 merge_classes (subclasses[i], classes[i + pos]);
1649 }
1650 }
1651 }
1652 /* And now merge the fields of structure. */
53c17031
JH
1653 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1654 {
1655 if (TREE_CODE (field) == FIELD_DECL)
1656 {
1657 int num;
1658
1659 /* Bitfields are always classified as integer. Handle them
1660 early, since later code would consider them to be
1661 misaligned integers. */
1662 if (DECL_BIT_FIELD (field))
1663 {
1664 for (i = int_bit_position (field) / 8 / 8;
1665 i < (int_bit_position (field)
1666 + tree_low_cst (DECL_SIZE (field), 0)
1667 + 63) / 8 / 8; i++)
1668 classes[i] =
1669 merge_classes (X86_64_INTEGER_CLASS,
1670 classes[i]);
1671 }
1672 else
1673 {
1674 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1675 TREE_TYPE (field), subclasses,
1676 (int_bit_position (field)
1677 + bit_offset) % 256);
1678 if (!num)
1679 return 0;
1680 for (i = 0; i < num; i++)
1681 {
1682 int pos =
db01f480 1683 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1684 classes[i + pos] =
1685 merge_classes (subclasses[i], classes[i + pos]);
1686 }
1687 }
1688 }
1689 }
1690 }
1691 /* Arrays are handled as small records. */
1692 else if (TREE_CODE (type) == ARRAY_TYPE)
1693 {
1694 int num;
1695 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1696 TREE_TYPE (type), subclasses, bit_offset);
1697 if (!num)
1698 return 0;
1699
1700 /* The partial classes are now full classes. */
1701 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1702 subclasses[0] = X86_64_SSE_CLASS;
1703 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1704 subclasses[0] = X86_64_INTEGER_CLASS;
1705
1706 for (i = 0; i < words; i++)
1707 classes[i] = subclasses[i % num];
1708 }
1709 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1710 else if (TREE_CODE (type) == UNION_TYPE
1711 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 1712 {
91ea38f9
JH
1713 /* For classes first merge in the field of the subclasses. */
1714 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1715 {
1716 tree bases = TYPE_BINFO_BASETYPES (type);
1717 int n_bases = TREE_VEC_LENGTH (bases);
1718 int i;
1719
1720 for (i = 0; i < n_bases; ++i)
1721 {
1722 tree binfo = TREE_VEC_ELT (bases, i);
1723 int num;
1724 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1725 tree type = BINFO_TYPE (binfo);
1726
1727 num = classify_argument (TYPE_MODE (type),
1728 type, subclasses,
db01f480 1729 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
1730 if (!num)
1731 return 0;
1732 for (i = 0; i < num; i++)
1733 {
c16576e6 1734 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1735 classes[i + pos] =
1736 merge_classes (subclasses[i], classes[i + pos]);
1737 }
1738 }
1739 }
53c17031
JH
1740 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1741 {
1742 if (TREE_CODE (field) == FIELD_DECL)
1743 {
1744 int num;
1745 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1746 TREE_TYPE (field), subclasses,
1747 bit_offset);
1748 if (!num)
1749 return 0;
1750 for (i = 0; i < num; i++)
1751 classes[i] = merge_classes (subclasses[i], classes[i]);
1752 }
1753 }
1754 }
1755 else
1756 abort ();
1757
1758 /* Final merger cleanup. */
1759 for (i = 0; i < words; i++)
1760 {
1761 /* If one class is MEMORY, everything should be passed in
1762 memory. */
1763 if (classes[i] == X86_64_MEMORY_CLASS)
1764 return 0;
1765
d6a7951f 1766 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1767 X86_64_SSE_CLASS. */
1768 if (classes[i] == X86_64_SSEUP_CLASS
1769 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1770 classes[i] = X86_64_SSE_CLASS;
1771
d6a7951f 1772 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1773 if (classes[i] == X86_64_X87UP_CLASS
1774 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1775 classes[i] = X86_64_SSE_CLASS;
1776 }
1777 return words;
1778 }
1779
1780 /* Compute alignment needed. We align all types to natural boundaries with
1781 exception of XFmode that is aligned to 64bits. */
1782 if (mode != VOIDmode && mode != BLKmode)
1783 {
1784 int mode_alignment = GET_MODE_BITSIZE (mode);
1785
1786 if (mode == XFmode)
1787 mode_alignment = 128;
1788 else if (mode == XCmode)
1789 mode_alignment = 256;
f5143c46 1790 /* Misaligned fields are always returned in memory. */
53c17031
JH
1791 if (bit_offset % mode_alignment)
1792 return 0;
1793 }
1794
1795 /* Classification of atomic types. */
1796 switch (mode)
1797 {
1798 case DImode:
1799 case SImode:
1800 case HImode:
1801 case QImode:
1802 case CSImode:
1803 case CHImode:
1804 case CQImode:
1805 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1806 classes[0] = X86_64_INTEGERSI_CLASS;
1807 else
1808 classes[0] = X86_64_INTEGER_CLASS;
1809 return 1;
1810 case CDImode:
1811 case TImode:
1812 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1813 return 2;
1814 case CTImode:
1815 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1816 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1817 return 4;
1818 case SFmode:
1819 if (!(bit_offset % 64))
1820 classes[0] = X86_64_SSESF_CLASS;
1821 else
1822 classes[0] = X86_64_SSE_CLASS;
1823 return 1;
1824 case DFmode:
1825 classes[0] = X86_64_SSEDF_CLASS;
1826 return 1;
1827 case TFmode:
1828 classes[0] = X86_64_X87_CLASS;
1829 classes[1] = X86_64_X87UP_CLASS;
1830 return 2;
1831 case TCmode:
1832 classes[0] = X86_64_X87_CLASS;
1833 classes[1] = X86_64_X87UP_CLASS;
1834 classes[2] = X86_64_X87_CLASS;
1835 classes[3] = X86_64_X87UP_CLASS;
1836 return 4;
1837 case DCmode:
1838 classes[0] = X86_64_SSEDF_CLASS;
1839 classes[1] = X86_64_SSEDF_CLASS;
1840 return 2;
1841 case SCmode:
1842 classes[0] = X86_64_SSE_CLASS;
1843 return 1;
e95d6b23
JH
1844 case V4SFmode:
1845 case V4SImode:
495333a6
JH
1846 case V16QImode:
1847 case V8HImode:
1848 case V2DFmode:
1849 case V2DImode:
e95d6b23
JH
1850 classes[0] = X86_64_SSE_CLASS;
1851 classes[1] = X86_64_SSEUP_CLASS;
1852 return 2;
1853 case V2SFmode:
1854 case V2SImode:
1855 case V4HImode:
1856 case V8QImode:
1857 classes[0] = X86_64_SSE_CLASS;
1858 return 1;
53c17031 1859 case BLKmode:
e95d6b23 1860 case VOIDmode:
53c17031
JH
1861 return 0;
1862 default:
1863 abort ();
1864 }
1865}
1866
1867/* Examine the argument and return set number of register required in each
f5143c46 1868 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1869static int
1870examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1871 enum machine_mode mode;
1872 tree type;
1873 int *int_nregs, *sse_nregs;
1874 int in_return;
1875{
1876 enum x86_64_reg_class class[MAX_CLASSES];
1877 int n = classify_argument (mode, type, class, 0);
1878
1879 *int_nregs = 0;
1880 *sse_nregs = 0;
1881 if (!n)
1882 return 0;
1883 for (n--; n >= 0; n--)
1884 switch (class[n])
1885 {
1886 case X86_64_INTEGER_CLASS:
1887 case X86_64_INTEGERSI_CLASS:
1888 (*int_nregs)++;
1889 break;
1890 case X86_64_SSE_CLASS:
1891 case X86_64_SSESF_CLASS:
1892 case X86_64_SSEDF_CLASS:
1893 (*sse_nregs)++;
1894 break;
1895 case X86_64_NO_CLASS:
1896 case X86_64_SSEUP_CLASS:
1897 break;
1898 case X86_64_X87_CLASS:
1899 case X86_64_X87UP_CLASS:
1900 if (!in_return)
1901 return 0;
1902 break;
1903 case X86_64_MEMORY_CLASS:
1904 abort ();
1905 }
1906 return 1;
1907}
1908/* Construct container for the argument used by GCC interface. See
1909 FUNCTION_ARG for the detailed description. */
1910static rtx
1911construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1912 enum machine_mode mode;
1913 tree type;
1914 int in_return;
1915 int nintregs, nsseregs;
07933f72
GS
1916 const int * intreg;
1917 int sse_regno;
53c17031
JH
1918{
1919 enum machine_mode tmpmode;
1920 int bytes =
1921 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1922 enum x86_64_reg_class class[MAX_CLASSES];
1923 int n;
1924 int i;
1925 int nexps = 0;
1926 int needed_sseregs, needed_intregs;
1927 rtx exp[MAX_CLASSES];
1928 rtx ret;
1929
1930 n = classify_argument (mode, type, class, 0);
1931 if (TARGET_DEBUG_ARG)
1932 {
1933 if (!n)
1934 fprintf (stderr, "Memory class\n");
1935 else
1936 {
1937 fprintf (stderr, "Classes:");
1938 for (i = 0; i < n; i++)
1939 {
1940 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1941 }
1942 fprintf (stderr, "\n");
1943 }
1944 }
1945 if (!n)
1946 return NULL;
1947 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1948 return NULL;
1949 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1950 return NULL;
1951
1952 /* First construct simple cases. Avoid SCmode, since we want to use
1953 single register to pass this type. */
1954 if (n == 1 && mode != SCmode)
1955 switch (class[0])
1956 {
1957 case X86_64_INTEGER_CLASS:
1958 case X86_64_INTEGERSI_CLASS:
1959 return gen_rtx_REG (mode, intreg[0]);
1960 case X86_64_SSE_CLASS:
1961 case X86_64_SSESF_CLASS:
1962 case X86_64_SSEDF_CLASS:
1963 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1964 case X86_64_X87_CLASS:
1965 return gen_rtx_REG (mode, FIRST_STACK_REG);
1966 case X86_64_NO_CLASS:
1967 /* Zero sized array, struct or class. */
1968 return NULL;
1969 default:
1970 abort ();
1971 }
1972 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 1973 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
1974 if (n == 2
1975 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1976 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1977 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1978 && class[1] == X86_64_INTEGER_CLASS
1979 && (mode == CDImode || mode == TImode)
1980 && intreg[0] + 1 == intreg[1])
1981 return gen_rtx_REG (mode, intreg[0]);
1982 if (n == 4
1983 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1984 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1985 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1986
1987 /* Otherwise figure out the entries of the PARALLEL. */
1988 for (i = 0; i < n; i++)
1989 {
1990 switch (class[i])
1991 {
1992 case X86_64_NO_CLASS:
1993 break;
1994 case X86_64_INTEGER_CLASS:
1995 case X86_64_INTEGERSI_CLASS:
1996 /* Merge TImodes on aligned occassions here too. */
1997 if (i * 8 + 8 > bytes)
1998 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1999 else if (class[i] == X86_64_INTEGERSI_CLASS)
2000 tmpmode = SImode;
2001 else
2002 tmpmode = DImode;
2003 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2004 if (tmpmode == BLKmode)
2005 tmpmode = DImode;
2006 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2007 gen_rtx_REG (tmpmode, *intreg),
2008 GEN_INT (i*8));
2009 intreg++;
2010 break;
2011 case X86_64_SSESF_CLASS:
2012 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2013 gen_rtx_REG (SFmode,
2014 SSE_REGNO (sse_regno)),
2015 GEN_INT (i*8));
2016 sse_regno++;
2017 break;
2018 case X86_64_SSEDF_CLASS:
2019 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2020 gen_rtx_REG (DFmode,
2021 SSE_REGNO (sse_regno)),
2022 GEN_INT (i*8));
2023 sse_regno++;
2024 break;
2025 case X86_64_SSE_CLASS:
2026 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2027 tmpmode = TImode, i++;
2028 else
2029 tmpmode = DImode;
2030 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2031 gen_rtx_REG (tmpmode,
2032 SSE_REGNO (sse_regno)),
2033 GEN_INT (i*8));
2034 sse_regno++;
2035 break;
2036 default:
2037 abort ();
2038 }
2039 }
2040 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2041 for (i = 0; i < nexps; i++)
2042 XVECEXP (ret, 0, i) = exp [i];
2043 return ret;
2044}
2045
b08de47e
MM
2046/* Update the data in CUM to advance over an argument
2047 of mode MODE and data type TYPE.
2048 (TYPE is null for libcalls where that information may not be available.) */
2049
2050void
2051function_arg_advance (cum, mode, type, named)
2052 CUMULATIVE_ARGS *cum; /* current arg information */
2053 enum machine_mode mode; /* current arg mode */
2054 tree type; /* type of the argument or 0 if lib support */
2055 int named; /* whether or not the argument was named */
2056{
5ac9118e
KG
2057 int bytes =
2058 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2059 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2060
2061 if (TARGET_DEBUG_ARG)
2062 fprintf (stderr,
e9a25f70 2063 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2064 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2065 if (TARGET_64BIT)
b08de47e 2066 {
53c17031
JH
2067 int int_nregs, sse_nregs;
2068 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2069 cum->words += words;
2070 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2071 {
53c17031
JH
2072 cum->nregs -= int_nregs;
2073 cum->sse_nregs -= sse_nregs;
2074 cum->regno += int_nregs;
2075 cum->sse_regno += sse_nregs;
82a127a9 2076 }
53c17031
JH
2077 else
2078 cum->words += words;
b08de47e 2079 }
a4f31c00 2080 else
82a127a9 2081 {
53c17031
JH
2082 if (TARGET_SSE && mode == TImode)
2083 {
2084 cum->sse_words += words;
2085 cum->sse_nregs -= 1;
2086 cum->sse_regno += 1;
2087 if (cum->sse_nregs <= 0)
2088 {
2089 cum->sse_nregs = 0;
2090 cum->sse_regno = 0;
2091 }
2092 }
2093 else
82a127a9 2094 {
53c17031
JH
2095 cum->words += words;
2096 cum->nregs -= words;
2097 cum->regno += words;
2098
2099 if (cum->nregs <= 0)
2100 {
2101 cum->nregs = 0;
2102 cum->regno = 0;
2103 }
82a127a9
CM
2104 }
2105 }
b08de47e
MM
2106 return;
2107}
2108
2109/* Define where to put the arguments to a function.
2110 Value is zero to push the argument on the stack,
2111 or a hard register in which to store the argument.
2112
2113 MODE is the argument's machine mode.
2114 TYPE is the data type of the argument (as a tree).
2115 This is null for libcalls where that information may
2116 not be available.
2117 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2118 the preceding args and about the function being called.
2119 NAMED is nonzero if this argument is a named parameter
2120 (otherwise it is an extra parameter matching an ellipsis). */
2121
07933f72 2122rtx
b08de47e
MM
2123function_arg (cum, mode, type, named)
2124 CUMULATIVE_ARGS *cum; /* current arg information */
2125 enum machine_mode mode; /* current arg mode */
2126 tree type; /* type of the argument or 0 if lib support */
2127 int named; /* != 0 for normal args, == 0 for ... args */
2128{
2129 rtx ret = NULL_RTX;
5ac9118e
KG
2130 int bytes =
2131 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2132 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2133
53c17031
JH
2134 /* Handle an hidden AL argument containing number of registers for varargs
2135 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2136 any AL settings. */
32ee7d1d 2137 if (mode == VOIDmode)
b08de47e 2138 {
53c17031
JH
2139 if (TARGET_64BIT)
2140 return GEN_INT (cum->maybe_vaarg
2141 ? (cum->sse_nregs < 0
2142 ? SSE_REGPARM_MAX
2143 : cum->sse_regno)
2144 : -1);
2145 else
2146 return constm1_rtx;
b08de47e 2147 }
53c17031
JH
2148 if (TARGET_64BIT)
2149 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2150 &x86_64_int_parameter_registers [cum->regno],
2151 cum->sse_regno);
2152 else
2153 switch (mode)
2154 {
2155 /* For now, pass fp/complex values on the stack. */
2156 default:
2157 break;
2158
2159 case BLKmode:
2160 case DImode:
2161 case SImode:
2162 case HImode:
2163 case QImode:
2164 if (words <= cum->nregs)
2165 ret = gen_rtx_REG (mode, cum->regno);
2166 break;
2167 case TImode:
2168 if (cum->sse_nregs)
2169 ret = gen_rtx_REG (mode, cum->sse_regno);
2170 break;
2171 }
b08de47e
MM
2172
2173 if (TARGET_DEBUG_ARG)
2174 {
2175 fprintf (stderr,
91ea38f9 2176 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2177 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2178
2179 if (ret)
91ea38f9 2180 print_simple_rtl (stderr, ret);
b08de47e
MM
2181 else
2182 fprintf (stderr, ", stack");
2183
2184 fprintf (stderr, " )\n");
2185 }
2186
2187 return ret;
2188}
53c17031
JH
2189
2190/* Gives the alignment boundary, in bits, of an argument with the specified mode
2191 and type. */
2192
2193int
2194ix86_function_arg_boundary (mode, type)
2195 enum machine_mode mode;
2196 tree type;
2197{
2198 int align;
2199 if (!TARGET_64BIT)
2200 return PARM_BOUNDARY;
2201 if (type)
2202 align = TYPE_ALIGN (type);
2203 else
2204 align = GET_MODE_ALIGNMENT (mode);
2205 if (align < PARM_BOUNDARY)
2206 align = PARM_BOUNDARY;
2207 if (align > 128)
2208 align = 128;
2209 return align;
2210}
2211
2212/* Return true if N is a possible register number of function value. */
2213bool
2214ix86_function_value_regno_p (regno)
2215 int regno;
2216{
2217 if (!TARGET_64BIT)
2218 {
2219 return ((regno) == 0
2220 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2221 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2222 }
2223 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2224 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2225 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2226}
2227
2228/* Define how to find the value returned by a function.
2229 VALTYPE is the data type of the value (as a tree).
2230 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2231 otherwise, FUNC is 0. */
2232rtx
2233ix86_function_value (valtype)
2234 tree valtype;
2235{
2236 if (TARGET_64BIT)
2237 {
2238 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2239 REGPARM_MAX, SSE_REGPARM_MAX,
2240 x86_64_int_return_registers, 0);
2241 /* For zero sized structures, construct_continer return NULL, but we need
2242 to keep rest of compiler happy by returning meaningfull value. */
2243 if (!ret)
2244 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2245 return ret;
2246 }
2247 else
b069de3b
SS
2248 return gen_rtx_REG (TYPE_MODE (valtype),
2249 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2250}
2251
f5143c46 2252/* Return false iff type is returned in memory. */
53c17031
JH
2253int
2254ix86_return_in_memory (type)
2255 tree type;
2256{
2257 int needed_intregs, needed_sseregs;
2258 if (TARGET_64BIT)
2259 {
2260 return !examine_argument (TYPE_MODE (type), type, 1,
2261 &needed_intregs, &needed_sseregs);
2262 }
2263 else
2264 {
2265 if (TYPE_MODE (type) == BLKmode
2266 || (VECTOR_MODE_P (TYPE_MODE (type))
2267 && int_size_in_bytes (type) == 8)
2268 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2269 && TYPE_MODE (type) != TFmode
2270 && !VECTOR_MODE_P (TYPE_MODE (type))))
2271 return 1;
2272 return 0;
2273 }
2274}
2275
2276/* Define how to find the value returned by a library function
2277 assuming the value has mode MODE. */
2278rtx
2279ix86_libcall_value (mode)
2280 enum machine_mode mode;
2281{
2282 if (TARGET_64BIT)
2283 {
2284 switch (mode)
2285 {
2286 case SFmode:
2287 case SCmode:
2288 case DFmode:
2289 case DCmode:
2290 return gen_rtx_REG (mode, FIRST_SSE_REG);
2291 case TFmode:
2292 case TCmode:
2293 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2294 default:
2295 return gen_rtx_REG (mode, 0);
2296 }
2297 }
2298 else
b069de3b
SS
2299 return gen_rtx_REG (mode, ix86_value_regno (mode));
2300}
2301
2302/* Given a mode, return the register to use for a return value. */
2303
2304static int
2305ix86_value_regno (mode)
2306 enum machine_mode mode;
2307{
2308 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2309 return FIRST_FLOAT_REG;
2310 if (mode == TImode || VECTOR_MODE_P (mode))
2311 return FIRST_SSE_REG;
2312 return 0;
53c17031 2313}
ad919812
JH
2314\f
2315/* Create the va_list data type. */
53c17031 2316
ad919812
JH
2317tree
2318ix86_build_va_list ()
2319{
2320 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2321
ad919812
JH
2322 /* For i386 we use plain pointer to argument area. */
2323 if (!TARGET_64BIT)
2324 return build_pointer_type (char_type_node);
2325
f1e639b1 2326 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2327 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2328
fce5a9f2 2329 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2330 unsigned_type_node);
fce5a9f2 2331 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2332 unsigned_type_node);
2333 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2334 ptr_type_node);
2335 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2336 ptr_type_node);
2337
2338 DECL_FIELD_CONTEXT (f_gpr) = record;
2339 DECL_FIELD_CONTEXT (f_fpr) = record;
2340 DECL_FIELD_CONTEXT (f_ovf) = record;
2341 DECL_FIELD_CONTEXT (f_sav) = record;
2342
2343 TREE_CHAIN (record) = type_decl;
2344 TYPE_NAME (record) = type_decl;
2345 TYPE_FIELDS (record) = f_gpr;
2346 TREE_CHAIN (f_gpr) = f_fpr;
2347 TREE_CHAIN (f_fpr) = f_ovf;
2348 TREE_CHAIN (f_ovf) = f_sav;
2349
2350 layout_type (record);
2351
2352 /* The correct type is an array type of one element. */
2353 return build_array_type (record, build_index_type (size_zero_node));
2354}
2355
2356/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2357 variable number of arguments.
ad919812
JH
2358
2359 CUM is as above.
2360
2361 MODE and TYPE are the mode and type of the current parameter.
2362
2363 PRETEND_SIZE is a variable that should be set to the amount of stack
2364 that must be pushed by the prolog to pretend that our caller pushed
2365 it.
2366
2367 Normally, this macro will push all remaining incoming registers on the
2368 stack and set PRETEND_SIZE to the length of the registers pushed. */
2369
2370void
2371ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2372 CUMULATIVE_ARGS *cum;
2373 enum machine_mode mode;
2374 tree type;
2375 int *pretend_size ATTRIBUTE_UNUSED;
2376 int no_rtl;
2377
2378{
2379 CUMULATIVE_ARGS next_cum;
2380 rtx save_area = NULL_RTX, mem;
2381 rtx label;
2382 rtx label_ref;
2383 rtx tmp_reg;
2384 rtx nsse_reg;
2385 int set;
2386 tree fntype;
2387 int stdarg_p;
2388 int i;
2389
2390 if (!TARGET_64BIT)
2391 return;
2392
2393 /* Indicate to allocate space on the stack for varargs save area. */
2394 ix86_save_varrargs_registers = 1;
2395
2396 fntype = TREE_TYPE (current_function_decl);
2397 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2398 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2399 != void_type_node));
2400
2401 /* For varargs, we do not want to skip the dummy va_dcl argument.
2402 For stdargs, we do want to skip the last named argument. */
2403 next_cum = *cum;
2404 if (stdarg_p)
2405 function_arg_advance (&next_cum, mode, type, 1);
2406
2407 if (!no_rtl)
2408 save_area = frame_pointer_rtx;
2409
2410 set = get_varargs_alias_set ();
2411
2412 for (i = next_cum.regno; i < ix86_regparm; i++)
2413 {
2414 mem = gen_rtx_MEM (Pmode,
2415 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2416 set_mem_alias_set (mem, set);
ad919812
JH
2417 emit_move_insn (mem, gen_rtx_REG (Pmode,
2418 x86_64_int_parameter_registers[i]));
2419 }
2420
2421 if (next_cum.sse_nregs)
2422 {
2423 /* Now emit code to save SSE registers. The AX parameter contains number
2424 of SSE parameter regsiters used to call this function. We use
2425 sse_prologue_save insn template that produces computed jump across
2426 SSE saves. We need some preparation work to get this working. */
2427
2428 label = gen_label_rtx ();
2429 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2430
2431 /* Compute address to jump to :
2432 label - 5*eax + nnamed_sse_arguments*5 */
2433 tmp_reg = gen_reg_rtx (Pmode);
2434 nsse_reg = gen_reg_rtx (Pmode);
2435 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2436 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2437 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2438 GEN_INT (4))));
2439 if (next_cum.sse_regno)
2440 emit_move_insn
2441 (nsse_reg,
2442 gen_rtx_CONST (DImode,
2443 gen_rtx_PLUS (DImode,
2444 label_ref,
2445 GEN_INT (next_cum.sse_regno * 4))));
2446 else
2447 emit_move_insn (nsse_reg, label_ref);
2448 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2449
2450 /* Compute address of memory block we save into. We always use pointer
2451 pointing 127 bytes after first byte to store - this is needed to keep
2452 instruction size limited by 4 bytes. */
2453 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2454 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2455 plus_constant (save_area,
2456 8 * REGPARM_MAX + 127)));
ad919812 2457 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2458 set_mem_alias_set (mem, set);
8ac61af7 2459 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2460
2461 /* And finally do the dirty job! */
8ac61af7
RK
2462 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2463 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2464 }
2465
2466}
2467
2468/* Implement va_start. */
2469
2470void
e5faf155 2471ix86_va_start (valist, nextarg)
ad919812
JH
2472 tree valist;
2473 rtx nextarg;
2474{
2475 HOST_WIDE_INT words, n_gpr, n_fpr;
2476 tree f_gpr, f_fpr, f_ovf, f_sav;
2477 tree gpr, fpr, ovf, sav, t;
2478
2479 /* Only 64bit target needs something special. */
2480 if (!TARGET_64BIT)
2481 {
e5faf155 2482 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2483 return;
2484 }
2485
2486 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2487 f_fpr = TREE_CHAIN (f_gpr);
2488 f_ovf = TREE_CHAIN (f_fpr);
2489 f_sav = TREE_CHAIN (f_ovf);
2490
2491 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2492 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2493 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2494 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2495 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2496
2497 /* Count number of gp and fp argument registers used. */
2498 words = current_function_args_info.words;
2499 n_gpr = current_function_args_info.regno;
2500 n_fpr = current_function_args_info.sse_regno;
2501
2502 if (TARGET_DEBUG_ARG)
2503 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2504 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2505
2506 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2507 build_int_2 (n_gpr * 8, 0));
2508 TREE_SIDE_EFFECTS (t) = 1;
2509 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2510
2511 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2512 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2513 TREE_SIDE_EFFECTS (t) = 1;
2514 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2515
2516 /* Find the overflow area. */
2517 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2518 if (words != 0)
2519 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2520 build_int_2 (words * UNITS_PER_WORD, 0));
2521 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2522 TREE_SIDE_EFFECTS (t) = 1;
2523 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2524
2525 /* Find the register save area.
2526 Prologue of the function save it right above stack frame. */
2527 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2528 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2529 TREE_SIDE_EFFECTS (t) = 1;
2530 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2531}
2532
2533/* Implement va_arg. */
2534rtx
2535ix86_va_arg (valist, type)
2536 tree valist, type;
2537{
0139adca 2538 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2539 tree f_gpr, f_fpr, f_ovf, f_sav;
2540 tree gpr, fpr, ovf, sav, t;
b932f770 2541 int size, rsize;
ad919812
JH
2542 rtx lab_false, lab_over = NULL_RTX;
2543 rtx addr_rtx, r;
2544 rtx container;
2545
2546 /* Only 64bit target needs something special. */
2547 if (!TARGET_64BIT)
2548 {
2549 return std_expand_builtin_va_arg (valist, type);
2550 }
2551
2552 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2553 f_fpr = TREE_CHAIN (f_gpr);
2554 f_ovf = TREE_CHAIN (f_fpr);
2555 f_sav = TREE_CHAIN (f_ovf);
2556
2557 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2558 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2559 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2560 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2561 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2562
2563 size = int_size_in_bytes (type);
2564 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2565
2566 container = construct_container (TYPE_MODE (type), type, 0,
2567 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2568 /*
2569 * Pull the value out of the saved registers ...
2570 */
2571
2572 addr_rtx = gen_reg_rtx (Pmode);
2573
2574 if (container)
2575 {
2576 rtx int_addr_rtx, sse_addr_rtx;
2577 int needed_intregs, needed_sseregs;
2578 int need_temp;
2579
2580 lab_over = gen_label_rtx ();
2581 lab_false = gen_label_rtx ();
8bad7136 2582
ad919812
JH
2583 examine_argument (TYPE_MODE (type), type, 0,
2584 &needed_intregs, &needed_sseregs);
2585
2586
2587 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2588 || TYPE_ALIGN (type) > 128);
2589
2590 /* In case we are passing structure, verify that it is consetuctive block
2591 on the register save area. If not we need to do moves. */
2592 if (!need_temp && !REG_P (container))
2593 {
2594 /* Verify that all registers are strictly consetuctive */
2595 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2596 {
2597 int i;
2598
2599 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2600 {
2601 rtx slot = XVECEXP (container, 0, i);
b531087a 2602 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2603 || INTVAL (XEXP (slot, 1)) != i * 16)
2604 need_temp = 1;
2605 }
2606 }
2607 else
2608 {
2609 int i;
2610
2611 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2612 {
2613 rtx slot = XVECEXP (container, 0, i);
b531087a 2614 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2615 || INTVAL (XEXP (slot, 1)) != i * 8)
2616 need_temp = 1;
2617 }
2618 }
2619 }
2620 if (!need_temp)
2621 {
2622 int_addr_rtx = addr_rtx;
2623 sse_addr_rtx = addr_rtx;
2624 }
2625 else
2626 {
2627 int_addr_rtx = gen_reg_rtx (Pmode);
2628 sse_addr_rtx = gen_reg_rtx (Pmode);
2629 }
2630 /* First ensure that we fit completely in registers. */
2631 if (needed_intregs)
2632 {
2633 emit_cmp_and_jump_insns (expand_expr
2634 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2635 GEN_INT ((REGPARM_MAX - needed_intregs +
2636 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2637 1, lab_false);
ad919812
JH
2638 }
2639 if (needed_sseregs)
2640 {
2641 emit_cmp_and_jump_insns (expand_expr
2642 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2643 GEN_INT ((SSE_REGPARM_MAX -
2644 needed_sseregs + 1) * 16 +
2645 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2646 SImode, 1, lab_false);
ad919812
JH
2647 }
2648
2649 /* Compute index to start of area used for integer regs. */
2650 if (needed_intregs)
2651 {
2652 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2653 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2654 if (r != int_addr_rtx)
2655 emit_move_insn (int_addr_rtx, r);
2656 }
2657 if (needed_sseregs)
2658 {
2659 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2660 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2661 if (r != sse_addr_rtx)
2662 emit_move_insn (sse_addr_rtx, r);
2663 }
2664 if (need_temp)
2665 {
2666 int i;
2667 rtx mem;
2668
b932f770
JH
2669 /* Never use the memory itself, as it has the alias set. */
2670 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2671 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2672 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2673 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2674
ad919812
JH
2675 for (i = 0; i < XVECLEN (container, 0); i++)
2676 {
2677 rtx slot = XVECEXP (container, 0, i);
2678 rtx reg = XEXP (slot, 0);
2679 enum machine_mode mode = GET_MODE (reg);
2680 rtx src_addr;
2681 rtx src_mem;
2682 int src_offset;
2683 rtx dest_mem;
2684
2685 if (SSE_REGNO_P (REGNO (reg)))
2686 {
2687 src_addr = sse_addr_rtx;
2688 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2689 }
2690 else
2691 {
2692 src_addr = int_addr_rtx;
2693 src_offset = REGNO (reg) * 8;
2694 }
2695 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2696 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2697 src_mem = adjust_address (src_mem, mode, src_offset);
2698 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2699 emit_move_insn (dest_mem, src_mem);
2700 }
2701 }
2702
2703 if (needed_intregs)
2704 {
2705 t =
2706 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2707 build_int_2 (needed_intregs * 8, 0));
2708 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2709 TREE_SIDE_EFFECTS (t) = 1;
2710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2711 }
2712 if (needed_sseregs)
2713 {
2714 t =
2715 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2716 build_int_2 (needed_sseregs * 16, 0));
2717 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2718 TREE_SIDE_EFFECTS (t) = 1;
2719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2720 }
2721
2722 emit_jump_insn (gen_jump (lab_over));
2723 emit_barrier ();
2724 emit_label (lab_false);
2725 }
2726
2727 /* ... otherwise out of the overflow area. */
2728
2729 /* Care for on-stack alignment if needed. */
2730 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2731 t = ovf;
2732 else
2733 {
2734 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2735 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2736 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2737 }
2738 t = save_expr (t);
2739
2740 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2741 if (r != addr_rtx)
2742 emit_move_insn (addr_rtx, r);
2743
2744 t =
2745 build (PLUS_EXPR, TREE_TYPE (t), t,
2746 build_int_2 (rsize * UNITS_PER_WORD, 0));
2747 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2748 TREE_SIDE_EFFECTS (t) = 1;
2749 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2750
2751 if (container)
2752 emit_label (lab_over);
2753
ad919812
JH
2754 return addr_rtx;
2755}
2756\f
7dd4b4a3
JH
2757/* Return nonzero if OP is general operand representable on x86_64. */
2758
2759int
2760x86_64_general_operand (op, mode)
2761 rtx op;
2762 enum machine_mode mode;
2763{
2764 if (!TARGET_64BIT)
2765 return general_operand (op, mode);
2766 if (nonimmediate_operand (op, mode))
2767 return 1;
2768 return x86_64_sign_extended_value (op);
2769}
2770
2771/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2772 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2773
2774int
2775x86_64_szext_general_operand (op, mode)
2776 rtx op;
2777 enum machine_mode mode;
2778{
2779 if (!TARGET_64BIT)
2780 return general_operand (op, mode);
2781 if (nonimmediate_operand (op, mode))
2782 return 1;
2783 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2784}
2785
2786/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2787
2788int
2789x86_64_nonmemory_operand (op, mode)
2790 rtx op;
2791 enum machine_mode mode;
2792{
2793 if (!TARGET_64BIT)
2794 return nonmemory_operand (op, mode);
2795 if (register_operand (op, mode))
2796 return 1;
2797 return x86_64_sign_extended_value (op);
2798}
2799
2800/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2801
2802int
2803x86_64_movabs_operand (op, mode)
2804 rtx op;
2805 enum machine_mode mode;
2806{
2807 if (!TARGET_64BIT || !flag_pic)
2808 return nonmemory_operand (op, mode);
2809 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2810 return 1;
2811 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2812 return 1;
2813 return 0;
2814}
2815
2816/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2817
2818int
2819x86_64_szext_nonmemory_operand (op, mode)
2820 rtx op;
2821 enum machine_mode mode;
2822{
2823 if (!TARGET_64BIT)
2824 return nonmemory_operand (op, mode);
2825 if (register_operand (op, mode))
2826 return 1;
2827 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2828}
2829
2830/* Return nonzero if OP is immediate operand representable on x86_64. */
2831
2832int
2833x86_64_immediate_operand (op, mode)
2834 rtx op;
2835 enum machine_mode mode;
2836{
2837 if (!TARGET_64BIT)
2838 return immediate_operand (op, mode);
2839 return x86_64_sign_extended_value (op);
2840}
2841
2842/* Return nonzero if OP is immediate operand representable on x86_64. */
2843
2844int
2845x86_64_zext_immediate_operand (op, mode)
2846 rtx op;
2847 enum machine_mode mode ATTRIBUTE_UNUSED;
2848{
2849 return x86_64_zero_extended_value (op);
2850}
2851
8bad7136
JL
2852/* Return nonzero if OP is (const_int 1), else return zero. */
2853
2854int
2855const_int_1_operand (op, mode)
2856 rtx op;
2857 enum machine_mode mode ATTRIBUTE_UNUSED;
2858{
2859 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2860}
2861
794a292d
JJ
2862/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2863 for shift & compare patterns, as shifting by 0 does not change flags),
2864 else return zero. */
2865
2866int
2867const_int_1_31_operand (op, mode)
2868 rtx op;
2869 enum machine_mode mode ATTRIBUTE_UNUSED;
2870{
2871 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2872}
2873
e075ae69
RH
2874/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2875 reference and a constant. */
b08de47e
MM
2876
2877int
e075ae69
RH
2878symbolic_operand (op, mode)
2879 register rtx op;
2880 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2881{
e075ae69 2882 switch (GET_CODE (op))
2a2ab3f9 2883 {
e075ae69
RH
2884 case SYMBOL_REF:
2885 case LABEL_REF:
2886 return 1;
2887
2888 case CONST:
2889 op = XEXP (op, 0);
2890 if (GET_CODE (op) == SYMBOL_REF
2891 || GET_CODE (op) == LABEL_REF
2892 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
2893 && (XINT (op, 1) == UNSPEC_GOT
2894 || XINT (op, 1) == UNSPEC_GOTOFF
2895 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
2896 return 1;
2897 if (GET_CODE (op) != PLUS
2898 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2899 return 0;
2900
2901 op = XEXP (op, 0);
2902 if (GET_CODE (op) == SYMBOL_REF
2903 || GET_CODE (op) == LABEL_REF)
2904 return 1;
2905 /* Only @GOTOFF gets offsets. */
2906 if (GET_CODE (op) != UNSPEC
8ee41eaf 2907 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
2908 return 0;
2909
2910 op = XVECEXP (op, 0, 0);
2911 if (GET_CODE (op) == SYMBOL_REF
2912 || GET_CODE (op) == LABEL_REF)
2913 return 1;
2914 return 0;
2915
2916 default:
2917 return 0;
2a2ab3f9
JVA
2918 }
2919}
2a2ab3f9 2920
e075ae69 2921/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2922
e075ae69
RH
2923int
2924pic_symbolic_operand (op, mode)
2925 register rtx op;
2926 enum machine_mode mode ATTRIBUTE_UNUSED;
2927{
6eb791fc
JH
2928 if (GET_CODE (op) != CONST)
2929 return 0;
2930 op = XEXP (op, 0);
2931 if (TARGET_64BIT)
2932 {
2933 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2934 return 1;
2935 }
fce5a9f2 2936 else
2a2ab3f9 2937 {
e075ae69
RH
2938 if (GET_CODE (op) == UNSPEC)
2939 return 1;
2940 if (GET_CODE (op) != PLUS
2941 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2942 return 0;
2943 op = XEXP (op, 0);
2944 if (GET_CODE (op) == UNSPEC)
2945 return 1;
2a2ab3f9 2946 }
e075ae69 2947 return 0;
2a2ab3f9 2948}
2a2ab3f9 2949
623fe810
RH
2950/* Return true if OP is a symbolic operand that resolves locally. */
2951
2952static int
2953local_symbolic_operand (op, mode)
2954 rtx op;
2955 enum machine_mode mode ATTRIBUTE_UNUSED;
2956{
2957 if (GET_CODE (op) == LABEL_REF)
2958 return 1;
2959
2960 if (GET_CODE (op) == CONST
2961 && GET_CODE (XEXP (op, 0)) == PLUS
2962 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2963 op = XEXP (XEXP (op, 0), 0);
2964
2965 if (GET_CODE (op) != SYMBOL_REF)
2966 return 0;
2967
2968 /* These we've been told are local by varasm and encode_section_info
2969 respectively. */
2970 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2971 return 1;
2972
2973 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 2974 the compiler that assumes it can just stick the results of
623fe810
RH
2975 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2976 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 2977 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
2978 if (strncmp (XSTR (op, 0), internal_label_prefix,
2979 internal_label_prefix_len) == 0)
2980 return 1;
2981
2982 return 0;
2983}
2984
f996902d
RH
2985/* Test for various thread-local symbols. See ix86_encode_section_info. */
2986
2987int
2988tls_symbolic_operand (op, mode)
2989 register rtx op;
2990 enum machine_mode mode ATTRIBUTE_UNUSED;
2991{
2992 const char *symbol_str;
2993
2994 if (GET_CODE (op) != SYMBOL_REF)
2995 return 0;
2996 symbol_str = XSTR (op, 0);
2997
2998 if (symbol_str[0] != '%')
2999 return 0;
755ac5d4 3000 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3001}
3002
3003static int
3004tls_symbolic_operand_1 (op, kind)
3005 rtx op;
3006 enum tls_model kind;
3007{
3008 const char *symbol_str;
3009
3010 if (GET_CODE (op) != SYMBOL_REF)
3011 return 0;
3012 symbol_str = XSTR (op, 0);
3013
3014 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3015}
3016
3017int
3018global_dynamic_symbolic_operand (op, mode)
3019 register rtx op;
3020 enum machine_mode mode ATTRIBUTE_UNUSED;
3021{
3022 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3023}
3024
3025int
3026local_dynamic_symbolic_operand (op, mode)
3027 register rtx op;
3028 enum machine_mode mode ATTRIBUTE_UNUSED;
3029{
3030 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3031}
3032
3033int
3034initial_exec_symbolic_operand (op, mode)
3035 register rtx op;
3036 enum machine_mode mode ATTRIBUTE_UNUSED;
3037{
3038 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3039}
3040
3041int
3042local_exec_symbolic_operand (op, mode)
3043 register rtx op;
3044 enum machine_mode mode ATTRIBUTE_UNUSED;
3045{
3046 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3047}
3048
28d52ffb
RH
3049/* Test for a valid operand for a call instruction. Don't allow the
3050 arg pointer register or virtual regs since they may decay into
3051 reg + const, which the patterns can't handle. */
2a2ab3f9 3052
e075ae69
RH
3053int
3054call_insn_operand (op, mode)
3055 rtx op;
3056 enum machine_mode mode ATTRIBUTE_UNUSED;
3057{
e075ae69
RH
3058 /* Disallow indirect through a virtual register. This leads to
3059 compiler aborts when trying to eliminate them. */
3060 if (GET_CODE (op) == REG
3061 && (op == arg_pointer_rtx
564d80f4 3062 || op == frame_pointer_rtx
e075ae69
RH
3063 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3064 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3065 return 0;
2a2ab3f9 3066
28d52ffb
RH
3067 /* Disallow `call 1234'. Due to varying assembler lameness this
3068 gets either rejected or translated to `call .+1234'. */
3069 if (GET_CODE (op) == CONST_INT)
3070 return 0;
3071
cbbf65e0
RH
3072 /* Explicitly allow SYMBOL_REF even if pic. */
3073 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3074 return 1;
2a2ab3f9 3075
cbbf65e0
RH
3076 /* Otherwise we can allow any general_operand in the address. */
3077 return general_operand (op, Pmode);
e075ae69 3078}
79325812 3079
e075ae69
RH
3080int
3081constant_call_address_operand (op, mode)
3082 rtx op;
3083 enum machine_mode mode ATTRIBUTE_UNUSED;
3084{
eaf19aba
JJ
3085 if (GET_CODE (op) == CONST
3086 && GET_CODE (XEXP (op, 0)) == PLUS
3087 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3088 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3089 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3090}
2a2ab3f9 3091
e075ae69 3092/* Match exactly zero and one. */
e9a25f70 3093
0f290768 3094int
e075ae69
RH
3095const0_operand (op, mode)
3096 register rtx op;
3097 enum machine_mode mode;
3098{
3099 return op == CONST0_RTX (mode);
3100}
e9a25f70 3101
0f290768 3102int
e075ae69
RH
3103const1_operand (op, mode)
3104 register rtx op;
3105 enum machine_mode mode ATTRIBUTE_UNUSED;
3106{
3107 return op == const1_rtx;
3108}
2a2ab3f9 3109
e075ae69 3110/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3111
e075ae69
RH
3112int
3113const248_operand (op, mode)
3114 register rtx op;
3115 enum machine_mode mode ATTRIBUTE_UNUSED;
3116{
3117 return (GET_CODE (op) == CONST_INT
3118 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3119}
e9a25f70 3120
e075ae69 3121/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3122
e075ae69
RH
3123int
3124incdec_operand (op, mode)
3125 register rtx op;
0631e0bf 3126 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3127{
f5143c46 3128 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3129 registers, since carry flag is not set. */
3130 if (TARGET_PENTIUM4 && !optimize_size)
3131 return 0;
2b1c08f5 3132 return op == const1_rtx || op == constm1_rtx;
e075ae69 3133}
2a2ab3f9 3134
371bc54b
JH
3135/* Return nonzero if OP is acceptable as operand of DImode shift
3136 expander. */
3137
3138int
3139shiftdi_operand (op, mode)
3140 rtx op;
3141 enum machine_mode mode ATTRIBUTE_UNUSED;
3142{
3143 if (TARGET_64BIT)
3144 return nonimmediate_operand (op, mode);
3145 else
3146 return register_operand (op, mode);
3147}
3148
0f290768 3149/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3150 register eliminable to the stack pointer. Otherwise, this is
3151 a register operand.
2a2ab3f9 3152
e075ae69
RH
3153 This is used to prevent esp from being used as an index reg.
3154 Which would only happen in pathological cases. */
5f1ec3e6 3155
e075ae69
RH
3156int
3157reg_no_sp_operand (op, mode)
3158 register rtx op;
3159 enum machine_mode mode;
3160{
3161 rtx t = op;
3162 if (GET_CODE (t) == SUBREG)
3163 t = SUBREG_REG (t);
564d80f4 3164 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3165 return 0;
2a2ab3f9 3166
e075ae69 3167 return register_operand (op, mode);
2a2ab3f9 3168}
b840bfb0 3169
915119a5
BS
3170int
3171mmx_reg_operand (op, mode)
3172 register rtx op;
bd793c65 3173 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3174{
3175 return MMX_REG_P (op);
3176}
3177
2c5a510c
RH
3178/* Return false if this is any eliminable register. Otherwise
3179 general_operand. */
3180
3181int
3182general_no_elim_operand (op, mode)
3183 register rtx op;
3184 enum machine_mode mode;
3185{
3186 rtx t = op;
3187 if (GET_CODE (t) == SUBREG)
3188 t = SUBREG_REG (t);
3189 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3190 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3191 || t == virtual_stack_dynamic_rtx)
3192 return 0;
1020a5ab
RH
3193 if (REG_P (t)
3194 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3195 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3196 return 0;
2c5a510c
RH
3197
3198 return general_operand (op, mode);
3199}
3200
3201/* Return false if this is any eliminable register. Otherwise
3202 register_operand or const_int. */
3203
3204int
3205nonmemory_no_elim_operand (op, mode)
3206 register rtx op;
3207 enum machine_mode mode;
3208{
3209 rtx t = op;
3210 if (GET_CODE (t) == SUBREG)
3211 t = SUBREG_REG (t);
3212 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3213 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3214 || t == virtual_stack_dynamic_rtx)
3215 return 0;
3216
3217 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3218}
3219
e075ae69 3220/* Return true if op is a Q_REGS class register. */
b840bfb0 3221
e075ae69
RH
3222int
3223q_regs_operand (op, mode)
3224 register rtx op;
3225 enum machine_mode mode;
b840bfb0 3226{
e075ae69
RH
3227 if (mode != VOIDmode && GET_MODE (op) != mode)
3228 return 0;
3229 if (GET_CODE (op) == SUBREG)
3230 op = SUBREG_REG (op);
7799175f 3231 return ANY_QI_REG_P (op);
0f290768 3232}
b840bfb0 3233
e075ae69 3234/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3235
e075ae69
RH
3236int
3237non_q_regs_operand (op, mode)
3238 register rtx op;
3239 enum machine_mode mode;
3240{
3241 if (mode != VOIDmode && GET_MODE (op) != mode)
3242 return 0;
3243 if (GET_CODE (op) == SUBREG)
3244 op = SUBREG_REG (op);
3245 return NON_QI_REG_P (op);
0f290768 3246}
b840bfb0 3247
915119a5
BS
3248/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3249 insns. */
3250int
3251sse_comparison_operator (op, mode)
3252 rtx op;
3253 enum machine_mode mode ATTRIBUTE_UNUSED;
3254{
3255 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3256 switch (code)
3257 {
3258 /* Operations supported directly. */
3259 case EQ:
3260 case LT:
3261 case LE:
3262 case UNORDERED:
3263 case NE:
3264 case UNGE:
3265 case UNGT:
3266 case ORDERED:
3267 return 1;
3268 /* These are equivalent to ones above in non-IEEE comparisons. */
3269 case UNEQ:
3270 case UNLT:
3271 case UNLE:
3272 case LTGT:
3273 case GE:
3274 case GT:
3275 return !TARGET_IEEE_FP;
3276 default:
3277 return 0;
3278 }
915119a5 3279}
9076b9c1 3280/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3281int
9076b9c1
JH
3282ix86_comparison_operator (op, mode)
3283 register rtx op;
3284 enum machine_mode mode;
e075ae69 3285{
9076b9c1 3286 enum machine_mode inmode;
9a915772 3287 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3288 if (mode != VOIDmode && GET_MODE (op) != mode)
3289 return 0;
9a915772
JH
3290 if (GET_RTX_CLASS (code) != '<')
3291 return 0;
3292 inmode = GET_MODE (XEXP (op, 0));
3293
3294 if (inmode == CCFPmode || inmode == CCFPUmode)
3295 {
3296 enum rtx_code second_code, bypass_code;
3297 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3298 return (bypass_code == NIL && second_code == NIL);
3299 }
3300 switch (code)
3a3677ff
RH
3301 {
3302 case EQ: case NE:
3a3677ff 3303 return 1;
9076b9c1 3304 case LT: case GE:
7e08e190 3305 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3306 || inmode == CCGOCmode || inmode == CCNOmode)
3307 return 1;
3308 return 0;
7e08e190 3309 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3310 if (inmode == CCmode)
9076b9c1
JH
3311 return 1;
3312 return 0;
3313 case GT: case LE:
7e08e190 3314 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3315 return 1;
3316 return 0;
3a3677ff
RH
3317 default:
3318 return 0;
3319 }
3320}
3321
9076b9c1 3322/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3323
9076b9c1
JH
3324int
3325fcmov_comparison_operator (op, mode)
3a3677ff
RH
3326 register rtx op;
3327 enum machine_mode mode;
3328{
b62d22a2 3329 enum machine_mode inmode;
9a915772 3330 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3331 if (mode != VOIDmode && GET_MODE (op) != mode)
3332 return 0;
9a915772
JH
3333 if (GET_RTX_CLASS (code) != '<')
3334 return 0;
3335 inmode = GET_MODE (XEXP (op, 0));
3336 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3337 {
9a915772
JH
3338 enum rtx_code second_code, bypass_code;
3339 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3340 if (bypass_code != NIL || second_code != NIL)
3341 return 0;
3342 code = ix86_fp_compare_code_to_integer (code);
3343 }
3344 /* i387 supports just limited amount of conditional codes. */
3345 switch (code)
3346 {
3347 case LTU: case GTU: case LEU: case GEU:
3348 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3349 return 1;
3350 return 0;
9a915772
JH
3351 case ORDERED: case UNORDERED:
3352 case EQ: case NE:
3353 return 1;
3a3677ff
RH
3354 default:
3355 return 0;
3356 }
e075ae69 3357}
b840bfb0 3358
e9e80858
JH
3359/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3360
3361int
3362promotable_binary_operator (op, mode)
3363 register rtx op;
3364 enum machine_mode mode ATTRIBUTE_UNUSED;
3365{
3366 switch (GET_CODE (op))
3367 {
3368 case MULT:
3369 /* Modern CPUs have same latency for HImode and SImode multiply,
3370 but 386 and 486 do HImode multiply faster. */
3371 return ix86_cpu > PROCESSOR_I486;
3372 case PLUS:
3373 case AND:
3374 case IOR:
3375 case XOR:
3376 case ASHIFT:
3377 return 1;
3378 default:
3379 return 0;
3380 }
3381}
3382
e075ae69
RH
3383/* Nearly general operand, but accept any const_double, since we wish
3384 to be able to drop them into memory rather than have them get pulled
3385 into registers. */
b840bfb0 3386
2a2ab3f9 3387int
e075ae69
RH
3388cmp_fp_expander_operand (op, mode)
3389 register rtx op;
3390 enum machine_mode mode;
2a2ab3f9 3391{
e075ae69 3392 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3393 return 0;
e075ae69 3394 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3395 return 1;
e075ae69 3396 return general_operand (op, mode);
2a2ab3f9
JVA
3397}
3398
e075ae69 3399/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3400
3401int
e075ae69 3402ext_register_operand (op, mode)
2a2ab3f9 3403 register rtx op;
bb5177ac 3404 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3405{
3522082b 3406 int regno;
0d7d98ee
JH
3407 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3408 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3409 return 0;
3522082b
JH
3410
3411 if (!register_operand (op, VOIDmode))
3412 return 0;
3413
3414 /* Be curefull to accept only registers having upper parts. */
3415 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3416 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3417}
3418
3419/* Return 1 if this is a valid binary floating-point operation.
0f290768 3420 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3421
3422int
3423binary_fp_operator (op, mode)
3424 register rtx op;
3425 enum machine_mode mode;
3426{
3427 if (mode != VOIDmode && mode != GET_MODE (op))
3428 return 0;
3429
2a2ab3f9
JVA
3430 switch (GET_CODE (op))
3431 {
e075ae69
RH
3432 case PLUS:
3433 case MINUS:
3434 case MULT:
3435 case DIV:
3436 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3437
2a2ab3f9
JVA
3438 default:
3439 return 0;
3440 }
3441}
fee2770d 3442
e075ae69 3443int
b531087a 3444mult_operator (op, mode)
e075ae69
RH
3445 register rtx op;
3446 enum machine_mode mode ATTRIBUTE_UNUSED;
3447{
3448 return GET_CODE (op) == MULT;
3449}
3450
3451int
b531087a 3452div_operator (op, mode)
e075ae69
RH
3453 register rtx op;
3454 enum machine_mode mode ATTRIBUTE_UNUSED;
3455{
3456 return GET_CODE (op) == DIV;
3457}
0a726ef1
JL
3458
3459int
e075ae69
RH
3460arith_or_logical_operator (op, mode)
3461 rtx op;
3462 enum machine_mode mode;
0a726ef1 3463{
e075ae69
RH
3464 return ((mode == VOIDmode || GET_MODE (op) == mode)
3465 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3466 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3467}
3468
e075ae69 3469/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3470
3471int
e075ae69
RH
3472memory_displacement_operand (op, mode)
3473 register rtx op;
3474 enum machine_mode mode;
4f2c8ebb 3475{
e075ae69 3476 struct ix86_address parts;
e9a25f70 3477
e075ae69
RH
3478 if (! memory_operand (op, mode))
3479 return 0;
3480
3481 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3482 abort ();
3483
3484 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3485}
3486
16189740 3487/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3488 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3489
3490 ??? It seems likely that this will only work because cmpsi is an
3491 expander, and no actual insns use this. */
4f2c8ebb
RS
3492
3493int
e075ae69
RH
3494cmpsi_operand (op, mode)
3495 rtx op;
3496 enum machine_mode mode;
fee2770d 3497{
b9b2c339 3498 if (nonimmediate_operand (op, mode))
e075ae69
RH
3499 return 1;
3500
3501 if (GET_CODE (op) == AND
3502 && GET_MODE (op) == SImode
3503 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3504 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3505 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3506 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3507 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3508 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3509 return 1;
e9a25f70 3510
fee2770d
RS
3511 return 0;
3512}
d784886d 3513
e075ae69
RH
3514/* Returns 1 if OP is memory operand that can not be represented by the
3515 modRM array. */
d784886d
RK
3516
3517int
e075ae69 3518long_memory_operand (op, mode)
d784886d
RK
3519 register rtx op;
3520 enum machine_mode mode;
3521{
e075ae69 3522 if (! memory_operand (op, mode))
d784886d
RK
3523 return 0;
3524
e075ae69 3525 return memory_address_length (op) != 0;
d784886d 3526}
2247f6ed
JH
3527
3528/* Return nonzero if the rtx is known aligned. */
3529
3530int
3531aligned_operand (op, mode)
3532 rtx op;
3533 enum machine_mode mode;
3534{
3535 struct ix86_address parts;
3536
3537 if (!general_operand (op, mode))
3538 return 0;
3539
0f290768 3540 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3541 if (GET_CODE (op) != MEM)
3542 return 1;
3543
0f290768 3544 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3545 if (MEM_VOLATILE_P (op))
3546 return 0;
3547
3548 op = XEXP (op, 0);
3549
3550 /* Pushes and pops are only valid on the stack pointer. */
3551 if (GET_CODE (op) == PRE_DEC
3552 || GET_CODE (op) == POST_INC)
3553 return 1;
3554
3555 /* Decode the address. */
3556 if (! ix86_decompose_address (op, &parts))
3557 abort ();
3558
1540f9eb
JH
3559 if (parts.base && GET_CODE (parts.base) == SUBREG)
3560 parts.base = SUBREG_REG (parts.base);
3561 if (parts.index && GET_CODE (parts.index) == SUBREG)
3562 parts.index = SUBREG_REG (parts.index);
3563
2247f6ed
JH
3564 /* Look for some component that isn't known to be aligned. */
3565 if (parts.index)
3566 {
3567 if (parts.scale < 4
bdb429a5 3568 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3569 return 0;
3570 }
3571 if (parts.base)
3572 {
bdb429a5 3573 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3574 return 0;
3575 }
3576 if (parts.disp)
3577 {
3578 if (GET_CODE (parts.disp) != CONST_INT
3579 || (INTVAL (parts.disp) & 3) != 0)
3580 return 0;
3581 }
3582
3583 /* Didn't find one -- this must be an aligned address. */
3584 return 1;
3585}
e075ae69
RH
3586\f
3587/* Return true if the constant is something that can be loaded with
3588 a special instruction. Only handle 0.0 and 1.0; others are less
3589 worthwhile. */
57dbca5e
BS
3590
3591int
e075ae69
RH
3592standard_80387_constant_p (x)
3593 rtx x;
57dbca5e 3594{
2b04e52b 3595 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3596 return -1;
2b04e52b
JH
3597 /* Note that on the 80387, other constants, such as pi, that we should support
3598 too. On some machines, these are much slower to load as standard constant,
3599 than to load from doubles in memory. */
3600 if (x == CONST0_RTX (GET_MODE (x)))
3601 return 1;
3602 if (x == CONST1_RTX (GET_MODE (x)))
3603 return 2;
e075ae69 3604 return 0;
57dbca5e
BS
3605}
3606
2b04e52b
JH
3607/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3608 */
3609int
3610standard_sse_constant_p (x)
3611 rtx x;
3612{
3613 if (GET_CODE (x) != CONST_DOUBLE)
3614 return -1;
3615 return (x == CONST0_RTX (GET_MODE (x)));
3616}
3617
2a2ab3f9
JVA
3618/* Returns 1 if OP contains a symbol reference */
3619
3620int
3621symbolic_reference_mentioned_p (op)
3622 rtx op;
3623{
6f7d635c 3624 register const char *fmt;
2a2ab3f9
JVA
3625 register int i;
3626
3627 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3628 return 1;
3629
3630 fmt = GET_RTX_FORMAT (GET_CODE (op));
3631 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3632 {
3633 if (fmt[i] == 'E')
3634 {
3635 register int j;
3636
3637 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3638 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3639 return 1;
3640 }
e9a25f70 3641
2a2ab3f9
JVA
3642 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3643 return 1;
3644 }
3645
3646 return 0;
3647}
e075ae69
RH
3648
3649/* Return 1 if it is appropriate to emit `ret' instructions in the
3650 body of a function. Do this only if the epilogue is simple, needing a
3651 couple of insns. Prior to reloading, we can't tell how many registers
3652 must be saved, so return 0 then. Return 0 if there is no frame
3653 marker to de-allocate.
3654
3655 If NON_SAVING_SETJMP is defined and true, then it is not possible
3656 for the epilogue to be simple, so return 0. This is a special case
3657 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3658 until final, but jump_optimize may need to know sooner if a
3659 `return' is OK. */
32b5b1aa
SC
3660
3661int
e075ae69 3662ix86_can_use_return_insn_p ()
32b5b1aa 3663{
4dd2ac2c 3664 struct ix86_frame frame;
9a7372d6 3665
e075ae69
RH
3666#ifdef NON_SAVING_SETJMP
3667 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3668 return 0;
3669#endif
9a7372d6
RH
3670
3671 if (! reload_completed || frame_pointer_needed)
3672 return 0;
32b5b1aa 3673
9a7372d6
RH
3674 /* Don't allow more than 32 pop, since that's all we can do
3675 with one instruction. */
3676 if (current_function_pops_args
3677 && current_function_args_size >= 32768)
e075ae69 3678 return 0;
32b5b1aa 3679
4dd2ac2c
JH
3680 ix86_compute_frame_layout (&frame);
3681 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3682}
6189a572
JH
3683\f
3684/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3685int
3686x86_64_sign_extended_value (value)
3687 rtx value;
3688{
3689 switch (GET_CODE (value))
3690 {
3691 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3692 to be at least 32 and this all acceptable constants are
3693 represented as CONST_INT. */
3694 case CONST_INT:
3695 if (HOST_BITS_PER_WIDE_INT == 32)
3696 return 1;
3697 else
3698 {
3699 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3700 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3701 }
3702 break;
3703
3704 /* For certain code models, the symbolic references are known to fit. */
3705 case SYMBOL_REF:
3706 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3707
3708 /* For certain code models, the code is near as well. */
3709 case LABEL_REF:
3710 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3711
3712 /* We also may accept the offsetted memory references in certain special
3713 cases. */
3714 case CONST:
3715 if (GET_CODE (XEXP (value, 0)) == UNSPEC
8ee41eaf 3716 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
6189a572
JH
3717 return 1;
3718 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3719 {
3720 rtx op1 = XEXP (XEXP (value, 0), 0);
3721 rtx op2 = XEXP (XEXP (value, 0), 1);
3722 HOST_WIDE_INT offset;
3723
3724 if (ix86_cmodel == CM_LARGE)
3725 return 0;
3726 if (GET_CODE (op2) != CONST_INT)
3727 return 0;
3728 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3729 switch (GET_CODE (op1))
3730 {
3731 case SYMBOL_REF:
3732 /* For CM_SMALL assume that latest object is 1MB before
3733 end of 31bits boundary. We may also accept pretty
3734 large negative constants knowing that all objects are
3735 in the positive half of address space. */
3736 if (ix86_cmodel == CM_SMALL
3737 && offset < 1024*1024*1024
3738 && trunc_int_for_mode (offset, SImode) == offset)
3739 return 1;
3740 /* For CM_KERNEL we know that all object resist in the
3741 negative half of 32bits address space. We may not
3742 accept negative offsets, since they may be just off
d6a7951f 3743 and we may accept pretty large positive ones. */
6189a572
JH
3744 if (ix86_cmodel == CM_KERNEL
3745 && offset > 0
3746 && trunc_int_for_mode (offset, SImode) == offset)
3747 return 1;
3748 break;
3749 case LABEL_REF:
3750 /* These conditions are similar to SYMBOL_REF ones, just the
3751 constraints for code models differ. */
3752 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3753 && offset < 1024*1024*1024
3754 && trunc_int_for_mode (offset, SImode) == offset)
3755 return 1;
3756 if (ix86_cmodel == CM_KERNEL
3757 && offset > 0
3758 && trunc_int_for_mode (offset, SImode) == offset)
3759 return 1;
3760 break;
3761 default:
3762 return 0;
3763 }
3764 }
3765 return 0;
3766 default:
3767 return 0;
3768 }
3769}
3770
3771/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3772int
3773x86_64_zero_extended_value (value)
3774 rtx value;
3775{
3776 switch (GET_CODE (value))
3777 {
3778 case CONST_DOUBLE:
3779 if (HOST_BITS_PER_WIDE_INT == 32)
3780 return (GET_MODE (value) == VOIDmode
3781 && !CONST_DOUBLE_HIGH (value));
3782 else
3783 return 0;
3784 case CONST_INT:
3785 if (HOST_BITS_PER_WIDE_INT == 32)
3786 return INTVAL (value) >= 0;
3787 else
b531087a 3788 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3789 break;
3790
3791 /* For certain code models, the symbolic references are known to fit. */
3792 case SYMBOL_REF:
3793 return ix86_cmodel == CM_SMALL;
3794
3795 /* For certain code models, the code is near as well. */
3796 case LABEL_REF:
3797 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3798
3799 /* We also may accept the offsetted memory references in certain special
3800 cases. */
3801 case CONST:
3802 if (GET_CODE (XEXP (value, 0)) == PLUS)
3803 {
3804 rtx op1 = XEXP (XEXP (value, 0), 0);
3805 rtx op2 = XEXP (XEXP (value, 0), 1);
3806
3807 if (ix86_cmodel == CM_LARGE)
3808 return 0;
3809 switch (GET_CODE (op1))
3810 {
3811 case SYMBOL_REF:
3812 return 0;
d6a7951f 3813 /* For small code model we may accept pretty large positive
6189a572
JH
3814 offsets, since one bit is available for free. Negative
3815 offsets are limited by the size of NULL pointer area
3816 specified by the ABI. */
3817 if (ix86_cmodel == CM_SMALL
3818 && GET_CODE (op2) == CONST_INT
3819 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3820 && (trunc_int_for_mode (INTVAL (op2), SImode)
3821 == INTVAL (op2)))
3822 return 1;
3823 /* ??? For the kernel, we may accept adjustment of
3824 -0x10000000, since we know that it will just convert
d6a7951f 3825 negative address space to positive, but perhaps this
6189a572
JH
3826 is not worthwhile. */
3827 break;
3828 case LABEL_REF:
3829 /* These conditions are similar to SYMBOL_REF ones, just the
3830 constraints for code models differ. */
3831 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3832 && GET_CODE (op2) == CONST_INT
3833 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3834 && (trunc_int_for_mode (INTVAL (op2), SImode)
3835 == INTVAL (op2)))
3836 return 1;
3837 break;
3838 default:
3839 return 0;
3840 }
3841 }
3842 return 0;
3843 default:
3844 return 0;
3845 }
3846}
6fca22eb
RH
3847
3848/* Value should be nonzero if functions must have frame pointers.
3849 Zero means the frame pointer need not be set up (and parms may
3850 be accessed via the stack pointer) in functions that seem suitable. */
3851
3852int
3853ix86_frame_pointer_required ()
3854{
3855 /* If we accessed previous frames, then the generated code expects
3856 to be able to access the saved ebp value in our frame. */
3857 if (cfun->machine->accesses_prev_frame)
3858 return 1;
a4f31c00 3859
6fca22eb
RH
3860 /* Several x86 os'es need a frame pointer for other reasons,
3861 usually pertaining to setjmp. */
3862 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3863 return 1;
3864
3865 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3866 the frame pointer by default. Turn it back on now if we've not
3867 got a leaf function. */
a7943381
RH
3868 if (TARGET_OMIT_LEAF_FRAME_POINTER
3869 && (!current_function_is_leaf || current_function_profile))
6fca22eb
RH
3870 return 1;
3871
3872 return 0;
3873}
3874
3875/* Record that the current function accesses previous call frames. */
3876
3877void
3878ix86_setup_frame_addresses ()
3879{
3880 cfun->machine->accesses_prev_frame = 1;
3881}
e075ae69 3882\f
145aacc2
RH
3883#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3884# define USE_HIDDEN_LINKONCE 1
3885#else
3886# define USE_HIDDEN_LINKONCE 0
3887#endif
3888
bd09bdeb 3889static int pic_labels_used;
e9a25f70 3890
145aacc2
RH
3891/* Fills in the label name that should be used for a pc thunk for
3892 the given register. */
3893
3894static void
3895get_pc_thunk_name (name, regno)
3896 char name[32];
3897 unsigned int regno;
3898{
3899 if (USE_HIDDEN_LINKONCE)
3900 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3901 else
3902 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3903}
3904
3905
e075ae69
RH
3906/* This function generates code for -fpic that loads %ebx with
3907 the return address of the caller and then returns. */
3908
3909void
4cf12e7e 3910ix86_asm_file_end (file)
e075ae69 3911 FILE *file;
e075ae69
RH
3912{
3913 rtx xops[2];
bd09bdeb 3914 int regno;
32b5b1aa 3915
bd09bdeb 3916 for (regno = 0; regno < 8; ++regno)
7c262518 3917 {
145aacc2
RH
3918 char name[32];
3919
bd09bdeb
RH
3920 if (! ((pic_labels_used >> regno) & 1))
3921 continue;
3922
145aacc2 3923 get_pc_thunk_name (name, regno);
bd09bdeb 3924
145aacc2
RH
3925 if (USE_HIDDEN_LINKONCE)
3926 {
3927 tree decl;
3928
3929 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3930 error_mark_node);
3931 TREE_PUBLIC (decl) = 1;
3932 TREE_STATIC (decl) = 1;
3933 DECL_ONE_ONLY (decl) = 1;
3934
3935 (*targetm.asm_out.unique_section) (decl, 0);
3936 named_section (decl, NULL, 0);
3937
5eb99654 3938 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
3939 fputs ("\t.hidden\t", file);
3940 assemble_name (file, name);
3941 fputc ('\n', file);
3942 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3943 }
3944 else
3945 {
3946 text_section ();
3947 ASM_OUTPUT_LABEL (file, name);
3948 }
bd09bdeb
RH
3949
3950 xops[0] = gen_rtx_REG (SImode, regno);
3951 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3952 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3953 output_asm_insn ("ret", xops);
7c262518 3954 }
32b5b1aa 3955}
32b5b1aa 3956
c8c03509 3957/* Emit code for the SET_GOT patterns. */
32b5b1aa 3958
c8c03509
RH
3959const char *
3960output_set_got (dest)
3961 rtx dest;
3962{
3963 rtx xops[3];
0d7d98ee 3964
c8c03509 3965 xops[0] = dest;
5fc0e5df 3966 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 3967
c8c03509 3968 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 3969 {
c8c03509
RH
3970 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3971
3972 if (!flag_pic)
3973 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3974 else
3975 output_asm_insn ("call\t%a2", xops);
3976
b069de3b
SS
3977#if TARGET_MACHO
3978 /* Output the "canonical" label name ("Lxx$pb") here too. This
3979 is what will be referred to by the Mach-O PIC subsystem. */
3980 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3981#endif
c8c03509
RH
3982 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3983 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3984
3985 if (flag_pic)
3986 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 3987 }
e075ae69 3988 else
e5cb57e8 3989 {
145aacc2
RH
3990 char name[32];
3991 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 3992 pic_labels_used |= 1 << REGNO (dest);
f996902d 3993
145aacc2 3994 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
3995 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3996 output_asm_insn ("call\t%X2", xops);
e5cb57e8 3997 }
e5cb57e8 3998
c8c03509
RH
3999 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4000 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4001 else if (!TARGET_MACHO)
8e9fadc3 4002 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4003
c8c03509 4004 return "";
e9a25f70 4005}
8dfe5673 4006
0d7d98ee 4007/* Generate an "push" pattern for input ARG. */
e9a25f70 4008
e075ae69
RH
4009static rtx
4010gen_push (arg)
4011 rtx arg;
e9a25f70 4012{
c5c76735 4013 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4014 gen_rtx_MEM (Pmode,
4015 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4016 stack_pointer_rtx)),
4017 arg);
e9a25f70
JL
4018}
4019
bd09bdeb
RH
4020/* Return >= 0 if there is an unused call-clobbered register available
4021 for the entire function. */
4022
4023static unsigned int
4024ix86_select_alt_pic_regnum ()
4025{
4026 if (current_function_is_leaf && !current_function_profile)
4027 {
4028 int i;
4029 for (i = 2; i >= 0; --i)
4030 if (!regs_ever_live[i])
4031 return i;
4032 }
4033
4034 return INVALID_REGNUM;
4035}
fce5a9f2 4036
4dd2ac2c
JH
4037/* Return 1 if we need to save REGNO. */
4038static int
1020a5ab 4039ix86_save_reg (regno, maybe_eh_return)
9b690711 4040 unsigned int regno;
37a58036 4041 int maybe_eh_return;
1020a5ab 4042{
bd09bdeb
RH
4043 if (pic_offset_table_rtx
4044 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4045 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4046 || current_function_profile
1020a5ab 4047 || current_function_calls_eh_return))
bd09bdeb
RH
4048 {
4049 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4050 return 0;
4051 return 1;
4052 }
1020a5ab
RH
4053
4054 if (current_function_calls_eh_return && maybe_eh_return)
4055 {
4056 unsigned i;
4057 for (i = 0; ; i++)
4058 {
b531087a 4059 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4060 if (test == INVALID_REGNUM)
4061 break;
9b690711 4062 if (test == regno)
1020a5ab
RH
4063 return 1;
4064 }
4065 }
4dd2ac2c 4066
1020a5ab
RH
4067 return (regs_ever_live[regno]
4068 && !call_used_regs[regno]
4069 && !fixed_regs[regno]
4070 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4071}
4072
0903fcab
JH
4073/* Return number of registers to be saved on the stack. */
4074
4075static int
4076ix86_nsaved_regs ()
4077{
4078 int nregs = 0;
0903fcab
JH
4079 int regno;
4080
4dd2ac2c 4081 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4082 if (ix86_save_reg (regno, true))
4dd2ac2c 4083 nregs++;
0903fcab
JH
4084 return nregs;
4085}
4086
4087/* Return the offset between two registers, one to be eliminated, and the other
4088 its replacement, at the start of a routine. */
4089
4090HOST_WIDE_INT
4091ix86_initial_elimination_offset (from, to)
4092 int from;
4093 int to;
4094{
4dd2ac2c
JH
4095 struct ix86_frame frame;
4096 ix86_compute_frame_layout (&frame);
564d80f4
JH
4097
4098 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4099 return frame.hard_frame_pointer_offset;
564d80f4
JH
4100 else if (from == FRAME_POINTER_REGNUM
4101 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4102 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4103 else
4104 {
564d80f4
JH
4105 if (to != STACK_POINTER_REGNUM)
4106 abort ();
4107 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4108 return frame.stack_pointer_offset;
564d80f4
JH
4109 else if (from != FRAME_POINTER_REGNUM)
4110 abort ();
0903fcab 4111 else
4dd2ac2c 4112 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4113 }
4114}
4115
4dd2ac2c 4116/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4117
4dd2ac2c
JH
4118static void
4119ix86_compute_frame_layout (frame)
4120 struct ix86_frame *frame;
65954bd8 4121{
65954bd8 4122 HOST_WIDE_INT total_size;
564d80f4 4123 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4124 int offset;
4125 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4126 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4127
4dd2ac2c 4128 frame->nregs = ix86_nsaved_regs ();
564d80f4 4129 total_size = size;
65954bd8 4130
9ba81eaa 4131 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4132 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4133
4134 frame->hard_frame_pointer_offset = offset;
564d80f4 4135
fcbfaa65
RK
4136 /* Do some sanity checking of stack_alignment_needed and
4137 preferred_alignment, since i386 port is the only using those features
f710504c 4138 that may break easily. */
564d80f4 4139
44affdae
JH
4140 if (size && !stack_alignment_needed)
4141 abort ();
44affdae
JH
4142 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4143 abort ();
4144 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4145 abort ();
4146 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4147 abort ();
564d80f4 4148
4dd2ac2c
JH
4149 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4150 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4151
4dd2ac2c
JH
4152 /* Register save area */
4153 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4154
8362f420
JH
4155 /* Va-arg area */
4156 if (ix86_save_varrargs_registers)
4157 {
4158 offset += X86_64_VARARGS_SIZE;
4159 frame->va_arg_size = X86_64_VARARGS_SIZE;
4160 }
4161 else
4162 frame->va_arg_size = 0;
4163
4dd2ac2c
JH
4164 /* Align start of frame for local function. */
4165 frame->padding1 = ((offset + stack_alignment_needed - 1)
4166 & -stack_alignment_needed) - offset;
f73ad30e 4167
4dd2ac2c 4168 offset += frame->padding1;
65954bd8 4169
4dd2ac2c
JH
4170 /* Frame pointer points here. */
4171 frame->frame_pointer_offset = offset;
54ff41b7 4172
4dd2ac2c 4173 offset += size;
65954bd8 4174
0b7ae565
RH
4175 /* Add outgoing arguments area. Can be skipped if we eliminated
4176 all the function calls as dead code. */
4177 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4178 {
4179 offset += current_function_outgoing_args_size;
4180 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4181 }
4182 else
4183 frame->outgoing_arguments_size = 0;
564d80f4 4184
002ff5bc
RH
4185 /* Align stack boundary. Only needed if we're calling another function
4186 or using alloca. */
4187 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4188 frame->padding2 = ((offset + preferred_alignment - 1)
4189 & -preferred_alignment) - offset;
4190 else
4191 frame->padding2 = 0;
4dd2ac2c
JH
4192
4193 offset += frame->padding2;
4194
4195 /* We've reached end of stack frame. */
4196 frame->stack_pointer_offset = offset;
4197
4198 /* Size prologue needs to allocate. */
4199 frame->to_allocate =
4200 (size + frame->padding1 + frame->padding2
8362f420 4201 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4202
8362f420
JH
4203 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4204 && current_function_is_leaf)
4205 {
4206 frame->red_zone_size = frame->to_allocate;
4207 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4208 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4209 }
4210 else
4211 frame->red_zone_size = 0;
4212 frame->to_allocate -= frame->red_zone_size;
4213 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4214#if 0
4215 fprintf (stderr, "nregs: %i\n", frame->nregs);
4216 fprintf (stderr, "size: %i\n", size);
4217 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4218 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4219 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4220 fprintf (stderr, "padding2: %i\n", frame->padding2);
4221 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4222 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4223 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4224 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4225 frame->hard_frame_pointer_offset);
4226 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4227#endif
65954bd8
JL
4228}
4229
0903fcab
JH
4230/* Emit code to save registers in the prologue. */
4231
4232static void
4233ix86_emit_save_regs ()
4234{
4235 register int regno;
0903fcab 4236 rtx insn;
0903fcab 4237
4dd2ac2c 4238 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4239 if (ix86_save_reg (regno, true))
0903fcab 4240 {
0d7d98ee 4241 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4242 RTX_FRAME_RELATED_P (insn) = 1;
4243 }
4244}
4245
c6036a37
JH
4246/* Emit code to save registers using MOV insns. First register
4247 is restored from POINTER + OFFSET. */
4248static void
4249ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4250 rtx pointer;
4251 HOST_WIDE_INT offset;
c6036a37
JH
4252{
4253 int regno;
4254 rtx insn;
4255
4256 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4257 if (ix86_save_reg (regno, true))
4258 {
b72f00af
RK
4259 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4260 Pmode, offset),
c6036a37
JH
4261 gen_rtx_REG (Pmode, regno));
4262 RTX_FRAME_RELATED_P (insn) = 1;
4263 offset += UNITS_PER_WORD;
4264 }
4265}
4266
0f290768 4267/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4268
4269void
4270ix86_expand_prologue ()
2a2ab3f9 4271{
564d80f4 4272 rtx insn;
bd09bdeb 4273 bool pic_reg_used;
4dd2ac2c 4274 struct ix86_frame frame;
6ab16dd9 4275 int use_mov = 0;
c6036a37 4276 HOST_WIDE_INT allocate;
4dd2ac2c 4277
2ab0437e 4278 if (!optimize_size)
6ab16dd9
JH
4279 {
4280 use_fast_prologue_epilogue
4281 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4282 if (TARGET_PROLOGUE_USING_MOVE)
4283 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4284 }
4dd2ac2c 4285 ix86_compute_frame_layout (&frame);
79325812 4286
e075ae69
RH
4287 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4288 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4289
2a2ab3f9
JVA
4290 if (frame_pointer_needed)
4291 {
564d80f4 4292 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4293 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4294
564d80f4 4295 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4296 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4297 }
4298
c6036a37
JH
4299 allocate = frame.to_allocate;
4300 /* In case we are dealing only with single register and empty frame,
4301 push is equivalent of the mov+add sequence. */
4302 if (allocate == 0 && frame.nregs <= 1)
4303 use_mov = 0;
4304
4305 if (!use_mov)
4306 ix86_emit_save_regs ();
4307 else
4308 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4309
c6036a37 4310 if (allocate == 0)
8dfe5673 4311 ;
e323735c 4312 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4313 {
f2042df3
RH
4314 insn = emit_insn (gen_pro_epilogue_adjust_stack
4315 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4316 GEN_INT (-allocate)));
e075ae69 4317 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4318 }
79325812 4319 else
8dfe5673 4320 {
e075ae69 4321 /* ??? Is this only valid for Win32? */
e9a25f70 4322
e075ae69 4323 rtx arg0, sym;
e9a25f70 4324
8362f420 4325 if (TARGET_64BIT)
b531087a 4326 abort ();
8362f420 4327
e075ae69 4328 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4329 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4330
e075ae69
RH
4331 sym = gen_rtx_MEM (FUNCTION_MODE,
4332 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4333 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4334
4335 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4336 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4337 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4338 }
c6036a37
JH
4339 if (use_mov)
4340 {
4341 if (!frame_pointer_needed || !frame.to_allocate)
4342 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4343 else
4344 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4345 -frame.nregs * UNITS_PER_WORD);
4346 }
e9a25f70 4347
84530511
SC
4348#ifdef SUBTARGET_PROLOGUE
4349 SUBTARGET_PROLOGUE;
0f290768 4350#endif
84530511 4351
bd09bdeb
RH
4352 pic_reg_used = false;
4353 if (pic_offset_table_rtx
4354 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4355 || current_function_profile))
4356 {
4357 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4358
4359 if (alt_pic_reg_used != INVALID_REGNUM)
4360 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4361
4362 pic_reg_used = true;
4363 }
4364
e9a25f70 4365 if (pic_reg_used)
c8c03509
RH
4366 {
4367 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4368
66edd3b4
RH
4369 /* Even with accurate pre-reload life analysis, we can wind up
4370 deleting all references to the pic register after reload.
4371 Consider if cross-jumping unifies two sides of a branch
4372 controled by a comparison vs the only read from a global.
4373 In which case, allow the set_got to be deleted, though we're
4374 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4375 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4376 }
77a989d1 4377
66edd3b4
RH
4378 /* Prevent function calls from be scheduled before the call to mcount.
4379 In the pic_reg_used case, make sure that the got load isn't deleted. */
4380 if (current_function_profile)
4381 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4382}
4383
da2d1d3a
JH
4384/* Emit code to restore saved registers using MOV insns. First register
4385 is restored from POINTER + OFFSET. */
4386static void
1020a5ab
RH
4387ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4388 rtx pointer;
4389 int offset;
37a58036 4390 int maybe_eh_return;
da2d1d3a
JH
4391{
4392 int regno;
da2d1d3a 4393
4dd2ac2c 4394 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4395 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4396 {
4dd2ac2c 4397 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4398 adjust_address (gen_rtx_MEM (Pmode, pointer),
4399 Pmode, offset));
4dd2ac2c 4400 offset += UNITS_PER_WORD;
da2d1d3a
JH
4401 }
4402}
4403
0f290768 4404/* Restore function stack, frame, and registers. */
e9a25f70 4405
2a2ab3f9 4406void
1020a5ab
RH
4407ix86_expand_epilogue (style)
4408 int style;
2a2ab3f9 4409{
1c71e60e 4410 int regno;
fdb8a883 4411 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4412 struct ix86_frame frame;
65954bd8 4413 HOST_WIDE_INT offset;
4dd2ac2c
JH
4414
4415 ix86_compute_frame_layout (&frame);
2a2ab3f9 4416
a4f31c00 4417 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4418 must be taken for the normal return case of a function using
4419 eh_return: the eax and edx registers are marked as saved, but not
4420 restored along this path. */
4421 offset = frame.nregs;
4422 if (current_function_calls_eh_return && style != 2)
4423 offset -= 2;
4424 offset *= -UNITS_PER_WORD;
2a2ab3f9 4425
fdb8a883
JW
4426 /* If we're only restoring one register and sp is not valid then
4427 using a move instruction to restore the register since it's
0f290768 4428 less work than reloading sp and popping the register.
da2d1d3a
JH
4429
4430 The default code result in stack adjustment using add/lea instruction,
4431 while this code results in LEAVE instruction (or discrete equivalent),
4432 so it is profitable in some other cases as well. Especially when there
4433 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4434 and there is exactly one register to pop. This heruistic may need some
4435 tuning in future. */
4dd2ac2c 4436 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4437 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4438 && use_fast_prologue_epilogue
c6036a37 4439 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4440 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4441 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4442 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4443 || current_function_calls_eh_return)
2a2ab3f9 4444 {
da2d1d3a
JH
4445 /* Restore registers. We can use ebp or esp to address the memory
4446 locations. If both are available, default to ebp, since offsets
4447 are known to be small. Only exception is esp pointing directly to the
4448 end of block of saved registers, where we may simplify addressing
4449 mode. */
4450
4dd2ac2c 4451 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4452 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4453 frame.to_allocate, style == 2);
da2d1d3a 4454 else
1020a5ab
RH
4455 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4456 offset, style == 2);
4457
4458 /* eh_return epilogues need %ecx added to the stack pointer. */
4459 if (style == 2)
4460 {
4461 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4462
1020a5ab
RH
4463 if (frame_pointer_needed)
4464 {
4465 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4466 tmp = plus_constant (tmp, UNITS_PER_WORD);
4467 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4468
4469 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4470 emit_move_insn (hard_frame_pointer_rtx, tmp);
4471
4472 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4473 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4474 }
4475 else
4476 {
4477 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4478 tmp = plus_constant (tmp, (frame.to_allocate
4479 + frame.nregs * UNITS_PER_WORD));
4480 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4481 }
4482 }
4483 else if (!frame_pointer_needed)
f2042df3
RH
4484 emit_insn (gen_pro_epilogue_adjust_stack
4485 (stack_pointer_rtx, stack_pointer_rtx,
4486 GEN_INT (frame.to_allocate
4487 + frame.nregs * UNITS_PER_WORD)));
0f290768 4488 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4489 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4490 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4491 else
2a2ab3f9 4492 {
1c71e60e
JH
4493 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4494 hard_frame_pointer_rtx,
f2042df3 4495 const0_rtx));
8362f420
JH
4496 if (TARGET_64BIT)
4497 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4498 else
4499 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4500 }
4501 }
1c71e60e 4502 else
68f654ec 4503 {
1c71e60e
JH
4504 /* First step is to deallocate the stack frame so that we can
4505 pop the registers. */
4506 if (!sp_valid)
4507 {
4508 if (!frame_pointer_needed)
4509 abort ();
4510 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4511 hard_frame_pointer_rtx,
f2042df3 4512 GEN_INT (offset)));
1c71e60e 4513 }
4dd2ac2c 4514 else if (frame.to_allocate)
f2042df3
RH
4515 emit_insn (gen_pro_epilogue_adjust_stack
4516 (stack_pointer_rtx, stack_pointer_rtx,
4517 GEN_INT (frame.to_allocate)));
1c71e60e 4518
4dd2ac2c 4519 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4520 if (ix86_save_reg (regno, false))
8362f420
JH
4521 {
4522 if (TARGET_64BIT)
4523 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4524 else
4525 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4526 }
4dd2ac2c 4527 if (frame_pointer_needed)
8362f420 4528 {
f5143c46 4529 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4530 able to grok it fast. */
4531 if (TARGET_USE_LEAVE)
4532 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4533 else if (TARGET_64BIT)
8362f420
JH
4534 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4535 else
4536 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4537 }
68f654ec 4538 }
68f654ec 4539
cbbf65e0 4540 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4541 if (style == 0)
cbbf65e0
RH
4542 return;
4543
2a2ab3f9
JVA
4544 if (current_function_pops_args && current_function_args_size)
4545 {
e075ae69 4546 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4547
b8c752c8
UD
4548 /* i386 can only pop 64K bytes. If asked to pop more, pop
4549 return address, do explicit add, and jump indirectly to the
0f290768 4550 caller. */
2a2ab3f9 4551
b8c752c8 4552 if (current_function_pops_args >= 65536)
2a2ab3f9 4553 {
e075ae69 4554 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4555
8362f420
JH
4556 /* There are is no "pascal" calling convention in 64bit ABI. */
4557 if (TARGET_64BIT)
b531087a 4558 abort ();
8362f420 4559
e075ae69
RH
4560 emit_insn (gen_popsi1 (ecx));
4561 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4562 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4563 }
79325812 4564 else
e075ae69
RH
4565 emit_jump_insn (gen_return_pop_internal (popc));
4566 }
4567 else
4568 emit_jump_insn (gen_return_internal ());
4569}
bd09bdeb
RH
4570
4571/* Reset from the function's potential modifications. */
4572
4573static void
4574ix86_output_function_epilogue (file, size)
4575 FILE *file ATTRIBUTE_UNUSED;
4576 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4577{
4578 if (pic_offset_table_rtx)
4579 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4580}
e075ae69
RH
4581\f
4582/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4583 for an instruction. Return 0 if the structure of the address is
4584 grossly off. Return -1 if the address contains ASHIFT, so it is not
4585 strictly valid, but still used for computing length of lea instruction.
4586 */
e075ae69
RH
4587
4588static int
4589ix86_decompose_address (addr, out)
4590 register rtx addr;
4591 struct ix86_address *out;
4592{
4593 rtx base = NULL_RTX;
4594 rtx index = NULL_RTX;
4595 rtx disp = NULL_RTX;
4596 HOST_WIDE_INT scale = 1;
4597 rtx scale_rtx = NULL_RTX;
b446e5a2 4598 int retval = 1;
e075ae69 4599
1540f9eb 4600 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4601 base = addr;
4602 else if (GET_CODE (addr) == PLUS)
4603 {
4604 rtx op0 = XEXP (addr, 0);
4605 rtx op1 = XEXP (addr, 1);
4606 enum rtx_code code0 = GET_CODE (op0);
4607 enum rtx_code code1 = GET_CODE (op1);
4608
4609 if (code0 == REG || code0 == SUBREG)
4610 {
4611 if (code1 == REG || code1 == SUBREG)
4612 index = op0, base = op1; /* index + base */
4613 else
4614 base = op0, disp = op1; /* base + displacement */
4615 }
4616 else if (code0 == MULT)
e9a25f70 4617 {
e075ae69
RH
4618 index = XEXP (op0, 0);
4619 scale_rtx = XEXP (op0, 1);
4620 if (code1 == REG || code1 == SUBREG)
4621 base = op1; /* index*scale + base */
e9a25f70 4622 else
e075ae69
RH
4623 disp = op1; /* index*scale + disp */
4624 }
4625 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4626 {
4627 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4628 scale_rtx = XEXP (XEXP (op0, 0), 1);
4629 base = XEXP (op0, 1);
4630 disp = op1;
2a2ab3f9 4631 }
e075ae69
RH
4632 else if (code0 == PLUS)
4633 {
4634 index = XEXP (op0, 0); /* index + base + disp */
4635 base = XEXP (op0, 1);
4636 disp = op1;
4637 }
4638 else
b446e5a2 4639 return 0;
e075ae69
RH
4640 }
4641 else if (GET_CODE (addr) == MULT)
4642 {
4643 index = XEXP (addr, 0); /* index*scale */
4644 scale_rtx = XEXP (addr, 1);
4645 }
4646 else if (GET_CODE (addr) == ASHIFT)
4647 {
4648 rtx tmp;
4649
4650 /* We're called for lea too, which implements ashift on occasion. */
4651 index = XEXP (addr, 0);
4652 tmp = XEXP (addr, 1);
4653 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4654 return 0;
e075ae69
RH
4655 scale = INTVAL (tmp);
4656 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4657 return 0;
e075ae69 4658 scale = 1 << scale;
b446e5a2 4659 retval = -1;
2a2ab3f9 4660 }
2a2ab3f9 4661 else
e075ae69
RH
4662 disp = addr; /* displacement */
4663
4664 /* Extract the integral value of scale. */
4665 if (scale_rtx)
e9a25f70 4666 {
e075ae69 4667 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4668 return 0;
e075ae69 4669 scale = INTVAL (scale_rtx);
e9a25f70 4670 }
3b3c6a3f 4671
e075ae69
RH
4672 /* Allow arg pointer and stack pointer as index if there is not scaling */
4673 if (base && index && scale == 1
564d80f4
JH
4674 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4675 || index == stack_pointer_rtx))
e075ae69
RH
4676 {
4677 rtx tmp = base;
4678 base = index;
4679 index = tmp;
4680 }
4681
4682 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4683 if ((base == hard_frame_pointer_rtx
4684 || base == frame_pointer_rtx
4685 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4686 disp = const0_rtx;
4687
4688 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4689 Avoid this by transforming to [%esi+0]. */
4690 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4691 && base && !index && !disp
329e1d01 4692 && REG_P (base)
e075ae69
RH
4693 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4694 disp = const0_rtx;
4695
4696 /* Special case: encode reg+reg instead of reg*2. */
4697 if (!base && index && scale && scale == 2)
4698 base = index, scale = 1;
0f290768 4699
e075ae69
RH
4700 /* Special case: scaling cannot be encoded without base or displacement. */
4701 if (!base && !disp && index && scale != 1)
4702 disp = const0_rtx;
4703
4704 out->base = base;
4705 out->index = index;
4706 out->disp = disp;
4707 out->scale = scale;
3b3c6a3f 4708
b446e5a2 4709 return retval;
e075ae69 4710}
01329426
JH
4711\f
4712/* Return cost of the memory address x.
4713 For i386, it is better to use a complex address than let gcc copy
4714 the address into a reg and make a new pseudo. But not if the address
4715 requires to two regs - that would mean more pseudos with longer
4716 lifetimes. */
4717int
4718ix86_address_cost (x)
4719 rtx x;
4720{
4721 struct ix86_address parts;
4722 int cost = 1;
3b3c6a3f 4723
01329426
JH
4724 if (!ix86_decompose_address (x, &parts))
4725 abort ();
4726
1540f9eb
JH
4727 if (parts.base && GET_CODE (parts.base) == SUBREG)
4728 parts.base = SUBREG_REG (parts.base);
4729 if (parts.index && GET_CODE (parts.index) == SUBREG)
4730 parts.index = SUBREG_REG (parts.index);
4731
01329426
JH
4732 /* More complex memory references are better. */
4733 if (parts.disp && parts.disp != const0_rtx)
4734 cost--;
4735
4736 /* Attempt to minimize number of registers in the address. */
4737 if ((parts.base
4738 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4739 || (parts.index
4740 && (!REG_P (parts.index)
4741 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4742 cost++;
4743
4744 if (parts.base
4745 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4746 && parts.index
4747 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4748 && parts.base != parts.index)
4749 cost++;
4750
4751 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4752 since it's predecode logic can't detect the length of instructions
4753 and it degenerates to vector decoded. Increase cost of such
4754 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4755 to split such addresses or even refuse such addresses at all.
01329426
JH
4756
4757 Following addressing modes are affected:
4758 [base+scale*index]
4759 [scale*index+disp]
4760 [base+index]
0f290768 4761
01329426
JH
4762 The first and last case may be avoidable by explicitly coding the zero in
4763 memory address, but I don't have AMD-K6 machine handy to check this
4764 theory. */
4765
4766 if (TARGET_K6
4767 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4768 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4769 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4770 cost += 10;
0f290768 4771
01329426
JH
4772 return cost;
4773}
4774\f
b949ea8b
JW
4775/* If X is a machine specific address (i.e. a symbol or label being
4776 referenced as a displacement from the GOT implemented using an
4777 UNSPEC), then return the base term. Otherwise return X. */
4778
4779rtx
4780ix86_find_base_term (x)
4781 rtx x;
4782{
4783 rtx term;
4784
6eb791fc
JH
4785 if (TARGET_64BIT)
4786 {
4787 if (GET_CODE (x) != CONST)
4788 return x;
4789 term = XEXP (x, 0);
4790 if (GET_CODE (term) == PLUS
4791 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4792 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4793 term = XEXP (term, 0);
4794 if (GET_CODE (term) != UNSPEC
8ee41eaf 4795 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4796 return x;
4797
4798 term = XVECEXP (term, 0, 0);
4799
4800 if (GET_CODE (term) != SYMBOL_REF
4801 && GET_CODE (term) != LABEL_REF)
4802 return x;
4803
4804 return term;
4805 }
4806
b949ea8b
JW
4807 if (GET_CODE (x) != PLUS
4808 || XEXP (x, 0) != pic_offset_table_rtx
4809 || GET_CODE (XEXP (x, 1)) != CONST)
4810 return x;
4811
4812 term = XEXP (XEXP (x, 1), 0);
4813
4814 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4815 term = XEXP (term, 0);
4816
4817 if (GET_CODE (term) != UNSPEC
8ee41eaf 4818 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
4819 return x;
4820
4821 term = XVECEXP (term, 0, 0);
4822
4823 if (GET_CODE (term) != SYMBOL_REF
4824 && GET_CODE (term) != LABEL_REF)
4825 return x;
4826
4827 return term;
4828}
4829\f
f996902d
RH
4830/* Determine if a given RTX is a valid constant. We already know this
4831 satisfies CONSTANT_P. */
4832
4833bool
4834legitimate_constant_p (x)
4835 rtx x;
4836{
4837 rtx inner;
4838
4839 switch (GET_CODE (x))
4840 {
4841 case SYMBOL_REF:
4842 /* TLS symbols are not constant. */
4843 if (tls_symbolic_operand (x, Pmode))
4844 return false;
4845 break;
4846
4847 case CONST:
4848 inner = XEXP (x, 0);
4849
4850 /* Offsets of TLS symbols are never valid.
4851 Discourage CSE from creating them. */
4852 if (GET_CODE (inner) == PLUS
4853 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4854 return false;
4855
4856 /* Only some unspecs are valid as "constants". */
4857 if (GET_CODE (inner) == UNSPEC)
4858 switch (XINT (inner, 1))
4859 {
4860 case UNSPEC_TPOFF:
4861 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4862 default:
4863 return false;
4864 }
4865 break;
4866
4867 default:
4868 break;
4869 }
4870
4871 /* Otherwise we handle everything else in the move patterns. */
4872 return true;
4873}
4874
4875/* Determine if a given RTX is a valid constant address. */
4876
4877bool
4878constant_address_p (x)
4879 rtx x;
4880{
4881 switch (GET_CODE (x))
4882 {
4883 case LABEL_REF:
4884 case CONST_INT:
4885 return true;
4886
4887 case CONST_DOUBLE:
4888 return TARGET_64BIT;
4889
4890 case CONST:
b069de3b
SS
4891 /* For Mach-O, really believe the CONST. */
4892 if (TARGET_MACHO)
4893 return true;
4894 /* Otherwise fall through. */
f996902d
RH
4895 case SYMBOL_REF:
4896 return !flag_pic && legitimate_constant_p (x);
4897
4898 default:
4899 return false;
4900 }
4901}
4902
4903/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 4904 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
4905 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4906
4907bool
4908legitimate_pic_operand_p (x)
4909 rtx x;
4910{
4911 rtx inner;
4912
4913 switch (GET_CODE (x))
4914 {
4915 case CONST:
4916 inner = XEXP (x, 0);
4917
4918 /* Only some unspecs are valid as "constants". */
4919 if (GET_CODE (inner) == UNSPEC)
4920 switch (XINT (inner, 1))
4921 {
4922 case UNSPEC_TPOFF:
4923 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4924 default:
4925 return false;
4926 }
4927 /* FALLTHRU */
4928
4929 case SYMBOL_REF:
4930 case LABEL_REF:
4931 return legitimate_pic_address_disp_p (x);
4932
4933 default:
4934 return true;
4935 }
4936}
4937
e075ae69
RH
4938/* Determine if a given CONST RTX is a valid memory displacement
4939 in PIC mode. */
0f290768 4940
59be65f6 4941int
91bb873f
RH
4942legitimate_pic_address_disp_p (disp)
4943 register rtx disp;
4944{
f996902d
RH
4945 bool saw_plus;
4946
6eb791fc
JH
4947 /* In 64bit mode we can allow direct addresses of symbols and labels
4948 when they are not dynamic symbols. */
4949 if (TARGET_64BIT)
4950 {
4951 rtx x = disp;
4952 if (GET_CODE (disp) == CONST)
4953 x = XEXP (disp, 0);
4954 /* ??? Handle PIC code models */
4955 if (GET_CODE (x) == PLUS
4956 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4957 && ix86_cmodel == CM_SMALL_PIC
4958 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4959 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4960 x = XEXP (x, 0);
4961 if (local_symbolic_operand (x, Pmode))
4962 return 1;
4963 }
91bb873f
RH
4964 if (GET_CODE (disp) != CONST)
4965 return 0;
4966 disp = XEXP (disp, 0);
4967
6eb791fc
JH
4968 if (TARGET_64BIT)
4969 {
4970 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4971 of GOT tables. We should not need these anyway. */
4972 if (GET_CODE (disp) != UNSPEC
8ee41eaf 4973 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4974 return 0;
4975
4976 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4977 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4978 return 0;
4979 return 1;
4980 }
4981
f996902d 4982 saw_plus = false;
91bb873f
RH
4983 if (GET_CODE (disp) == PLUS)
4984 {
4985 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4986 return 0;
4987 disp = XEXP (disp, 0);
f996902d 4988 saw_plus = true;
91bb873f
RH
4989 }
4990
b069de3b
SS
4991 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4992 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
4993 {
4994 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4995 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4996 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4997 {
4998 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4999 if (strstr (sym_name, "$pb") != 0)
5000 return 1;
5001 }
5002 }
5003
8ee41eaf 5004 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5005 return 0;
5006
623fe810
RH
5007 switch (XINT (disp, 1))
5008 {
8ee41eaf 5009 case UNSPEC_GOT:
f996902d
RH
5010 if (saw_plus)
5011 return false;
623fe810 5012 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5013 case UNSPEC_GOTOFF:
623fe810 5014 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d
RH
5015 case UNSPEC_GOTTPOFF:
5016 if (saw_plus)
5017 return false;
5018 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5019 case UNSPEC_NTPOFF:
5020 /* ??? Could support offset here. */
5021 if (saw_plus)
5022 return false;
5023 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5024 case UNSPEC_DTPOFF:
5025 /* ??? Could support offset here. */
5026 if (saw_plus)
5027 return false;
5028 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5029 }
fce5a9f2 5030
623fe810 5031 return 0;
91bb873f
RH
5032}
5033
e075ae69
RH
5034/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5035 memory address for an instruction. The MODE argument is the machine mode
5036 for the MEM expression that wants to use this address.
5037
5038 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5039 convert common non-canonical forms to canonical form so that they will
5040 be recognized. */
5041
3b3c6a3f
MM
5042int
5043legitimate_address_p (mode, addr, strict)
5044 enum machine_mode mode;
5045 register rtx addr;
5046 int strict;
5047{
e075ae69
RH
5048 struct ix86_address parts;
5049 rtx base, index, disp;
5050 HOST_WIDE_INT scale;
5051 const char *reason = NULL;
5052 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5053
5054 if (TARGET_DEBUG_ADDR)
5055 {
5056 fprintf (stderr,
e9a25f70 5057 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5058 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5059 debug_rtx (addr);
5060 }
5061
9e20be0c
JJ
5062 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5063 {
5064 if (TARGET_DEBUG_ADDR)
5065 fprintf (stderr, "Success.\n");
5066 return TRUE;
5067 }
5068
b446e5a2 5069 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5070 {
e075ae69 5071 reason = "decomposition failed";
50e60bc3 5072 goto report_error;
3b3c6a3f
MM
5073 }
5074
e075ae69
RH
5075 base = parts.base;
5076 index = parts.index;
5077 disp = parts.disp;
5078 scale = parts.scale;
91f0226f 5079
e075ae69 5080 /* Validate base register.
e9a25f70
JL
5081
5082 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5083 is one word out of a two word structure, which is represented internally
5084 as a DImode int. */
e9a25f70 5085
3b3c6a3f
MM
5086 if (base)
5087 {
1540f9eb 5088 rtx reg;
e075ae69
RH
5089 reason_rtx = base;
5090
1540f9eb
JH
5091 if (GET_CODE (base) == SUBREG)
5092 reg = SUBREG_REG (base);
5093 else
5094 reg = base;
5095
5096 if (GET_CODE (reg) != REG)
3b3c6a3f 5097 {
e075ae69 5098 reason = "base is not a register";
50e60bc3 5099 goto report_error;
3b3c6a3f
MM
5100 }
5101
c954bd01
RH
5102 if (GET_MODE (base) != Pmode)
5103 {
e075ae69 5104 reason = "base is not in Pmode";
50e60bc3 5105 goto report_error;
c954bd01
RH
5106 }
5107
1540f9eb
JH
5108 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5109 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5110 {
e075ae69 5111 reason = "base is not valid";
50e60bc3 5112 goto report_error;
3b3c6a3f
MM
5113 }
5114 }
5115
e075ae69 5116 /* Validate index register.
e9a25f70
JL
5117
5118 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5119 is one word out of a two word structure, which is represented internally
5120 as a DImode int. */
e075ae69
RH
5121
5122 if (index)
3b3c6a3f 5123 {
1540f9eb 5124 rtx reg;
e075ae69
RH
5125 reason_rtx = index;
5126
1540f9eb
JH
5127 if (GET_CODE (index) == SUBREG)
5128 reg = SUBREG_REG (index);
5129 else
5130 reg = index;
5131
5132 if (GET_CODE (reg) != REG)
3b3c6a3f 5133 {
e075ae69 5134 reason = "index is not a register";
50e60bc3 5135 goto report_error;
3b3c6a3f
MM
5136 }
5137
e075ae69 5138 if (GET_MODE (index) != Pmode)
c954bd01 5139 {
e075ae69 5140 reason = "index is not in Pmode";
50e60bc3 5141 goto report_error;
c954bd01
RH
5142 }
5143
1540f9eb
JH
5144 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5145 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5146 {
e075ae69 5147 reason = "index is not valid";
50e60bc3 5148 goto report_error;
3b3c6a3f
MM
5149 }
5150 }
3b3c6a3f 5151
e075ae69
RH
5152 /* Validate scale factor. */
5153 if (scale != 1)
3b3c6a3f 5154 {
e075ae69
RH
5155 reason_rtx = GEN_INT (scale);
5156 if (!index)
3b3c6a3f 5157 {
e075ae69 5158 reason = "scale without index";
50e60bc3 5159 goto report_error;
3b3c6a3f
MM
5160 }
5161
e075ae69 5162 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5163 {
e075ae69 5164 reason = "scale is not a valid multiplier";
50e60bc3 5165 goto report_error;
3b3c6a3f
MM
5166 }
5167 }
5168
91bb873f 5169 /* Validate displacement. */
3b3c6a3f
MM
5170 if (disp)
5171 {
e075ae69
RH
5172 reason_rtx = disp;
5173
0d7d98ee 5174 if (TARGET_64BIT)
3b3c6a3f 5175 {
0d7d98ee
JH
5176 if (!x86_64_sign_extended_value (disp))
5177 {
5178 reason = "displacement is out of range";
5179 goto report_error;
5180 }
5181 }
5182 else
5183 {
5184 if (GET_CODE (disp) == CONST_DOUBLE)
5185 {
5186 reason = "displacement is a const_double";
5187 goto report_error;
5188 }
3b3c6a3f
MM
5189 }
5190
f996902d
RH
5191 if (GET_CODE (disp) == CONST
5192 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5193 switch (XINT (XEXP (disp, 0), 1))
5194 {
5195 case UNSPEC_GOT:
5196 case UNSPEC_GOTOFF:
5197 case UNSPEC_GOTPCREL:
5198 if (!flag_pic)
5199 abort ();
5200 goto is_legitimate_pic;
5201
5202 case UNSPEC_GOTTPOFF:
5203 case UNSPEC_NTPOFF:
5204 case UNSPEC_DTPOFF:
5205 break;
5206
5207 default:
5208 reason = "invalid address unspec";
5209 goto report_error;
5210 }
5211
b069de3b
SS
5212 else if (flag_pic && (SYMBOLIC_CONST (disp)
5213#if TARGET_MACHO
5214 && !machopic_operand_p (disp)
5215#endif
5216 ))
3b3c6a3f 5217 {
f996902d 5218 is_legitimate_pic:
0d7d98ee
JH
5219 if (TARGET_64BIT && (index || base))
5220 {
5221 reason = "non-constant pic memory reference";
5222 goto report_error;
5223 }
91bb873f
RH
5224 if (! legitimate_pic_address_disp_p (disp))
5225 {
e075ae69 5226 reason = "displacement is an invalid pic construct";
50e60bc3 5227 goto report_error;
91bb873f
RH
5228 }
5229
4e9efe54 5230 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5231 includes the pic_offset_table_rtx register.
5232
4e9efe54
JH
5233 While this is good idea, unfortunately these constructs may
5234 be created by "adds using lea" optimization for incorrect
5235 code like:
5236
5237 int a;
5238 int foo(int i)
5239 {
5240 return *(&a+i);
5241 }
5242
50e60bc3 5243 This code is nonsensical, but results in addressing
4e9efe54 5244 GOT table with pic_offset_table_rtx base. We can't
f710504c 5245 just refuse it easily, since it gets matched by
4e9efe54
JH
5246 "addsi3" pattern, that later gets split to lea in the
5247 case output register differs from input. While this
5248 can be handled by separate addsi pattern for this case
5249 that never results in lea, this seems to be easier and
5250 correct fix for crash to disable this test. */
3b3c6a3f 5251 }
f996902d
RH
5252 else if (!CONSTANT_ADDRESS_P (disp))
5253 {
5254 reason = "displacement is not constant";
5255 goto report_error;
5256 }
3b3c6a3f
MM
5257 }
5258
e075ae69 5259 /* Everything looks valid. */
3b3c6a3f 5260 if (TARGET_DEBUG_ADDR)
e075ae69 5261 fprintf (stderr, "Success.\n");
3b3c6a3f 5262 return TRUE;
e075ae69 5263
5bf0ebab 5264 report_error:
e075ae69
RH
5265 if (TARGET_DEBUG_ADDR)
5266 {
5267 fprintf (stderr, "Error: %s\n", reason);
5268 debug_rtx (reason_rtx);
5269 }
5270 return FALSE;
3b3c6a3f 5271}
3b3c6a3f 5272\f
55efb413
JW
5273/* Return an unique alias set for the GOT. */
5274
0f290768 5275static HOST_WIDE_INT
55efb413
JW
5276ix86_GOT_alias_set ()
5277{
5bf0ebab
RH
5278 static HOST_WIDE_INT set = -1;
5279 if (set == -1)
5280 set = new_alias_set ();
5281 return set;
0f290768 5282}
55efb413 5283
3b3c6a3f
MM
5284/* Return a legitimate reference for ORIG (an address) using the
5285 register REG. If REG is 0, a new pseudo is generated.
5286
91bb873f 5287 There are two types of references that must be handled:
3b3c6a3f
MM
5288
5289 1. Global data references must load the address from the GOT, via
5290 the PIC reg. An insn is emitted to do this load, and the reg is
5291 returned.
5292
91bb873f
RH
5293 2. Static data references, constant pool addresses, and code labels
5294 compute the address as an offset from the GOT, whose base is in
5295 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5296 differentiate them from global data objects. The returned
5297 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5298
5299 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5300 reg also appears in the address. */
3b3c6a3f
MM
5301
5302rtx
5303legitimize_pic_address (orig, reg)
5304 rtx orig;
5305 rtx reg;
5306{
5307 rtx addr = orig;
5308 rtx new = orig;
91bb873f 5309 rtx base;
3b3c6a3f 5310
b069de3b
SS
5311#if TARGET_MACHO
5312 if (reg == 0)
5313 reg = gen_reg_rtx (Pmode);
5314 /* Use the generic Mach-O PIC machinery. */
5315 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5316#endif
5317
623fe810 5318 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 5319 {
14f73b5a
JH
5320 /* In 64bit mode we can address such objects directly. */
5321 if (TARGET_64BIT)
5322 new = addr;
5323 else
5324 {
5325 /* This symbol may be referenced via a displacement from the PIC
5326 base address (@GOTOFF). */
3b3c6a3f 5327
66edd3b4
RH
5328 if (reload_in_progress)
5329 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5330 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
5331 new = gen_rtx_CONST (Pmode, new);
5332 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5333
14f73b5a
JH
5334 if (reg != 0)
5335 {
5336 emit_move_insn (reg, new);
5337 new = reg;
5338 }
5339 }
3b3c6a3f 5340 }
91bb873f 5341 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5342 {
14f73b5a
JH
5343 if (TARGET_64BIT)
5344 {
8ee41eaf 5345 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5346 new = gen_rtx_CONST (Pmode, new);
5347 new = gen_rtx_MEM (Pmode, new);
5348 RTX_UNCHANGING_P (new) = 1;
5349 set_mem_alias_set (new, ix86_GOT_alias_set ());
5350
5351 if (reg == 0)
5352 reg = gen_reg_rtx (Pmode);
5353 /* Use directly gen_movsi, otherwise the address is loaded
5354 into register for CSE. We don't want to CSE this addresses,
5355 instead we CSE addresses from the GOT table, so skip this. */
5356 emit_insn (gen_movsi (reg, new));
5357 new = reg;
5358 }
5359 else
5360 {
5361 /* This symbol must be referenced via a load from the
5362 Global Offset Table (@GOT). */
3b3c6a3f 5363
66edd3b4
RH
5364 if (reload_in_progress)
5365 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5366 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5367 new = gen_rtx_CONST (Pmode, new);
5368 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5369 new = gen_rtx_MEM (Pmode, new);
5370 RTX_UNCHANGING_P (new) = 1;
5371 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5372
14f73b5a
JH
5373 if (reg == 0)
5374 reg = gen_reg_rtx (Pmode);
5375 emit_move_insn (reg, new);
5376 new = reg;
5377 }
0f290768 5378 }
91bb873f
RH
5379 else
5380 {
5381 if (GET_CODE (addr) == CONST)
3b3c6a3f 5382 {
91bb873f 5383 addr = XEXP (addr, 0);
e3c8ea67
RH
5384
5385 /* We must match stuff we generate before. Assume the only
5386 unspecs that can get here are ours. Not that we could do
5387 anything with them anyway... */
5388 if (GET_CODE (addr) == UNSPEC
5389 || (GET_CODE (addr) == PLUS
5390 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5391 return orig;
5392 if (GET_CODE (addr) != PLUS)
564d80f4 5393 abort ();
3b3c6a3f 5394 }
91bb873f
RH
5395 if (GET_CODE (addr) == PLUS)
5396 {
5397 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5398
91bb873f
RH
5399 /* Check first to see if this is a constant offset from a @GOTOFF
5400 symbol reference. */
623fe810 5401 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5402 && GET_CODE (op1) == CONST_INT)
5403 {
6eb791fc
JH
5404 if (!TARGET_64BIT)
5405 {
66edd3b4
RH
5406 if (reload_in_progress)
5407 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5408 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5409 UNSPEC_GOTOFF);
6eb791fc
JH
5410 new = gen_rtx_PLUS (Pmode, new, op1);
5411 new = gen_rtx_CONST (Pmode, new);
5412 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5413
6eb791fc
JH
5414 if (reg != 0)
5415 {
5416 emit_move_insn (reg, new);
5417 new = reg;
5418 }
5419 }
5420 else
91bb873f 5421 {
6eb791fc 5422 /* ??? We need to limit offsets here. */
91bb873f
RH
5423 }
5424 }
5425 else
5426 {
5427 base = legitimize_pic_address (XEXP (addr, 0), reg);
5428 new = legitimize_pic_address (XEXP (addr, 1),
5429 base == reg ? NULL_RTX : reg);
5430
5431 if (GET_CODE (new) == CONST_INT)
5432 new = plus_constant (base, INTVAL (new));
5433 else
5434 {
5435 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5436 {
5437 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5438 new = XEXP (new, 1);
5439 }
5440 new = gen_rtx_PLUS (Pmode, base, new);
5441 }
5442 }
5443 }
3b3c6a3f
MM
5444 }
5445 return new;
5446}
fb49053f 5447
fb49053f 5448static void
f996902d 5449ix86_encode_section_info (decl, first)
fb49053f
RH
5450 tree decl;
5451 int first ATTRIBUTE_UNUSED;
5452{
f996902d
RH
5453 bool local_p = (*targetm.binds_local_p) (decl);
5454 rtx rtl, symbol;
5455
5456 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5457 if (GET_CODE (rtl) != MEM)
5458 return;
5459 symbol = XEXP (rtl, 0);
5460 if (GET_CODE (symbol) != SYMBOL_REF)
5461 return;
5462
5463 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5464 symbol so that we may access it directly in the GOT. */
5465
fb49053f 5466 if (flag_pic)
f996902d
RH
5467 SYMBOL_REF_FLAG (symbol) = local_p;
5468
5469 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5470 "local dynamic", "initial exec" or "local exec" TLS models
5471 respectively. */
5472
5473 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5474 {
f996902d
RH
5475 const char *symbol_str;
5476 char *newstr;
5477 size_t len;
5478 enum tls_model kind;
5479
5480 if (!flag_pic)
5481 {
5482 if (local_p)
5483 kind = TLS_MODEL_LOCAL_EXEC;
5484 else
5485 kind = TLS_MODEL_INITIAL_EXEC;
5486 }
5487 /* Local dynamic is inefficient when we're not combining the
5488 parts of the address. */
5489 else if (optimize && local_p)
5490 kind = TLS_MODEL_LOCAL_DYNAMIC;
5491 else
5492 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5493 if (kind < flag_tls_default)
5494 kind = flag_tls_default;
5495
5496 symbol_str = XSTR (symbol, 0);
fb49053f 5497
f996902d
RH
5498 if (symbol_str[0] == '%')
5499 {
5500 if (symbol_str[1] == tls_model_chars[kind])
5501 return;
5502 symbol_str += 2;
5503 }
5504 len = strlen (symbol_str) + 1;
5505 newstr = alloca (len + 2);
5506
5507 newstr[0] = '%';
5508 newstr[1] = tls_model_chars[kind];
5509 memcpy (newstr + 2, symbol_str, len);
5510
5511 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5512 }
5513}
f996902d
RH
5514
5515/* Undo the above when printing symbol names. */
5516
5517static const char *
5518ix86_strip_name_encoding (str)
5519 const char *str;
5520{
5521 if (str[0] == '%')
5522 str += 2;
5523 if (str [0] == '*')
5524 str += 1;
5525 return str;
5526}
3b3c6a3f 5527\f
f996902d
RH
5528/* Load the thread pointer into a register. */
5529
5530static rtx
5531get_thread_pointer ()
5532{
5533 rtx tp;
5534
5535 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
5536 tp = gen_rtx_MEM (Pmode, tp);
5537 RTX_UNCHANGING_P (tp) = 1;
5538 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
5539 tp = force_reg (Pmode, tp);
5540
5541 return tp;
5542}
fce5a9f2 5543
3b3c6a3f
MM
5544/* Try machine-dependent ways of modifying an illegitimate address
5545 to be legitimate. If we find one, return the new, valid address.
5546 This macro is used in only one place: `memory_address' in explow.c.
5547
5548 OLDX is the address as it was before break_out_memory_refs was called.
5549 In some cases it is useful to look at this to decide what needs to be done.
5550
5551 MODE and WIN are passed so that this macro can use
5552 GO_IF_LEGITIMATE_ADDRESS.
5553
5554 It is always safe for this macro to do nothing. It exists to recognize
5555 opportunities to optimize the output.
5556
5557 For the 80386, we handle X+REG by loading X into a register R and
5558 using R+REG. R will go in a general reg and indexing will be used.
5559 However, if REG is a broken-out memory address or multiplication,
5560 nothing needs to be done because REG can certainly go in a general reg.
5561
5562 When -fpic is used, special handling is needed for symbolic references.
5563 See comments by legitimize_pic_address in i386.c for details. */
5564
5565rtx
5566legitimize_address (x, oldx, mode)
5567 register rtx x;
bb5177ac 5568 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5569 enum machine_mode mode;
5570{
5571 int changed = 0;
5572 unsigned log;
5573
5574 if (TARGET_DEBUG_ADDR)
5575 {
e9a25f70
JL
5576 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5577 GET_MODE_NAME (mode));
3b3c6a3f
MM
5578 debug_rtx (x);
5579 }
5580
f996902d
RH
5581 log = tls_symbolic_operand (x, mode);
5582 if (log)
5583 {
5584 rtx dest, base, off, pic;
5585
755ac5d4 5586 switch (log)
f996902d
RH
5587 {
5588 case TLS_MODEL_GLOBAL_DYNAMIC:
5589 dest = gen_reg_rtx (Pmode);
5590 emit_insn (gen_tls_global_dynamic (dest, x));
5591 break;
5592
5593 case TLS_MODEL_LOCAL_DYNAMIC:
5594 base = gen_reg_rtx (Pmode);
5595 emit_insn (gen_tls_local_dynamic_base (base));
5596
5597 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5598 off = gen_rtx_CONST (Pmode, off);
5599
5600 return gen_rtx_PLUS (Pmode, base, off);
5601
5602 case TLS_MODEL_INITIAL_EXEC:
5603 if (flag_pic)
5604 {
66edd3b4
RH
5605 if (reload_in_progress)
5606 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d
RH
5607 pic = pic_offset_table_rtx;
5608 }
5609 else
5610 {
5611 pic = gen_reg_rtx (Pmode);
5612 emit_insn (gen_set_got (pic));
5613 }
5614
5615 base = get_thread_pointer ();
5616
5617 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5618 off = gen_rtx_CONST (Pmode, off);
5619 off = gen_rtx_PLUS (Pmode, pic, off);
5620 off = gen_rtx_MEM (Pmode, off);
5621 RTX_UNCHANGING_P (off) = 1;
5622 set_mem_alias_set (off, ix86_GOT_alias_set ());
5623
5624 /* Damn Sun for specifing a set of dynamic relocations without
5625 considering the two-operand nature of the architecture!
5626 We'd be much better off with a "GOTNTPOFF" relocation that
5627 already contained the negated constant. */
5628 /* ??? Using negl and reg+reg addressing appears to be a lose
5629 size-wise. The negl is two bytes, just like the extra movl
5630 incurred by the two-operand subl, but reg+reg addressing
5631 uses the two-byte modrm form, unlike plain reg. */
5632
5633 dest = gen_reg_rtx (Pmode);
5634 emit_insn (gen_subsi3 (dest, base, off));
5635 break;
5636
5637 case TLS_MODEL_LOCAL_EXEC:
5638 base = get_thread_pointer ();
5639
5640 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5641 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5642 off = gen_rtx_CONST (Pmode, off);
5643
5644 if (TARGET_GNU_TLS)
5645 return gen_rtx_PLUS (Pmode, base, off);
5646 else
5647 {
5648 dest = gen_reg_rtx (Pmode);
5649 emit_insn (gen_subsi3 (dest, base, off));
5650 }
5651 break;
5652
5653 default:
5654 abort ();
5655 }
5656
5657 return dest;
5658 }
5659
3b3c6a3f
MM
5660 if (flag_pic && SYMBOLIC_CONST (x))
5661 return legitimize_pic_address (x, 0);
5662
5663 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5664 if (GET_CODE (x) == ASHIFT
5665 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5666 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5667 {
5668 changed = 1;
a269a03c
JC
5669 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5670 GEN_INT (1 << log));
3b3c6a3f
MM
5671 }
5672
5673 if (GET_CODE (x) == PLUS)
5674 {
0f290768 5675 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5676
3b3c6a3f
MM
5677 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5678 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5679 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5680 {
5681 changed = 1;
c5c76735
JL
5682 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5683 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5684 GEN_INT (1 << log));
3b3c6a3f
MM
5685 }
5686
5687 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5688 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5689 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5690 {
5691 changed = 1;
c5c76735
JL
5692 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5693 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5694 GEN_INT (1 << log));
3b3c6a3f
MM
5695 }
5696
0f290768 5697 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5698 if (GET_CODE (XEXP (x, 1)) == MULT)
5699 {
5700 rtx tmp = XEXP (x, 0);
5701 XEXP (x, 0) = XEXP (x, 1);
5702 XEXP (x, 1) = tmp;
5703 changed = 1;
5704 }
5705
5706 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5707 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5708 created by virtual register instantiation, register elimination, and
5709 similar optimizations. */
5710 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5711 {
5712 changed = 1;
c5c76735
JL
5713 x = gen_rtx_PLUS (Pmode,
5714 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5715 XEXP (XEXP (x, 1), 0)),
5716 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5717 }
5718
e9a25f70
JL
5719 /* Canonicalize
5720 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5721 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5722 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5723 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5724 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5725 && CONSTANT_P (XEXP (x, 1)))
5726 {
00c79232
ML
5727 rtx constant;
5728 rtx other = NULL_RTX;
3b3c6a3f
MM
5729
5730 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5731 {
5732 constant = XEXP (x, 1);
5733 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5734 }
5735 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5736 {
5737 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5738 other = XEXP (x, 1);
5739 }
5740 else
5741 constant = 0;
5742
5743 if (constant)
5744 {
5745 changed = 1;
c5c76735
JL
5746 x = gen_rtx_PLUS (Pmode,
5747 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5748 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5749 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5750 }
5751 }
5752
5753 if (changed && legitimate_address_p (mode, x, FALSE))
5754 return x;
5755
5756 if (GET_CODE (XEXP (x, 0)) == MULT)
5757 {
5758 changed = 1;
5759 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5760 }
5761
5762 if (GET_CODE (XEXP (x, 1)) == MULT)
5763 {
5764 changed = 1;
5765 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5766 }
5767
5768 if (changed
5769 && GET_CODE (XEXP (x, 1)) == REG
5770 && GET_CODE (XEXP (x, 0)) == REG)
5771 return x;
5772
5773 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5774 {
5775 changed = 1;
5776 x = legitimize_pic_address (x, 0);
5777 }
5778
5779 if (changed && legitimate_address_p (mode, x, FALSE))
5780 return x;
5781
5782 if (GET_CODE (XEXP (x, 0)) == REG)
5783 {
5784 register rtx temp = gen_reg_rtx (Pmode);
5785 register rtx val = force_operand (XEXP (x, 1), temp);
5786 if (val != temp)
5787 emit_move_insn (temp, val);
5788
5789 XEXP (x, 1) = temp;
5790 return x;
5791 }
5792
5793 else if (GET_CODE (XEXP (x, 1)) == REG)
5794 {
5795 register rtx temp = gen_reg_rtx (Pmode);
5796 register rtx val = force_operand (XEXP (x, 0), temp);
5797 if (val != temp)
5798 emit_move_insn (temp, val);
5799
5800 XEXP (x, 0) = temp;
5801 return x;
5802 }
5803 }
5804
5805 return x;
5806}
2a2ab3f9
JVA
5807\f
5808/* Print an integer constant expression in assembler syntax. Addition
5809 and subtraction are the only arithmetic that may appear in these
5810 expressions. FILE is the stdio stream to write to, X is the rtx, and
5811 CODE is the operand print code from the output string. */
5812
5813static void
5814output_pic_addr_const (file, x, code)
5815 FILE *file;
5816 rtx x;
5817 int code;
5818{
5819 char buf[256];
5820
5821 switch (GET_CODE (x))
5822 {
5823 case PC:
5824 if (flag_pic)
5825 putc ('.', file);
5826 else
5827 abort ();
5828 break;
5829
5830 case SYMBOL_REF:
91bb873f 5831 assemble_name (file, XSTR (x, 0));
b069de3b 5832 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 5833 fputs ("@PLT", file);
2a2ab3f9
JVA
5834 break;
5835
91bb873f
RH
5836 case LABEL_REF:
5837 x = XEXP (x, 0);
5838 /* FALLTHRU */
2a2ab3f9
JVA
5839 case CODE_LABEL:
5840 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5841 assemble_name (asm_out_file, buf);
5842 break;
5843
5844 case CONST_INT:
f64cecad 5845 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5846 break;
5847
5848 case CONST:
5849 /* This used to output parentheses around the expression,
5850 but that does not work on the 386 (either ATT or BSD assembler). */
5851 output_pic_addr_const (file, XEXP (x, 0), code);
5852 break;
5853
5854 case CONST_DOUBLE:
5855 if (GET_MODE (x) == VOIDmode)
5856 {
5857 /* We can use %d if the number is <32 bits and positive. */
5858 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5859 fprintf (file, "0x%lx%08lx",
5860 (unsigned long) CONST_DOUBLE_HIGH (x),
5861 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5862 else
f64cecad 5863 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5864 }
5865 else
5866 /* We can't handle floating point constants;
5867 PRINT_OPERAND must handle them. */
5868 output_operand_lossage ("floating constant misused");
5869 break;
5870
5871 case PLUS:
e9a25f70 5872 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5873 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5874 {
2a2ab3f9 5875 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5876 putc ('+', file);
e9a25f70 5877 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5878 }
91bb873f 5879 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5880 {
2a2ab3f9 5881 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5882 putc ('+', file);
e9a25f70 5883 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5884 }
91bb873f
RH
5885 else
5886 abort ();
2a2ab3f9
JVA
5887 break;
5888
5889 case MINUS:
b069de3b
SS
5890 if (!TARGET_MACHO)
5891 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5892 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5893 putc ('-', file);
2a2ab3f9 5894 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
5895 if (!TARGET_MACHO)
5896 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5897 break;
5898
91bb873f
RH
5899 case UNSPEC:
5900 if (XVECLEN (x, 0) != 1)
5bf0ebab 5901 abort ();
91bb873f
RH
5902 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5903 switch (XINT (x, 1))
77ebd435 5904 {
8ee41eaf 5905 case UNSPEC_GOT:
77ebd435
AJ
5906 fputs ("@GOT", file);
5907 break;
8ee41eaf 5908 case UNSPEC_GOTOFF:
77ebd435
AJ
5909 fputs ("@GOTOFF", file);
5910 break;
8ee41eaf 5911 case UNSPEC_GOTPCREL:
edfe8595 5912 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 5913 break;
f996902d
RH
5914 case UNSPEC_GOTTPOFF:
5915 fputs ("@GOTTPOFF", file);
5916 break;
5917 case UNSPEC_TPOFF:
5918 fputs ("@TPOFF", file);
5919 break;
5920 case UNSPEC_NTPOFF:
5921 fputs ("@NTPOFF", file);
5922 break;
5923 case UNSPEC_DTPOFF:
5924 fputs ("@DTPOFF", file);
5925 break;
77ebd435
AJ
5926 default:
5927 output_operand_lossage ("invalid UNSPEC as operand");
5928 break;
5929 }
91bb873f
RH
5930 break;
5931
2a2ab3f9
JVA
5932 default:
5933 output_operand_lossage ("invalid expression as operand");
5934 }
5935}
1865dbb5 5936
0f290768 5937/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5938 We need to handle our special PIC relocations. */
5939
0f290768 5940void
1865dbb5
JM
5941i386_dwarf_output_addr_const (file, x)
5942 FILE *file;
5943 rtx x;
5944{
14f73b5a 5945#ifdef ASM_QUAD
18b5b8d6 5946 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
5947#else
5948 if (TARGET_64BIT)
5949 abort ();
18b5b8d6 5950 fprintf (file, "%s", ASM_LONG);
14f73b5a 5951#endif
1865dbb5
JM
5952 if (flag_pic)
5953 output_pic_addr_const (file, x, '\0');
5954 else
5955 output_addr_const (file, x);
5956 fputc ('\n', file);
5957}
5958
b9203463
RH
5959/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5960 We need to emit DTP-relative relocations. */
5961
5962void
5963i386_output_dwarf_dtprel (file, size, x)
5964 FILE *file;
5965 int size;
5966 rtx x;
5967{
5968 switch (size)
5969 {
5970 case 4:
5971 fputs (ASM_LONG, file);
5972 break;
5973 case 8:
5974#ifdef ASM_QUAD
5975 fputs (ASM_QUAD, file);
5976 break;
5977#endif
5978 default:
5979 abort ();
5980 }
5981
5982 output_addr_const (file, x);
5983 fputs ("@DTPOFF", file);
5984}
5985
1865dbb5
JM
5986/* In the name of slightly smaller debug output, and to cater to
5987 general assembler losage, recognize PIC+GOTOFF and turn it back
5988 into a direct symbol reference. */
5989
5990rtx
5991i386_simplify_dwarf_addr (orig_x)
5992 rtx orig_x;
5993{
ec65b2e3 5994 rtx x = orig_x, y;
1865dbb5 5995
4c8c0dec
JJ
5996 if (GET_CODE (x) == MEM)
5997 x = XEXP (x, 0);
5998
6eb791fc
JH
5999 if (TARGET_64BIT)
6000 {
6001 if (GET_CODE (x) != CONST
6002 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6003 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6004 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6005 return orig_x;
6006 return XVECEXP (XEXP (x, 0), 0, 0);
6007 }
6008
1865dbb5 6009 if (GET_CODE (x) != PLUS
1865dbb5
JM
6010 || GET_CODE (XEXP (x, 1)) != CONST)
6011 return orig_x;
6012
ec65b2e3
JJ
6013 if (GET_CODE (XEXP (x, 0)) == REG
6014 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6015 /* %ebx + GOT/GOTOFF */
6016 y = NULL;
6017 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6018 {
6019 /* %ebx + %reg * scale + GOT/GOTOFF */
6020 y = XEXP (x, 0);
6021 if (GET_CODE (XEXP (y, 0)) == REG
6022 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6023 y = XEXP (y, 1);
6024 else if (GET_CODE (XEXP (y, 1)) == REG
6025 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6026 y = XEXP (y, 0);
6027 else
6028 return orig_x;
6029 if (GET_CODE (y) != REG
6030 && GET_CODE (y) != MULT
6031 && GET_CODE (y) != ASHIFT)
6032 return orig_x;
6033 }
6034 else
6035 return orig_x;
6036
1865dbb5
JM
6037 x = XEXP (XEXP (x, 1), 0);
6038 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6039 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6040 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6041 {
6042 if (y)
6043 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6044 return XVECEXP (x, 0, 0);
6045 }
1865dbb5
JM
6046
6047 if (GET_CODE (x) == PLUS
6048 && GET_CODE (XEXP (x, 0)) == UNSPEC
6049 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6050 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6051 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6052 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6053 {
6054 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6055 if (y)
6056 return gen_rtx_PLUS (Pmode, y, x);
6057 return x;
6058 }
1865dbb5
JM
6059
6060 return orig_x;
6061}
2a2ab3f9 6062\f
a269a03c 6063static void
e075ae69 6064put_condition_code (code, mode, reverse, fp, file)
a269a03c 6065 enum rtx_code code;
e075ae69
RH
6066 enum machine_mode mode;
6067 int reverse, fp;
a269a03c
JC
6068 FILE *file;
6069{
a269a03c
JC
6070 const char *suffix;
6071
9a915772
JH
6072 if (mode == CCFPmode || mode == CCFPUmode)
6073 {
6074 enum rtx_code second_code, bypass_code;
6075 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6076 if (bypass_code != NIL || second_code != NIL)
b531087a 6077 abort ();
9a915772
JH
6078 code = ix86_fp_compare_code_to_integer (code);
6079 mode = CCmode;
6080 }
a269a03c
JC
6081 if (reverse)
6082 code = reverse_condition (code);
e075ae69 6083
a269a03c
JC
6084 switch (code)
6085 {
6086 case EQ:
6087 suffix = "e";
6088 break;
a269a03c
JC
6089 case NE:
6090 suffix = "ne";
6091 break;
a269a03c 6092 case GT:
7e08e190 6093 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6094 abort ();
6095 suffix = "g";
a269a03c 6096 break;
a269a03c 6097 case GTU:
e075ae69
RH
6098 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6099 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6100 if (mode != CCmode)
0f290768 6101 abort ();
e075ae69 6102 suffix = fp ? "nbe" : "a";
a269a03c 6103 break;
a269a03c 6104 case LT:
9076b9c1 6105 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6106 suffix = "s";
7e08e190 6107 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6108 suffix = "l";
9076b9c1 6109 else
0f290768 6110 abort ();
a269a03c 6111 break;
a269a03c 6112 case LTU:
9076b9c1 6113 if (mode != CCmode)
0f290768 6114 abort ();
a269a03c
JC
6115 suffix = "b";
6116 break;
a269a03c 6117 case GE:
9076b9c1 6118 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6119 suffix = "ns";
7e08e190 6120 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6121 suffix = "ge";
9076b9c1 6122 else
0f290768 6123 abort ();
a269a03c 6124 break;
a269a03c 6125 case GEU:
e075ae69 6126 /* ??? As above. */
7e08e190 6127 if (mode != CCmode)
0f290768 6128 abort ();
7e08e190 6129 suffix = fp ? "nb" : "ae";
a269a03c 6130 break;
a269a03c 6131 case LE:
7e08e190 6132 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6133 abort ();
6134 suffix = "le";
a269a03c 6135 break;
a269a03c 6136 case LEU:
9076b9c1
JH
6137 if (mode != CCmode)
6138 abort ();
7e08e190 6139 suffix = "be";
a269a03c 6140 break;
3a3677ff 6141 case UNORDERED:
9e7adcb3 6142 suffix = fp ? "u" : "p";
3a3677ff
RH
6143 break;
6144 case ORDERED:
9e7adcb3 6145 suffix = fp ? "nu" : "np";
3a3677ff 6146 break;
a269a03c
JC
6147 default:
6148 abort ();
6149 }
6150 fputs (suffix, file);
6151}
6152
e075ae69
RH
6153void
6154print_reg (x, code, file)
6155 rtx x;
6156 int code;
6157 FILE *file;
e5cb57e8 6158{
e075ae69 6159 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6160 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6161 || REGNO (x) == FLAGS_REG
6162 || REGNO (x) == FPSR_REG)
6163 abort ();
e9a25f70 6164
5bf0ebab 6165 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6166 putc ('%', file);
6167
ef6257cd 6168 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6169 code = 2;
6170 else if (code == 'b')
6171 code = 1;
6172 else if (code == 'k')
6173 code = 4;
3f3f2124
JH
6174 else if (code == 'q')
6175 code = 8;
e075ae69
RH
6176 else if (code == 'y')
6177 code = 3;
6178 else if (code == 'h')
6179 code = 0;
6180 else
6181 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6182
3f3f2124
JH
6183 /* Irritatingly, AMD extended registers use different naming convention
6184 from the normal registers. */
6185 if (REX_INT_REG_P (x))
6186 {
885a70fd
JH
6187 if (!TARGET_64BIT)
6188 abort ();
3f3f2124
JH
6189 switch (code)
6190 {
ef6257cd 6191 case 0:
c725bd79 6192 error ("extended registers have no high halves");
3f3f2124
JH
6193 break;
6194 case 1:
6195 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6196 break;
6197 case 2:
6198 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6199 break;
6200 case 4:
6201 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6202 break;
6203 case 8:
6204 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6205 break;
6206 default:
c725bd79 6207 error ("unsupported operand size for extended register");
3f3f2124
JH
6208 break;
6209 }
6210 return;
6211 }
e075ae69
RH
6212 switch (code)
6213 {
6214 case 3:
6215 if (STACK_TOP_P (x))
6216 {
6217 fputs ("st(0)", file);
6218 break;
6219 }
6220 /* FALLTHRU */
e075ae69 6221 case 8:
3f3f2124 6222 case 4:
e075ae69 6223 case 12:
446988df 6224 if (! ANY_FP_REG_P (x))
885a70fd 6225 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6226 /* FALLTHRU */
a7180f70 6227 case 16:
e075ae69
RH
6228 case 2:
6229 fputs (hi_reg_name[REGNO (x)], file);
6230 break;
6231 case 1:
6232 fputs (qi_reg_name[REGNO (x)], file);
6233 break;
6234 case 0:
6235 fputs (qi_high_reg_name[REGNO (x)], file);
6236 break;
6237 default:
6238 abort ();
fe25fea3 6239 }
e5cb57e8
SC
6240}
6241
f996902d
RH
6242/* Locate some local-dynamic symbol still in use by this function
6243 so that we can print its name in some tls_local_dynamic_base
6244 pattern. */
6245
6246static const char *
6247get_some_local_dynamic_name ()
6248{
6249 rtx insn;
6250
6251 if (cfun->machine->some_ld_name)
6252 return cfun->machine->some_ld_name;
6253
6254 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6255 if (INSN_P (insn)
6256 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6257 return cfun->machine->some_ld_name;
6258
6259 abort ();
6260}
6261
6262static int
6263get_some_local_dynamic_name_1 (px, data)
6264 rtx *px;
6265 void *data ATTRIBUTE_UNUSED;
6266{
6267 rtx x = *px;
6268
6269 if (GET_CODE (x) == SYMBOL_REF
6270 && local_dynamic_symbolic_operand (x, Pmode))
6271 {
6272 cfun->machine->some_ld_name = XSTR (x, 0);
6273 return 1;
6274 }
6275
6276 return 0;
6277}
6278
2a2ab3f9 6279/* Meaning of CODE:
fe25fea3 6280 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6281 C -- print opcode suffix for set/cmov insn.
fe25fea3 6282 c -- like C, but print reversed condition
ef6257cd 6283 F,f -- likewise, but for floating-point.
048b1c95
JJ
6284 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6285 nothing
2a2ab3f9
JVA
6286 R -- print the prefix for register names.
6287 z -- print the opcode suffix for the size of the current operand.
6288 * -- print a star (in certain assembler syntax)
fb204271 6289 A -- print an absolute memory reference.
2a2ab3f9 6290 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6291 s -- print a shift double count, followed by the assemblers argument
6292 delimiter.
fe25fea3
SC
6293 b -- print the QImode name of the register for the indicated operand.
6294 %b0 would print %al if operands[0] is reg 0.
6295 w -- likewise, print the HImode name of the register.
6296 k -- likewise, print the SImode name of the register.
3f3f2124 6297 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6298 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6299 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6300 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6301 P -- if PIC, print an @PLT suffix.
6302 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6303 & -- print some in-use local-dynamic symbol name.
a46d1d38 6304 */
2a2ab3f9
JVA
6305
6306void
6307print_operand (file, x, code)
6308 FILE *file;
6309 rtx x;
6310 int code;
6311{
6312 if (code)
6313 {
6314 switch (code)
6315 {
6316 case '*':
80f33d06 6317 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6318 putc ('*', file);
6319 return;
6320
f996902d
RH
6321 case '&':
6322 assemble_name (file, get_some_local_dynamic_name ());
6323 return;
6324
fb204271 6325 case 'A':
80f33d06 6326 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6327 putc ('*', file);
80f33d06 6328 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6329 {
6330 /* Intel syntax. For absolute addresses, registers should not
6331 be surrounded by braces. */
6332 if (GET_CODE (x) != REG)
6333 {
6334 putc ('[', file);
6335 PRINT_OPERAND (file, x, 0);
6336 putc (']', file);
6337 return;
6338 }
6339 }
80f33d06
GS
6340 else
6341 abort ();
fb204271
DN
6342
6343 PRINT_OPERAND (file, x, 0);
6344 return;
6345
6346
2a2ab3f9 6347 case 'L':
80f33d06 6348 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6349 putc ('l', file);
2a2ab3f9
JVA
6350 return;
6351
6352 case 'W':
80f33d06 6353 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6354 putc ('w', file);
2a2ab3f9
JVA
6355 return;
6356
6357 case 'B':
80f33d06 6358 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6359 putc ('b', file);
2a2ab3f9
JVA
6360 return;
6361
6362 case 'Q':
80f33d06 6363 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6364 putc ('l', file);
2a2ab3f9
JVA
6365 return;
6366
6367 case 'S':
80f33d06 6368 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6369 putc ('s', file);
2a2ab3f9
JVA
6370 return;
6371
5f1ec3e6 6372 case 'T':
80f33d06 6373 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6374 putc ('t', file);
5f1ec3e6
JVA
6375 return;
6376
2a2ab3f9
JVA
6377 case 'z':
6378 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6379 registers. */
2a2ab3f9
JVA
6380 if (STACK_REG_P (x))
6381 return;
6382
831c4e87
KC
6383 /* Likewise if using Intel opcodes. */
6384 if (ASSEMBLER_DIALECT == ASM_INTEL)
6385 return;
6386
6387 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6388 switch (GET_MODE_SIZE (GET_MODE (x)))
6389 {
2a2ab3f9 6390 case 2:
155d8a47
JW
6391#ifdef HAVE_GAS_FILDS_FISTS
6392 putc ('s', file);
6393#endif
2a2ab3f9
JVA
6394 return;
6395
6396 case 4:
6397 if (GET_MODE (x) == SFmode)
6398 {
e075ae69 6399 putc ('s', file);
2a2ab3f9
JVA
6400 return;
6401 }
6402 else
e075ae69 6403 putc ('l', file);
2a2ab3f9
JVA
6404 return;
6405
5f1ec3e6 6406 case 12:
2b589241 6407 case 16:
e075ae69
RH
6408 putc ('t', file);
6409 return;
5f1ec3e6 6410
2a2ab3f9
JVA
6411 case 8:
6412 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6413 {
6414#ifdef GAS_MNEMONICS
e075ae69 6415 putc ('q', file);
56c0e8fa 6416#else
e075ae69
RH
6417 putc ('l', file);
6418 putc ('l', file);
56c0e8fa
JVA
6419#endif
6420 }
e075ae69
RH
6421 else
6422 putc ('l', file);
2a2ab3f9 6423 return;
155d8a47
JW
6424
6425 default:
6426 abort ();
2a2ab3f9 6427 }
4af3895e
JVA
6428
6429 case 'b':
6430 case 'w':
6431 case 'k':
3f3f2124 6432 case 'q':
4af3895e
JVA
6433 case 'h':
6434 case 'y':
5cb6195d 6435 case 'X':
e075ae69 6436 case 'P':
4af3895e
JVA
6437 break;
6438
2d49677f
SC
6439 case 's':
6440 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6441 {
6442 PRINT_OPERAND (file, x, 0);
e075ae69 6443 putc (',', file);
2d49677f 6444 }
a269a03c
JC
6445 return;
6446
a46d1d38
JH
6447 case 'D':
6448 /* Little bit of braindamage here. The SSE compare instructions
6449 does use completely different names for the comparisons that the
6450 fp conditional moves. */
6451 switch (GET_CODE (x))
6452 {
6453 case EQ:
6454 case UNEQ:
6455 fputs ("eq", file);
6456 break;
6457 case LT:
6458 case UNLT:
6459 fputs ("lt", file);
6460 break;
6461 case LE:
6462 case UNLE:
6463 fputs ("le", file);
6464 break;
6465 case UNORDERED:
6466 fputs ("unord", file);
6467 break;
6468 case NE:
6469 case LTGT:
6470 fputs ("neq", file);
6471 break;
6472 case UNGE:
6473 case GE:
6474 fputs ("nlt", file);
6475 break;
6476 case UNGT:
6477 case GT:
6478 fputs ("nle", file);
6479 break;
6480 case ORDERED:
6481 fputs ("ord", file);
6482 break;
6483 default:
6484 abort ();
6485 break;
6486 }
6487 return;
048b1c95
JJ
6488 case 'O':
6489#ifdef CMOV_SUN_AS_SYNTAX
6490 if (ASSEMBLER_DIALECT == ASM_ATT)
6491 {
6492 switch (GET_MODE (x))
6493 {
6494 case HImode: putc ('w', file); break;
6495 case SImode:
6496 case SFmode: putc ('l', file); break;
6497 case DImode:
6498 case DFmode: putc ('q', file); break;
6499 default: abort ();
6500 }
6501 putc ('.', file);
6502 }
6503#endif
6504 return;
1853aadd 6505 case 'C':
e075ae69 6506 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6507 return;
fe25fea3 6508 case 'F':
048b1c95
JJ
6509#ifdef CMOV_SUN_AS_SYNTAX
6510 if (ASSEMBLER_DIALECT == ASM_ATT)
6511 putc ('.', file);
6512#endif
e075ae69 6513 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6514 return;
6515
e9a25f70 6516 /* Like above, but reverse condition */
e075ae69 6517 case 'c':
fce5a9f2 6518 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
6519 and not a condition code which needs to be reversed. */
6520 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6521 {
6522 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6523 return;
6524 }
e075ae69
RH
6525 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6526 return;
fe25fea3 6527 case 'f':
048b1c95
JJ
6528#ifdef CMOV_SUN_AS_SYNTAX
6529 if (ASSEMBLER_DIALECT == ASM_ATT)
6530 putc ('.', file);
6531#endif
e075ae69 6532 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6533 return;
ef6257cd
JH
6534 case '+':
6535 {
6536 rtx x;
e5cb57e8 6537
ef6257cd
JH
6538 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6539 return;
a4f31c00 6540
ef6257cd
JH
6541 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6542 if (x)
6543 {
6544 int pred_val = INTVAL (XEXP (x, 0));
6545
6546 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6547 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6548 {
6549 int taken = pred_val > REG_BR_PROB_BASE / 2;
6550 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6551
6552 /* Emit hints only in the case default branch prediction
6553 heruistics would fail. */
6554 if (taken != cputaken)
6555 {
6556 /* We use 3e (DS) prefix for taken branches and
6557 2e (CS) prefix for not taken branches. */
6558 if (taken)
6559 fputs ("ds ; ", file);
6560 else
6561 fputs ("cs ; ", file);
6562 }
6563 }
6564 }
6565 return;
6566 }
4af3895e 6567 default:
a52453cc 6568 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
6569 }
6570 }
e9a25f70 6571
2a2ab3f9
JVA
6572 if (GET_CODE (x) == REG)
6573 {
6574 PRINT_REG (x, code, file);
6575 }
e9a25f70 6576
2a2ab3f9
JVA
6577 else if (GET_CODE (x) == MEM)
6578 {
e075ae69 6579 /* No `byte ptr' prefix for call instructions. */
80f33d06 6580 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6581 {
69ddee61 6582 const char * size;
e075ae69
RH
6583 switch (GET_MODE_SIZE (GET_MODE (x)))
6584 {
6585 case 1: size = "BYTE"; break;
6586 case 2: size = "WORD"; break;
6587 case 4: size = "DWORD"; break;
6588 case 8: size = "QWORD"; break;
6589 case 12: size = "XWORD"; break;
a7180f70 6590 case 16: size = "XMMWORD"; break;
e075ae69 6591 default:
564d80f4 6592 abort ();
e075ae69 6593 }
fb204271
DN
6594
6595 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6596 if (code == 'b')
6597 size = "BYTE";
6598 else if (code == 'w')
6599 size = "WORD";
6600 else if (code == 'k')
6601 size = "DWORD";
6602
e075ae69
RH
6603 fputs (size, file);
6604 fputs (" PTR ", file);
2a2ab3f9 6605 }
e075ae69
RH
6606
6607 x = XEXP (x, 0);
6608 if (flag_pic && CONSTANT_ADDRESS_P (x))
6609 output_pic_addr_const (file, x, code);
0d7d98ee 6610 /* Avoid (%rip) for call operands. */
5bf0ebab 6611 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6612 && GET_CODE (x) != CONST_INT)
6613 output_addr_const (file, x);
c8b94768
RH
6614 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6615 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6616 else
e075ae69 6617 output_address (x);
2a2ab3f9 6618 }
e9a25f70 6619
2a2ab3f9
JVA
6620 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6621 {
e9a25f70
JL
6622 REAL_VALUE_TYPE r;
6623 long l;
6624
5f1ec3e6
JVA
6625 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6626 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6627
80f33d06 6628 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6629 putc ('$', file);
52267fcb 6630 fprintf (file, "0x%lx", l);
5f1ec3e6 6631 }
e9a25f70 6632
0f290768 6633 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6634 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6635 {
e9a25f70
JL
6636 REAL_VALUE_TYPE r;
6637 char dstr[30];
6638
5f1ec3e6 6639 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4b67a274 6640 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
5f1ec3e6 6641 fprintf (file, "%s", dstr);
2a2ab3f9 6642 }
e9a25f70 6643
2b589241
JH
6644 else if (GET_CODE (x) == CONST_DOUBLE
6645 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6646 {
e9a25f70
JL
6647 REAL_VALUE_TYPE r;
6648 char dstr[30];
6649
5f1ec3e6 6650 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4b67a274 6651 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
5f1ec3e6 6652 fprintf (file, "%s", dstr);
2a2ab3f9 6653 }
f996902d 6654
79325812 6655 else
2a2ab3f9 6656 {
4af3895e 6657 if (code != 'P')
2a2ab3f9 6658 {
695dac07 6659 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6660 {
80f33d06 6661 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6662 putc ('$', file);
6663 }
2a2ab3f9
JVA
6664 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6665 || GET_CODE (x) == LABEL_REF)
e075ae69 6666 {
80f33d06 6667 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6668 putc ('$', file);
6669 else
6670 fputs ("OFFSET FLAT:", file);
6671 }
2a2ab3f9 6672 }
e075ae69
RH
6673 if (GET_CODE (x) == CONST_INT)
6674 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6675 else if (flag_pic)
2a2ab3f9
JVA
6676 output_pic_addr_const (file, x, code);
6677 else
6678 output_addr_const (file, x);
6679 }
6680}
6681\f
6682/* Print a memory operand whose address is ADDR. */
6683
6684void
6685print_operand_address (file, addr)
6686 FILE *file;
6687 register rtx addr;
6688{
e075ae69
RH
6689 struct ix86_address parts;
6690 rtx base, index, disp;
6691 int scale;
e9a25f70 6692
9e20be0c
JJ
6693 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6694 {
6695 if (ASSEMBLER_DIALECT == ASM_INTEL)
6696 fputs ("DWORD PTR ", file);
6697 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6698 putc ('%', file);
6699 fputs ("gs:0", file);
6700 return;
6701 }
6702
e075ae69
RH
6703 if (! ix86_decompose_address (addr, &parts))
6704 abort ();
e9a25f70 6705
e075ae69
RH
6706 base = parts.base;
6707 index = parts.index;
6708 disp = parts.disp;
6709 scale = parts.scale;
e9a25f70 6710
e075ae69
RH
6711 if (!base && !index)
6712 {
6713 /* Displacement only requires special attention. */
e9a25f70 6714
e075ae69 6715 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6716 {
80f33d06 6717 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6718 {
6719 if (USER_LABEL_PREFIX[0] == 0)
6720 putc ('%', file);
6721 fputs ("ds:", file);
6722 }
e075ae69 6723 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6724 }
e075ae69
RH
6725 else if (flag_pic)
6726 output_pic_addr_const (file, addr, 0);
6727 else
6728 output_addr_const (file, addr);
0d7d98ee
JH
6729
6730 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595
RH
6731 if (TARGET_64BIT
6732 && (GET_CODE (addr) == SYMBOL_REF
6733 || GET_CODE (addr) == LABEL_REF
6734 || (GET_CODE (addr) == CONST
6735 && GET_CODE (XEXP (addr, 0)) == PLUS
6736 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6737 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 6738 fputs ("(%rip)", file);
e075ae69
RH
6739 }
6740 else
6741 {
80f33d06 6742 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6743 {
e075ae69 6744 if (disp)
2a2ab3f9 6745 {
c399861d 6746 if (flag_pic)
e075ae69
RH
6747 output_pic_addr_const (file, disp, 0);
6748 else if (GET_CODE (disp) == LABEL_REF)
6749 output_asm_label (disp);
2a2ab3f9 6750 else
e075ae69 6751 output_addr_const (file, disp);
2a2ab3f9
JVA
6752 }
6753
e075ae69
RH
6754 putc ('(', file);
6755 if (base)
6756 PRINT_REG (base, 0, file);
6757 if (index)
2a2ab3f9 6758 {
e075ae69
RH
6759 putc (',', file);
6760 PRINT_REG (index, 0, file);
6761 if (scale != 1)
6762 fprintf (file, ",%d", scale);
2a2ab3f9 6763 }
e075ae69 6764 putc (')', file);
2a2ab3f9 6765 }
2a2ab3f9
JVA
6766 else
6767 {
e075ae69 6768 rtx offset = NULL_RTX;
e9a25f70 6769
e075ae69
RH
6770 if (disp)
6771 {
6772 /* Pull out the offset of a symbol; print any symbol itself. */
6773 if (GET_CODE (disp) == CONST
6774 && GET_CODE (XEXP (disp, 0)) == PLUS
6775 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6776 {
6777 offset = XEXP (XEXP (disp, 0), 1);
6778 disp = gen_rtx_CONST (VOIDmode,
6779 XEXP (XEXP (disp, 0), 0));
6780 }
ce193852 6781
e075ae69
RH
6782 if (flag_pic)
6783 output_pic_addr_const (file, disp, 0);
6784 else if (GET_CODE (disp) == LABEL_REF)
6785 output_asm_label (disp);
6786 else if (GET_CODE (disp) == CONST_INT)
6787 offset = disp;
6788 else
6789 output_addr_const (file, disp);
6790 }
e9a25f70 6791
e075ae69
RH
6792 putc ('[', file);
6793 if (base)
a8620236 6794 {
e075ae69
RH
6795 PRINT_REG (base, 0, file);
6796 if (offset)
6797 {
6798 if (INTVAL (offset) >= 0)
6799 putc ('+', file);
6800 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6801 }
a8620236 6802 }
e075ae69
RH
6803 else if (offset)
6804 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6805 else
e075ae69 6806 putc ('0', file);
e9a25f70 6807
e075ae69
RH
6808 if (index)
6809 {
6810 putc ('+', file);
6811 PRINT_REG (index, 0, file);
6812 if (scale != 1)
6813 fprintf (file, "*%d", scale);
6814 }
6815 putc (']', file);
6816 }
2a2ab3f9
JVA
6817 }
6818}
f996902d
RH
6819
6820bool
6821output_addr_const_extra (file, x)
6822 FILE *file;
6823 rtx x;
6824{
6825 rtx op;
6826
6827 if (GET_CODE (x) != UNSPEC)
6828 return false;
6829
6830 op = XVECEXP (x, 0, 0);
6831 switch (XINT (x, 1))
6832 {
6833 case UNSPEC_GOTTPOFF:
6834 output_addr_const (file, op);
6835 fputs ("@GOTTPOFF", file);
6836 break;
6837 case UNSPEC_TPOFF:
6838 output_addr_const (file, op);
6839 fputs ("@TPOFF", file);
6840 break;
6841 case UNSPEC_NTPOFF:
6842 output_addr_const (file, op);
6843 fputs ("@NTPOFF", file);
6844 break;
6845 case UNSPEC_DTPOFF:
6846 output_addr_const (file, op);
6847 fputs ("@DTPOFF", file);
6848 break;
6849
6850 default:
6851 return false;
6852 }
6853
6854 return true;
6855}
2a2ab3f9
JVA
6856\f
6857/* Split one or more DImode RTL references into pairs of SImode
6858 references. The RTL can be REG, offsettable MEM, integer constant, or
6859 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6860 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6861 that parallel "operands". */
2a2ab3f9
JVA
6862
6863void
6864split_di (operands, num, lo_half, hi_half)
6865 rtx operands[];
6866 int num;
6867 rtx lo_half[], hi_half[];
6868{
6869 while (num--)
6870 {
57dbca5e 6871 rtx op = operands[num];
b932f770
JH
6872
6873 /* simplify_subreg refuse to split volatile memory addresses,
6874 but we still have to handle it. */
6875 if (GET_CODE (op) == MEM)
2a2ab3f9 6876 {
f4ef873c 6877 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6878 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6879 }
6880 else
b932f770 6881 {
38ca929b
JH
6882 lo_half[num] = simplify_gen_subreg (SImode, op,
6883 GET_MODE (op) == VOIDmode
6884 ? DImode : GET_MODE (op), 0);
6885 hi_half[num] = simplify_gen_subreg (SImode, op,
6886 GET_MODE (op) == VOIDmode
6887 ? DImode : GET_MODE (op), 4);
b932f770 6888 }
2a2ab3f9
JVA
6889 }
6890}
44cf5b6a
JH
6891/* Split one or more TImode RTL references into pairs of SImode
6892 references. The RTL can be REG, offsettable MEM, integer constant, or
6893 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6894 split and "num" is its length. lo_half and hi_half are output arrays
6895 that parallel "operands". */
6896
6897void
6898split_ti (operands, num, lo_half, hi_half)
6899 rtx operands[];
6900 int num;
6901 rtx lo_half[], hi_half[];
6902{
6903 while (num--)
6904 {
6905 rtx op = operands[num];
b932f770
JH
6906
6907 /* simplify_subreg refuse to split volatile memory addresses, but we
6908 still have to handle it. */
6909 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6910 {
6911 lo_half[num] = adjust_address (op, DImode, 0);
6912 hi_half[num] = adjust_address (op, DImode, 8);
6913 }
6914 else
b932f770
JH
6915 {
6916 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6917 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6918 }
44cf5b6a
JH
6919 }
6920}
2a2ab3f9 6921\f
2a2ab3f9
JVA
6922/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6923 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6924 is the expression of the binary operation. The output may either be
6925 emitted here, or returned to the caller, like all output_* functions.
6926
6927 There is no guarantee that the operands are the same mode, as they
0f290768 6928 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6929
e3c2afab
AM
6930#ifndef SYSV386_COMPAT
6931/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6932 wants to fix the assemblers because that causes incompatibility
6933 with gcc. No-one wants to fix gcc because that causes
6934 incompatibility with assemblers... You can use the option of
6935 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6936#define SYSV386_COMPAT 1
6937#endif
6938
69ddee61 6939const char *
2a2ab3f9
JVA
6940output_387_binary_op (insn, operands)
6941 rtx insn;
6942 rtx *operands;
6943{
e3c2afab 6944 static char buf[30];
69ddee61 6945 const char *p;
1deaa899
JH
6946 const char *ssep;
6947 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 6948
e3c2afab
AM
6949#ifdef ENABLE_CHECKING
6950 /* Even if we do not want to check the inputs, this documents input
6951 constraints. Which helps in understanding the following code. */
6952 if (STACK_REG_P (operands[0])
6953 && ((REG_P (operands[1])
6954 && REGNO (operands[0]) == REGNO (operands[1])
6955 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6956 || (REG_P (operands[2])
6957 && REGNO (operands[0]) == REGNO (operands[2])
6958 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6959 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6960 ; /* ok */
1deaa899 6961 else if (!is_sse)
e3c2afab
AM
6962 abort ();
6963#endif
6964
2a2ab3f9
JVA
6965 switch (GET_CODE (operands[3]))
6966 {
6967 case PLUS:
e075ae69
RH
6968 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6969 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6970 p = "fiadd";
6971 else
6972 p = "fadd";
1deaa899 6973 ssep = "add";
2a2ab3f9
JVA
6974 break;
6975
6976 case MINUS:
e075ae69
RH
6977 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6978 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6979 p = "fisub";
6980 else
6981 p = "fsub";
1deaa899 6982 ssep = "sub";
2a2ab3f9
JVA
6983 break;
6984
6985 case MULT:
e075ae69
RH
6986 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6987 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6988 p = "fimul";
6989 else
6990 p = "fmul";
1deaa899 6991 ssep = "mul";
2a2ab3f9
JVA
6992 break;
6993
6994 case DIV:
e075ae69
RH
6995 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6996 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6997 p = "fidiv";
6998 else
6999 p = "fdiv";
1deaa899 7000 ssep = "div";
2a2ab3f9
JVA
7001 break;
7002
7003 default:
7004 abort ();
7005 }
7006
1deaa899
JH
7007 if (is_sse)
7008 {
7009 strcpy (buf, ssep);
7010 if (GET_MODE (operands[0]) == SFmode)
7011 strcat (buf, "ss\t{%2, %0|%0, %2}");
7012 else
7013 strcat (buf, "sd\t{%2, %0|%0, %2}");
7014 return buf;
7015 }
e075ae69 7016 strcpy (buf, p);
2a2ab3f9
JVA
7017
7018 switch (GET_CODE (operands[3]))
7019 {
7020 case MULT:
7021 case PLUS:
7022 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7023 {
e3c2afab 7024 rtx temp = operands[2];
2a2ab3f9
JVA
7025 operands[2] = operands[1];
7026 operands[1] = temp;
7027 }
7028
e3c2afab
AM
7029 /* know operands[0] == operands[1]. */
7030
2a2ab3f9 7031 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7032 {
7033 p = "%z2\t%2";
7034 break;
7035 }
2a2ab3f9
JVA
7036
7037 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7038 {
7039 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7040 /* How is it that we are storing to a dead operand[2]?
7041 Well, presumably operands[1] is dead too. We can't
7042 store the result to st(0) as st(0) gets popped on this
7043 instruction. Instead store to operands[2] (which I
7044 think has to be st(1)). st(1) will be popped later.
7045 gcc <= 2.8.1 didn't have this check and generated
7046 assembly code that the Unixware assembler rejected. */
7047 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7048 else
e3c2afab 7049 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7050 break;
6b28fd63 7051 }
2a2ab3f9
JVA
7052
7053 if (STACK_TOP_P (operands[0]))
e3c2afab 7054 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7055 else
e3c2afab 7056 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7057 break;
2a2ab3f9
JVA
7058
7059 case MINUS:
7060 case DIV:
7061 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7062 {
7063 p = "r%z1\t%1";
7064 break;
7065 }
2a2ab3f9
JVA
7066
7067 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7068 {
7069 p = "%z2\t%2";
7070 break;
7071 }
2a2ab3f9 7072
2a2ab3f9 7073 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7074 {
e3c2afab
AM
7075#if SYSV386_COMPAT
7076 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7077 derived assemblers, confusingly reverse the direction of
7078 the operation for fsub{r} and fdiv{r} when the
7079 destination register is not st(0). The Intel assembler
7080 doesn't have this brain damage. Read !SYSV386_COMPAT to
7081 figure out what the hardware really does. */
7082 if (STACK_TOP_P (operands[0]))
7083 p = "{p\t%0, %2|rp\t%2, %0}";
7084 else
7085 p = "{rp\t%2, %0|p\t%0, %2}";
7086#else
6b28fd63 7087 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7088 /* As above for fmul/fadd, we can't store to st(0). */
7089 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7090 else
e3c2afab
AM
7091 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7092#endif
e075ae69 7093 break;
6b28fd63 7094 }
2a2ab3f9
JVA
7095
7096 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7097 {
e3c2afab 7098#if SYSV386_COMPAT
6b28fd63 7099 if (STACK_TOP_P (operands[0]))
e3c2afab 7100 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7101 else
e3c2afab
AM
7102 p = "{p\t%1, %0|rp\t%0, %1}";
7103#else
7104 if (STACK_TOP_P (operands[0]))
7105 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7106 else
7107 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7108#endif
e075ae69 7109 break;
6b28fd63 7110 }
2a2ab3f9
JVA
7111
7112 if (STACK_TOP_P (operands[0]))
7113 {
7114 if (STACK_TOP_P (operands[1]))
e3c2afab 7115 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7116 else
e3c2afab 7117 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7118 break;
2a2ab3f9
JVA
7119 }
7120 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7121 {
7122#if SYSV386_COMPAT
7123 p = "{\t%1, %0|r\t%0, %1}";
7124#else
7125 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7126#endif
7127 }
2a2ab3f9 7128 else
e3c2afab
AM
7129 {
7130#if SYSV386_COMPAT
7131 p = "{r\t%2, %0|\t%0, %2}";
7132#else
7133 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7134#endif
7135 }
e075ae69 7136 break;
2a2ab3f9
JVA
7137
7138 default:
7139 abort ();
7140 }
e075ae69
RH
7141
7142 strcat (buf, p);
7143 return buf;
2a2ab3f9 7144}
e075ae69 7145
a4f31c00 7146/* Output code to initialize control word copies used by
7a2e09f4
JH
7147 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7148 is set to control word rounding downwards. */
7149void
7150emit_i387_cw_initialization (normal, round_down)
7151 rtx normal, round_down;
7152{
7153 rtx reg = gen_reg_rtx (HImode);
7154
7155 emit_insn (gen_x86_fnstcw_1 (normal));
7156 emit_move_insn (reg, normal);
7157 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7158 && !TARGET_64BIT)
7159 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7160 else
7161 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7162 emit_move_insn (round_down, reg);
7163}
7164
2a2ab3f9 7165/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7166 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7167 operand may be [SDX]Fmode. */
2a2ab3f9 7168
69ddee61 7169const char *
2a2ab3f9
JVA
7170output_fix_trunc (insn, operands)
7171 rtx insn;
7172 rtx *operands;
7173{
7174 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7175 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7176
e075ae69
RH
7177 /* Jump through a hoop or two for DImode, since the hardware has no
7178 non-popping instruction. We used to do this a different way, but
7179 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7180 if (dimode_p && !stack_top_dies)
7181 output_asm_insn ("fld\t%y1", operands);
e075ae69 7182
7a2e09f4 7183 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7184 abort ();
7185
e075ae69 7186 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7187 abort ();
e9a25f70 7188
7a2e09f4 7189 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7190 if (stack_top_dies || dimode_p)
7a2e09f4 7191 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7192 else
7a2e09f4 7193 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7194 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7195
e075ae69 7196 return "";
2a2ab3f9 7197}
cda749b1 7198
e075ae69
RH
7199/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7200 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7201 when fucom should be used. */
7202
69ddee61 7203const char *
e075ae69 7204output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7205 rtx insn;
7206 rtx *operands;
e075ae69 7207 int eflags_p, unordered_p;
cda749b1 7208{
e075ae69
RH
7209 int stack_top_dies;
7210 rtx cmp_op0 = operands[0];
7211 rtx cmp_op1 = operands[1];
0644b628 7212 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7213
7214 if (eflags_p == 2)
7215 {
7216 cmp_op0 = cmp_op1;
7217 cmp_op1 = operands[2];
7218 }
0644b628
JH
7219 if (is_sse)
7220 {
7221 if (GET_MODE (operands[0]) == SFmode)
7222 if (unordered_p)
7223 return "ucomiss\t{%1, %0|%0, %1}";
7224 else
7225 return "comiss\t{%1, %0|%0, %y}";
7226 else
7227 if (unordered_p)
7228 return "ucomisd\t{%1, %0|%0, %1}";
7229 else
7230 return "comisd\t{%1, %0|%0, %y}";
7231 }
cda749b1 7232
e075ae69 7233 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7234 abort ();
7235
e075ae69 7236 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7237
e075ae69
RH
7238 if (STACK_REG_P (cmp_op1)
7239 && stack_top_dies
7240 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7241 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7242 {
e075ae69
RH
7243 /* If both the top of the 387 stack dies, and the other operand
7244 is also a stack register that dies, then this must be a
7245 `fcompp' float compare */
7246
7247 if (eflags_p == 1)
7248 {
7249 /* There is no double popping fcomi variant. Fortunately,
7250 eflags is immune from the fstp's cc clobbering. */
7251 if (unordered_p)
7252 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7253 else
7254 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7255 return "fstp\t%y0";
7256 }
7257 else
cda749b1 7258 {
e075ae69
RH
7259 if (eflags_p == 2)
7260 {
7261 if (unordered_p)
7262 return "fucompp\n\tfnstsw\t%0";
7263 else
7264 return "fcompp\n\tfnstsw\t%0";
7265 }
cda749b1
JW
7266 else
7267 {
e075ae69
RH
7268 if (unordered_p)
7269 return "fucompp";
7270 else
7271 return "fcompp";
cda749b1
JW
7272 }
7273 }
cda749b1
JW
7274 }
7275 else
7276 {
e075ae69 7277 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7278
0f290768 7279 static const char * const alt[24] =
e075ae69
RH
7280 {
7281 "fcom%z1\t%y1",
7282 "fcomp%z1\t%y1",
7283 "fucom%z1\t%y1",
7284 "fucomp%z1\t%y1",
0f290768 7285
e075ae69
RH
7286 "ficom%z1\t%y1",
7287 "ficomp%z1\t%y1",
7288 NULL,
7289 NULL,
7290
7291 "fcomi\t{%y1, %0|%0, %y1}",
7292 "fcomip\t{%y1, %0|%0, %y1}",
7293 "fucomi\t{%y1, %0|%0, %y1}",
7294 "fucomip\t{%y1, %0|%0, %y1}",
7295
7296 NULL,
7297 NULL,
7298 NULL,
7299 NULL,
7300
7301 "fcom%z2\t%y2\n\tfnstsw\t%0",
7302 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7303 "fucom%z2\t%y2\n\tfnstsw\t%0",
7304 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7305
e075ae69
RH
7306 "ficom%z2\t%y2\n\tfnstsw\t%0",
7307 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7308 NULL,
7309 NULL
7310 };
7311
7312 int mask;
69ddee61 7313 const char *ret;
e075ae69
RH
7314
7315 mask = eflags_p << 3;
7316 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7317 mask |= unordered_p << 1;
7318 mask |= stack_top_dies;
7319
7320 if (mask >= 24)
7321 abort ();
7322 ret = alt[mask];
7323 if (ret == NULL)
7324 abort ();
cda749b1 7325
e075ae69 7326 return ret;
cda749b1
JW
7327 }
7328}
2a2ab3f9 7329
f88c65f7
RH
7330void
7331ix86_output_addr_vec_elt (file, value)
7332 FILE *file;
7333 int value;
7334{
7335 const char *directive = ASM_LONG;
7336
7337 if (TARGET_64BIT)
7338 {
7339#ifdef ASM_QUAD
7340 directive = ASM_QUAD;
7341#else
7342 abort ();
7343#endif
7344 }
7345
7346 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7347}
7348
7349void
7350ix86_output_addr_diff_elt (file, value, rel)
7351 FILE *file;
7352 int value, rel;
7353{
7354 if (TARGET_64BIT)
74411039 7355 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7356 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7357 else if (HAVE_AS_GOTOFF_IN_DATA)
7358 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7359#if TARGET_MACHO
7360 else if (TARGET_MACHO)
7361 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7362 machopic_function_base_name () + 1);
7363#endif
f88c65f7 7364 else
5fc0e5df
KW
7365 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7366 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7367}
32b5b1aa 7368\f
a8bac9ab
RH
7369/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7370 for the target. */
7371
7372void
7373ix86_expand_clear (dest)
7374 rtx dest;
7375{
7376 rtx tmp;
7377
7378 /* We play register width games, which are only valid after reload. */
7379 if (!reload_completed)
7380 abort ();
7381
7382 /* Avoid HImode and its attendant prefix byte. */
7383 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7384 dest = gen_rtx_REG (SImode, REGNO (dest));
7385
7386 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7387
7388 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7389 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7390 {
7391 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7392 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7393 }
7394
7395 emit_insn (tmp);
7396}
7397
f996902d
RH
7398/* X is an unchanging MEM. If it is a constant pool reference, return
7399 the constant pool rtx, else NULL. */
7400
7401static rtx
7402maybe_get_pool_constant (x)
7403 rtx x;
7404{
7405 x = XEXP (x, 0);
7406
7407 if (flag_pic)
7408 {
7409 if (GET_CODE (x) != PLUS)
7410 return NULL_RTX;
7411 if (XEXP (x, 0) != pic_offset_table_rtx)
7412 return NULL_RTX;
7413 x = XEXP (x, 1);
7414 if (GET_CODE (x) != CONST)
7415 return NULL_RTX;
7416 x = XEXP (x, 0);
7417 if (GET_CODE (x) != UNSPEC)
7418 return NULL_RTX;
7419 if (XINT (x, 1) != UNSPEC_GOTOFF)
7420 return NULL_RTX;
7421 x = XVECEXP (x, 0, 0);
7422 }
7423
7424 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7425 return get_pool_constant (x);
7426
7427 return NULL_RTX;
7428}
7429
79325812 7430void
e075ae69
RH
7431ix86_expand_move (mode, operands)
7432 enum machine_mode mode;
7433 rtx operands[];
32b5b1aa 7434{
e075ae69 7435 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7436 rtx insn, op0, op1, tmp;
7437
7438 op0 = operands[0];
7439 op1 = operands[1];
7440
7441 /* ??? We have a slight problem. We need to say that tls symbols are
7442 not legitimate constants so that reload does not helpfully reload
7443 these constants from a REG_EQUIV, which we cannot handle. (Recall
7444 that general- and local-dynamic address resolution requires a
7445 function call.)
e9a25f70 7446
f996902d
RH
7447 However, if we say that tls symbols are not legitimate constants,
7448 then emit_move_insn helpfully drop them into the constant pool.
7449
7450 It is far easier to work around emit_move_insn than reload. Recognize
7451 the MEM that we would have created and extract the symbol_ref. */
7452
7453 if (mode == Pmode
7454 && GET_CODE (op1) == MEM
7455 && RTX_UNCHANGING_P (op1))
32b5b1aa 7456 {
f996902d
RH
7457 tmp = maybe_get_pool_constant (op1);
7458 /* Note that we only care about symbolic constants here, which
7459 unlike CONST_INT will always have a proper mode. */
7460 if (tmp && GET_MODE (tmp) == Pmode)
7461 op1 = tmp;
7462 }
e9a25f70 7463
f996902d
RH
7464 if (tls_symbolic_operand (op1, Pmode))
7465 {
7466 op1 = legitimize_address (op1, op1, VOIDmode);
7467 if (GET_CODE (op0) == MEM)
7468 {
7469 tmp = gen_reg_rtx (mode);
7470 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7471 op1 = tmp;
7472 }
7473 }
7474 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7475 {
b069de3b
SS
7476#if TARGET_MACHO
7477 if (MACHOPIC_PURE)
7478 {
7479 rtx temp = ((reload_in_progress
7480 || ((op0 && GET_CODE (op0) == REG)
7481 && mode == Pmode))
7482 ? op0 : gen_reg_rtx (Pmode));
7483 op1 = machopic_indirect_data_reference (op1, temp);
7484 op1 = machopic_legitimize_pic_address (op1, mode,
7485 temp == op1 ? 0 : temp);
7486 }
7487 else
7488 {
7489 if (MACHOPIC_INDIRECT)
7490 op1 = machopic_indirect_data_reference (op1, 0);
7491 }
7492 if (op0 != op1)
7493 {
7494 insn = gen_rtx_SET (VOIDmode, op0, op1);
7495 emit_insn (insn);
7496 }
7497 return;
7498#endif /* TARGET_MACHO */
f996902d
RH
7499 if (GET_CODE (op0) == MEM)
7500 op1 = force_reg (Pmode, op1);
e075ae69 7501 else
32b5b1aa 7502 {
f996902d 7503 rtx temp = op0;
e075ae69
RH
7504 if (GET_CODE (temp) != REG)
7505 temp = gen_reg_rtx (Pmode);
f996902d
RH
7506 temp = legitimize_pic_address (op1, temp);
7507 if (temp == op0)
e075ae69 7508 return;
f996902d 7509 op1 = temp;
32b5b1aa 7510 }
e075ae69
RH
7511 }
7512 else
7513 {
f996902d 7514 if (GET_CODE (op0) == MEM
44cf5b6a 7515 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7516 || !push_operand (op0, mode))
7517 && GET_CODE (op1) == MEM)
7518 op1 = force_reg (mode, op1);
e9a25f70 7519
f996902d
RH
7520 if (push_operand (op0, mode)
7521 && ! general_no_elim_operand (op1, mode))
7522 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7523
44cf5b6a
JH
7524 /* Force large constants in 64bit compilation into register
7525 to get them CSEed. */
7526 if (TARGET_64BIT && mode == DImode
f996902d
RH
7527 && immediate_operand (op1, mode)
7528 && !x86_64_zero_extended_value (op1)
7529 && !register_operand (op0, mode)
44cf5b6a 7530 && optimize && !reload_completed && !reload_in_progress)
f996902d 7531 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7532
e075ae69 7533 if (FLOAT_MODE_P (mode))
32b5b1aa 7534 {
d7a29404
JH
7535 /* If we are loading a floating point constant to a register,
7536 force the value to memory now, since we'll get better code
7537 out the back end. */
e075ae69
RH
7538
7539 if (strict)
7540 ;
f996902d
RH
7541 else if (GET_CODE (op1) == CONST_DOUBLE
7542 && register_operand (op0, mode))
7543 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 7544 }
32b5b1aa 7545 }
e9a25f70 7546
f996902d 7547 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 7548
e075ae69
RH
7549 emit_insn (insn);
7550}
e9a25f70 7551
e37af218
RH
7552void
7553ix86_expand_vector_move (mode, operands)
7554 enum machine_mode mode;
7555 rtx operands[];
7556{
7557 /* Force constants other than zero into memory. We do not know how
7558 the instructions used to build constants modify the upper 64 bits
7559 of the register, once we have that information we may be able
7560 to handle some of them more efficiently. */
7561 if ((reload_in_progress | reload_completed) == 0
7562 && register_operand (operands[0], mode)
7563 && CONSTANT_P (operands[1]))
7564 {
7565 rtx addr = gen_reg_rtx (Pmode);
7566 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7567 operands[1] = gen_rtx_MEM (mode, addr);
7568 }
7569
7570 /* Make operand1 a register if it isn't already. */
7571 if ((reload_in_progress | reload_completed) == 0
7572 && !register_operand (operands[0], mode)
7573 && !register_operand (operands[1], mode)
7574 && operands[1] != CONST0_RTX (mode))
7575 {
59bef189 7576 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7577 emit_move_insn (operands[0], temp);
7578 return;
7579 }
7580
7581 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 7582}
e37af218 7583
e075ae69
RH
7584/* Attempt to expand a binary operator. Make the expansion closer to the
7585 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7586 memory references (one output, two input) in a single insn. */
e9a25f70 7587
e075ae69
RH
7588void
7589ix86_expand_binary_operator (code, mode, operands)
7590 enum rtx_code code;
7591 enum machine_mode mode;
7592 rtx operands[];
7593{
7594 int matching_memory;
7595 rtx src1, src2, dst, op, clob;
7596
7597 dst = operands[0];
7598 src1 = operands[1];
7599 src2 = operands[2];
7600
7601 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7602 if (GET_RTX_CLASS (code) == 'c'
7603 && (rtx_equal_p (dst, src2)
7604 || immediate_operand (src1, mode)))
7605 {
7606 rtx temp = src1;
7607 src1 = src2;
7608 src2 = temp;
32b5b1aa 7609 }
e9a25f70 7610
e075ae69
RH
7611 /* If the destination is memory, and we do not have matching source
7612 operands, do things in registers. */
7613 matching_memory = 0;
7614 if (GET_CODE (dst) == MEM)
32b5b1aa 7615 {
e075ae69
RH
7616 if (rtx_equal_p (dst, src1))
7617 matching_memory = 1;
7618 else if (GET_RTX_CLASS (code) == 'c'
7619 && rtx_equal_p (dst, src2))
7620 matching_memory = 2;
7621 else
7622 dst = gen_reg_rtx (mode);
7623 }
0f290768 7624
e075ae69
RH
7625 /* Both source operands cannot be in memory. */
7626 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7627 {
7628 if (matching_memory != 2)
7629 src2 = force_reg (mode, src2);
7630 else
7631 src1 = force_reg (mode, src1);
32b5b1aa 7632 }
e9a25f70 7633
06a964de
JH
7634 /* If the operation is not commutable, source 1 cannot be a constant
7635 or non-matching memory. */
0f290768 7636 if ((CONSTANT_P (src1)
06a964de
JH
7637 || (!matching_memory && GET_CODE (src1) == MEM))
7638 && GET_RTX_CLASS (code) != 'c')
e075ae69 7639 src1 = force_reg (mode, src1);
0f290768 7640
e075ae69 7641 /* If optimizing, copy to regs to improve CSE */
fe577e58 7642 if (optimize && ! no_new_pseudos)
32b5b1aa 7643 {
e075ae69
RH
7644 if (GET_CODE (dst) == MEM)
7645 dst = gen_reg_rtx (mode);
7646 if (GET_CODE (src1) == MEM)
7647 src1 = force_reg (mode, src1);
7648 if (GET_CODE (src2) == MEM)
7649 src2 = force_reg (mode, src2);
32b5b1aa 7650 }
e9a25f70 7651
e075ae69
RH
7652 /* Emit the instruction. */
7653
7654 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7655 if (reload_in_progress)
7656 {
7657 /* Reload doesn't know about the flags register, and doesn't know that
7658 it doesn't want to clobber it. We can only do this with PLUS. */
7659 if (code != PLUS)
7660 abort ();
7661 emit_insn (op);
7662 }
7663 else
32b5b1aa 7664 {
e075ae69
RH
7665 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7666 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7667 }
e9a25f70 7668
e075ae69
RH
7669 /* Fix up the destination if needed. */
7670 if (dst != operands[0])
7671 emit_move_insn (operands[0], dst);
7672}
7673
7674/* Return TRUE or FALSE depending on whether the binary operator meets the
7675 appropriate constraints. */
7676
7677int
7678ix86_binary_operator_ok (code, mode, operands)
7679 enum rtx_code code;
7680 enum machine_mode mode ATTRIBUTE_UNUSED;
7681 rtx operands[3];
7682{
7683 /* Both source operands cannot be in memory. */
7684 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7685 return 0;
7686 /* If the operation is not commutable, source 1 cannot be a constant. */
7687 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7688 return 0;
7689 /* If the destination is memory, we must have a matching source operand. */
7690 if (GET_CODE (operands[0]) == MEM
7691 && ! (rtx_equal_p (operands[0], operands[1])
7692 || (GET_RTX_CLASS (code) == 'c'
7693 && rtx_equal_p (operands[0], operands[2]))))
7694 return 0;
06a964de 7695 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7696 have a matching destination. */
06a964de
JH
7697 if (GET_CODE (operands[1]) == MEM
7698 && GET_RTX_CLASS (code) != 'c'
7699 && ! rtx_equal_p (operands[0], operands[1]))
7700 return 0;
e075ae69
RH
7701 return 1;
7702}
7703
7704/* Attempt to expand a unary operator. Make the expansion closer to the
7705 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7706 memory references (one output, one input) in a single insn. */
e075ae69 7707
9d81fc27 7708void
e075ae69
RH
7709ix86_expand_unary_operator (code, mode, operands)
7710 enum rtx_code code;
7711 enum machine_mode mode;
7712 rtx operands[];
7713{
06a964de
JH
7714 int matching_memory;
7715 rtx src, dst, op, clob;
7716
7717 dst = operands[0];
7718 src = operands[1];
e075ae69 7719
06a964de
JH
7720 /* If the destination is memory, and we do not have matching source
7721 operands, do things in registers. */
7722 matching_memory = 0;
7723 if (GET_CODE (dst) == MEM)
32b5b1aa 7724 {
06a964de
JH
7725 if (rtx_equal_p (dst, src))
7726 matching_memory = 1;
e075ae69 7727 else
06a964de 7728 dst = gen_reg_rtx (mode);
32b5b1aa 7729 }
e9a25f70 7730
06a964de
JH
7731 /* When source operand is memory, destination must match. */
7732 if (!matching_memory && GET_CODE (src) == MEM)
7733 src = force_reg (mode, src);
0f290768 7734
06a964de 7735 /* If optimizing, copy to regs to improve CSE */
fe577e58 7736 if (optimize && ! no_new_pseudos)
06a964de
JH
7737 {
7738 if (GET_CODE (dst) == MEM)
7739 dst = gen_reg_rtx (mode);
7740 if (GET_CODE (src) == MEM)
7741 src = force_reg (mode, src);
7742 }
7743
7744 /* Emit the instruction. */
7745
7746 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7747 if (reload_in_progress || code == NOT)
7748 {
7749 /* Reload doesn't know about the flags register, and doesn't know that
7750 it doesn't want to clobber it. */
7751 if (code != NOT)
7752 abort ();
7753 emit_insn (op);
7754 }
7755 else
7756 {
7757 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7758 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7759 }
7760
7761 /* Fix up the destination if needed. */
7762 if (dst != operands[0])
7763 emit_move_insn (operands[0], dst);
e075ae69
RH
7764}
7765
7766/* Return TRUE or FALSE depending on whether the unary operator meets the
7767 appropriate constraints. */
7768
7769int
7770ix86_unary_operator_ok (code, mode, operands)
7771 enum rtx_code code ATTRIBUTE_UNUSED;
7772 enum machine_mode mode ATTRIBUTE_UNUSED;
7773 rtx operands[2] ATTRIBUTE_UNUSED;
7774{
06a964de
JH
7775 /* If one of operands is memory, source and destination must match. */
7776 if ((GET_CODE (operands[0]) == MEM
7777 || GET_CODE (operands[1]) == MEM)
7778 && ! rtx_equal_p (operands[0], operands[1]))
7779 return FALSE;
e075ae69
RH
7780 return TRUE;
7781}
7782
16189740
RH
7783/* Return TRUE or FALSE depending on whether the first SET in INSN
7784 has source and destination with matching CC modes, and that the
7785 CC mode is at least as constrained as REQ_MODE. */
7786
7787int
7788ix86_match_ccmode (insn, req_mode)
7789 rtx insn;
7790 enum machine_mode req_mode;
7791{
7792 rtx set;
7793 enum machine_mode set_mode;
7794
7795 set = PATTERN (insn);
7796 if (GET_CODE (set) == PARALLEL)
7797 set = XVECEXP (set, 0, 0);
7798 if (GET_CODE (set) != SET)
7799 abort ();
9076b9c1
JH
7800 if (GET_CODE (SET_SRC (set)) != COMPARE)
7801 abort ();
16189740
RH
7802
7803 set_mode = GET_MODE (SET_DEST (set));
7804 switch (set_mode)
7805 {
9076b9c1
JH
7806 case CCNOmode:
7807 if (req_mode != CCNOmode
7808 && (req_mode != CCmode
7809 || XEXP (SET_SRC (set), 1) != const0_rtx))
7810 return 0;
7811 break;
16189740 7812 case CCmode:
9076b9c1 7813 if (req_mode == CCGCmode)
16189740
RH
7814 return 0;
7815 /* FALLTHRU */
9076b9c1
JH
7816 case CCGCmode:
7817 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7818 return 0;
7819 /* FALLTHRU */
7820 case CCGOCmode:
16189740
RH
7821 if (req_mode == CCZmode)
7822 return 0;
7823 /* FALLTHRU */
7824 case CCZmode:
7825 break;
7826
7827 default:
7828 abort ();
7829 }
7830
7831 return (GET_MODE (SET_SRC (set)) == set_mode);
7832}
7833
e075ae69
RH
7834/* Generate insn patterns to do an integer compare of OPERANDS. */
7835
7836static rtx
7837ix86_expand_int_compare (code, op0, op1)
7838 enum rtx_code code;
7839 rtx op0, op1;
7840{
7841 enum machine_mode cmpmode;
7842 rtx tmp, flags;
7843
7844 cmpmode = SELECT_CC_MODE (code, op0, op1);
7845 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7846
7847 /* This is very simple, but making the interface the same as in the
7848 FP case makes the rest of the code easier. */
7849 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7850 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7851
7852 /* Return the test that should be put into the flags user, i.e.
7853 the bcc, scc, or cmov instruction. */
7854 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7855}
7856
3a3677ff
RH
7857/* Figure out whether to use ordered or unordered fp comparisons.
7858 Return the appropriate mode to use. */
e075ae69 7859
b1cdafbb 7860enum machine_mode
3a3677ff 7861ix86_fp_compare_mode (code)
8752c357 7862 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7863{
9e7adcb3
JH
7864 /* ??? In order to make all comparisons reversible, we do all comparisons
7865 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7866 all forms trapping and nontrapping comparisons, we can make inequality
7867 comparisons trapping again, since it results in better code when using
7868 FCOM based compares. */
7869 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7870}
7871
9076b9c1
JH
7872enum machine_mode
7873ix86_cc_mode (code, op0, op1)
7874 enum rtx_code code;
7875 rtx op0, op1;
7876{
7877 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7878 return ix86_fp_compare_mode (code);
7879 switch (code)
7880 {
7881 /* Only zero flag is needed. */
7882 case EQ: /* ZF=0 */
7883 case NE: /* ZF!=0 */
7884 return CCZmode;
7885 /* Codes needing carry flag. */
265dab10
JH
7886 case GEU: /* CF=0 */
7887 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7888 case LTU: /* CF=1 */
7889 case LEU: /* CF=1 | ZF=1 */
265dab10 7890 return CCmode;
9076b9c1
JH
7891 /* Codes possibly doable only with sign flag when
7892 comparing against zero. */
7893 case GE: /* SF=OF or SF=0 */
7e08e190 7894 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7895 if (op1 == const0_rtx)
7896 return CCGOCmode;
7897 else
7898 /* For other cases Carry flag is not required. */
7899 return CCGCmode;
7900 /* Codes doable only with sign flag when comparing
7901 against zero, but we miss jump instruction for it
7902 so we need to use relational tests agains overflow
7903 that thus needs to be zero. */
7904 case GT: /* ZF=0 & SF=OF */
7905 case LE: /* ZF=1 | SF<>OF */
7906 if (op1 == const0_rtx)
7907 return CCNOmode;
7908 else
7909 return CCGCmode;
7fcd7218
JH
7910 /* strcmp pattern do (use flags) and combine may ask us for proper
7911 mode. */
7912 case USE:
7913 return CCmode;
9076b9c1 7914 default:
0f290768 7915 abort ();
9076b9c1
JH
7916 }
7917}
7918
3a3677ff
RH
7919/* Return true if we should use an FCOMI instruction for this fp comparison. */
7920
a940d8bd 7921int
3a3677ff 7922ix86_use_fcomi_compare (code)
9e7adcb3 7923 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 7924{
9e7adcb3
JH
7925 enum rtx_code swapped_code = swap_condition (code);
7926 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7927 || (ix86_fp_comparison_cost (swapped_code)
7928 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
7929}
7930
0f290768 7931/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
7932 to a fp comparison. The operands are updated in place; the new
7933 comparsion code is returned. */
7934
7935static enum rtx_code
7936ix86_prepare_fp_compare_args (code, pop0, pop1)
7937 enum rtx_code code;
7938 rtx *pop0, *pop1;
7939{
7940 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7941 rtx op0 = *pop0, op1 = *pop1;
7942 enum machine_mode op_mode = GET_MODE (op0);
0644b628 7943 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7944
e075ae69 7945 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7946 The same is true of the XFmode compare instructions. The same is
7947 true of the fcomi compare instructions. */
7948
0644b628
JH
7949 if (!is_sse
7950 && (fpcmp_mode == CCFPUmode
7951 || op_mode == XFmode
7952 || op_mode == TFmode
7953 || ix86_use_fcomi_compare (code)))
e075ae69 7954 {
3a3677ff
RH
7955 op0 = force_reg (op_mode, op0);
7956 op1 = force_reg (op_mode, op1);
e075ae69
RH
7957 }
7958 else
7959 {
7960 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7961 things around if they appear profitable, otherwise force op0
7962 into a register. */
7963
7964 if (standard_80387_constant_p (op0) == 0
7965 || (GET_CODE (op0) == MEM
7966 && ! (standard_80387_constant_p (op1) == 0
7967 || GET_CODE (op1) == MEM)))
32b5b1aa 7968 {
e075ae69
RH
7969 rtx tmp;
7970 tmp = op0, op0 = op1, op1 = tmp;
7971 code = swap_condition (code);
7972 }
7973
7974 if (GET_CODE (op0) != REG)
3a3677ff 7975 op0 = force_reg (op_mode, op0);
e075ae69
RH
7976
7977 if (CONSTANT_P (op1))
7978 {
7979 if (standard_80387_constant_p (op1))
3a3677ff 7980 op1 = force_reg (op_mode, op1);
e075ae69 7981 else
3a3677ff 7982 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7983 }
7984 }
e9a25f70 7985
9e7adcb3
JH
7986 /* Try to rearrange the comparison to make it cheaper. */
7987 if (ix86_fp_comparison_cost (code)
7988 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 7989 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
7990 {
7991 rtx tmp;
7992 tmp = op0, op0 = op1, op1 = tmp;
7993 code = swap_condition (code);
7994 if (GET_CODE (op0) != REG)
7995 op0 = force_reg (op_mode, op0);
7996 }
7997
3a3677ff
RH
7998 *pop0 = op0;
7999 *pop1 = op1;
8000 return code;
8001}
8002
c0c102a9
JH
8003/* Convert comparison codes we use to represent FP comparison to integer
8004 code that will result in proper branch. Return UNKNOWN if no such code
8005 is available. */
8006static enum rtx_code
8007ix86_fp_compare_code_to_integer (code)
8008 enum rtx_code code;
8009{
8010 switch (code)
8011 {
8012 case GT:
8013 return GTU;
8014 case GE:
8015 return GEU;
8016 case ORDERED:
8017 case UNORDERED:
8018 return code;
8019 break;
8020 case UNEQ:
8021 return EQ;
8022 break;
8023 case UNLT:
8024 return LTU;
8025 break;
8026 case UNLE:
8027 return LEU;
8028 break;
8029 case LTGT:
8030 return NE;
8031 break;
8032 default:
8033 return UNKNOWN;
8034 }
8035}
8036
8037/* Split comparison code CODE into comparisons we can do using branch
8038 instructions. BYPASS_CODE is comparison code for branch that will
8039 branch around FIRST_CODE and SECOND_CODE. If some of branches
8040 is not required, set value to NIL.
8041 We never require more than two branches. */
8042static void
8043ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8044 enum rtx_code code, *bypass_code, *first_code, *second_code;
8045{
8046 *first_code = code;
8047 *bypass_code = NIL;
8048 *second_code = NIL;
8049
8050 /* The fcomi comparison sets flags as follows:
8051
8052 cmp ZF PF CF
8053 > 0 0 0
8054 < 0 0 1
8055 = 1 0 0
8056 un 1 1 1 */
8057
8058 switch (code)
8059 {
8060 case GT: /* GTU - CF=0 & ZF=0 */
8061 case GE: /* GEU - CF=0 */
8062 case ORDERED: /* PF=0 */
8063 case UNORDERED: /* PF=1 */
8064 case UNEQ: /* EQ - ZF=1 */
8065 case UNLT: /* LTU - CF=1 */
8066 case UNLE: /* LEU - CF=1 | ZF=1 */
8067 case LTGT: /* EQ - ZF=0 */
8068 break;
8069 case LT: /* LTU - CF=1 - fails on unordered */
8070 *first_code = UNLT;
8071 *bypass_code = UNORDERED;
8072 break;
8073 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8074 *first_code = UNLE;
8075 *bypass_code = UNORDERED;
8076 break;
8077 case EQ: /* EQ - ZF=1 - fails on unordered */
8078 *first_code = UNEQ;
8079 *bypass_code = UNORDERED;
8080 break;
8081 case NE: /* NE - ZF=0 - fails on unordered */
8082 *first_code = LTGT;
8083 *second_code = UNORDERED;
8084 break;
8085 case UNGE: /* GEU - CF=0 - fails on unordered */
8086 *first_code = GE;
8087 *second_code = UNORDERED;
8088 break;
8089 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8090 *first_code = GT;
8091 *second_code = UNORDERED;
8092 break;
8093 default:
8094 abort ();
8095 }
8096 if (!TARGET_IEEE_FP)
8097 {
8098 *second_code = NIL;
8099 *bypass_code = NIL;
8100 }
8101}
8102
9e7adcb3
JH
8103/* Return cost of comparison done fcom + arithmetics operations on AX.
8104 All following functions do use number of instructions as an cost metrics.
8105 In future this should be tweaked to compute bytes for optimize_size and
8106 take into account performance of various instructions on various CPUs. */
8107static int
8108ix86_fp_comparison_arithmetics_cost (code)
8109 enum rtx_code code;
8110{
8111 if (!TARGET_IEEE_FP)
8112 return 4;
8113 /* The cost of code output by ix86_expand_fp_compare. */
8114 switch (code)
8115 {
8116 case UNLE:
8117 case UNLT:
8118 case LTGT:
8119 case GT:
8120 case GE:
8121 case UNORDERED:
8122 case ORDERED:
8123 case UNEQ:
8124 return 4;
8125 break;
8126 case LT:
8127 case NE:
8128 case EQ:
8129 case UNGE:
8130 return 5;
8131 break;
8132 case LE:
8133 case UNGT:
8134 return 6;
8135 break;
8136 default:
8137 abort ();
8138 }
8139}
8140
8141/* Return cost of comparison done using fcomi operation.
8142 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8143static int
8144ix86_fp_comparison_fcomi_cost (code)
8145 enum rtx_code code;
8146{
8147 enum rtx_code bypass_code, first_code, second_code;
8148 /* Return arbitarily high cost when instruction is not supported - this
8149 prevents gcc from using it. */
8150 if (!TARGET_CMOVE)
8151 return 1024;
8152 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8153 return (bypass_code != NIL || second_code != NIL) + 2;
8154}
8155
8156/* Return cost of comparison done using sahf operation.
8157 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8158static int
8159ix86_fp_comparison_sahf_cost (code)
8160 enum rtx_code code;
8161{
8162 enum rtx_code bypass_code, first_code, second_code;
8163 /* Return arbitarily high cost when instruction is not preferred - this
8164 avoids gcc from using it. */
8165 if (!TARGET_USE_SAHF && !optimize_size)
8166 return 1024;
8167 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8168 return (bypass_code != NIL || second_code != NIL) + 3;
8169}
8170
8171/* Compute cost of the comparison done using any method.
8172 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8173static int
8174ix86_fp_comparison_cost (code)
8175 enum rtx_code code;
8176{
8177 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8178 int min;
8179
8180 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8181 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8182
8183 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8184 if (min > sahf_cost)
8185 min = sahf_cost;
8186 if (min > fcomi_cost)
8187 min = fcomi_cost;
8188 return min;
8189}
c0c102a9 8190
3a3677ff
RH
8191/* Generate insn patterns to do a floating point compare of OPERANDS. */
8192
9e7adcb3
JH
8193static rtx
8194ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8195 enum rtx_code code;
8196 rtx op0, op1, scratch;
9e7adcb3
JH
8197 rtx *second_test;
8198 rtx *bypass_test;
3a3677ff
RH
8199{
8200 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8201 rtx tmp, tmp2;
9e7adcb3 8202 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8203 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8204
8205 fpcmp_mode = ix86_fp_compare_mode (code);
8206 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8207
9e7adcb3
JH
8208 if (second_test)
8209 *second_test = NULL_RTX;
8210 if (bypass_test)
8211 *bypass_test = NULL_RTX;
8212
c0c102a9
JH
8213 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8214
9e7adcb3
JH
8215 /* Do fcomi/sahf based test when profitable. */
8216 if ((bypass_code == NIL || bypass_test)
8217 && (second_code == NIL || second_test)
8218 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8219 {
c0c102a9
JH
8220 if (TARGET_CMOVE)
8221 {
8222 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8223 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8224 tmp);
8225 emit_insn (tmp);
8226 }
8227 else
8228 {
8229 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8230 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8231 if (!scratch)
8232 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8233 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8234 emit_insn (gen_x86_sahf_1 (scratch));
8235 }
e075ae69
RH
8236
8237 /* The FP codes work out to act like unsigned. */
9a915772 8238 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8239 code = first_code;
8240 if (bypass_code != NIL)
8241 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8242 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8243 const0_rtx);
8244 if (second_code != NIL)
8245 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8246 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8247 const0_rtx);
e075ae69
RH
8248 }
8249 else
8250 {
8251 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8252 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8253 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8254 if (!scratch)
8255 scratch = gen_reg_rtx (HImode);
3a3677ff 8256 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8257
9a915772
JH
8258 /* In the unordered case, we have to check C2 for NaN's, which
8259 doesn't happen to work out to anything nice combination-wise.
8260 So do some bit twiddling on the value we've got in AH to come
8261 up with an appropriate set of condition codes. */
e075ae69 8262
9a915772
JH
8263 intcmp_mode = CCNOmode;
8264 switch (code)
32b5b1aa 8265 {
9a915772
JH
8266 case GT:
8267 case UNGT:
8268 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8269 {
3a3677ff 8270 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8271 code = EQ;
9a915772
JH
8272 }
8273 else
8274 {
8275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8276 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8277 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8278 intcmp_mode = CCmode;
8279 code = GEU;
8280 }
8281 break;
8282 case LT:
8283 case UNLT:
8284 if (code == LT && TARGET_IEEE_FP)
8285 {
3a3677ff
RH
8286 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8287 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8288 intcmp_mode = CCmode;
8289 code = EQ;
9a915772
JH
8290 }
8291 else
8292 {
8293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8294 code = NE;
8295 }
8296 break;
8297 case GE:
8298 case UNGE:
8299 if (code == GE || !TARGET_IEEE_FP)
8300 {
3a3677ff 8301 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8302 code = EQ;
9a915772
JH
8303 }
8304 else
8305 {
8306 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8307 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8308 GEN_INT (0x01)));
8309 code = NE;
8310 }
8311 break;
8312 case LE:
8313 case UNLE:
8314 if (code == LE && TARGET_IEEE_FP)
8315 {
3a3677ff
RH
8316 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8317 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8318 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8319 intcmp_mode = CCmode;
8320 code = LTU;
9a915772
JH
8321 }
8322 else
8323 {
8324 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8325 code = NE;
8326 }
8327 break;
8328 case EQ:
8329 case UNEQ:
8330 if (code == EQ && TARGET_IEEE_FP)
8331 {
3a3677ff
RH
8332 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8333 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8334 intcmp_mode = CCmode;
8335 code = EQ;
9a915772
JH
8336 }
8337 else
8338 {
3a3677ff
RH
8339 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8340 code = NE;
8341 break;
9a915772
JH
8342 }
8343 break;
8344 case NE:
8345 case LTGT:
8346 if (code == NE && TARGET_IEEE_FP)
8347 {
3a3677ff 8348 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8349 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8350 GEN_INT (0x40)));
3a3677ff 8351 code = NE;
9a915772
JH
8352 }
8353 else
8354 {
3a3677ff
RH
8355 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8356 code = EQ;
32b5b1aa 8357 }
9a915772
JH
8358 break;
8359
8360 case UNORDERED:
8361 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8362 code = NE;
8363 break;
8364 case ORDERED:
8365 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8366 code = EQ;
8367 break;
8368
8369 default:
8370 abort ();
32b5b1aa 8371 }
32b5b1aa 8372 }
e075ae69
RH
8373
8374 /* Return the test that should be put into the flags user, i.e.
8375 the bcc, scc, or cmov instruction. */
8376 return gen_rtx_fmt_ee (code, VOIDmode,
8377 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8378 const0_rtx);
8379}
8380
9e3e266c 8381rtx
a1b8572c 8382ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8383 enum rtx_code code;
a1b8572c 8384 rtx *second_test, *bypass_test;
e075ae69
RH
8385{
8386 rtx op0, op1, ret;
8387 op0 = ix86_compare_op0;
8388 op1 = ix86_compare_op1;
8389
a1b8572c
JH
8390 if (second_test)
8391 *second_test = NULL_RTX;
8392 if (bypass_test)
8393 *bypass_test = NULL_RTX;
8394
e075ae69 8395 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8396 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8397 second_test, bypass_test);
32b5b1aa 8398 else
e075ae69
RH
8399 ret = ix86_expand_int_compare (code, op0, op1);
8400
8401 return ret;
8402}
8403
03598dea
JH
8404/* Return true if the CODE will result in nontrivial jump sequence. */
8405bool
8406ix86_fp_jump_nontrivial_p (code)
8407 enum rtx_code code;
8408{
8409 enum rtx_code bypass_code, first_code, second_code;
8410 if (!TARGET_CMOVE)
8411 return true;
8412 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8413 return bypass_code != NIL || second_code != NIL;
8414}
8415
e075ae69 8416void
3a3677ff 8417ix86_expand_branch (code, label)
e075ae69 8418 enum rtx_code code;
e075ae69
RH
8419 rtx label;
8420{
3a3677ff 8421 rtx tmp;
e075ae69 8422
3a3677ff 8423 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8424 {
3a3677ff
RH
8425 case QImode:
8426 case HImode:
8427 case SImode:
0d7d98ee 8428 simple:
a1b8572c 8429 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8430 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8431 gen_rtx_LABEL_REF (VOIDmode, label),
8432 pc_rtx);
8433 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8434 return;
e075ae69 8435
3a3677ff
RH
8436 case SFmode:
8437 case DFmode:
0f290768 8438 case XFmode:
2b589241 8439 case TFmode:
3a3677ff
RH
8440 {
8441 rtvec vec;
8442 int use_fcomi;
03598dea 8443 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8444
8445 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8446 &ix86_compare_op1);
fce5a9f2 8447
03598dea
JH
8448 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8449
8450 /* Check whether we will use the natural sequence with one jump. If
8451 so, we can expand jump early. Otherwise delay expansion by
8452 creating compound insn to not confuse optimizers. */
8453 if (bypass_code == NIL && second_code == NIL
8454 && TARGET_CMOVE)
8455 {
8456 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8457 gen_rtx_LABEL_REF (VOIDmode, label),
8458 pc_rtx, NULL_RTX);
8459 }
8460 else
8461 {
8462 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8463 ix86_compare_op0, ix86_compare_op1);
8464 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8465 gen_rtx_LABEL_REF (VOIDmode, label),
8466 pc_rtx);
8467 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8468
8469 use_fcomi = ix86_use_fcomi_compare (code);
8470 vec = rtvec_alloc (3 + !use_fcomi);
8471 RTVEC_ELT (vec, 0) = tmp;
8472 RTVEC_ELT (vec, 1)
8473 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8474 RTVEC_ELT (vec, 2)
8475 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8476 if (! use_fcomi)
8477 RTVEC_ELT (vec, 3)
8478 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8479
8480 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8481 }
3a3677ff
RH
8482 return;
8483 }
32b5b1aa 8484
3a3677ff 8485 case DImode:
0d7d98ee
JH
8486 if (TARGET_64BIT)
8487 goto simple;
3a3677ff
RH
8488 /* Expand DImode branch into multiple compare+branch. */
8489 {
8490 rtx lo[2], hi[2], label2;
8491 enum rtx_code code1, code2, code3;
32b5b1aa 8492
3a3677ff
RH
8493 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8494 {
8495 tmp = ix86_compare_op0;
8496 ix86_compare_op0 = ix86_compare_op1;
8497 ix86_compare_op1 = tmp;
8498 code = swap_condition (code);
8499 }
8500 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8501 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8502
3a3677ff
RH
8503 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8504 avoid two branches. This costs one extra insn, so disable when
8505 optimizing for size. */
32b5b1aa 8506
3a3677ff
RH
8507 if ((code == EQ || code == NE)
8508 && (!optimize_size
8509 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8510 {
8511 rtx xor0, xor1;
32b5b1aa 8512
3a3677ff
RH
8513 xor1 = hi[0];
8514 if (hi[1] != const0_rtx)
8515 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8516 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8517
3a3677ff
RH
8518 xor0 = lo[0];
8519 if (lo[1] != const0_rtx)
8520 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8521 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8522
3a3677ff
RH
8523 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8524 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8525
3a3677ff
RH
8526 ix86_compare_op0 = tmp;
8527 ix86_compare_op1 = const0_rtx;
8528 ix86_expand_branch (code, label);
8529 return;
8530 }
e075ae69 8531
1f9124e4
JJ
8532 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8533 op1 is a constant and the low word is zero, then we can just
8534 examine the high word. */
32b5b1aa 8535
1f9124e4
JJ
8536 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8537 switch (code)
8538 {
8539 case LT: case LTU: case GE: case GEU:
8540 ix86_compare_op0 = hi[0];
8541 ix86_compare_op1 = hi[1];
8542 ix86_expand_branch (code, label);
8543 return;
8544 default:
8545 break;
8546 }
e075ae69 8547
3a3677ff 8548 /* Otherwise, we need two or three jumps. */
e075ae69 8549
3a3677ff 8550 label2 = gen_label_rtx ();
e075ae69 8551
3a3677ff
RH
8552 code1 = code;
8553 code2 = swap_condition (code);
8554 code3 = unsigned_condition (code);
e075ae69 8555
3a3677ff
RH
8556 switch (code)
8557 {
8558 case LT: case GT: case LTU: case GTU:
8559 break;
e075ae69 8560
3a3677ff
RH
8561 case LE: code1 = LT; code2 = GT; break;
8562 case GE: code1 = GT; code2 = LT; break;
8563 case LEU: code1 = LTU; code2 = GTU; break;
8564 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8565
3a3677ff
RH
8566 case EQ: code1 = NIL; code2 = NE; break;
8567 case NE: code2 = NIL; break;
e075ae69 8568
3a3677ff
RH
8569 default:
8570 abort ();
8571 }
e075ae69 8572
3a3677ff
RH
8573 /*
8574 * a < b =>
8575 * if (hi(a) < hi(b)) goto true;
8576 * if (hi(a) > hi(b)) goto false;
8577 * if (lo(a) < lo(b)) goto true;
8578 * false:
8579 */
8580
8581 ix86_compare_op0 = hi[0];
8582 ix86_compare_op1 = hi[1];
8583
8584 if (code1 != NIL)
8585 ix86_expand_branch (code1, label);
8586 if (code2 != NIL)
8587 ix86_expand_branch (code2, label2);
8588
8589 ix86_compare_op0 = lo[0];
8590 ix86_compare_op1 = lo[1];
8591 ix86_expand_branch (code3, label);
8592
8593 if (code2 != NIL)
8594 emit_label (label2);
8595 return;
8596 }
e075ae69 8597
3a3677ff
RH
8598 default:
8599 abort ();
8600 }
32b5b1aa 8601}
e075ae69 8602
9e7adcb3
JH
8603/* Split branch based on floating point condition. */
8604void
03598dea
JH
8605ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8606 enum rtx_code code;
8607 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
8608{
8609 rtx second, bypass;
8610 rtx label = NULL_RTX;
03598dea 8611 rtx condition;
6b24c259
JH
8612 int bypass_probability = -1, second_probability = -1, probability = -1;
8613 rtx i;
9e7adcb3
JH
8614
8615 if (target2 != pc_rtx)
8616 {
8617 rtx tmp = target2;
8618 code = reverse_condition_maybe_unordered (code);
8619 target2 = target1;
8620 target1 = tmp;
8621 }
8622
8623 condition = ix86_expand_fp_compare (code, op1, op2,
8624 tmp, &second, &bypass);
6b24c259
JH
8625
8626 if (split_branch_probability >= 0)
8627 {
8628 /* Distribute the probabilities across the jumps.
8629 Assume the BYPASS and SECOND to be always test
8630 for UNORDERED. */
8631 probability = split_branch_probability;
8632
d6a7951f 8633 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8634 to be updated. Later we may run some experiments and see
8635 if unordered values are more frequent in practice. */
8636 if (bypass)
8637 bypass_probability = 1;
8638 if (second)
8639 second_probability = 1;
8640 }
9e7adcb3
JH
8641 if (bypass != NULL_RTX)
8642 {
8643 label = gen_label_rtx ();
6b24c259
JH
8644 i = emit_jump_insn (gen_rtx_SET
8645 (VOIDmode, pc_rtx,
8646 gen_rtx_IF_THEN_ELSE (VOIDmode,
8647 bypass,
8648 gen_rtx_LABEL_REF (VOIDmode,
8649 label),
8650 pc_rtx)));
8651 if (bypass_probability >= 0)
8652 REG_NOTES (i)
8653 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8654 GEN_INT (bypass_probability),
8655 REG_NOTES (i));
8656 }
8657 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8658 (VOIDmode, pc_rtx,
8659 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8660 condition, target1, target2)));
8661 if (probability >= 0)
8662 REG_NOTES (i)
8663 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8664 GEN_INT (probability),
8665 REG_NOTES (i));
8666 if (second != NULL_RTX)
9e7adcb3 8667 {
6b24c259
JH
8668 i = emit_jump_insn (gen_rtx_SET
8669 (VOIDmode, pc_rtx,
8670 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8671 target2)));
8672 if (second_probability >= 0)
8673 REG_NOTES (i)
8674 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8675 GEN_INT (second_probability),
8676 REG_NOTES (i));
9e7adcb3 8677 }
9e7adcb3
JH
8678 if (label != NULL_RTX)
8679 emit_label (label);
8680}
8681
32b5b1aa 8682int
3a3677ff 8683ix86_expand_setcc (code, dest)
e075ae69 8684 enum rtx_code code;
e075ae69 8685 rtx dest;
32b5b1aa 8686{
a1b8572c
JH
8687 rtx ret, tmp, tmpreg;
8688 rtx second_test, bypass_test;
e075ae69 8689
885a70fd
JH
8690 if (GET_MODE (ix86_compare_op0) == DImode
8691 && !TARGET_64BIT)
e075ae69
RH
8692 return 0; /* FAIL */
8693
b932f770
JH
8694 if (GET_MODE (dest) != QImode)
8695 abort ();
e075ae69 8696
a1b8572c 8697 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8698 PUT_MODE (ret, QImode);
8699
8700 tmp = dest;
a1b8572c 8701 tmpreg = dest;
32b5b1aa 8702
e075ae69 8703 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8704 if (bypass_test || second_test)
8705 {
8706 rtx test = second_test;
8707 int bypass = 0;
8708 rtx tmp2 = gen_reg_rtx (QImode);
8709 if (bypass_test)
8710 {
8711 if (second_test)
b531087a 8712 abort ();
a1b8572c
JH
8713 test = bypass_test;
8714 bypass = 1;
8715 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8716 }
8717 PUT_MODE (test, QImode);
8718 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8719
8720 if (bypass)
8721 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8722 else
8723 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8724 }
e075ae69 8725
e075ae69 8726 return 1; /* DONE */
32b5b1aa 8727}
e075ae69 8728
32b5b1aa 8729int
e075ae69
RH
8730ix86_expand_int_movcc (operands)
8731 rtx operands[];
32b5b1aa 8732{
e075ae69
RH
8733 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8734 rtx compare_seq, compare_op;
a1b8572c 8735 rtx second_test, bypass_test;
635559ab 8736 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8737
36583fea
JH
8738 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8739 In case comparsion is done with immediate, we can convert it to LTU or
8740 GEU by altering the integer. */
8741
8742 if ((code == LEU || code == GTU)
8743 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 8744 && mode != HImode
261376e7
RH
8745 && INTVAL (ix86_compare_op1) != -1
8746 /* For x86-64, the immediate field in the instruction is 32-bit
8747 signed, so we can't increment a DImode value above 0x7fffffff. */
74411039
JH
8748 && (!TARGET_64BIT
8749 || GET_MODE (ix86_compare_op0) != DImode
261376e7 8750 || INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 8751 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
8752 && GET_CODE (operands[3]) == CONST_INT)
8753 {
8754 if (code == LEU)
8755 code = LTU;
8756 else
8757 code = GEU;
261376e7
RH
8758 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8759 GET_MODE (ix86_compare_op0));
36583fea 8760 }
3a3677ff 8761
e075ae69 8762 start_sequence ();
a1b8572c 8763 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 8764 compare_seq = get_insns ();
e075ae69
RH
8765 end_sequence ();
8766
8767 compare_code = GET_CODE (compare_op);
8768
8769 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8770 HImode insns, we'd be swallowed in word prefix ops. */
8771
635559ab
JH
8772 if (mode != HImode
8773 && (mode != DImode || TARGET_64BIT)
0f290768 8774 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8775 && GET_CODE (operands[3]) == CONST_INT)
8776 {
8777 rtx out = operands[0];
8778 HOST_WIDE_INT ct = INTVAL (operands[2]);
8779 HOST_WIDE_INT cf = INTVAL (operands[3]);
8780 HOST_WIDE_INT diff;
8781
a1b8572c
JH
8782 if ((compare_code == LTU || compare_code == GEU)
8783 && !second_test && !bypass_test)
e075ae69 8784 {
e075ae69
RH
8785 /* Detect overlap between destination and compare sources. */
8786 rtx tmp = out;
8787
0f290768 8788 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8789 if (compare_code == LTU)
8790 {
8791 int tmp = ct;
8792 ct = cf;
8793 cf = tmp;
8794 compare_code = reverse_condition (compare_code);
8795 code = reverse_condition (code);
8796 }
8797 diff = ct - cf;
8798
e075ae69 8799 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8800 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8801 tmp = gen_reg_rtx (mode);
e075ae69
RH
8802
8803 emit_insn (compare_seq);
635559ab 8804 if (mode == DImode)
14f73b5a
JH
8805 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8806 else
8807 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8808
36583fea
JH
8809 if (diff == 1)
8810 {
8811 /*
8812 * cmpl op0,op1
8813 * sbbl dest,dest
8814 * [addl dest, ct]
8815 *
8816 * Size 5 - 8.
8817 */
8818 if (ct)
635559ab
JH
8819 tmp = expand_simple_binop (mode, PLUS,
8820 tmp, GEN_INT (ct),
8821 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8822 }
8823 else if (cf == -1)
8824 {
8825 /*
8826 * cmpl op0,op1
8827 * sbbl dest,dest
8828 * orl $ct, dest
8829 *
8830 * Size 8.
8831 */
635559ab
JH
8832 tmp = expand_simple_binop (mode, IOR,
8833 tmp, GEN_INT (ct),
8834 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8835 }
8836 else if (diff == -1 && ct)
8837 {
8838 /*
8839 * cmpl op0,op1
8840 * sbbl dest,dest
06ec023f 8841 * notl dest
36583fea
JH
8842 * [addl dest, cf]
8843 *
8844 * Size 8 - 11.
8845 */
635559ab
JH
8846 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8847 if (cf)
8848 tmp = expand_simple_binop (mode, PLUS,
8849 tmp, GEN_INT (cf),
8850 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8851 }
8852 else
8853 {
8854 /*
8855 * cmpl op0,op1
8856 * sbbl dest,dest
06ec023f 8857 * [notl dest]
36583fea
JH
8858 * andl cf - ct, dest
8859 * [addl dest, ct]
8860 *
8861 * Size 8 - 11.
8862 */
06ec023f
RB
8863
8864 if (cf == 0)
8865 {
8866 cf = ct;
8867 ct = 0;
8868 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8869 }
8870
635559ab
JH
8871 tmp = expand_simple_binop (mode, AND,
8872 tmp,
d8bf17f9 8873 gen_int_mode (cf - ct, mode),
635559ab
JH
8874 tmp, 1, OPTAB_DIRECT);
8875 if (ct)
8876 tmp = expand_simple_binop (mode, PLUS,
8877 tmp, GEN_INT (ct),
8878 tmp, 1, OPTAB_DIRECT);
36583fea 8879 }
e075ae69
RH
8880
8881 if (tmp != out)
8882 emit_move_insn (out, tmp);
8883
8884 return 1; /* DONE */
8885 }
8886
8887 diff = ct - cf;
8888 if (diff < 0)
8889 {
8890 HOST_WIDE_INT tmp;
8891 tmp = ct, ct = cf, cf = tmp;
8892 diff = -diff;
734dba19
JH
8893 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8894 {
8895 /* We may be reversing unordered compare to normal compare, that
8896 is not valid in general (we may convert non-trapping condition
8897 to trapping one), however on i386 we currently emit all
8898 comparisons unordered. */
8899 compare_code = reverse_condition_maybe_unordered (compare_code);
8900 code = reverse_condition_maybe_unordered (code);
8901 }
8902 else
8903 {
8904 compare_code = reverse_condition (compare_code);
8905 code = reverse_condition (code);
8906 }
e075ae69 8907 }
0f2a3457
JJ
8908
8909 compare_code = NIL;
8910 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8911 && GET_CODE (ix86_compare_op1) == CONST_INT)
8912 {
8913 if (ix86_compare_op1 == const0_rtx
8914 && (code == LT || code == GE))
8915 compare_code = code;
8916 else if (ix86_compare_op1 == constm1_rtx)
8917 {
8918 if (code == LE)
8919 compare_code = LT;
8920 else if (code == GT)
8921 compare_code = GE;
8922 }
8923 }
8924
8925 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8926 if (compare_code != NIL
8927 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8928 && (cf == -1 || ct == -1))
8929 {
8930 /* If lea code below could be used, only optimize
8931 if it results in a 2 insn sequence. */
8932
8933 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8934 || diff == 3 || diff == 5 || diff == 9)
8935 || (compare_code == LT && ct == -1)
8936 || (compare_code == GE && cf == -1))
8937 {
8938 /*
8939 * notl op1 (if necessary)
8940 * sarl $31, op1
8941 * orl cf, op1
8942 */
8943 if (ct != -1)
8944 {
8945 cf = ct;
8946 ct = -1;
8947 code = reverse_condition (code);
8948 }
8949
8950 out = emit_store_flag (out, code, ix86_compare_op0,
8951 ix86_compare_op1, VOIDmode, 0, -1);
8952
8953 out = expand_simple_binop (mode, IOR,
8954 out, GEN_INT (cf),
8955 out, 1, OPTAB_DIRECT);
8956 if (out != operands[0])
8957 emit_move_insn (operands[0], out);
8958
8959 return 1; /* DONE */
8960 }
8961 }
8962
635559ab
JH
8963 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8964 || diff == 3 || diff == 5 || diff == 9)
8965 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
8966 {
8967 /*
8968 * xorl dest,dest
8969 * cmpl op1,op2
8970 * setcc dest
8971 * lea cf(dest*(ct-cf)),dest
8972 *
8973 * Size 14.
8974 *
8975 * This also catches the degenerate setcc-only case.
8976 */
8977
8978 rtx tmp;
8979 int nops;
8980
8981 out = emit_store_flag (out, code, ix86_compare_op0,
8982 ix86_compare_op1, VOIDmode, 0, 1);
8983
8984 nops = 0;
97f51ac4
RB
8985 /* On x86_64 the lea instruction operates on Pmode, so we need
8986 to get arithmetics done in proper mode to match. */
e075ae69 8987 if (diff == 1)
14f73b5a 8988 tmp = out;
e075ae69
RH
8989 else
8990 {
885a70fd 8991 rtx out1;
14f73b5a 8992 out1 = out;
635559ab 8993 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
8994 nops++;
8995 if (diff & 1)
8996 {
635559ab 8997 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
8998 nops++;
8999 }
9000 }
9001 if (cf != 0)
9002 {
635559ab 9003 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9004 nops++;
9005 }
885a70fd
JH
9006 if (tmp != out
9007 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 9008 {
14f73b5a 9009 if (nops == 1)
e075ae69
RH
9010 {
9011 rtx clob;
9012
9013 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9014 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9015
9016 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9017 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9018 emit_insn (tmp);
9019 }
9020 else
9021 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9022 }
9023 if (out != operands[0])
1985ef90 9024 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9025
9026 return 1; /* DONE */
9027 }
9028
9029 /*
9030 * General case: Jumpful:
9031 * xorl dest,dest cmpl op1, op2
9032 * cmpl op1, op2 movl ct, dest
9033 * setcc dest jcc 1f
9034 * decl dest movl cf, dest
9035 * andl (cf-ct),dest 1:
9036 * addl ct,dest
0f290768 9037 *
e075ae69
RH
9038 * Size 20. Size 14.
9039 *
9040 * This is reasonably steep, but branch mispredict costs are
9041 * high on modern cpus, so consider failing only if optimizing
9042 * for space.
9043 *
9044 * %%% Parameterize branch_cost on the tuning architecture, then
9045 * use that. The 80386 couldn't care less about mispredicts.
9046 */
9047
9048 if (!optimize_size && !TARGET_CMOVE)
9049 {
97f51ac4 9050 if (cf == 0)
e075ae69 9051 {
97f51ac4
RB
9052 cf = ct;
9053 ct = 0;
734dba19 9054 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9055 /* We may be reversing unordered compare to normal compare,
9056 that is not valid in general (we may convert non-trapping
9057 condition to trapping one), however on i386 we currently
9058 emit all comparisons unordered. */
9059 code = reverse_condition_maybe_unordered (code);
9060 else
9061 {
9062 code = reverse_condition (code);
9063 if (compare_code != NIL)
9064 compare_code = reverse_condition (compare_code);
9065 }
9066 }
9067
9068 if (compare_code != NIL)
9069 {
9070 /* notl op1 (if needed)
9071 sarl $31, op1
9072 andl (cf-ct), op1
9073 addl ct, op1
9074
9075 For x < 0 (resp. x <= -1) there will be no notl,
9076 so if possible swap the constants to get rid of the
9077 complement.
9078 True/false will be -1/0 while code below (store flag
9079 followed by decrement) is 0/-1, so the constants need
9080 to be exchanged once more. */
9081
9082 if (compare_code == GE || !cf)
734dba19 9083 {
0f2a3457
JJ
9084 code = reverse_condition (code);
9085 compare_code = LT;
734dba19
JH
9086 }
9087 else
9088 {
0f2a3457
JJ
9089 HOST_WIDE_INT tmp = cf;
9090 cf = ct;
9091 ct = tmp;
734dba19 9092 }
0f2a3457
JJ
9093
9094 out = emit_store_flag (out, code, ix86_compare_op0,
9095 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9096 }
0f2a3457
JJ
9097 else
9098 {
9099 out = emit_store_flag (out, code, ix86_compare_op0,
9100 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9101
97f51ac4 9102 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
0f2a3457
JJ
9103 out, 1, OPTAB_DIRECT);
9104 }
e075ae69 9105
97f51ac4 9106 out = expand_simple_binop (mode, AND, out,
d8bf17f9 9107 gen_int_mode (cf - ct, mode),
635559ab 9108 out, 1, OPTAB_DIRECT);
97f51ac4
RB
9109 if (ct)
9110 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9111 out, 1, OPTAB_DIRECT);
e075ae69
RH
9112 if (out != operands[0])
9113 emit_move_insn (operands[0], out);
9114
9115 return 1; /* DONE */
9116 }
9117 }
9118
9119 if (!TARGET_CMOVE)
9120 {
9121 /* Try a few things more with specific constants and a variable. */
9122
78a0d70c 9123 optab op;
e075ae69
RH
9124 rtx var, orig_out, out, tmp;
9125
9126 if (optimize_size)
9127 return 0; /* FAIL */
9128
0f290768 9129 /* If one of the two operands is an interesting constant, load a
e075ae69 9130 constant with the above and mask it in with a logical operation. */
0f290768 9131
e075ae69
RH
9132 if (GET_CODE (operands[2]) == CONST_INT)
9133 {
9134 var = operands[3];
9135 if (INTVAL (operands[2]) == 0)
9136 operands[3] = constm1_rtx, op = and_optab;
9137 else if (INTVAL (operands[2]) == -1)
9138 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9139 else
9140 return 0; /* FAIL */
e075ae69
RH
9141 }
9142 else if (GET_CODE (operands[3]) == CONST_INT)
9143 {
9144 var = operands[2];
9145 if (INTVAL (operands[3]) == 0)
9146 operands[2] = constm1_rtx, op = and_optab;
9147 else if (INTVAL (operands[3]) == -1)
9148 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9149 else
9150 return 0; /* FAIL */
e075ae69 9151 }
78a0d70c 9152 else
e075ae69
RH
9153 return 0; /* FAIL */
9154
9155 orig_out = operands[0];
635559ab 9156 tmp = gen_reg_rtx (mode);
e075ae69
RH
9157 operands[0] = tmp;
9158
9159 /* Recurse to get the constant loaded. */
9160 if (ix86_expand_int_movcc (operands) == 0)
9161 return 0; /* FAIL */
9162
9163 /* Mask in the interesting variable. */
635559ab 9164 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
9165 OPTAB_WIDEN);
9166 if (out != orig_out)
9167 emit_move_insn (orig_out, out);
9168
9169 return 1; /* DONE */
9170 }
9171
9172 /*
9173 * For comparison with above,
9174 *
9175 * movl cf,dest
9176 * movl ct,tmp
9177 * cmpl op1,op2
9178 * cmovcc tmp,dest
9179 *
9180 * Size 15.
9181 */
9182
635559ab
JH
9183 if (! nonimmediate_operand (operands[2], mode))
9184 operands[2] = force_reg (mode, operands[2]);
9185 if (! nonimmediate_operand (operands[3], mode))
9186 operands[3] = force_reg (mode, operands[3]);
e075ae69 9187
a1b8572c
JH
9188 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9189 {
635559ab 9190 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9191 emit_move_insn (tmp, operands[3]);
9192 operands[3] = tmp;
9193 }
9194 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9195 {
635559ab 9196 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9197 emit_move_insn (tmp, operands[2]);
9198 operands[2] = tmp;
9199 }
c9682caf
JH
9200 if (! register_operand (operands[2], VOIDmode)
9201 && ! register_operand (operands[3], VOIDmode))
635559ab 9202 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9203
e075ae69
RH
9204 emit_insn (compare_seq);
9205 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9206 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9207 compare_op, operands[2],
9208 operands[3])));
a1b8572c
JH
9209 if (bypass_test)
9210 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9211 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9212 bypass_test,
9213 operands[3],
9214 operands[0])));
9215 if (second_test)
9216 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9217 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9218 second_test,
9219 operands[2],
9220 operands[0])));
e075ae69
RH
9221
9222 return 1; /* DONE */
e9a25f70 9223}
e075ae69 9224
32b5b1aa 9225int
e075ae69
RH
9226ix86_expand_fp_movcc (operands)
9227 rtx operands[];
32b5b1aa 9228{
e075ae69 9229 enum rtx_code code;
e075ae69 9230 rtx tmp;
a1b8572c 9231 rtx compare_op, second_test, bypass_test;
32b5b1aa 9232
0073023d
JH
9233 /* For SF/DFmode conditional moves based on comparisons
9234 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9235 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9236 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9237 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9238 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9239 && (!TARGET_IEEE_FP
9240 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9241 /* We may be called from the post-reload splitter. */
9242 && (!REG_P (operands[0])
9243 || SSE_REG_P (operands[0])
52a661a6 9244 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9245 {
9246 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9247 code = GET_CODE (operands[1]);
9248
9249 /* See if we have (cross) match between comparison operands and
9250 conditional move operands. */
9251 if (rtx_equal_p (operands[2], op1))
9252 {
9253 rtx tmp = op0;
9254 op0 = op1;
9255 op1 = tmp;
9256 code = reverse_condition_maybe_unordered (code);
9257 }
9258 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9259 {
9260 /* Check for min operation. */
9261 if (code == LT)
9262 {
9263 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9264 if (memory_operand (op0, VOIDmode))
9265 op0 = force_reg (GET_MODE (operands[0]), op0);
9266 if (GET_MODE (operands[0]) == SFmode)
9267 emit_insn (gen_minsf3 (operands[0], op0, op1));
9268 else
9269 emit_insn (gen_mindf3 (operands[0], op0, op1));
9270 return 1;
9271 }
9272 /* Check for max operation. */
9273 if (code == GT)
9274 {
9275 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9276 if (memory_operand (op0, VOIDmode))
9277 op0 = force_reg (GET_MODE (operands[0]), op0);
9278 if (GET_MODE (operands[0]) == SFmode)
9279 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9280 else
9281 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9282 return 1;
9283 }
9284 }
9285 /* Manage condition to be sse_comparison_operator. In case we are
9286 in non-ieee mode, try to canonicalize the destination operand
9287 to be first in the comparison - this helps reload to avoid extra
9288 moves. */
9289 if (!sse_comparison_operator (operands[1], VOIDmode)
9290 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9291 {
9292 rtx tmp = ix86_compare_op0;
9293 ix86_compare_op0 = ix86_compare_op1;
9294 ix86_compare_op1 = tmp;
9295 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9296 VOIDmode, ix86_compare_op0,
9297 ix86_compare_op1);
9298 }
9299 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9300 move. We also don't support the NE comparison on SSE, so try to
9301 avoid it. */
037f20f1
JH
9302 if ((rtx_equal_p (operands[0], operands[3])
9303 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9304 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9305 {
9306 rtx tmp = operands[2];
9307 operands[2] = operands[3];
92d0fb09 9308 operands[3] = tmp;
0073023d
JH
9309 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9310 (GET_CODE (operands[1])),
9311 VOIDmode, ix86_compare_op0,
9312 ix86_compare_op1);
9313 }
9314 if (GET_MODE (operands[0]) == SFmode)
9315 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9316 operands[2], operands[3],
9317 ix86_compare_op0, ix86_compare_op1));
9318 else
9319 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9320 operands[2], operands[3],
9321 ix86_compare_op0, ix86_compare_op1));
9322 return 1;
9323 }
9324
e075ae69 9325 /* The floating point conditional move instructions don't directly
0f290768 9326 support conditions resulting from a signed integer comparison. */
32b5b1aa 9327
e075ae69 9328 code = GET_CODE (operands[1]);
a1b8572c 9329 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9330
9331 /* The floating point conditional move instructions don't directly
9332 support signed integer comparisons. */
9333
a1b8572c 9334 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9335 {
a1b8572c 9336 if (second_test != NULL || bypass_test != NULL)
b531087a 9337 abort ();
e075ae69 9338 tmp = gen_reg_rtx (QImode);
3a3677ff 9339 ix86_expand_setcc (code, tmp);
e075ae69
RH
9340 code = NE;
9341 ix86_compare_op0 = tmp;
9342 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9343 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9344 }
9345 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9346 {
9347 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9348 emit_move_insn (tmp, operands[3]);
9349 operands[3] = tmp;
9350 }
9351 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9352 {
9353 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9354 emit_move_insn (tmp, operands[2]);
9355 operands[2] = tmp;
e075ae69 9356 }
e9a25f70 9357
e075ae69
RH
9358 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9359 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9360 compare_op,
e075ae69
RH
9361 operands[2],
9362 operands[3])));
a1b8572c
JH
9363 if (bypass_test)
9364 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9365 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9366 bypass_test,
9367 operands[3],
9368 operands[0])));
9369 if (second_test)
9370 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9371 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9372 second_test,
9373 operands[2],
9374 operands[0])));
32b5b1aa 9375
e075ae69 9376 return 1;
32b5b1aa
SC
9377}
9378
2450a057
JH
9379/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9380 works for floating pointer parameters and nonoffsetable memories.
9381 For pushes, it returns just stack offsets; the values will be saved
9382 in the right order. Maximally three parts are generated. */
9383
2b589241 9384static int
2450a057
JH
9385ix86_split_to_parts (operand, parts, mode)
9386 rtx operand;
9387 rtx *parts;
9388 enum machine_mode mode;
32b5b1aa 9389{
26e5b205
JH
9390 int size;
9391
9392 if (!TARGET_64BIT)
9393 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9394 else
9395 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9396
a7180f70
BS
9397 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9398 abort ();
2450a057
JH
9399 if (size < 2 || size > 3)
9400 abort ();
9401
f996902d
RH
9402 /* Optimize constant pool reference to immediates. This is used by fp
9403 moves, that force all constants to memory to allow combining. */
9404 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9405 {
9406 rtx tmp = maybe_get_pool_constant (operand);
9407 if (tmp)
9408 operand = tmp;
9409 }
d7a29404 9410
2450a057 9411 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9412 {
2450a057
JH
9413 /* The only non-offsetable memories we handle are pushes. */
9414 if (! push_operand (operand, VOIDmode))
9415 abort ();
9416
26e5b205
JH
9417 operand = copy_rtx (operand);
9418 PUT_MODE (operand, Pmode);
2450a057
JH
9419 parts[0] = parts[1] = parts[2] = operand;
9420 }
26e5b205 9421 else if (!TARGET_64BIT)
2450a057
JH
9422 {
9423 if (mode == DImode)
9424 split_di (&operand, 1, &parts[0], &parts[1]);
9425 else
e075ae69 9426 {
2450a057
JH
9427 if (REG_P (operand))
9428 {
9429 if (!reload_completed)
9430 abort ();
9431 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9432 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9433 if (size == 3)
9434 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9435 }
9436 else if (offsettable_memref_p (operand))
9437 {
f4ef873c 9438 operand = adjust_address (operand, SImode, 0);
2450a057 9439 parts[0] = operand;
b72f00af 9440 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9441 if (size == 3)
b72f00af 9442 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9443 }
9444 else if (GET_CODE (operand) == CONST_DOUBLE)
9445 {
9446 REAL_VALUE_TYPE r;
2b589241 9447 long l[4];
2450a057
JH
9448
9449 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9450 switch (mode)
9451 {
9452 case XFmode:
2b589241 9453 case TFmode:
2450a057 9454 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9455 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9456 break;
9457 case DFmode:
9458 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9459 break;
9460 default:
9461 abort ();
9462 }
d8bf17f9
LB
9463 parts[1] = gen_int_mode (l[1], SImode);
9464 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9465 }
9466 else
9467 abort ();
e075ae69 9468 }
2450a057 9469 }
26e5b205
JH
9470 else
9471 {
44cf5b6a
JH
9472 if (mode == TImode)
9473 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9474 if (mode == XFmode || mode == TFmode)
9475 {
9476 if (REG_P (operand))
9477 {
9478 if (!reload_completed)
9479 abort ();
9480 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9481 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9482 }
9483 else if (offsettable_memref_p (operand))
9484 {
b72f00af 9485 operand = adjust_address (operand, DImode, 0);
26e5b205 9486 parts[0] = operand;
b72f00af 9487 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
9488 }
9489 else if (GET_CODE (operand) == CONST_DOUBLE)
9490 {
9491 REAL_VALUE_TYPE r;
9492 long l[3];
9493
9494 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9495 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9496 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9497 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9498 parts[0]
d8bf17f9 9499 = gen_int_mode
44cf5b6a 9500 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9501 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9502 DImode);
26e5b205
JH
9503 else
9504 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 9505 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
9506 }
9507 else
9508 abort ();
9509 }
9510 }
2450a057 9511
2b589241 9512 return size;
2450a057
JH
9513}
9514
9515/* Emit insns to perform a move or push of DI, DF, and XF values.
9516 Return false when normal moves are needed; true when all required
9517 insns have been emitted. Operands 2-4 contain the input values
9518 int the correct order; operands 5-7 contain the output values. */
9519
26e5b205
JH
9520void
9521ix86_split_long_move (operands)
9522 rtx operands[];
2450a057
JH
9523{
9524 rtx part[2][3];
26e5b205 9525 int nparts;
2450a057
JH
9526 int push = 0;
9527 int collisions = 0;
26e5b205
JH
9528 enum machine_mode mode = GET_MODE (operands[0]);
9529
9530 /* The DFmode expanders may ask us to move double.
9531 For 64bit target this is single move. By hiding the fact
9532 here we simplify i386.md splitters. */
9533 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9534 {
8cdfa312
RH
9535 /* Optimize constant pool reference to immediates. This is used by
9536 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9537
9538 if (GET_CODE (operands[1]) == MEM
9539 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9540 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9541 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9542 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9543 {
9544 operands[0] = copy_rtx (operands[0]);
9545 PUT_MODE (operands[0], Pmode);
9546 }
26e5b205
JH
9547 else
9548 operands[0] = gen_lowpart (DImode, operands[0]);
9549 operands[1] = gen_lowpart (DImode, operands[1]);
9550 emit_move_insn (operands[0], operands[1]);
9551 return;
9552 }
2450a057 9553
2450a057
JH
9554 /* The only non-offsettable memory we handle is push. */
9555 if (push_operand (operands[0], VOIDmode))
9556 push = 1;
9557 else if (GET_CODE (operands[0]) == MEM
9558 && ! offsettable_memref_p (operands[0]))
9559 abort ();
9560
26e5b205
JH
9561 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9562 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9563
9564 /* When emitting push, take care for source operands on the stack. */
9565 if (push && GET_CODE (operands[1]) == MEM
9566 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9567 {
26e5b205 9568 if (nparts == 3)
886cbb88
JH
9569 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9570 XEXP (part[1][2], 0));
9571 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9572 XEXP (part[1][1], 0));
2450a057
JH
9573 }
9574
0f290768 9575 /* We need to do copy in the right order in case an address register
2450a057
JH
9576 of the source overlaps the destination. */
9577 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9578 {
9579 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9580 collisions++;
9581 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9582 collisions++;
26e5b205 9583 if (nparts == 3
2450a057
JH
9584 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9585 collisions++;
9586
9587 /* Collision in the middle part can be handled by reordering. */
26e5b205 9588 if (collisions == 1 && nparts == 3
2450a057 9589 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9590 {
2450a057
JH
9591 rtx tmp;
9592 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9593 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9594 }
e075ae69 9595
2450a057
JH
9596 /* If there are more collisions, we can't handle it by reordering.
9597 Do an lea to the last part and use only one colliding move. */
9598 else if (collisions > 1)
9599 {
9600 collisions = 1;
26e5b205 9601 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 9602 XEXP (part[1][0], 0)));
26e5b205
JH
9603 part[1][0] = change_address (part[1][0],
9604 TARGET_64BIT ? DImode : SImode,
9605 part[0][nparts - 1]);
b72f00af 9606 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 9607 if (nparts == 3)
b72f00af 9608 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
9609 }
9610 }
9611
9612 if (push)
9613 {
26e5b205 9614 if (!TARGET_64BIT)
2b589241 9615 {
26e5b205
JH
9616 if (nparts == 3)
9617 {
9618 /* We use only first 12 bytes of TFmode value, but for pushing we
9619 are required to adjust stack as if we were pushing real 16byte
9620 value. */
9621 if (mode == TFmode && !TARGET_64BIT)
9622 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9623 GEN_INT (-4)));
9624 emit_move_insn (part[0][2], part[1][2]);
9625 }
2b589241 9626 }
26e5b205
JH
9627 else
9628 {
9629 /* In 64bit mode we don't have 32bit push available. In case this is
9630 register, it is OK - we will just use larger counterpart. We also
9631 retype memory - these comes from attempt to avoid REX prefix on
9632 moving of second half of TFmode value. */
9633 if (GET_MODE (part[1][1]) == SImode)
9634 {
9635 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9636 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9637 else if (REG_P (part[1][1]))
9638 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9639 else
b531087a 9640 abort ();
886cbb88
JH
9641 if (GET_MODE (part[1][0]) == SImode)
9642 part[1][0] = part[1][1];
26e5b205
JH
9643 }
9644 }
9645 emit_move_insn (part[0][1], part[1][1]);
9646 emit_move_insn (part[0][0], part[1][0]);
9647 return;
2450a057
JH
9648 }
9649
9650 /* Choose correct order to not overwrite the source before it is copied. */
9651 if ((REG_P (part[0][0])
9652 && REG_P (part[1][1])
9653 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9654 || (nparts == 3
2450a057
JH
9655 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9656 || (collisions > 0
9657 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9658 {
26e5b205 9659 if (nparts == 3)
2450a057 9660 {
26e5b205
JH
9661 operands[2] = part[0][2];
9662 operands[3] = part[0][1];
9663 operands[4] = part[0][0];
9664 operands[5] = part[1][2];
9665 operands[6] = part[1][1];
9666 operands[7] = part[1][0];
2450a057
JH
9667 }
9668 else
9669 {
26e5b205
JH
9670 operands[2] = part[0][1];
9671 operands[3] = part[0][0];
9672 operands[5] = part[1][1];
9673 operands[6] = part[1][0];
2450a057
JH
9674 }
9675 }
9676 else
9677 {
26e5b205 9678 if (nparts == 3)
2450a057 9679 {
26e5b205
JH
9680 operands[2] = part[0][0];
9681 operands[3] = part[0][1];
9682 operands[4] = part[0][2];
9683 operands[5] = part[1][0];
9684 operands[6] = part[1][1];
9685 operands[7] = part[1][2];
2450a057
JH
9686 }
9687 else
9688 {
26e5b205
JH
9689 operands[2] = part[0][0];
9690 operands[3] = part[0][1];
9691 operands[5] = part[1][0];
9692 operands[6] = part[1][1];
e075ae69
RH
9693 }
9694 }
26e5b205
JH
9695 emit_move_insn (operands[2], operands[5]);
9696 emit_move_insn (operands[3], operands[6]);
9697 if (nparts == 3)
9698 emit_move_insn (operands[4], operands[7]);
32b5b1aa 9699
26e5b205 9700 return;
32b5b1aa 9701}
32b5b1aa 9702
e075ae69
RH
9703void
9704ix86_split_ashldi (operands, scratch)
9705 rtx *operands, scratch;
32b5b1aa 9706{
e075ae69
RH
9707 rtx low[2], high[2];
9708 int count;
b985a30f 9709
e075ae69
RH
9710 if (GET_CODE (operands[2]) == CONST_INT)
9711 {
9712 split_di (operands, 2, low, high);
9713 count = INTVAL (operands[2]) & 63;
32b5b1aa 9714
e075ae69
RH
9715 if (count >= 32)
9716 {
9717 emit_move_insn (high[0], low[1]);
9718 emit_move_insn (low[0], const0_rtx);
b985a30f 9719
e075ae69
RH
9720 if (count > 32)
9721 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9722 }
9723 else
9724 {
9725 if (!rtx_equal_p (operands[0], operands[1]))
9726 emit_move_insn (operands[0], operands[1]);
9727 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9728 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9729 }
9730 }
9731 else
9732 {
9733 if (!rtx_equal_p (operands[0], operands[1]))
9734 emit_move_insn (operands[0], operands[1]);
b985a30f 9735
e075ae69 9736 split_di (operands, 1, low, high);
b985a30f 9737
e075ae69
RH
9738 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9739 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 9740
fe577e58 9741 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9742 {
fe577e58 9743 if (! no_new_pseudos)
e075ae69
RH
9744 scratch = force_reg (SImode, const0_rtx);
9745 else
9746 emit_move_insn (scratch, const0_rtx);
9747
9748 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9749 scratch));
9750 }
9751 else
9752 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9753 }
e9a25f70 9754}
32b5b1aa 9755
e075ae69
RH
9756void
9757ix86_split_ashrdi (operands, scratch)
9758 rtx *operands, scratch;
32b5b1aa 9759{
e075ae69
RH
9760 rtx low[2], high[2];
9761 int count;
32b5b1aa 9762
e075ae69
RH
9763 if (GET_CODE (operands[2]) == CONST_INT)
9764 {
9765 split_di (operands, 2, low, high);
9766 count = INTVAL (operands[2]) & 63;
32b5b1aa 9767
e075ae69
RH
9768 if (count >= 32)
9769 {
9770 emit_move_insn (low[0], high[1]);
32b5b1aa 9771
e075ae69
RH
9772 if (! reload_completed)
9773 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9774 else
9775 {
9776 emit_move_insn (high[0], low[0]);
9777 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9778 }
9779
9780 if (count > 32)
9781 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9782 }
9783 else
9784 {
9785 if (!rtx_equal_p (operands[0], operands[1]))
9786 emit_move_insn (operands[0], operands[1]);
9787 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9788 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9789 }
9790 }
9791 else
32b5b1aa 9792 {
e075ae69
RH
9793 if (!rtx_equal_p (operands[0], operands[1]))
9794 emit_move_insn (operands[0], operands[1]);
9795
9796 split_di (operands, 1, low, high);
9797
9798 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9799 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9800
fe577e58 9801 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9802 {
fe577e58 9803 if (! no_new_pseudos)
e075ae69
RH
9804 scratch = gen_reg_rtx (SImode);
9805 emit_move_insn (scratch, high[0]);
9806 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9807 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9808 scratch));
9809 }
9810 else
9811 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9812 }
e075ae69 9813}
32b5b1aa 9814
e075ae69
RH
9815void
9816ix86_split_lshrdi (operands, scratch)
9817 rtx *operands, scratch;
9818{
9819 rtx low[2], high[2];
9820 int count;
32b5b1aa 9821
e075ae69 9822 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9823 {
e075ae69
RH
9824 split_di (operands, 2, low, high);
9825 count = INTVAL (operands[2]) & 63;
9826
9827 if (count >= 32)
c7271385 9828 {
e075ae69
RH
9829 emit_move_insn (low[0], high[1]);
9830 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9831
e075ae69
RH
9832 if (count > 32)
9833 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9834 }
9835 else
9836 {
9837 if (!rtx_equal_p (operands[0], operands[1]))
9838 emit_move_insn (operands[0], operands[1]);
9839 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9840 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9841 }
32b5b1aa 9842 }
e075ae69
RH
9843 else
9844 {
9845 if (!rtx_equal_p (operands[0], operands[1]))
9846 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9847
e075ae69
RH
9848 split_di (operands, 1, low, high);
9849
9850 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9851 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9852
9853 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9854 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9855 {
fe577e58 9856 if (! no_new_pseudos)
e075ae69
RH
9857 scratch = force_reg (SImode, const0_rtx);
9858 else
9859 emit_move_insn (scratch, const0_rtx);
9860
9861 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9862 scratch));
9863 }
9864 else
9865 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9866 }
32b5b1aa 9867}
3f803cd9 9868
0407c02b 9869/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9870 it is aligned to VALUE bytes. If true, jump to the label. */
9871static rtx
9872ix86_expand_aligntest (variable, value)
9873 rtx variable;
9874 int value;
9875{
9876 rtx label = gen_label_rtx ();
9877 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9878 if (GET_MODE (variable) == DImode)
9879 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9880 else
9881 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9882 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 9883 1, label);
0945b39d
JH
9884 return label;
9885}
9886
9887/* Adjust COUNTER by the VALUE. */
9888static void
9889ix86_adjust_counter (countreg, value)
9890 rtx countreg;
9891 HOST_WIDE_INT value;
9892{
9893 if (GET_MODE (countreg) == DImode)
9894 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9895 else
9896 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9897}
9898
9899/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 9900rtx
0945b39d
JH
9901ix86_zero_extend_to_Pmode (exp)
9902 rtx exp;
9903{
9904 rtx r;
9905 if (GET_MODE (exp) == VOIDmode)
9906 return force_reg (Pmode, exp);
9907 if (GET_MODE (exp) == Pmode)
9908 return copy_to_mode_reg (Pmode, exp);
9909 r = gen_reg_rtx (Pmode);
9910 emit_insn (gen_zero_extendsidi2 (r, exp));
9911 return r;
9912}
9913
9914/* Expand string move (memcpy) operation. Use i386 string operations when
9915 profitable. expand_clrstr contains similar code. */
9916int
9917ix86_expand_movstr (dst, src, count_exp, align_exp)
9918 rtx dst, src, count_exp, align_exp;
9919{
9920 rtx srcreg, destreg, countreg;
9921 enum machine_mode counter_mode;
9922 HOST_WIDE_INT align = 0;
9923 unsigned HOST_WIDE_INT count = 0;
9924 rtx insns;
9925
9926 start_sequence ();
9927
9928 if (GET_CODE (align_exp) == CONST_INT)
9929 align = INTVAL (align_exp);
9930
5519a4f9 9931 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9932 if (!TARGET_ALIGN_STRINGOPS)
9933 align = 64;
9934
9935 if (GET_CODE (count_exp) == CONST_INT)
9936 count = INTVAL (count_exp);
9937
9938 /* Figure out proper mode for counter. For 32bits it is always SImode,
9939 for 64bits use SImode when possible, otherwise DImode.
9940 Set count to number of bytes copied when known at compile time. */
9941 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9942 || x86_64_zero_extended_value (count_exp))
9943 counter_mode = SImode;
9944 else
9945 counter_mode = DImode;
9946
9947 if (counter_mode != SImode && counter_mode != DImode)
9948 abort ();
9949
9950 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9951 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9952
9953 emit_insn (gen_cld ());
9954
9955 /* When optimizing for size emit simple rep ; movsb instruction for
9956 counts not divisible by 4. */
9957
9958 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9959 {
9960 countreg = ix86_zero_extend_to_Pmode (count_exp);
9961 if (TARGET_64BIT)
9962 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9963 destreg, srcreg, countreg));
9964 else
9965 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9966 destreg, srcreg, countreg));
9967 }
9968
9969 /* For constant aligned (or small unaligned) copies use rep movsl
9970 followed by code copying the rest. For PentiumPro ensure 8 byte
9971 alignment to allow rep movsl acceleration. */
9972
9973 else if (count != 0
9974 && (align >= 8
9975 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9976 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9977 {
9978 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9979 if (count & ~(size - 1))
9980 {
9981 countreg = copy_to_mode_reg (counter_mode,
9982 GEN_INT ((count >> (size == 4 ? 2 : 3))
9983 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9984 countreg = ix86_zero_extend_to_Pmode (countreg);
9985 if (size == 4)
9986 {
9987 if (TARGET_64BIT)
9988 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9989 destreg, srcreg, countreg));
9990 else
9991 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9992 destreg, srcreg, countreg));
9993 }
9994 else
9995 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9996 destreg, srcreg, countreg));
9997 }
9998 if (size == 8 && (count & 0x04))
9999 emit_insn (gen_strmovsi (destreg, srcreg));
10000 if (count & 0x02)
10001 emit_insn (gen_strmovhi (destreg, srcreg));
10002 if (count & 0x01)
10003 emit_insn (gen_strmovqi (destreg, srcreg));
10004 }
10005 /* The generic code based on the glibc implementation:
10006 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10007 allowing accelerated copying there)
10008 - copy the data using rep movsl
10009 - copy the rest. */
10010 else
10011 {
10012 rtx countreg2;
10013 rtx label = NULL;
37ad04a5
JH
10014 int desired_alignment = (TARGET_PENTIUMPRO
10015 && (count == 0 || count >= (unsigned int) 260)
10016 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10017
10018 /* In case we don't know anything about the alignment, default to
10019 library version, since it is usually equally fast and result in
10020 shorter code. */
10021 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10022 {
10023 end_sequence ();
10024 return 0;
10025 }
10026
10027 if (TARGET_SINGLE_STRINGOP)
10028 emit_insn (gen_cld ());
10029
10030 countreg2 = gen_reg_rtx (Pmode);
10031 countreg = copy_to_mode_reg (counter_mode, count_exp);
10032
10033 /* We don't use loops to align destination and to copy parts smaller
10034 than 4 bytes, because gcc is able to optimize such code better (in
10035 the case the destination or the count really is aligned, gcc is often
10036 able to predict the branches) and also it is friendlier to the
a4f31c00 10037 hardware branch prediction.
0945b39d
JH
10038
10039 Using loops is benefical for generic case, because we can
10040 handle small counts using the loops. Many CPUs (such as Athlon)
10041 have large REP prefix setup costs.
10042
10043 This is quite costy. Maybe we can revisit this decision later or
10044 add some customizability to this code. */
10045
37ad04a5 10046 if (count == 0 && align < desired_alignment)
0945b39d
JH
10047 {
10048 label = gen_label_rtx ();
aaae0bb9 10049 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10050 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10051 }
10052 if (align <= 1)
10053 {
10054 rtx label = ix86_expand_aligntest (destreg, 1);
10055 emit_insn (gen_strmovqi (destreg, srcreg));
10056 ix86_adjust_counter (countreg, 1);
10057 emit_label (label);
10058 LABEL_NUSES (label) = 1;
10059 }
10060 if (align <= 2)
10061 {
10062 rtx label = ix86_expand_aligntest (destreg, 2);
10063 emit_insn (gen_strmovhi (destreg, srcreg));
10064 ix86_adjust_counter (countreg, 2);
10065 emit_label (label);
10066 LABEL_NUSES (label) = 1;
10067 }
37ad04a5 10068 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10069 {
10070 rtx label = ix86_expand_aligntest (destreg, 4);
10071 emit_insn (gen_strmovsi (destreg, srcreg));
10072 ix86_adjust_counter (countreg, 4);
10073 emit_label (label);
10074 LABEL_NUSES (label) = 1;
10075 }
10076
37ad04a5
JH
10077 if (label && desired_alignment > 4 && !TARGET_64BIT)
10078 {
10079 emit_label (label);
10080 LABEL_NUSES (label) = 1;
10081 label = NULL_RTX;
10082 }
0945b39d
JH
10083 if (!TARGET_SINGLE_STRINGOP)
10084 emit_insn (gen_cld ());
10085 if (TARGET_64BIT)
10086 {
10087 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10088 GEN_INT (3)));
10089 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10090 destreg, srcreg, countreg2));
10091 }
10092 else
10093 {
10094 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10095 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10096 destreg, srcreg, countreg2));
10097 }
10098
10099 if (label)
10100 {
10101 emit_label (label);
10102 LABEL_NUSES (label) = 1;
10103 }
10104 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10105 emit_insn (gen_strmovsi (destreg, srcreg));
10106 if ((align <= 4 || count == 0) && TARGET_64BIT)
10107 {
10108 rtx label = ix86_expand_aligntest (countreg, 4);
10109 emit_insn (gen_strmovsi (destreg, srcreg));
10110 emit_label (label);
10111 LABEL_NUSES (label) = 1;
10112 }
10113 if (align > 2 && count != 0 && (count & 2))
10114 emit_insn (gen_strmovhi (destreg, srcreg));
10115 if (align <= 2 || count == 0)
10116 {
10117 rtx label = ix86_expand_aligntest (countreg, 2);
10118 emit_insn (gen_strmovhi (destreg, srcreg));
10119 emit_label (label);
10120 LABEL_NUSES (label) = 1;
10121 }
10122 if (align > 1 && count != 0 && (count & 1))
10123 emit_insn (gen_strmovqi (destreg, srcreg));
10124 if (align <= 1 || count == 0)
10125 {
10126 rtx label = ix86_expand_aligntest (countreg, 1);
10127 emit_insn (gen_strmovqi (destreg, srcreg));
10128 emit_label (label);
10129 LABEL_NUSES (label) = 1;
10130 }
10131 }
10132
10133 insns = get_insns ();
10134 end_sequence ();
10135
10136 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10137 emit_insn (insns);
0945b39d
JH
10138 return 1;
10139}
10140
10141/* Expand string clear operation (bzero). Use i386 string operations when
10142 profitable. expand_movstr contains similar code. */
10143int
10144ix86_expand_clrstr (src, count_exp, align_exp)
10145 rtx src, count_exp, align_exp;
10146{
10147 rtx destreg, zeroreg, countreg;
10148 enum machine_mode counter_mode;
10149 HOST_WIDE_INT align = 0;
10150 unsigned HOST_WIDE_INT count = 0;
10151
10152 if (GET_CODE (align_exp) == CONST_INT)
10153 align = INTVAL (align_exp);
10154
5519a4f9 10155 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10156 if (!TARGET_ALIGN_STRINGOPS)
10157 align = 32;
10158
10159 if (GET_CODE (count_exp) == CONST_INT)
10160 count = INTVAL (count_exp);
10161 /* Figure out proper mode for counter. For 32bits it is always SImode,
10162 for 64bits use SImode when possible, otherwise DImode.
10163 Set count to number of bytes copied when known at compile time. */
10164 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10165 || x86_64_zero_extended_value (count_exp))
10166 counter_mode = SImode;
10167 else
10168 counter_mode = DImode;
10169
10170 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10171
10172 emit_insn (gen_cld ());
10173
10174 /* When optimizing for size emit simple rep ; movsb instruction for
10175 counts not divisible by 4. */
10176
10177 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10178 {
10179 countreg = ix86_zero_extend_to_Pmode (count_exp);
10180 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10181 if (TARGET_64BIT)
10182 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10183 destreg, countreg));
10184 else
10185 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10186 destreg, countreg));
10187 }
10188 else if (count != 0
10189 && (align >= 8
10190 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10191 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10192 {
10193 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10194 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10195 if (count & ~(size - 1))
10196 {
10197 countreg = copy_to_mode_reg (counter_mode,
10198 GEN_INT ((count >> (size == 4 ? 2 : 3))
10199 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10200 countreg = ix86_zero_extend_to_Pmode (countreg);
10201 if (size == 4)
10202 {
10203 if (TARGET_64BIT)
10204 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10205 destreg, countreg));
10206 else
10207 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10208 destreg, countreg));
10209 }
10210 else
10211 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10212 destreg, countreg));
10213 }
10214 if (size == 8 && (count & 0x04))
10215 emit_insn (gen_strsetsi (destreg,
10216 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10217 if (count & 0x02)
10218 emit_insn (gen_strsethi (destreg,
10219 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10220 if (count & 0x01)
10221 emit_insn (gen_strsetqi (destreg,
10222 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10223 }
10224 else
10225 {
10226 rtx countreg2;
10227 rtx label = NULL;
37ad04a5
JH
10228 /* Compute desired alignment of the string operation. */
10229 int desired_alignment = (TARGET_PENTIUMPRO
10230 && (count == 0 || count >= (unsigned int) 260)
10231 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10232
10233 /* In case we don't know anything about the alignment, default to
10234 library version, since it is usually equally fast and result in
10235 shorter code. */
10236 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10237 return 0;
10238
10239 if (TARGET_SINGLE_STRINGOP)
10240 emit_insn (gen_cld ());
10241
10242 countreg2 = gen_reg_rtx (Pmode);
10243 countreg = copy_to_mode_reg (counter_mode, count_exp);
10244 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10245
37ad04a5 10246 if (count == 0 && align < desired_alignment)
0945b39d
JH
10247 {
10248 label = gen_label_rtx ();
37ad04a5 10249 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10250 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10251 }
10252 if (align <= 1)
10253 {
10254 rtx label = ix86_expand_aligntest (destreg, 1);
10255 emit_insn (gen_strsetqi (destreg,
10256 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10257 ix86_adjust_counter (countreg, 1);
10258 emit_label (label);
10259 LABEL_NUSES (label) = 1;
10260 }
10261 if (align <= 2)
10262 {
10263 rtx label = ix86_expand_aligntest (destreg, 2);
10264 emit_insn (gen_strsethi (destreg,
10265 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10266 ix86_adjust_counter (countreg, 2);
10267 emit_label (label);
10268 LABEL_NUSES (label) = 1;
10269 }
37ad04a5 10270 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10271 {
10272 rtx label = ix86_expand_aligntest (destreg, 4);
10273 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10274 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10275 : zeroreg)));
10276 ix86_adjust_counter (countreg, 4);
10277 emit_label (label);
10278 LABEL_NUSES (label) = 1;
10279 }
10280
37ad04a5
JH
10281 if (label && desired_alignment > 4 && !TARGET_64BIT)
10282 {
10283 emit_label (label);
10284 LABEL_NUSES (label) = 1;
10285 label = NULL_RTX;
10286 }
10287
0945b39d
JH
10288 if (!TARGET_SINGLE_STRINGOP)
10289 emit_insn (gen_cld ());
10290 if (TARGET_64BIT)
10291 {
10292 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10293 GEN_INT (3)));
10294 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10295 destreg, countreg2));
10296 }
10297 else
10298 {
10299 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10300 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10301 destreg, countreg2));
10302 }
0945b39d
JH
10303 if (label)
10304 {
10305 emit_label (label);
10306 LABEL_NUSES (label) = 1;
10307 }
37ad04a5 10308
0945b39d
JH
10309 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10310 emit_insn (gen_strsetsi (destreg,
10311 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10312 if (TARGET_64BIT && (align <= 4 || count == 0))
10313 {
79258dce 10314 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
10315 emit_insn (gen_strsetsi (destreg,
10316 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10317 emit_label (label);
10318 LABEL_NUSES (label) = 1;
10319 }
10320 if (align > 2 && count != 0 && (count & 2))
10321 emit_insn (gen_strsethi (destreg,
10322 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10323 if (align <= 2 || count == 0)
10324 {
74411039 10325 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10326 emit_insn (gen_strsethi (destreg,
10327 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10328 emit_label (label);
10329 LABEL_NUSES (label) = 1;
10330 }
10331 if (align > 1 && count != 0 && (count & 1))
10332 emit_insn (gen_strsetqi (destreg,
10333 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10334 if (align <= 1 || count == 0)
10335 {
74411039 10336 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10337 emit_insn (gen_strsetqi (destreg,
10338 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10339 emit_label (label);
10340 LABEL_NUSES (label) = 1;
10341 }
10342 }
10343 return 1;
10344}
10345/* Expand strlen. */
10346int
10347ix86_expand_strlen (out, src, eoschar, align)
10348 rtx out, src, eoschar, align;
10349{
10350 rtx addr, scratch1, scratch2, scratch3, scratch4;
10351
10352 /* The generic case of strlen expander is long. Avoid it's
10353 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10354
10355 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10356 && !TARGET_INLINE_ALL_STRINGOPS
10357 && !optimize_size
10358 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10359 return 0;
10360
10361 addr = force_reg (Pmode, XEXP (src, 0));
10362 scratch1 = gen_reg_rtx (Pmode);
10363
10364 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10365 && !optimize_size)
10366 {
10367 /* Well it seems that some optimizer does not combine a call like
10368 foo(strlen(bar), strlen(bar));
10369 when the move and the subtraction is done here. It does calculate
10370 the length just once when these instructions are done inside of
10371 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10372 often used and I use one fewer register for the lifetime of
10373 output_strlen_unroll() this is better. */
10374
10375 emit_move_insn (out, addr);
10376
10377 ix86_expand_strlensi_unroll_1 (out, align);
10378
10379 /* strlensi_unroll_1 returns the address of the zero at the end of
10380 the string, like memchr(), so compute the length by subtracting
10381 the start address. */
10382 if (TARGET_64BIT)
10383 emit_insn (gen_subdi3 (out, out, addr));
10384 else
10385 emit_insn (gen_subsi3 (out, out, addr));
10386 }
10387 else
10388 {
10389 scratch2 = gen_reg_rtx (Pmode);
10390 scratch3 = gen_reg_rtx (Pmode);
10391 scratch4 = force_reg (Pmode, constm1_rtx);
10392
10393 emit_move_insn (scratch3, addr);
10394 eoschar = force_reg (QImode, eoschar);
10395
10396 emit_insn (gen_cld ());
10397 if (TARGET_64BIT)
10398 {
10399 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10400 align, scratch4, scratch3));
10401 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10402 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10403 }
10404 else
10405 {
10406 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10407 align, scratch4, scratch3));
10408 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10409 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10410 }
10411 }
10412 return 1;
10413}
10414
e075ae69
RH
10415/* Expand the appropriate insns for doing strlen if not just doing
10416 repnz; scasb
10417
10418 out = result, initialized with the start address
10419 align_rtx = alignment of the address.
10420 scratch = scratch register, initialized with the startaddress when
77ebd435 10421 not aligned, otherwise undefined
3f803cd9
SC
10422
10423 This is just the body. It needs the initialisations mentioned above and
10424 some address computing at the end. These things are done in i386.md. */
10425
0945b39d
JH
10426static void
10427ix86_expand_strlensi_unroll_1 (out, align_rtx)
10428 rtx out, align_rtx;
3f803cd9 10429{
e075ae69
RH
10430 int align;
10431 rtx tmp;
10432 rtx align_2_label = NULL_RTX;
10433 rtx align_3_label = NULL_RTX;
10434 rtx align_4_label = gen_label_rtx ();
10435 rtx end_0_label = gen_label_rtx ();
e075ae69 10436 rtx mem;
e2e52e1b 10437 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10438 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
10439
10440 align = 0;
10441 if (GET_CODE (align_rtx) == CONST_INT)
10442 align = INTVAL (align_rtx);
3f803cd9 10443
e9a25f70 10444 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10445
e9a25f70 10446 /* Is there a known alignment and is it less than 4? */
e075ae69 10447 if (align < 4)
3f803cd9 10448 {
0945b39d
JH
10449 rtx scratch1 = gen_reg_rtx (Pmode);
10450 emit_move_insn (scratch1, out);
e9a25f70 10451 /* Is there a known alignment and is it not 2? */
e075ae69 10452 if (align != 2)
3f803cd9 10453 {
e075ae69
RH
10454 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10455 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10456
10457 /* Leave just the 3 lower bits. */
0945b39d 10458 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
10459 NULL_RTX, 0, OPTAB_WIDEN);
10460
9076b9c1 10461 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10462 Pmode, 1, align_4_label);
9076b9c1 10463 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 10464 Pmode, 1, align_2_label);
9076b9c1 10465 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 10466 Pmode, 1, align_3_label);
3f803cd9
SC
10467 }
10468 else
10469 {
e9a25f70
JL
10470 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10471 check if is aligned to 4 - byte. */
e9a25f70 10472
0945b39d 10473 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
10474 NULL_RTX, 0, OPTAB_WIDEN);
10475
9076b9c1 10476 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10477 Pmode, 1, align_4_label);
3f803cd9
SC
10478 }
10479
e075ae69 10480 mem = gen_rtx_MEM (QImode, out);
e9a25f70 10481
e075ae69 10482 /* Now compare the bytes. */
e9a25f70 10483
0f290768 10484 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 10485 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 10486 QImode, 1, end_0_label);
3f803cd9 10487
0f290768 10488 /* Increment the address. */
0945b39d
JH
10489 if (TARGET_64BIT)
10490 emit_insn (gen_adddi3 (out, out, const1_rtx));
10491 else
10492 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 10493
e075ae69
RH
10494 /* Not needed with an alignment of 2 */
10495 if (align != 2)
10496 {
10497 emit_label (align_2_label);
3f803cd9 10498
d43e0b7d
RK
10499 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10500 end_0_label);
e075ae69 10501
0945b39d
JH
10502 if (TARGET_64BIT)
10503 emit_insn (gen_adddi3 (out, out, const1_rtx));
10504 else
10505 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
10506
10507 emit_label (align_3_label);
10508 }
10509
d43e0b7d
RK
10510 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10511 end_0_label);
e075ae69 10512
0945b39d
JH
10513 if (TARGET_64BIT)
10514 emit_insn (gen_adddi3 (out, out, const1_rtx));
10515 else
10516 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
10517 }
10518
e075ae69
RH
10519 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10520 align this loop. It gives only huge programs, but does not help to
10521 speed up. */
10522 emit_label (align_4_label);
3f803cd9 10523
e075ae69
RH
10524 mem = gen_rtx_MEM (SImode, out);
10525 emit_move_insn (scratch, mem);
0945b39d
JH
10526 if (TARGET_64BIT)
10527 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10528 else
10529 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 10530
e2e52e1b
JH
10531 /* This formula yields a nonzero result iff one of the bytes is zero.
10532 This saves three branches inside loop and many cycles. */
10533
10534 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10535 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10536 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 10537 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 10538 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
10539 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10540 align_4_label);
e2e52e1b
JH
10541
10542 if (TARGET_CMOVE)
10543 {
10544 rtx reg = gen_reg_rtx (SImode);
0945b39d 10545 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
10546 emit_move_insn (reg, tmpreg);
10547 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10548
0f290768 10549 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 10550 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10551 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10552 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10553 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10554 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
10555 reg,
10556 tmpreg)));
e2e52e1b 10557 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
10558 emit_insn (gen_rtx_SET (SImode, reg2,
10559 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
10560
10561 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10562 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10563 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 10564 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
10565 reg2,
10566 out)));
e2e52e1b
JH
10567
10568 }
10569 else
10570 {
10571 rtx end_2_label = gen_label_rtx ();
10572 /* Is zero in the first two bytes? */
10573
16189740 10574 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10575 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10576 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10577 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10578 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10579 pc_rtx);
10580 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10581 JUMP_LABEL (tmp) = end_2_label;
10582
0f290768 10583 /* Not in the first two. Move two bytes forward. */
e2e52e1b 10584 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
10585 if (TARGET_64BIT)
10586 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10587 else
10588 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
10589
10590 emit_label (end_2_label);
10591
10592 }
10593
0f290768 10594 /* Avoid branch in fixing the byte. */
e2e52e1b 10595 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 10596 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
10597 if (TARGET_64BIT)
10598 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10599 else
10600 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
10601
10602 emit_label (end_0_label);
10603}
0e07aff3
RH
10604
10605void
10606ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10607 rtx retval, fnaddr, callarg1, callarg2, pop;
10608{
10609 rtx use = NULL, call;
10610
10611 if (pop == const0_rtx)
10612 pop = NULL;
10613 if (TARGET_64BIT && pop)
10614 abort ();
10615
b069de3b
SS
10616#if TARGET_MACHO
10617 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10618 fnaddr = machopic_indirect_call_target (fnaddr);
10619#else
0e07aff3
RH
10620 /* Static functions and indirect calls don't need the pic register. */
10621 if (! TARGET_64BIT && flag_pic
10622 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10623 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 10624 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
10625
10626 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10627 {
10628 rtx al = gen_rtx_REG (QImode, 0);
10629 emit_move_insn (al, callarg2);
10630 use_reg (&use, al);
10631 }
b069de3b 10632#endif /* TARGET_MACHO */
0e07aff3
RH
10633
10634 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10635 {
10636 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10637 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10638 }
10639
10640 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10641 if (retval)
10642 call = gen_rtx_SET (VOIDmode, retval, call);
10643 if (pop)
10644 {
10645 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10646 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10647 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10648 }
10649
10650 call = emit_call_insn (call);
10651 if (use)
10652 CALL_INSN_FUNCTION_USAGE (call) = use;
10653}
fce5a9f2 10654
e075ae69 10655\f
e075ae69
RH
10656/* Clear stack slot assignments remembered from previous functions.
10657 This is called from INIT_EXPANDERS once before RTL is emitted for each
10658 function. */
10659
e2500fed
GK
10660static struct machine_function *
10661ix86_init_machine_status ()
37b15744 10662{
e2500fed 10663 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
10664}
10665
e075ae69
RH
10666/* Return a MEM corresponding to a stack slot with mode MODE.
10667 Allocate a new slot if necessary.
10668
10669 The RTL for a function can have several slots available: N is
10670 which slot to use. */
10671
10672rtx
10673assign_386_stack_local (mode, n)
10674 enum machine_mode mode;
10675 int n;
10676{
10677 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10678 abort ();
10679
10680 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10681 ix86_stack_locals[(int) mode][n]
10682 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10683
10684 return ix86_stack_locals[(int) mode][n];
10685}
f996902d
RH
10686
10687/* Construct the SYMBOL_REF for the tls_get_addr function. */
10688
e2500fed 10689static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
10690rtx
10691ix86_tls_get_addr ()
10692{
f996902d 10693
e2500fed 10694 if (!ix86_tls_symbol)
f996902d 10695 {
e2500fed 10696 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
f996902d
RH
10697 ? "___tls_get_addr"
10698 : "__tls_get_addr"));
f996902d
RH
10699 }
10700
e2500fed 10701 return ix86_tls_symbol;
f996902d 10702}
e075ae69
RH
10703\f
10704/* Calculate the length of the memory address in the instruction
10705 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10706
10707static int
10708memory_address_length (addr)
10709 rtx addr;
10710{
10711 struct ix86_address parts;
10712 rtx base, index, disp;
10713 int len;
10714
10715 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
10716 || GET_CODE (addr) == POST_INC
10717 || GET_CODE (addr) == PRE_MODIFY
10718 || GET_CODE (addr) == POST_MODIFY)
e075ae69 10719 return 0;
3f803cd9 10720
e075ae69
RH
10721 if (! ix86_decompose_address (addr, &parts))
10722 abort ();
3f803cd9 10723
e075ae69
RH
10724 base = parts.base;
10725 index = parts.index;
10726 disp = parts.disp;
10727 len = 0;
3f803cd9 10728
e075ae69
RH
10729 /* Register Indirect. */
10730 if (base && !index && !disp)
10731 {
10732 /* Special cases: ebp and esp need the two-byte modrm form. */
10733 if (addr == stack_pointer_rtx
10734 || addr == arg_pointer_rtx
564d80f4
JH
10735 || addr == frame_pointer_rtx
10736 || addr == hard_frame_pointer_rtx)
e075ae69 10737 len = 1;
3f803cd9 10738 }
e9a25f70 10739
e075ae69
RH
10740 /* Direct Addressing. */
10741 else if (disp && !base && !index)
10742 len = 4;
10743
3f803cd9
SC
10744 else
10745 {
e075ae69
RH
10746 /* Find the length of the displacement constant. */
10747 if (disp)
10748 {
10749 if (GET_CODE (disp) == CONST_INT
10750 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10751 len = 1;
10752 else
10753 len = 4;
10754 }
3f803cd9 10755
e075ae69
RH
10756 /* An index requires the two-byte modrm form. */
10757 if (index)
10758 len += 1;
3f803cd9
SC
10759 }
10760
e075ae69
RH
10761 return len;
10762}
79325812 10763
5bf0ebab
RH
10764/* Compute default value for "length_immediate" attribute. When SHORTFORM
10765 is set, expect that insn have 8bit immediate alternative. */
e075ae69 10766int
6ef67412 10767ix86_attr_length_immediate_default (insn, shortform)
e075ae69 10768 rtx insn;
6ef67412 10769 int shortform;
e075ae69 10770{
6ef67412
JH
10771 int len = 0;
10772 int i;
6c698a6d 10773 extract_insn_cached (insn);
6ef67412
JH
10774 for (i = recog_data.n_operands - 1; i >= 0; --i)
10775 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 10776 {
6ef67412 10777 if (len)
3071fab5 10778 abort ();
6ef67412
JH
10779 if (shortform
10780 && GET_CODE (recog_data.operand[i]) == CONST_INT
10781 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10782 len = 1;
10783 else
10784 {
10785 switch (get_attr_mode (insn))
10786 {
10787 case MODE_QI:
10788 len+=1;
10789 break;
10790 case MODE_HI:
10791 len+=2;
10792 break;
10793 case MODE_SI:
10794 len+=4;
10795 break;
14f73b5a
JH
10796 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10797 case MODE_DI:
10798 len+=4;
10799 break;
6ef67412 10800 default:
c725bd79 10801 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
10802 }
10803 }
3071fab5 10804 }
6ef67412
JH
10805 return len;
10806}
10807/* Compute default value for "length_address" attribute. */
10808int
10809ix86_attr_length_address_default (insn)
10810 rtx insn;
10811{
10812 int i;
6c698a6d 10813 extract_insn_cached (insn);
1ccbefce
RH
10814 for (i = recog_data.n_operands - 1; i >= 0; --i)
10815 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10816 {
6ef67412 10817 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10818 break;
10819 }
6ef67412 10820 return 0;
3f803cd9 10821}
e075ae69
RH
10822\f
10823/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10824
c237e94a 10825static int
e075ae69 10826ix86_issue_rate ()
b657fc39 10827{
e075ae69 10828 switch (ix86_cpu)
b657fc39 10829 {
e075ae69
RH
10830 case PROCESSOR_PENTIUM:
10831 case PROCESSOR_K6:
10832 return 2;
79325812 10833
e075ae69 10834 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10835 case PROCESSOR_PENTIUM4:
10836 case PROCESSOR_ATHLON:
e075ae69 10837 return 3;
b657fc39 10838
b657fc39 10839 default:
e075ae69 10840 return 1;
b657fc39 10841 }
b657fc39
L
10842}
10843
e075ae69
RH
10844/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10845 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10846
e075ae69
RH
10847static int
10848ix86_flags_dependant (insn, dep_insn, insn_type)
10849 rtx insn, dep_insn;
10850 enum attr_type insn_type;
10851{
10852 rtx set, set2;
b657fc39 10853
e075ae69
RH
10854 /* Simplify the test for uninteresting insns. */
10855 if (insn_type != TYPE_SETCC
10856 && insn_type != TYPE_ICMOV
10857 && insn_type != TYPE_FCMOV
10858 && insn_type != TYPE_IBR)
10859 return 0;
b657fc39 10860
e075ae69
RH
10861 if ((set = single_set (dep_insn)) != 0)
10862 {
10863 set = SET_DEST (set);
10864 set2 = NULL_RTX;
10865 }
10866 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10867 && XVECLEN (PATTERN (dep_insn), 0) == 2
10868 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10869 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10870 {
10871 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10872 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10873 }
78a0d70c
ZW
10874 else
10875 return 0;
b657fc39 10876
78a0d70c
ZW
10877 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10878 return 0;
b657fc39 10879
f5143c46 10880 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
10881 not any other potentially set register. */
10882 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10883 return 0;
10884
10885 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10886 return 0;
10887
10888 return 1;
e075ae69 10889}
b657fc39 10890
e075ae69
RH
10891/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10892 address with operands set by DEP_INSN. */
10893
10894static int
10895ix86_agi_dependant (insn, dep_insn, insn_type)
10896 rtx insn, dep_insn;
10897 enum attr_type insn_type;
10898{
10899 rtx addr;
10900
6ad48e84
JH
10901 if (insn_type == TYPE_LEA
10902 && TARGET_PENTIUM)
5fbdde42
RH
10903 {
10904 addr = PATTERN (insn);
10905 if (GET_CODE (addr) == SET)
10906 ;
10907 else if (GET_CODE (addr) == PARALLEL
10908 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10909 addr = XVECEXP (addr, 0, 0);
10910 else
10911 abort ();
10912 addr = SET_SRC (addr);
10913 }
e075ae69
RH
10914 else
10915 {
10916 int i;
6c698a6d 10917 extract_insn_cached (insn);
1ccbefce
RH
10918 for (i = recog_data.n_operands - 1; i >= 0; --i)
10919 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10920 {
1ccbefce 10921 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
10922 goto found;
10923 }
10924 return 0;
10925 found:;
b657fc39
L
10926 }
10927
e075ae69 10928 return modified_in_p (addr, dep_insn);
b657fc39 10929}
a269a03c 10930
c237e94a 10931static int
e075ae69 10932ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
10933 rtx insn, link, dep_insn;
10934 int cost;
10935{
e075ae69 10936 enum attr_type insn_type, dep_insn_type;
6ad48e84 10937 enum attr_memory memory, dep_memory;
e075ae69 10938 rtx set, set2;
9b00189f 10939 int dep_insn_code_number;
a269a03c 10940
309ada50 10941 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 10942 if (REG_NOTE_KIND (link) != 0)
309ada50 10943 return 0;
a269a03c 10944
9b00189f
JH
10945 dep_insn_code_number = recog_memoized (dep_insn);
10946
e075ae69 10947 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 10948 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 10949 return cost;
a269a03c 10950
1c71e60e
JH
10951 insn_type = get_attr_type (insn);
10952 dep_insn_type = get_attr_type (dep_insn);
9b00189f 10953
a269a03c
JC
10954 switch (ix86_cpu)
10955 {
10956 case PROCESSOR_PENTIUM:
e075ae69
RH
10957 /* Address Generation Interlock adds a cycle of latency. */
10958 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10959 cost += 1;
10960
10961 /* ??? Compares pair with jump/setcc. */
10962 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10963 cost = 0;
10964
10965 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 10966 if (insn_type == TYPE_FMOV
e075ae69
RH
10967 && get_attr_memory (insn) == MEMORY_STORE
10968 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10969 cost += 1;
10970 break;
a269a03c 10971
e075ae69 10972 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
10973 memory = get_attr_memory (insn);
10974 dep_memory = get_attr_memory (dep_insn);
10975
0f290768 10976 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
10977 increase the cost here for non-imov insns. */
10978 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
10979 && dep_insn_type != TYPE_FMOV
10980 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
10981 cost += 1;
10982
10983 /* INT->FP conversion is expensive. */
10984 if (get_attr_fp_int_src (dep_insn))
10985 cost += 5;
10986
10987 /* There is one cycle extra latency between an FP op and a store. */
10988 if (insn_type == TYPE_FMOV
10989 && (set = single_set (dep_insn)) != NULL_RTX
10990 && (set2 = single_set (insn)) != NULL_RTX
10991 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10992 && GET_CODE (SET_DEST (set2)) == MEM)
10993 cost += 1;
6ad48e84
JH
10994
10995 /* Show ability of reorder buffer to hide latency of load by executing
10996 in parallel with previous instruction in case
10997 previous instruction is not needed to compute the address. */
10998 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10999 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11000 {
11001 /* Claim moves to take one cycle, as core can issue one load
11002 at time and the next load can start cycle later. */
11003 if (dep_insn_type == TYPE_IMOV
11004 || dep_insn_type == TYPE_FMOV)
11005 cost = 1;
11006 else if (cost > 1)
11007 cost--;
11008 }
e075ae69 11009 break;
a269a03c 11010
e075ae69 11011 case PROCESSOR_K6:
6ad48e84
JH
11012 memory = get_attr_memory (insn);
11013 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11014 /* The esp dependency is resolved before the instruction is really
11015 finished. */
11016 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11017 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11018 return 1;
a269a03c 11019
0f290768 11020 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11021 increase the cost here for non-imov insns. */
6ad48e84 11022 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11023 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11024
11025 /* INT->FP conversion is expensive. */
11026 if (get_attr_fp_int_src (dep_insn))
11027 cost += 5;
6ad48e84
JH
11028
11029 /* Show ability of reorder buffer to hide latency of load by executing
11030 in parallel with previous instruction in case
11031 previous instruction is not needed to compute the address. */
11032 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11033 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11034 {
11035 /* Claim moves to take one cycle, as core can issue one load
11036 at time and the next load can start cycle later. */
11037 if (dep_insn_type == TYPE_IMOV
11038 || dep_insn_type == TYPE_FMOV)
11039 cost = 1;
11040 else if (cost > 2)
11041 cost -= 2;
11042 else
11043 cost = 1;
11044 }
a14003ee 11045 break;
e075ae69 11046
309ada50 11047 case PROCESSOR_ATHLON:
6ad48e84
JH
11048 memory = get_attr_memory (insn);
11049 dep_memory = get_attr_memory (dep_insn);
11050
11051 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
11052 {
11053 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11054 cost += 2;
11055 else
11056 cost += 3;
11057 }
6ad48e84
JH
11058 /* Show ability of reorder buffer to hide latency of load by executing
11059 in parallel with previous instruction in case
11060 previous instruction is not needed to compute the address. */
11061 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11062 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11063 {
11064 /* Claim moves to take one cycle, as core can issue one load
11065 at time and the next load can start cycle later. */
11066 if (dep_insn_type == TYPE_IMOV
11067 || dep_insn_type == TYPE_FMOV)
11068 cost = 0;
11069 else if (cost >= 3)
11070 cost -= 3;
11071 else
11072 cost = 0;
11073 }
309ada50 11074
a269a03c 11075 default:
a269a03c
JC
11076 break;
11077 }
11078
11079 return cost;
11080}
0a726ef1 11081
e075ae69
RH
11082static union
11083{
11084 struct ppro_sched_data
11085 {
11086 rtx decode[3];
11087 int issued_this_cycle;
11088 } ppro;
11089} ix86_sched_data;
0a726ef1 11090
e075ae69
RH
11091static enum attr_ppro_uops
11092ix86_safe_ppro_uops (insn)
11093 rtx insn;
11094{
11095 if (recog_memoized (insn) >= 0)
11096 return get_attr_ppro_uops (insn);
11097 else
11098 return PPRO_UOPS_MANY;
11099}
0a726ef1 11100
e075ae69
RH
11101static void
11102ix86_dump_ppro_packet (dump)
11103 FILE *dump;
0a726ef1 11104{
e075ae69 11105 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11106 {
e075ae69
RH
11107 fprintf (dump, "PPRO packet: %d",
11108 INSN_UID (ix86_sched_data.ppro.decode[0]));
11109 if (ix86_sched_data.ppro.decode[1])
11110 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11111 if (ix86_sched_data.ppro.decode[2])
11112 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11113 fputc ('\n', dump);
11114 }
11115}
0a726ef1 11116
e075ae69 11117/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11118
c237e94a
ZW
11119static void
11120ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11121 FILE *dump ATTRIBUTE_UNUSED;
11122 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11123 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11124{
11125 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11126}
11127
11128/* Shift INSN to SLOT, and shift everything else down. */
11129
11130static void
11131ix86_reorder_insn (insnp, slot)
11132 rtx *insnp, *slot;
11133{
11134 if (insnp != slot)
11135 {
11136 rtx insn = *insnp;
0f290768 11137 do
e075ae69
RH
11138 insnp[0] = insnp[1];
11139 while (++insnp != slot);
11140 *insnp = insn;
0a726ef1 11141 }
e075ae69
RH
11142}
11143
c6991660 11144static void
78a0d70c
ZW
11145ix86_sched_reorder_ppro (ready, e_ready)
11146 rtx *ready;
11147 rtx *e_ready;
11148{
11149 rtx decode[3];
11150 enum attr_ppro_uops cur_uops;
11151 int issued_this_cycle;
11152 rtx *insnp;
11153 int i;
e075ae69 11154
0f290768 11155 /* At this point .ppro.decode contains the state of the three
78a0d70c 11156 decoders from last "cycle". That is, those insns that were
0f290768 11157 actually independent. But here we're scheduling for the
78a0d70c
ZW
11158 decoder, and we may find things that are decodable in the
11159 same cycle. */
e075ae69 11160
0f290768 11161 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11162 issued_this_cycle = 0;
e075ae69 11163
78a0d70c
ZW
11164 insnp = e_ready;
11165 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11166
78a0d70c
ZW
11167 /* If the decoders are empty, and we've a complex insn at the
11168 head of the priority queue, let it issue without complaint. */
11169 if (decode[0] == NULL)
11170 {
11171 if (cur_uops == PPRO_UOPS_MANY)
11172 {
11173 decode[0] = *insnp;
11174 goto ppro_done;
11175 }
11176
11177 /* Otherwise, search for a 2-4 uop unsn to issue. */
11178 while (cur_uops != PPRO_UOPS_FEW)
11179 {
11180 if (insnp == ready)
11181 break;
11182 cur_uops = ix86_safe_ppro_uops (*--insnp);
11183 }
11184
11185 /* If so, move it to the head of the line. */
11186 if (cur_uops == PPRO_UOPS_FEW)
11187 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11188
78a0d70c
ZW
11189 /* Issue the head of the queue. */
11190 issued_this_cycle = 1;
11191 decode[0] = *e_ready--;
11192 }
fb693d44 11193
78a0d70c
ZW
11194 /* Look for simple insns to fill in the other two slots. */
11195 for (i = 1; i < 3; ++i)
11196 if (decode[i] == NULL)
11197 {
a151daf0 11198 if (ready > e_ready)
78a0d70c 11199 goto ppro_done;
fb693d44 11200
e075ae69
RH
11201 insnp = e_ready;
11202 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11203 while (cur_uops != PPRO_UOPS_ONE)
11204 {
11205 if (insnp == ready)
11206 break;
11207 cur_uops = ix86_safe_ppro_uops (*--insnp);
11208 }
fb693d44 11209
78a0d70c
ZW
11210 /* Found one. Move it to the head of the queue and issue it. */
11211 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11212 {
78a0d70c
ZW
11213 ix86_reorder_insn (insnp, e_ready);
11214 decode[i] = *e_ready--;
11215 issued_this_cycle++;
11216 continue;
11217 }
fb693d44 11218
78a0d70c
ZW
11219 /* ??? Didn't find one. Ideally, here we would do a lazy split
11220 of 2-uop insns, issue one and queue the other. */
11221 }
fb693d44 11222
78a0d70c
ZW
11223 ppro_done:
11224 if (issued_this_cycle == 0)
11225 issued_this_cycle = 1;
11226 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11227}
fb693d44 11228
0f290768 11229/* We are about to being issuing insns for this clock cycle.
78a0d70c 11230 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11231static int
11232ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11233 FILE *dump ATTRIBUTE_UNUSED;
11234 int sched_verbose ATTRIBUTE_UNUSED;
11235 rtx *ready;
c237e94a 11236 int *n_readyp;
78a0d70c
ZW
11237 int clock_var ATTRIBUTE_UNUSED;
11238{
c237e94a 11239 int n_ready = *n_readyp;
78a0d70c 11240 rtx *e_ready = ready + n_ready - 1;
fb693d44 11241
fce5a9f2 11242 /* Make sure to go ahead and initialize key items in
a151daf0
JL
11243 ix86_sched_data if we are not going to bother trying to
11244 reorder the ready queue. */
78a0d70c 11245 if (n_ready < 2)
a151daf0
JL
11246 {
11247 ix86_sched_data.ppro.issued_this_cycle = 1;
11248 goto out;
11249 }
e075ae69 11250
78a0d70c
ZW
11251 switch (ix86_cpu)
11252 {
11253 default:
11254 break;
e075ae69 11255
78a0d70c
ZW
11256 case PROCESSOR_PENTIUMPRO:
11257 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11258 break;
fb693d44
RH
11259 }
11260
e075ae69
RH
11261out:
11262 return ix86_issue_rate ();
11263}
fb693d44 11264
e075ae69
RH
11265/* We are about to issue INSN. Return the number of insns left on the
11266 ready queue that can be issued this cycle. */
b222082e 11267
c237e94a 11268static int
e075ae69
RH
11269ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11270 FILE *dump;
11271 int sched_verbose;
11272 rtx insn;
11273 int can_issue_more;
11274{
11275 int i;
11276 switch (ix86_cpu)
fb693d44 11277 {
e075ae69
RH
11278 default:
11279 return can_issue_more - 1;
fb693d44 11280
e075ae69
RH
11281 case PROCESSOR_PENTIUMPRO:
11282 {
11283 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11284
e075ae69
RH
11285 if (uops == PPRO_UOPS_MANY)
11286 {
11287 if (sched_verbose)
11288 ix86_dump_ppro_packet (dump);
11289 ix86_sched_data.ppro.decode[0] = insn;
11290 ix86_sched_data.ppro.decode[1] = NULL;
11291 ix86_sched_data.ppro.decode[2] = NULL;
11292 if (sched_verbose)
11293 ix86_dump_ppro_packet (dump);
11294 ix86_sched_data.ppro.decode[0] = NULL;
11295 }
11296 else if (uops == PPRO_UOPS_FEW)
11297 {
11298 if (sched_verbose)
11299 ix86_dump_ppro_packet (dump);
11300 ix86_sched_data.ppro.decode[0] = insn;
11301 ix86_sched_data.ppro.decode[1] = NULL;
11302 ix86_sched_data.ppro.decode[2] = NULL;
11303 }
11304 else
11305 {
11306 for (i = 0; i < 3; ++i)
11307 if (ix86_sched_data.ppro.decode[i] == NULL)
11308 {
11309 ix86_sched_data.ppro.decode[i] = insn;
11310 break;
11311 }
11312 if (i == 3)
11313 abort ();
11314 if (i == 2)
11315 {
11316 if (sched_verbose)
11317 ix86_dump_ppro_packet (dump);
11318 ix86_sched_data.ppro.decode[0] = NULL;
11319 ix86_sched_data.ppro.decode[1] = NULL;
11320 ix86_sched_data.ppro.decode[2] = NULL;
11321 }
11322 }
11323 }
11324 return --ix86_sched_data.ppro.issued_this_cycle;
11325 }
fb693d44 11326}
9b690711
RH
11327
11328static int
11329ia32_use_dfa_pipeline_interface ()
11330{
11331 if (ix86_cpu == PROCESSOR_PENTIUM)
11332 return 1;
11333 return 0;
11334}
11335
11336/* How many alternative schedules to try. This should be as wide as the
11337 scheduling freedom in the DFA, but no wider. Making this value too
11338 large results extra work for the scheduler. */
11339
11340static int
11341ia32_multipass_dfa_lookahead ()
11342{
11343 if (ix86_cpu == PROCESSOR_PENTIUM)
11344 return 2;
11345 else
11346 return 0;
11347}
11348
a7180f70 11349\f
0e4970d7
RK
11350/* Walk through INSNS and look for MEM references whose address is DSTREG or
11351 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11352 appropriate. */
11353
11354void
11355ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11356 rtx insns;
11357 rtx dstref, srcref, dstreg, srcreg;
11358{
11359 rtx insn;
11360
11361 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11362 if (INSN_P (insn))
11363 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11364 dstreg, srcreg);
11365}
11366
11367/* Subroutine of above to actually do the updating by recursively walking
11368 the rtx. */
11369
11370static void
11371ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11372 rtx x;
11373 rtx dstref, srcref, dstreg, srcreg;
11374{
11375 enum rtx_code code = GET_CODE (x);
11376 const char *format_ptr = GET_RTX_FORMAT (code);
11377 int i, j;
11378
11379 if (code == MEM && XEXP (x, 0) == dstreg)
11380 MEM_COPY_ATTRIBUTES (x, dstref);
11381 else if (code == MEM && XEXP (x, 0) == srcreg)
11382 MEM_COPY_ATTRIBUTES (x, srcref);
11383
11384 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11385 {
11386 if (*format_ptr == 'e')
11387 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11388 dstreg, srcreg);
11389 else if (*format_ptr == 'E')
11390 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 11391 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
11392 dstreg, srcreg);
11393 }
11394}
11395\f
a7180f70
BS
11396/* Compute the alignment given to a constant that is being placed in memory.
11397 EXP is the constant and ALIGN is the alignment that the object would
11398 ordinarily have.
11399 The value of this function is used instead of that alignment to align
11400 the object. */
11401
11402int
11403ix86_constant_alignment (exp, align)
11404 tree exp;
11405 int align;
11406{
11407 if (TREE_CODE (exp) == REAL_CST)
11408 {
11409 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11410 return 64;
11411 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11412 return 128;
11413 }
11414 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11415 && align < 256)
11416 return 256;
11417
11418 return align;
11419}
11420
11421/* Compute the alignment for a static variable.
11422 TYPE is the data type, and ALIGN is the alignment that
11423 the object would ordinarily have. The value of this function is used
11424 instead of that alignment to align the object. */
11425
11426int
11427ix86_data_alignment (type, align)
11428 tree type;
11429 int align;
11430{
11431 if (AGGREGATE_TYPE_P (type)
11432 && TYPE_SIZE (type)
11433 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11434 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11435 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11436 return 256;
11437
0d7d98ee
JH
11438 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11439 to 16byte boundary. */
11440 if (TARGET_64BIT)
11441 {
11442 if (AGGREGATE_TYPE_P (type)
11443 && TYPE_SIZE (type)
11444 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11445 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11446 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11447 return 128;
11448 }
11449
a7180f70
BS
11450 if (TREE_CODE (type) == ARRAY_TYPE)
11451 {
11452 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11453 return 64;
11454 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11455 return 128;
11456 }
11457 else if (TREE_CODE (type) == COMPLEX_TYPE)
11458 {
0f290768 11459
a7180f70
BS
11460 if (TYPE_MODE (type) == DCmode && align < 64)
11461 return 64;
11462 if (TYPE_MODE (type) == XCmode && align < 128)
11463 return 128;
11464 }
11465 else if ((TREE_CODE (type) == RECORD_TYPE
11466 || TREE_CODE (type) == UNION_TYPE
11467 || TREE_CODE (type) == QUAL_UNION_TYPE)
11468 && TYPE_FIELDS (type))
11469 {
11470 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11471 return 64;
11472 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11473 return 128;
11474 }
11475 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11476 || TREE_CODE (type) == INTEGER_TYPE)
11477 {
11478 if (TYPE_MODE (type) == DFmode && align < 64)
11479 return 64;
11480 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11481 return 128;
11482 }
11483
11484 return align;
11485}
11486
11487/* Compute the alignment for a local variable.
11488 TYPE is the data type, and ALIGN is the alignment that
11489 the object would ordinarily have. The value of this macro is used
11490 instead of that alignment to align the object. */
11491
11492int
11493ix86_local_alignment (type, align)
11494 tree type;
11495 int align;
11496{
0d7d98ee
JH
11497 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11498 to 16byte boundary. */
11499 if (TARGET_64BIT)
11500 {
11501 if (AGGREGATE_TYPE_P (type)
11502 && TYPE_SIZE (type)
11503 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11504 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11505 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11506 return 128;
11507 }
a7180f70
BS
11508 if (TREE_CODE (type) == ARRAY_TYPE)
11509 {
11510 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11511 return 64;
11512 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11513 return 128;
11514 }
11515 else if (TREE_CODE (type) == COMPLEX_TYPE)
11516 {
11517 if (TYPE_MODE (type) == DCmode && align < 64)
11518 return 64;
11519 if (TYPE_MODE (type) == XCmode && align < 128)
11520 return 128;
11521 }
11522 else if ((TREE_CODE (type) == RECORD_TYPE
11523 || TREE_CODE (type) == UNION_TYPE
11524 || TREE_CODE (type) == QUAL_UNION_TYPE)
11525 && TYPE_FIELDS (type))
11526 {
11527 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11528 return 64;
11529 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11530 return 128;
11531 }
11532 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11533 || TREE_CODE (type) == INTEGER_TYPE)
11534 {
0f290768 11535
a7180f70
BS
11536 if (TYPE_MODE (type) == DFmode && align < 64)
11537 return 64;
11538 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11539 return 128;
11540 }
11541 return align;
11542}
0ed08620
JH
11543\f
11544/* Emit RTL insns to initialize the variable parts of a trampoline.
11545 FNADDR is an RTX for the address of the function's pure code.
11546 CXT is an RTX for the static chain value for the function. */
11547void
11548x86_initialize_trampoline (tramp, fnaddr, cxt)
11549 rtx tramp, fnaddr, cxt;
11550{
11551 if (!TARGET_64BIT)
11552 {
11553 /* Compute offset from the end of the jmp to the target function. */
11554 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11555 plus_constant (tramp, 10),
11556 NULL_RTX, 1, OPTAB_DIRECT);
11557 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11558 gen_int_mode (0xb9, QImode));
0ed08620
JH
11559 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11560 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11561 gen_int_mode (0xe9, QImode));
0ed08620
JH
11562 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11563 }
11564 else
11565 {
11566 int offset = 0;
11567 /* Try to load address using shorter movl instead of movabs.
11568 We may want to support movq for kernel mode, but kernel does not use
11569 trampolines at the moment. */
11570 if (x86_64_zero_extended_value (fnaddr))
11571 {
11572 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11573 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11574 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11575 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11576 gen_lowpart (SImode, fnaddr));
11577 offset += 6;
11578 }
11579 else
11580 {
11581 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11582 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11583 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11584 fnaddr);
11585 offset += 10;
11586 }
11587 /* Load static chain using movabs to r10. */
11588 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11589 gen_int_mode (0xba49, HImode));
0ed08620
JH
11590 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11591 cxt);
11592 offset += 10;
11593 /* Jump to the r11 */
11594 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11595 gen_int_mode (0xff49, HImode));
0ed08620 11596 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11597 gen_int_mode (0xe3, QImode));
0ed08620
JH
11598 offset += 3;
11599 if (offset > TRAMPOLINE_SIZE)
b531087a 11600 abort ();
0ed08620
JH
11601 }
11602}
eeb06b1b 11603\f
6a2dd09a
RS
11604#define def_builtin(MASK, NAME, TYPE, CODE) \
11605do { \
11606 if ((MASK) & target_flags) \
11607 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11608 NULL, NULL_TREE); \
eeb06b1b 11609} while (0)
bd793c65 11610
bd793c65
BS
11611struct builtin_description
11612{
8b60264b
KG
11613 const unsigned int mask;
11614 const enum insn_code icode;
11615 const char *const name;
11616 const enum ix86_builtins code;
11617 const enum rtx_code comparison;
11618 const unsigned int flag;
bd793c65
BS
11619};
11620
fbe5eb6d
BS
11621/* Used for builtins that are enabled both by -msse and -msse2. */
11622#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11623
8b60264b 11624static const struct builtin_description bdesc_comi[] =
bd793c65 11625{
fbe5eb6d
BS
11626 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11627 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11628 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11629 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11630 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11631 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11632 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11633 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11634 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11635 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11636 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11637 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11638 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11639 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11640 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11641 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11642 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11643 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11644 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11645 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11646 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11647 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11648 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11649 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
bd793c65
BS
11650};
11651
8b60264b 11652static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11653{
11654 /* SSE */
fbe5eb6d
BS
11655 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11656 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11657 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11658 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11659 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11660 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11661 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11662 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11663
11664 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11665 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11666 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11667 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11668 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11669 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11670 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11671 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11672 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11673 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11674 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11675 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11676 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11677 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11678 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11679 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11680 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11681 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11682 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11683 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11684 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11685 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11686 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11687 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11688
11689 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11690 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11691 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11692 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11693
11694 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11695 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11696 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11697 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11698 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11699
11700 /* MMX */
eeb06b1b
BS
11701 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11702 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11703 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11704 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11705 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11706 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11707
11708 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11709 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11710 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11711 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11712 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11713 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11714 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11715 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11716
11717 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11718 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 11719 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
11720
11721 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11722 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11723 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11724 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11725
fbe5eb6d
BS
11726 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11727 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
11728
11729 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11730 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11731 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11732 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11733 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11734 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11735
fbe5eb6d
BS
11736 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11737 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11738 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11739 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
11740
11741 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11742 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11743 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11744 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11745 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11746 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
11747
11748 /* Special. */
eeb06b1b
BS
11749 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11750 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11751 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11752
fbe5eb6d
BS
11753 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11754 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
11755
11756 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11757 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11758 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11759 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11760 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11761 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11762
11763 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11764 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11765 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11766 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11767 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11768 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11769
11770 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11771 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11772 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11773 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11774
fbe5eb6d
BS
11775 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11776 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11777
11778 /* SSE2 */
11779 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11780 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11781 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11782 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11783 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11784 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11785 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11786 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11787
11788 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11789 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11790 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11791 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11792 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11793 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11794 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11795 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11796 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11797 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11798 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11799 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11800 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11801 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11802 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11803 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11804 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11805 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11806 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11807 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11808 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11809 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11810 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11811 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11812
11813 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11814 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11815 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11817
11818 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11819 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11820 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11821 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11822
11823 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11824 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11825 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11826
11827 /* SSE2 MMX */
11828 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11829 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11830 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11831 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11832 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11833 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11834 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11835 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11836
11837 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11838 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11839 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11840 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11841 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11842 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11843 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11844 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11845
11846 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11847 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11848 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11849 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11850
916b60b7
BS
11851 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11852 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11853 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11854 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11855
11856 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11857 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11858
11859 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11860 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11861 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11862 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11863 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11864 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11865
11866 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11867 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11868 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11869 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11870
11871 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11872 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11873 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11874 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11875 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11876 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11877
916b60b7
BS
11878 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11879 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11880 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11881
11882 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11883 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11884
11885 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11886 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11887 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11888 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11889 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11890 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11891
11892 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11894 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11898
11899 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11903
11904 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11905
fbe5eb6d
BS
11906 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11908 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
11909};
11910
8b60264b 11911static const struct builtin_description bdesc_1arg[] =
bd793c65 11912{
fbe5eb6d
BS
11913 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11914 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11915
11916 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11917 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11918 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11919
11920 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11921 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11922 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11923 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11924
11925 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11927 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11928
11929 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11930
11931 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11932 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 11933
fbe5eb6d
BS
11934 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11937 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11938 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 11939
fbe5eb6d 11940 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 11941
fbe5eb6d
BS
11942 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11943 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11944
11945 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11946 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11947 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
11948};
11949
f6155fda
SS
11950void
11951ix86_init_builtins ()
11952{
11953 if (TARGET_MMX)
11954 ix86_init_mmx_sse_builtins ();
11955}
11956
11957/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
11958 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11959 builtins. */
e37af218 11960static void
f6155fda 11961ix86_init_mmx_sse_builtins ()
bd793c65 11962{
8b60264b 11963 const struct builtin_description * d;
77ebd435 11964 size_t i;
bd793c65
BS
11965
11966 tree pchar_type_node = build_pointer_type (char_type_node);
11967 tree pfloat_type_node = build_pointer_type (float_type_node);
11968 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 11969 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
11970 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11971
11972 /* Comparisons. */
11973 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
11974 = build_function_type_list (integer_type_node,
11975 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11976 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
11977 = build_function_type_list (V4SI_type_node,
11978 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11979 /* MMX/SSE/integer conversions. */
bd793c65 11980 tree int_ftype_v4sf
b4de2f7d
AH
11981 = build_function_type_list (integer_type_node,
11982 V4SF_type_node, NULL_TREE);
bd793c65 11983 tree int_ftype_v8qi
b4de2f7d 11984 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 11985 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
11986 = build_function_type_list (V4SF_type_node,
11987 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 11988 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
11989 = build_function_type_list (V4SF_type_node,
11990 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 11991 tree int_ftype_v4hi_int
b4de2f7d
AH
11992 = build_function_type_list (integer_type_node,
11993 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 11994 tree v4hi_ftype_v4hi_int_int
e7a60f56 11995 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
11996 integer_type_node, integer_type_node,
11997 NULL_TREE);
bd793c65
BS
11998 /* Miscellaneous. */
11999 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12000 = build_function_type_list (V8QI_type_node,
12001 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12002 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12003 = build_function_type_list (V4HI_type_node,
12004 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12005 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12006 = build_function_type_list (V4SF_type_node,
12007 V4SF_type_node, V4SF_type_node,
12008 integer_type_node, NULL_TREE);
bd793c65 12009 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12010 = build_function_type_list (V2SI_type_node,
12011 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12012 tree v4hi_ftype_v4hi_int
b4de2f7d 12013 = build_function_type_list (V4HI_type_node,
e7a60f56 12014 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12015 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12016 = build_function_type_list (V4HI_type_node,
12017 V4HI_type_node, long_long_unsigned_type_node,
12018 NULL_TREE);
bd793c65 12019 tree v2si_ftype_v2si_di
b4de2f7d
AH
12020 = build_function_type_list (V2SI_type_node,
12021 V2SI_type_node, long_long_unsigned_type_node,
12022 NULL_TREE);
bd793c65 12023 tree void_ftype_void
b4de2f7d 12024 = build_function_type (void_type_node, void_list_node);
bd793c65 12025 tree void_ftype_unsigned
b4de2f7d 12026 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 12027 tree unsigned_ftype_void
b4de2f7d 12028 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12029 tree di_ftype_void
b4de2f7d 12030 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12031 tree v4sf_ftype_void
b4de2f7d 12032 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12033 tree v2si_ftype_v4sf
b4de2f7d 12034 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12035 /* Loads/stores. */
bd793c65 12036 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12037 = build_function_type_list (void_type_node,
12038 V8QI_type_node, V8QI_type_node,
12039 pchar_type_node, NULL_TREE);
bd793c65 12040 tree v4sf_ftype_pfloat
b4de2f7d 12041 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
bd793c65
BS
12042 /* @@@ the type is bogus */
12043 tree v4sf_ftype_v4sf_pv2si
b4de2f7d
AH
12044 = build_function_type_list (V4SF_type_node,
12045 V4SF_type_node, pv2di_type_node, NULL_TREE);
1255c85c 12046 tree void_ftype_pv2si_v4sf
b4de2f7d
AH
12047 = build_function_type_list (void_type_node,
12048 pv2di_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12049 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12050 = build_function_type_list (void_type_node,
12051 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12052 tree void_ftype_pdi_di
b4de2f7d
AH
12053 = build_function_type_list (void_type_node,
12054 pdi_type_node, long_long_unsigned_type_node,
12055 NULL_TREE);
916b60b7 12056 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12057 = build_function_type_list (void_type_node,
12058 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12059 /* Normal vector unops. */
12060 tree v4sf_ftype_v4sf
b4de2f7d 12061 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12062
bd793c65
BS
12063 /* Normal vector binops. */
12064 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12065 = build_function_type_list (V4SF_type_node,
12066 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12067 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12068 = build_function_type_list (V8QI_type_node,
12069 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12070 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12071 = build_function_type_list (V4HI_type_node,
12072 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12073 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12074 = build_function_type_list (V2SI_type_node,
12075 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12076 tree di_ftype_di_di
b4de2f7d
AH
12077 = build_function_type_list (long_long_unsigned_type_node,
12078 long_long_unsigned_type_node,
12079 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12080
47f339cf 12081 tree v2si_ftype_v2sf
ae3aa00d 12082 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12083 tree v2sf_ftype_v2si
b4de2f7d 12084 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12085 tree v2si_ftype_v2si
b4de2f7d 12086 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12087 tree v2sf_ftype_v2sf
b4de2f7d 12088 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12089 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12090 = build_function_type_list (V2SF_type_node,
12091 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12092 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12093 = build_function_type_list (V2SI_type_node,
12094 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
12095 tree pint_type_node = build_pointer_type (integer_type_node);
12096 tree pdouble_type_node = build_pointer_type (double_type_node);
12097 tree int_ftype_v2df_v2df
b4de2f7d
AH
12098 = build_function_type_list (integer_type_node,
12099 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12100
12101 tree ti_ftype_void
b4de2f7d 12102 = build_function_type (intTI_type_node, void_list_node);
fbe5eb6d 12103 tree ti_ftype_ti_ti
b4de2f7d
AH
12104 = build_function_type_list (intTI_type_node,
12105 intTI_type_node, intTI_type_node, NULL_TREE);
fbe5eb6d 12106 tree void_ftype_pvoid
b4de2f7d 12107 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
fbe5eb6d 12108 tree v2di_ftype_di
b4de2f7d
AH
12109 = build_function_type_list (V2DI_type_node,
12110 long_long_unsigned_type_node, NULL_TREE);
fbe5eb6d 12111 tree v4sf_ftype_v4si
b4de2f7d 12112 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12113 tree v4si_ftype_v4sf
b4de2f7d 12114 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12115 tree v2df_ftype_v4si
b4de2f7d 12116 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12117 tree v4si_ftype_v2df
b4de2f7d 12118 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12119 tree v2si_ftype_v2df
b4de2f7d 12120 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12121 tree v4sf_ftype_v2df
b4de2f7d 12122 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12123 tree v2df_ftype_v2si
b4de2f7d 12124 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12125 tree v2df_ftype_v4sf
b4de2f7d 12126 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12127 tree int_ftype_v2df
b4de2f7d 12128 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12129 tree v2df_ftype_v2df_int
b4de2f7d
AH
12130 = build_function_type_list (V2DF_type_node,
12131 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12132 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12133 = build_function_type_list (V4SF_type_node,
12134 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12135 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12136 = build_function_type_list (V2DF_type_node,
12137 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12138 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12139 = build_function_type_list (V2DF_type_node,
12140 V2DF_type_node, V2DF_type_node,
12141 integer_type_node,
12142 NULL_TREE);
fbe5eb6d 12143 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12144 = build_function_type_list (V2DF_type_node,
12145 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12146 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12147 = build_function_type_list (void_type_node,
12148 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12149 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12150 = build_function_type_list (void_type_node,
12151 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12152 tree void_ftype_pint_int
b4de2f7d
AH
12153 = build_function_type_list (void_type_node,
12154 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12155 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12156 = build_function_type_list (void_type_node,
12157 V16QI_type_node, V16QI_type_node,
12158 pchar_type_node, NULL_TREE);
fbe5eb6d 12159 tree v2df_ftype_pdouble
b4de2f7d 12160 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
fbe5eb6d 12161 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12162 = build_function_type_list (V2DF_type_node,
12163 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12164 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12165 = build_function_type_list (V16QI_type_node,
12166 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12167 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12168 = build_function_type_list (V8HI_type_node,
12169 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12170 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12171 = build_function_type_list (V4SI_type_node,
12172 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12173 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12174 = build_function_type_list (V2DI_type_node,
12175 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12176 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12177 = build_function_type_list (V2DI_type_node,
12178 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12179 tree v2df_ftype_v2df
b4de2f7d 12180 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12181 tree v2df_ftype_double
b4de2f7d 12182 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12183 tree v2df_ftype_double_double
b4de2f7d
AH
12184 = build_function_type_list (V2DF_type_node,
12185 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12186 tree int_ftype_v8hi_int
b4de2f7d
AH
12187 = build_function_type_list (integer_type_node,
12188 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12189 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12190 = build_function_type_list (V8HI_type_node,
12191 V8HI_type_node, integer_type_node,
12192 integer_type_node, NULL_TREE);
916b60b7 12193 tree v2di_ftype_v2di_int
b4de2f7d
AH
12194 = build_function_type_list (V2DI_type_node,
12195 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12196 tree v4si_ftype_v4si_int
b4de2f7d
AH
12197 = build_function_type_list (V4SI_type_node,
12198 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12199 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12200 = build_function_type_list (V8HI_type_node,
12201 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12202 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12203 = build_function_type_list (V8HI_type_node,
12204 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12205 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12206 = build_function_type_list (V4SI_type_node,
12207 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12208 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12209 = build_function_type_list (V4SI_type_node,
12210 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12211 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12212 = build_function_type_list (long_long_unsigned_type_node,
12213 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 12214 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12215 = build_function_type_list (V2DI_type_node,
12216 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 12217 tree int_ftype_v16qi
b4de2f7d 12218 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
47f339cf 12219
bd793c65
BS
12220 /* Add all builtins that are more or less simple operations on two
12221 operands. */
ca7558fc 12222 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12223 {
12224 /* Use one of the operands; the target can have a different mode for
12225 mask-generating compares. */
12226 enum machine_mode mode;
12227 tree type;
12228
12229 if (d->name == 0)
12230 continue;
12231 mode = insn_data[d->icode].operand[1].mode;
12232
bd793c65
BS
12233 switch (mode)
12234 {
fbe5eb6d
BS
12235 case V16QImode:
12236 type = v16qi_ftype_v16qi_v16qi;
12237 break;
12238 case V8HImode:
12239 type = v8hi_ftype_v8hi_v8hi;
12240 break;
12241 case V4SImode:
12242 type = v4si_ftype_v4si_v4si;
12243 break;
12244 case V2DImode:
12245 type = v2di_ftype_v2di_v2di;
12246 break;
12247 case V2DFmode:
12248 type = v2df_ftype_v2df_v2df;
12249 break;
12250 case TImode:
12251 type = ti_ftype_ti_ti;
12252 break;
bd793c65
BS
12253 case V4SFmode:
12254 type = v4sf_ftype_v4sf_v4sf;
12255 break;
12256 case V8QImode:
12257 type = v8qi_ftype_v8qi_v8qi;
12258 break;
12259 case V4HImode:
12260 type = v4hi_ftype_v4hi_v4hi;
12261 break;
12262 case V2SImode:
12263 type = v2si_ftype_v2si_v2si;
12264 break;
bd793c65
BS
12265 case DImode:
12266 type = di_ftype_di_di;
12267 break;
12268
12269 default:
12270 abort ();
12271 }
0f290768 12272
bd793c65
BS
12273 /* Override for comparisons. */
12274 if (d->icode == CODE_FOR_maskcmpv4sf3
12275 || d->icode == CODE_FOR_maskncmpv4sf3
12276 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12277 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12278 type = v4si_ftype_v4sf_v4sf;
12279
fbe5eb6d
BS
12280 if (d->icode == CODE_FOR_maskcmpv2df3
12281 || d->icode == CODE_FOR_maskncmpv2df3
12282 || d->icode == CODE_FOR_vmmaskcmpv2df3
12283 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12284 type = v2di_ftype_v2df_v2df;
12285
eeb06b1b 12286 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12287 }
12288
12289 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12290 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12291 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12292 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12293 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12294 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12295 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12296 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12297
12298 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12299 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12300 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12301
12302 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12303 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12304
12305 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12306 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12307
bd793c65 12308 /* comi/ucomi insns. */
ca7558fc 12309 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12310 if (d->mask == MASK_SSE2)
12311 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12312 else
12313 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12314
1255c85c
BS
12315 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12316 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12317 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12318
fbe5eb6d
BS
12319 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12320 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12321 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12322 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12323 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12324 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12325
fbe5eb6d
BS
12326 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12327 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12328 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12329 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
e37af218 12330
fbe5eb6d
BS
12331 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12332 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12333
fbe5eb6d 12334 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12335
fbe5eb6d
BS
12336 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12337 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12338 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12339 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12340 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12341 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12342
fbe5eb6d
BS
12343 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12344 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12345 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12346 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12347
fbe5eb6d
BS
12348 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12349 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12350 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12351 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12352
fbe5eb6d 12353 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12354
916b60b7 12355 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12356
fbe5eb6d
BS
12357 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12358 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12359 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12360 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12361 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12362 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 12363
fbe5eb6d 12364 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12365
47f339cf
BS
12366 /* Original 3DNow! */
12367 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12368 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12369 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12370 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12371 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12372 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12373 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12374 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12375 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12376 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12377 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12378 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12379 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12380 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12381 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12382 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12383 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12384 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12385 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12386 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12387
12388 /* 3DNow! extension as used in the Athlon CPU. */
12389 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12390 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12391 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12392 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12393 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12394 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12395
fbe5eb6d
BS
12396 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12397
12398 /* SSE2 */
12399 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12400 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12401
12402 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12403 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12404
12405 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12406 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12407 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12408 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12409 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12410 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12411
12412 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12413 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12414 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12415 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12416
12417 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12418 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12419 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12420 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12421 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12422
12423 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12424 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12425 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12426 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12427
12428 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12430
12431 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12432
12433 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12434 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12435
12436 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12438 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12439 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12440 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12441
12442 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12443
12444 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12445 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12446
12447 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12449 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12450
12451 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12452 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12453 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12454
12455 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12456 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12457 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12458 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12459 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12460 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12461 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12462
12463 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12464 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12465 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
12466
12467 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12468 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12469 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12470
12471 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12472 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12473 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12474
12475 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12476 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12477
12478 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12479 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12480 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12481
12482 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12483 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12484 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12485
12486 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12487 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12488
12489 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
12490}
12491
12492/* Errors in the source file can cause expand_expr to return const0_rtx
12493 where we expect a vector. To avoid crashing, use one of the vector
12494 clear instructions. */
12495static rtx
12496safe_vector_operand (x, mode)
12497 rtx x;
12498 enum machine_mode mode;
12499{
12500 if (x != const0_rtx)
12501 return x;
12502 x = gen_reg_rtx (mode);
12503
47f339cf 12504 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12505 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12506 : gen_rtx_SUBREG (DImode, x, 0)));
12507 else
e37af218
RH
12508 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12509 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
12510 return x;
12511}
12512
12513/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12514
12515static rtx
12516ix86_expand_binop_builtin (icode, arglist, target)
12517 enum insn_code icode;
12518 tree arglist;
12519 rtx target;
12520{
12521 rtx pat;
12522 tree arg0 = TREE_VALUE (arglist);
12523 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12524 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12525 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12526 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12527 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12528 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12529
12530 if (VECTOR_MODE_P (mode0))
12531 op0 = safe_vector_operand (op0, mode0);
12532 if (VECTOR_MODE_P (mode1))
12533 op1 = safe_vector_operand (op1, mode1);
12534
12535 if (! target
12536 || GET_MODE (target) != tmode
12537 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12538 target = gen_reg_rtx (tmode);
12539
12540 /* In case the insn wants input operands in modes different from
12541 the result, abort. */
12542 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12543 abort ();
12544
12545 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12546 op0 = copy_to_mode_reg (mode0, op0);
12547 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12548 op1 = copy_to_mode_reg (mode1, op1);
12549
59bef189
RH
12550 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12551 yet one of the two must not be a memory. This is normally enforced
12552 by expanders, but we didn't bother to create one here. */
12553 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12554 op0 = copy_to_mode_reg (mode0, op0);
12555
bd793c65
BS
12556 pat = GEN_FCN (icode) (target, op0, op1);
12557 if (! pat)
12558 return 0;
12559 emit_insn (pat);
12560 return target;
12561}
12562
fce5a9f2 12563/* In type_for_mode we restrict the ability to create TImode types
e37af218
RH
12564 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12565 to have a V4SFmode signature. Convert them in-place to TImode. */
12566
12567static rtx
12568ix86_expand_timode_binop_builtin (icode, arglist, target)
12569 enum insn_code icode;
12570 tree arglist;
12571 rtx target;
12572{
12573 rtx pat;
12574 tree arg0 = TREE_VALUE (arglist);
12575 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12576 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12577 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12578
12579 op0 = gen_lowpart (TImode, op0);
12580 op1 = gen_lowpart (TImode, op1);
12581 target = gen_reg_rtx (TImode);
12582
12583 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12584 op0 = copy_to_mode_reg (TImode, op0);
12585 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12586 op1 = copy_to_mode_reg (TImode, op1);
12587
59bef189
RH
12588 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12589 yet one of the two must not be a memory. This is normally enforced
12590 by expanders, but we didn't bother to create one here. */
12591 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12592 op0 = copy_to_mode_reg (TImode, op0);
12593
e37af218
RH
12594 pat = GEN_FCN (icode) (target, op0, op1);
12595 if (! pat)
12596 return 0;
12597 emit_insn (pat);
12598
12599 return gen_lowpart (V4SFmode, target);
12600}
12601
bd793c65
BS
12602/* Subroutine of ix86_expand_builtin to take care of stores. */
12603
12604static rtx
e37af218 12605ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
12606 enum insn_code icode;
12607 tree arglist;
bd793c65
BS
12608{
12609 rtx pat;
12610 tree arg0 = TREE_VALUE (arglist);
12611 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12612 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12613 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12614 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12615 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12616
12617 if (VECTOR_MODE_P (mode1))
12618 op1 = safe_vector_operand (op1, mode1);
12619
12620 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
12621
12622 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12623 op1 = copy_to_mode_reg (mode1, op1);
12624
bd793c65
BS
12625 pat = GEN_FCN (icode) (op0, op1);
12626 if (pat)
12627 emit_insn (pat);
12628 return 0;
12629}
12630
12631/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12632
12633static rtx
12634ix86_expand_unop_builtin (icode, arglist, target, do_load)
12635 enum insn_code icode;
12636 tree arglist;
12637 rtx target;
12638 int do_load;
12639{
12640 rtx pat;
12641 tree arg0 = TREE_VALUE (arglist);
12642 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12643 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12644 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12645
12646 if (! target
12647 || GET_MODE (target) != tmode
12648 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12649 target = gen_reg_rtx (tmode);
12650 if (do_load)
12651 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12652 else
12653 {
12654 if (VECTOR_MODE_P (mode0))
12655 op0 = safe_vector_operand (op0, mode0);
12656
12657 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12658 op0 = copy_to_mode_reg (mode0, op0);
12659 }
12660
12661 pat = GEN_FCN (icode) (target, op0);
12662 if (! pat)
12663 return 0;
12664 emit_insn (pat);
12665 return target;
12666}
12667
12668/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12669 sqrtss, rsqrtss, rcpss. */
12670
12671static rtx
12672ix86_expand_unop1_builtin (icode, arglist, target)
12673 enum insn_code icode;
12674 tree arglist;
12675 rtx target;
12676{
12677 rtx pat;
12678 tree arg0 = TREE_VALUE (arglist);
59bef189 12679 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12680 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12681 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12682
12683 if (! target
12684 || GET_MODE (target) != tmode
12685 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12686 target = gen_reg_rtx (tmode);
12687
12688 if (VECTOR_MODE_P (mode0))
12689 op0 = safe_vector_operand (op0, mode0);
12690
12691 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12692 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 12693
59bef189
RH
12694 op1 = op0;
12695 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12696 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 12697
59bef189 12698 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12699 if (! pat)
12700 return 0;
12701 emit_insn (pat);
12702 return target;
12703}
12704
12705/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12706
12707static rtx
12708ix86_expand_sse_compare (d, arglist, target)
8b60264b 12709 const struct builtin_description *d;
bd793c65
BS
12710 tree arglist;
12711 rtx target;
12712{
12713 rtx pat;
12714 tree arg0 = TREE_VALUE (arglist);
12715 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12716 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12717 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12718 rtx op2;
12719 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12720 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12721 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12722 enum rtx_code comparison = d->comparison;
12723
12724 if (VECTOR_MODE_P (mode0))
12725 op0 = safe_vector_operand (op0, mode0);
12726 if (VECTOR_MODE_P (mode1))
12727 op1 = safe_vector_operand (op1, mode1);
12728
12729 /* Swap operands if we have a comparison that isn't available in
12730 hardware. */
12731 if (d->flag)
12732 {
21e1b5f1
BS
12733 rtx tmp = gen_reg_rtx (mode1);
12734 emit_move_insn (tmp, op1);
bd793c65 12735 op1 = op0;
21e1b5f1 12736 op0 = tmp;
bd793c65 12737 }
21e1b5f1
BS
12738
12739 if (! target
12740 || GET_MODE (target) != tmode
12741 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12742 target = gen_reg_rtx (tmode);
12743
12744 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12745 op0 = copy_to_mode_reg (mode0, op0);
12746 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12747 op1 = copy_to_mode_reg (mode1, op1);
12748
12749 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12750 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12751 if (! pat)
12752 return 0;
12753 emit_insn (pat);
12754 return target;
12755}
12756
12757/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12758
12759static rtx
12760ix86_expand_sse_comi (d, arglist, target)
8b60264b 12761 const struct builtin_description *d;
bd793c65
BS
12762 tree arglist;
12763 rtx target;
12764{
12765 rtx pat;
12766 tree arg0 = TREE_VALUE (arglist);
12767 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12768 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12769 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12770 rtx op2;
12771 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12772 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12773 enum rtx_code comparison = d->comparison;
12774
12775 if (VECTOR_MODE_P (mode0))
12776 op0 = safe_vector_operand (op0, mode0);
12777 if (VECTOR_MODE_P (mode1))
12778 op1 = safe_vector_operand (op1, mode1);
12779
12780 /* Swap operands if we have a comparison that isn't available in
12781 hardware. */
12782 if (d->flag)
12783 {
12784 rtx tmp = op1;
12785 op1 = op0;
12786 op0 = tmp;
bd793c65
BS
12787 }
12788
12789 target = gen_reg_rtx (SImode);
12790 emit_move_insn (target, const0_rtx);
12791 target = gen_rtx_SUBREG (QImode, target, 0);
12792
12793 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12794 op0 = copy_to_mode_reg (mode0, op0);
12795 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12796 op1 = copy_to_mode_reg (mode1, op1);
12797
12798 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12799 pat = GEN_FCN (d->icode) (op0, op1, op2);
12800 if (! pat)
12801 return 0;
12802 emit_insn (pat);
29628f27
BS
12803 emit_insn (gen_rtx_SET (VOIDmode,
12804 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12805 gen_rtx_fmt_ee (comparison, QImode,
12806 gen_rtx_REG (CCmode, FLAGS_REG),
12807 const0_rtx)));
bd793c65 12808
6f1a6c5b 12809 return SUBREG_REG (target);
bd793c65
BS
12810}
12811
12812/* Expand an expression EXP that calls a built-in function,
12813 with result going to TARGET if that's convenient
12814 (and in mode MODE if that's convenient).
12815 SUBTARGET may be used as the target for computing one of EXP's operands.
12816 IGNORE is nonzero if the value is to be ignored. */
12817
12818rtx
12819ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12820 tree exp;
12821 rtx target;
12822 rtx subtarget ATTRIBUTE_UNUSED;
12823 enum machine_mode mode ATTRIBUTE_UNUSED;
12824 int ignore ATTRIBUTE_UNUSED;
12825{
8b60264b 12826 const struct builtin_description *d;
77ebd435 12827 size_t i;
bd793c65
BS
12828 enum insn_code icode;
12829 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12830 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12831 tree arg0, arg1, arg2;
bd793c65
BS
12832 rtx op0, op1, op2, pat;
12833 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12834 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12835
12836 switch (fcode)
12837 {
12838 case IX86_BUILTIN_EMMS:
12839 emit_insn (gen_emms ());
12840 return 0;
12841
12842 case IX86_BUILTIN_SFENCE:
12843 emit_insn (gen_sfence ());
12844 return 0;
12845
bd793c65 12846 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12847 case IX86_BUILTIN_PEXTRW128:
12848 icode = (fcode == IX86_BUILTIN_PEXTRW
12849 ? CODE_FOR_mmx_pextrw
12850 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12851 arg0 = TREE_VALUE (arglist);
12852 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12853 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12854 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12855 tmode = insn_data[icode].operand[0].mode;
12856 mode0 = insn_data[icode].operand[1].mode;
12857 mode1 = insn_data[icode].operand[2].mode;
12858
12859 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12860 op0 = copy_to_mode_reg (mode0, op0);
12861 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12862 {
12863 /* @@@ better error message */
12864 error ("selector must be an immediate");
6f1a6c5b 12865 return gen_reg_rtx (tmode);
bd793c65
BS
12866 }
12867 if (target == 0
12868 || GET_MODE (target) != tmode
12869 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12870 target = gen_reg_rtx (tmode);
12871 pat = GEN_FCN (icode) (target, op0, op1);
12872 if (! pat)
12873 return 0;
12874 emit_insn (pat);
12875 return target;
12876
12877 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12878 case IX86_BUILTIN_PINSRW128:
12879 icode = (fcode == IX86_BUILTIN_PINSRW
12880 ? CODE_FOR_mmx_pinsrw
12881 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
12882 arg0 = TREE_VALUE (arglist);
12883 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12884 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12885 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12886 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12887 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12888 tmode = insn_data[icode].operand[0].mode;
12889 mode0 = insn_data[icode].operand[1].mode;
12890 mode1 = insn_data[icode].operand[2].mode;
12891 mode2 = insn_data[icode].operand[3].mode;
12892
12893 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12894 op0 = copy_to_mode_reg (mode0, op0);
12895 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12896 op1 = copy_to_mode_reg (mode1, op1);
12897 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12898 {
12899 /* @@@ better error message */
12900 error ("selector must be an immediate");
12901 return const0_rtx;
12902 }
12903 if (target == 0
12904 || GET_MODE (target) != tmode
12905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12906 target = gen_reg_rtx (tmode);
12907 pat = GEN_FCN (icode) (target, op0, op1, op2);
12908 if (! pat)
12909 return 0;
12910 emit_insn (pat);
12911 return target;
12912
12913 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
12914 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12915 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12916 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
12917 /* Note the arg order is different from the operand order. */
12918 arg1 = TREE_VALUE (arglist);
12919 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12920 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12921 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12922 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12923 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12924 mode0 = insn_data[icode].operand[0].mode;
12925 mode1 = insn_data[icode].operand[1].mode;
12926 mode2 = insn_data[icode].operand[2].mode;
12927
5c464583 12928 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
12929 op0 = copy_to_mode_reg (mode0, op0);
12930 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12931 op1 = copy_to_mode_reg (mode1, op1);
12932 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12933 op2 = copy_to_mode_reg (mode2, op2);
12934 pat = GEN_FCN (icode) (op0, op1, op2);
12935 if (! pat)
12936 return 0;
12937 emit_insn (pat);
12938 return 0;
12939
12940 case IX86_BUILTIN_SQRTSS:
12941 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12942 case IX86_BUILTIN_RSQRTSS:
12943 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12944 case IX86_BUILTIN_RCPSS:
12945 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12946
e37af218
RH
12947 case IX86_BUILTIN_ANDPS:
12948 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12949 arglist, target);
12950 case IX86_BUILTIN_ANDNPS:
12951 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12952 arglist, target);
12953 case IX86_BUILTIN_ORPS:
12954 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12955 arglist, target);
12956 case IX86_BUILTIN_XORPS:
12957 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12958 arglist, target);
12959
bd793c65
BS
12960 case IX86_BUILTIN_LOADAPS:
12961 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12962
12963 case IX86_BUILTIN_LOADUPS:
12964 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12965
12966 case IX86_BUILTIN_STOREAPS:
e37af218 12967 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 12968 case IX86_BUILTIN_STOREUPS:
e37af218 12969 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
12970
12971 case IX86_BUILTIN_LOADSS:
12972 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12973
12974 case IX86_BUILTIN_STORESS:
e37af218 12975 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 12976
0f290768 12977 case IX86_BUILTIN_LOADHPS:
bd793c65 12978 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
12979 case IX86_BUILTIN_LOADHPD:
12980 case IX86_BUILTIN_LOADLPD:
12981 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12982 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12983 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12984 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12985 arg0 = TREE_VALUE (arglist);
12986 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12987 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12988 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12989 tmode = insn_data[icode].operand[0].mode;
12990 mode0 = insn_data[icode].operand[1].mode;
12991 mode1 = insn_data[icode].operand[2].mode;
12992
12993 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12994 op0 = copy_to_mode_reg (mode0, op0);
12995 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12996 if (target == 0
12997 || GET_MODE (target) != tmode
12998 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12999 target = gen_reg_rtx (tmode);
13000 pat = GEN_FCN (icode) (target, op0, op1);
13001 if (! pat)
13002 return 0;
13003 emit_insn (pat);
13004 return target;
0f290768 13005
bd793c65
BS
13006 case IX86_BUILTIN_STOREHPS:
13007 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13008 case IX86_BUILTIN_STOREHPD:
13009 case IX86_BUILTIN_STORELPD:
13010 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13011 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13012 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13013 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13014 arg0 = TREE_VALUE (arglist);
13015 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13016 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13017 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13018 mode0 = insn_data[icode].operand[1].mode;
13019 mode1 = insn_data[icode].operand[2].mode;
13020
13021 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13022 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13023 op1 = copy_to_mode_reg (mode1, op1);
13024
13025 pat = GEN_FCN (icode) (op0, op0, op1);
13026 if (! pat)
13027 return 0;
13028 emit_insn (pat);
13029 return 0;
13030
13031 case IX86_BUILTIN_MOVNTPS:
e37af218 13032 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13033 case IX86_BUILTIN_MOVNTQ:
e37af218 13034 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13035
13036 case IX86_BUILTIN_LDMXCSR:
13037 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13038 target = assign_386_stack_local (SImode, 0);
13039 emit_move_insn (target, op0);
13040 emit_insn (gen_ldmxcsr (target));
13041 return 0;
13042
13043 case IX86_BUILTIN_STMXCSR:
13044 target = assign_386_stack_local (SImode, 0);
13045 emit_insn (gen_stmxcsr (target));
13046 return copy_to_mode_reg (SImode, target);
13047
bd793c65 13048 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13049 case IX86_BUILTIN_SHUFPD:
13050 icode = (fcode == IX86_BUILTIN_SHUFPS
13051 ? CODE_FOR_sse_shufps
13052 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13053 arg0 = TREE_VALUE (arglist);
13054 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13055 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13056 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13057 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13058 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13059 tmode = insn_data[icode].operand[0].mode;
13060 mode0 = insn_data[icode].operand[1].mode;
13061 mode1 = insn_data[icode].operand[2].mode;
13062 mode2 = insn_data[icode].operand[3].mode;
13063
13064 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13065 op0 = copy_to_mode_reg (mode0, op0);
13066 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13067 op1 = copy_to_mode_reg (mode1, op1);
13068 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13069 {
13070 /* @@@ better error message */
13071 error ("mask must be an immediate");
6f1a6c5b 13072 return gen_reg_rtx (tmode);
bd793c65
BS
13073 }
13074 if (target == 0
13075 || GET_MODE (target) != tmode
13076 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13077 target = gen_reg_rtx (tmode);
13078 pat = GEN_FCN (icode) (target, op0, op1, op2);
13079 if (! pat)
13080 return 0;
13081 emit_insn (pat);
13082 return target;
13083
13084 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13085 case IX86_BUILTIN_PSHUFD:
13086 case IX86_BUILTIN_PSHUFHW:
13087 case IX86_BUILTIN_PSHUFLW:
13088 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13089 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13090 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13091 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13092 arg0 = TREE_VALUE (arglist);
13093 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13094 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13095 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13096 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13097 mode1 = insn_data[icode].operand[1].mode;
13098 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13099
29628f27
BS
13100 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13101 op0 = copy_to_mode_reg (mode1, op0);
13102 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13103 {
13104 /* @@@ better error message */
13105 error ("mask must be an immediate");
13106 return const0_rtx;
13107 }
13108 if (target == 0
13109 || GET_MODE (target) != tmode
13110 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13111 target = gen_reg_rtx (tmode);
29628f27 13112 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13113 if (! pat)
13114 return 0;
13115 emit_insn (pat);
13116 return target;
13117
47f339cf
BS
13118 case IX86_BUILTIN_FEMMS:
13119 emit_insn (gen_femms ());
13120 return NULL_RTX;
13121
13122 case IX86_BUILTIN_PAVGUSB:
13123 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13124
13125 case IX86_BUILTIN_PF2ID:
13126 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13127
13128 case IX86_BUILTIN_PFACC:
13129 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13130
13131 case IX86_BUILTIN_PFADD:
13132 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13133
13134 case IX86_BUILTIN_PFCMPEQ:
13135 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13136
13137 case IX86_BUILTIN_PFCMPGE:
13138 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13139
13140 case IX86_BUILTIN_PFCMPGT:
13141 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13142
13143 case IX86_BUILTIN_PFMAX:
13144 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13145
13146 case IX86_BUILTIN_PFMIN:
13147 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13148
13149 case IX86_BUILTIN_PFMUL:
13150 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13151
13152 case IX86_BUILTIN_PFRCP:
13153 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13154
13155 case IX86_BUILTIN_PFRCPIT1:
13156 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13157
13158 case IX86_BUILTIN_PFRCPIT2:
13159 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13160
13161 case IX86_BUILTIN_PFRSQIT1:
13162 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13163
13164 case IX86_BUILTIN_PFRSQRT:
13165 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13166
13167 case IX86_BUILTIN_PFSUB:
13168 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13169
13170 case IX86_BUILTIN_PFSUBR:
13171 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13172
13173 case IX86_BUILTIN_PI2FD:
13174 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13175
13176 case IX86_BUILTIN_PMULHRW:
13177 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13178
47f339cf
BS
13179 case IX86_BUILTIN_PF2IW:
13180 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13181
13182 case IX86_BUILTIN_PFNACC:
13183 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13184
13185 case IX86_BUILTIN_PFPNACC:
13186 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13187
13188 case IX86_BUILTIN_PI2FW:
13189 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13190
13191 case IX86_BUILTIN_PSWAPDSI:
13192 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13193
13194 case IX86_BUILTIN_PSWAPDSF:
13195 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13196
e37af218
RH
13197 case IX86_BUILTIN_SSE_ZERO:
13198 target = gen_reg_rtx (V4SFmode);
13199 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
13200 return target;
13201
bd793c65
BS
13202 case IX86_BUILTIN_MMX_ZERO:
13203 target = gen_reg_rtx (DImode);
13204 emit_insn (gen_mmx_clrdi (target));
13205 return target;
13206
fbe5eb6d
BS
13207 case IX86_BUILTIN_SQRTSD:
13208 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13209 case IX86_BUILTIN_LOADAPD:
13210 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13211 case IX86_BUILTIN_LOADUPD:
13212 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13213
13214 case IX86_BUILTIN_STOREAPD:
13215 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13216 case IX86_BUILTIN_STOREUPD:
13217 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13218
13219 case IX86_BUILTIN_LOADSD:
13220 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13221
13222 case IX86_BUILTIN_STORESD:
13223 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13224
13225 case IX86_BUILTIN_SETPD1:
13226 target = assign_386_stack_local (DFmode, 0);
13227 arg0 = TREE_VALUE (arglist);
13228 emit_move_insn (adjust_address (target, DFmode, 0),
13229 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13230 op0 = gen_reg_rtx (V2DFmode);
13231 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13232 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13233 return op0;
13234
13235 case IX86_BUILTIN_SETPD:
13236 target = assign_386_stack_local (V2DFmode, 0);
13237 arg0 = TREE_VALUE (arglist);
13238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13239 emit_move_insn (adjust_address (target, DFmode, 0),
13240 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13241 emit_move_insn (adjust_address (target, DFmode, 8),
13242 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13243 op0 = gen_reg_rtx (V2DFmode);
13244 emit_insn (gen_sse2_movapd (op0, target));
13245 return op0;
13246
13247 case IX86_BUILTIN_LOADRPD:
13248 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13249 gen_reg_rtx (V2DFmode), 1);
13250 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13251 return target;
13252
13253 case IX86_BUILTIN_LOADPD1:
13254 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13255 gen_reg_rtx (V2DFmode), 1);
13256 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13257 return target;
13258
13259 case IX86_BUILTIN_STOREPD1:
13260 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13261 case IX86_BUILTIN_STORERPD:
13262 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13263
13264 case IX86_BUILTIN_MFENCE:
13265 emit_insn (gen_sse2_mfence ());
13266 return 0;
13267 case IX86_BUILTIN_LFENCE:
13268 emit_insn (gen_sse2_lfence ());
13269 return 0;
13270
13271 case IX86_BUILTIN_CLFLUSH:
13272 arg0 = TREE_VALUE (arglist);
13273 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13274 icode = CODE_FOR_sse2_clflush;
13275 mode0 = insn_data[icode].operand[0].mode;
13276 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13277 op0 = copy_to_mode_reg (mode0, op0);
13278
13279 emit_insn (gen_sse2_clflush (op0));
13280 return 0;
13281
13282 case IX86_BUILTIN_MOVNTPD:
13283 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13284 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13285 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13286 case IX86_BUILTIN_MOVNTI:
13287 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13288
bd793c65
BS
13289 default:
13290 break;
13291 }
13292
ca7558fc 13293 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13294 if (d->code == fcode)
13295 {
13296 /* Compares are treated specially. */
13297 if (d->icode == CODE_FOR_maskcmpv4sf3
13298 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13299 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13300 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13301 || d->icode == CODE_FOR_maskcmpv2df3
13302 || d->icode == CODE_FOR_vmmaskcmpv2df3
13303 || d->icode == CODE_FOR_maskncmpv2df3
13304 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13305 return ix86_expand_sse_compare (d, arglist, target);
13306
13307 return ix86_expand_binop_builtin (d->icode, arglist, target);
13308 }
13309
ca7558fc 13310 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13311 if (d->code == fcode)
13312 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13313
ca7558fc 13314 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13315 if (d->code == fcode)
13316 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13317
bd793c65
BS
13318 /* @@@ Should really do something sensible here. */
13319 return 0;
bd793c65 13320}
4211a8fb
JH
13321
13322/* Store OPERAND to the memory after reload is completed. This means
f710504c 13323 that we can't easily use assign_stack_local. */
4211a8fb
JH
13324rtx
13325ix86_force_to_memory (mode, operand)
13326 enum machine_mode mode;
13327 rtx operand;
13328{
898d374d 13329 rtx result;
4211a8fb
JH
13330 if (!reload_completed)
13331 abort ();
898d374d
JH
13332 if (TARGET_64BIT && TARGET_RED_ZONE)
13333 {
13334 result = gen_rtx_MEM (mode,
13335 gen_rtx_PLUS (Pmode,
13336 stack_pointer_rtx,
13337 GEN_INT (-RED_ZONE_SIZE)));
13338 emit_move_insn (result, operand);
13339 }
13340 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13341 {
898d374d 13342 switch (mode)
4211a8fb 13343 {
898d374d
JH
13344 case HImode:
13345 case SImode:
13346 operand = gen_lowpart (DImode, operand);
13347 /* FALLTHRU */
13348 case DImode:
4211a8fb 13349 emit_insn (
898d374d
JH
13350 gen_rtx_SET (VOIDmode,
13351 gen_rtx_MEM (DImode,
13352 gen_rtx_PRE_DEC (DImode,
13353 stack_pointer_rtx)),
13354 operand));
13355 break;
13356 default:
13357 abort ();
13358 }
13359 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13360 }
13361 else
13362 {
13363 switch (mode)
13364 {
13365 case DImode:
13366 {
13367 rtx operands[2];
13368 split_di (&operand, 1, operands, operands + 1);
13369 emit_insn (
13370 gen_rtx_SET (VOIDmode,
13371 gen_rtx_MEM (SImode,
13372 gen_rtx_PRE_DEC (Pmode,
13373 stack_pointer_rtx)),
13374 operands[1]));
13375 emit_insn (
13376 gen_rtx_SET (VOIDmode,
13377 gen_rtx_MEM (SImode,
13378 gen_rtx_PRE_DEC (Pmode,
13379 stack_pointer_rtx)),
13380 operands[0]));
13381 }
13382 break;
13383 case HImode:
13384 /* It is better to store HImodes as SImodes. */
13385 if (!TARGET_PARTIAL_REG_STALL)
13386 operand = gen_lowpart (SImode, operand);
13387 /* FALLTHRU */
13388 case SImode:
4211a8fb 13389 emit_insn (
898d374d
JH
13390 gen_rtx_SET (VOIDmode,
13391 gen_rtx_MEM (GET_MODE (operand),
13392 gen_rtx_PRE_DEC (SImode,
13393 stack_pointer_rtx)),
13394 operand));
13395 break;
13396 default:
13397 abort ();
4211a8fb 13398 }
898d374d 13399 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13400 }
898d374d 13401 return result;
4211a8fb
JH
13402}
13403
13404/* Free operand from the memory. */
13405void
13406ix86_free_from_memory (mode)
13407 enum machine_mode mode;
13408{
898d374d
JH
13409 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13410 {
13411 int size;
13412
13413 if (mode == DImode || TARGET_64BIT)
13414 size = 8;
13415 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13416 size = 2;
13417 else
13418 size = 4;
13419 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13420 to pop or add instruction if registers are available. */
13421 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13422 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13423 GEN_INT (size))));
13424 }
4211a8fb 13425}
a946dd00 13426
f84aa48a
JH
13427/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13428 QImode must go into class Q_REGS.
13429 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13430 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
13431enum reg_class
13432ix86_preferred_reload_class (x, class)
13433 rtx x;
13434 enum reg_class class;
13435{
13436 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13437 {
13438 /* SSE can't load any constant directly yet. */
13439 if (SSE_CLASS_P (class))
13440 return NO_REGS;
13441 /* Floats can load 0 and 1. */
13442 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13443 {
13444 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13445 if (MAYBE_SSE_CLASS_P (class))
13446 return (reg_class_subset_p (class, GENERAL_REGS)
13447 ? GENERAL_REGS : FLOAT_REGS);
13448 else
13449 return class;
13450 }
13451 /* General regs can load everything. */
13452 if (reg_class_subset_p (class, GENERAL_REGS))
13453 return GENERAL_REGS;
13454 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13455 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13456 return NO_REGS;
13457 }
13458 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13459 return NO_REGS;
13460 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13461 return Q_REGS;
13462 return class;
13463}
13464
13465/* If we are copying between general and FP registers, we need a memory
13466 location. The same is true for SSE and MMX registers.
13467
13468 The macro can't work reliably when one of the CLASSES is class containing
13469 registers from multiple units (SSE, MMX, integer). We avoid this by never
13470 combining those units in single alternative in the machine description.
13471 Ensure that this constraint holds to avoid unexpected surprises.
13472
13473 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13474 enforce these sanity checks. */
13475int
13476ix86_secondary_memory_needed (class1, class2, mode, strict)
13477 enum reg_class class1, class2;
13478 enum machine_mode mode;
13479 int strict;
13480{
13481 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13482 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13483 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13484 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13485 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13486 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13487 {
13488 if (strict)
13489 abort ();
13490 else
13491 return 1;
13492 }
13493 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13494 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13495 && (mode) != SImode)
13496 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13497 && (mode) != SImode));
13498}
13499/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13500 one in class CLASS2.
f84aa48a
JH
13501
13502 It is not required that the cost always equal 2 when FROM is the same as TO;
13503 on some machines it is expensive to move between registers if they are not
13504 general registers. */
13505int
13506ix86_register_move_cost (mode, class1, class2)
13507 enum machine_mode mode;
13508 enum reg_class class1, class2;
13509{
13510 /* In case we require secondary memory, compute cost of the store followed
13511 by load. In case of copying from general_purpose_register we may emit
13512 multiple stores followed by single load causing memory size mismatch
13513 stall. Count this as arbitarily high cost of 20. */
13514 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13515 {
92d0fb09 13516 int add_cost = 0;
62415523 13517 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 13518 add_cost = 20;
62415523 13519 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 13520 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 13521 }
92d0fb09 13522 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
13523 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13524 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
13525 return ix86_cost->mmxsse_to_integer;
13526 if (MAYBE_FLOAT_CLASS_P (class1))
13527 return ix86_cost->fp_move;
13528 if (MAYBE_SSE_CLASS_P (class1))
13529 return ix86_cost->sse_move;
13530 if (MAYBE_MMX_CLASS_P (class1))
13531 return ix86_cost->mmx_move;
f84aa48a
JH
13532 return 2;
13533}
13534
a946dd00
JH
13535/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13536int
13537ix86_hard_regno_mode_ok (regno, mode)
13538 int regno;
13539 enum machine_mode mode;
13540{
13541 /* Flags and only flags can only hold CCmode values. */
13542 if (CC_REGNO_P (regno))
13543 return GET_MODE_CLASS (mode) == MODE_CC;
13544 if (GET_MODE_CLASS (mode) == MODE_CC
13545 || GET_MODE_CLASS (mode) == MODE_RANDOM
13546 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13547 return 0;
13548 if (FP_REGNO_P (regno))
13549 return VALID_FP_MODE_P (mode);
13550 if (SSE_REGNO_P (regno))
13551 return VALID_SSE_REG_MODE (mode);
13552 if (MMX_REGNO_P (regno))
47f339cf 13553 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
13554 /* We handle both integer and floats in the general purpose registers.
13555 In future we should be able to handle vector modes as well. */
13556 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13557 return 0;
13558 /* Take care for QImode values - they can be in non-QI regs, but then
13559 they do cause partial register stalls. */
d2836273 13560 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
13561 return 1;
13562 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13563}
fa79946e
JH
13564
13565/* Return the cost of moving data of mode M between a
13566 register and memory. A value of 2 is the default; this cost is
13567 relative to those in `REGISTER_MOVE_COST'.
13568
13569 If moving between registers and memory is more expensive than
13570 between two registers, you should define this macro to express the
a4f31c00
AJ
13571 relative cost.
13572
fa79946e
JH
13573 Model also increased moving costs of QImode registers in non
13574 Q_REGS classes.
13575 */
13576int
13577ix86_memory_move_cost (mode, class, in)
13578 enum machine_mode mode;
13579 enum reg_class class;
13580 int in;
13581{
13582 if (FLOAT_CLASS_P (class))
13583 {
13584 int index;
13585 switch (mode)
13586 {
13587 case SFmode:
13588 index = 0;
13589 break;
13590 case DFmode:
13591 index = 1;
13592 break;
13593 case XFmode:
13594 case TFmode:
13595 index = 2;
13596 break;
13597 default:
13598 return 100;
13599 }
13600 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13601 }
13602 if (SSE_CLASS_P (class))
13603 {
13604 int index;
13605 switch (GET_MODE_SIZE (mode))
13606 {
13607 case 4:
13608 index = 0;
13609 break;
13610 case 8:
13611 index = 1;
13612 break;
13613 case 16:
13614 index = 2;
13615 break;
13616 default:
13617 return 100;
13618 }
13619 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13620 }
13621 if (MMX_CLASS_P (class))
13622 {
13623 int index;
13624 switch (GET_MODE_SIZE (mode))
13625 {
13626 case 4:
13627 index = 0;
13628 break;
13629 case 8:
13630 index = 1;
13631 break;
13632 default:
13633 return 100;
13634 }
13635 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13636 }
13637 switch (GET_MODE_SIZE (mode))
13638 {
13639 case 1:
13640 if (in)
13641 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13642 : ix86_cost->movzbl_load);
13643 else
13644 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13645 : ix86_cost->int_store[0] + 4);
13646 break;
13647 case 2:
13648 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13649 default:
13650 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13651 if (mode == TFmode)
13652 mode = XFmode;
3bb7e126 13653 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
13654 * (int) GET_MODE_SIZE (mode) / 4);
13655 }
13656}
0ecf09f9 13657
21c318ba 13658#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
13659static void
13660ix86_svr3_asm_out_constructor (symbol, priority)
13661 rtx symbol;
13662 int priority ATTRIBUTE_UNUSED;
13663{
13664 init_section ();
13665 fputs ("\tpushl $", asm_out_file);
13666 assemble_name (asm_out_file, XSTR (symbol, 0));
13667 fputc ('\n', asm_out_file);
13668}
13669#endif
162f023b 13670
b069de3b
SS
13671#if TARGET_MACHO
13672
13673static int current_machopic_label_num;
13674
13675/* Given a symbol name and its associated stub, write out the
13676 definition of the stub. */
13677
13678void
13679machopic_output_stub (file, symb, stub)
13680 FILE *file;
13681 const char *symb, *stub;
13682{
13683 unsigned int length;
13684 char *binder_name, *symbol_name, lazy_ptr_name[32];
13685 int label = ++current_machopic_label_num;
13686
13687 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13688 symb = (*targetm.strip_name_encoding) (symb);
13689
13690 length = strlen (stub);
13691 binder_name = alloca (length + 32);
13692 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13693
13694 length = strlen (symb);
13695 symbol_name = alloca (length + 32);
13696 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13697
13698 sprintf (lazy_ptr_name, "L%d$lz", label);
13699
13700 if (MACHOPIC_PURE)
13701 machopic_picsymbol_stub_section ();
13702 else
13703 machopic_symbol_stub_section ();
13704
13705 fprintf (file, "%s:\n", stub);
13706 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13707
13708 if (MACHOPIC_PURE)
13709 {
13710 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13711 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13712 fprintf (file, "\tjmp %%edx\n");
13713 }
13714 else
13715 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13716
13717 fprintf (file, "%s:\n", binder_name);
13718
13719 if (MACHOPIC_PURE)
13720 {
13721 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13722 fprintf (file, "\tpushl %%eax\n");
13723 }
13724 else
13725 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13726
13727 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13728
13729 machopic_lazy_symbol_ptr_section ();
13730 fprintf (file, "%s:\n", lazy_ptr_name);
13731 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13732 fprintf (file, "\t.long %s\n", binder_name);
13733}
13734#endif /* TARGET_MACHO */
13735
162f023b
JH
13736/* Order the registers for register allocator. */
13737
13738void
13739x86_order_regs_for_local_alloc ()
13740{
13741 int pos = 0;
13742 int i;
13743
13744 /* First allocate the local general purpose registers. */
13745 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13746 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13747 reg_alloc_order [pos++] = i;
13748
13749 /* Global general purpose registers. */
13750 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13751 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13752 reg_alloc_order [pos++] = i;
13753
13754 /* x87 registers come first in case we are doing FP math
13755 using them. */
13756 if (!TARGET_SSE_MATH)
13757 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13758 reg_alloc_order [pos++] = i;
fce5a9f2 13759
162f023b
JH
13760 /* SSE registers. */
13761 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13762 reg_alloc_order [pos++] = i;
13763 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13764 reg_alloc_order [pos++] = i;
13765
13766 /* x87 registerts. */
13767 if (TARGET_SSE_MATH)
13768 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13769 reg_alloc_order [pos++] = i;
13770
13771 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13772 reg_alloc_order [pos++] = i;
13773
13774 /* Initialize the rest of array as we do not allocate some registers
13775 at all. */
13776 while (pos < FIRST_PSEUDO_REGISTER)
13777 reg_alloc_order [pos++] = 0;
13778}
194734e9
JH
13779
13780void
13781x86_output_mi_thunk (file, delta, function)
13782 FILE *file;
13783 int delta;
13784 tree function;
13785{
13786 tree parm;
13787 rtx xops[3];
13788
13789 if (ix86_regparm > 0)
13790 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13791 else
13792 parm = NULL_TREE;
13793 for (; parm; parm = TREE_CHAIN (parm))
13794 if (TREE_VALUE (parm) == void_type_node)
13795 break;
13796
13797 xops[0] = GEN_INT (delta);
13798 if (TARGET_64BIT)
13799 {
13800 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13801 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13802 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13803 if (flag_pic)
13804 {
13805 fprintf (file, "\tjmp *");
13806 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13807 fprintf (file, "@GOTPCREL(%%rip)\n");
13808 }
13809 else
13810 {
13811 fprintf (file, "\tjmp ");
13812 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13813 fprintf (file, "\n");
13814 }
13815 }
13816 else
13817 {
13818 if (parm)
13819 xops[1] = gen_rtx_REG (SImode, 0);
13820 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13821 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13822 else
13823 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13824 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13825
13826 if (flag_pic)
13827 {
13828 xops[0] = pic_offset_table_rtx;
13829 xops[1] = gen_label_rtx ();
5fc0e5df 13830 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
194734e9
JH
13831
13832 if (ix86_regparm > 2)
13833 abort ();
13834 output_asm_insn ("push{l}\t%0", xops);
13835 output_asm_insn ("call\t%P1", xops);
13836 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13837 output_asm_insn ("pop{l}\t%0", xops);
13838 output_asm_insn
13839 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13840 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13841 output_asm_insn
13842 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13843 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13844 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13845 }
13846 else
13847 {
13848 fprintf (file, "\tjmp ");
13849 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13850 fprintf (file, "\n");
13851 }
13852 }
13853}
e2500fed 13854
e932b21b
JH
13855int
13856x86_field_alignment (field, computed)
13857 tree field;
13858 int computed;
13859{
13860 enum machine_mode mode;
ad9335eb
JJ
13861 tree type = TREE_TYPE (field);
13862
13863 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 13864 return computed;
ad9335eb
JJ
13865 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13866 ? get_inner_array_type (type) : type);
39e3a681
JJ
13867 if (mode == DFmode || mode == DCmode
13868 || GET_MODE_CLASS (mode) == MODE_INT
13869 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
13870 return MIN (32, computed);
13871 return computed;
13872}
13873
2a500b9e
JH
13874/* Implement machine specific optimizations.
13875 At the moment we implement single transformation: AMD Athlon works faster
13876 when RET is not destination of conditional jump or directly preceeded
13877 by other jump instruction. We avoid the penalty by inserting NOP just
13878 before the RET instructions in such cases. */
13879void
13880x86_machine_dependent_reorg (first)
13881 rtx first ATTRIBUTE_UNUSED;
13882{
13883 edge e;
13884
13885 if (!TARGET_ATHLON || !optimize || optimize_size)
13886 return;
13887 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13888 {
13889 basic_block bb = e->src;
13890 rtx ret = bb->end;
13891 rtx prev;
13892 bool insert = false;
13893
13894 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13895 continue;
13896 prev = prev_nonnote_insn (ret);
13897 if (prev && GET_CODE (prev) == CODE_LABEL)
13898 {
13899 edge e;
13900 for (e = bb->pred; e; e = e->pred_next)
13901 if (EDGE_FREQUENCY (e) && e->src->index > 0
13902 && !(e->flags & EDGE_FALLTHRU))
13903 insert = 1;
13904 }
13905 if (!insert)
13906 {
13907 prev = prev_real_insn (ret);
13908 if (prev && GET_CODE (prev) == JUMP_INSN
13909 && any_condjump_p (prev))
13910 insert = 1;
13911 }
13912 if (insert)
13913 emit_insn_before (gen_nop (), ret);
13914 }
13915}
13916
e2500fed 13917#include "gt-i386.h"
This page took 3.83868 seconds and 5 git commands to generate.