]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
2002-07-14� Mark Wielaard� <mark@klomp.org>
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
fce5a9f2 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
2ab0437e 87};
32b5b1aa 88/* Processor costs (relative to an add) */
fce5a9f2 89static const
32b5b1aa 90struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 91 1, /* cost of an add instruction */
32b5b1aa
SC
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
e075ae69 97 23, /* cost of a divide/mod */
44cf5b6a
JH
98 3, /* cost of movsx */
99 2, /* cost of movzx */
96e7ae40 100 15, /* "large" insn */
e2e52e1b 101 3, /* MOVE_RATIO */
7c6b971d 102 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
0f290768 105 Relative to reg-reg move (2). */
96e7ae40
JH
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
fa79946e
JH
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
f4365627
JH
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
32b5b1aa
SC
124};
125
fce5a9f2 126static const
32b5b1aa
SC
127struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
e075ae69 134 40, /* cost of a divide/mod */
44cf5b6a
JH
135 3, /* cost of movsx */
136 2, /* cost of movzx */
96e7ae40 137 15, /* "large" insn */
e2e52e1b 138 3, /* MOVE_RATIO */
7c6b971d 139 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
0f290768 142 Relative to reg-reg move (2). */
96e7ae40
JH
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
fa79946e
JH
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
f4365627
JH
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
32b5b1aa
SC
161};
162
fce5a9f2 163static const
e5cb57e8 164struct processor_costs pentium_cost = {
32b5b1aa
SC
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
856b07a1 167 4, /* variable shift costs */
e5cb57e8 168 1, /* constant shift costs */
856b07a1
SC
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
e075ae69 171 25, /* cost of a divide/mod */
44cf5b6a
JH
172 3, /* cost of movsx */
173 2, /* cost of movzx */
96e7ae40 174 8, /* "large" insn */
e2e52e1b 175 6, /* MOVE_RATIO */
7c6b971d 176 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
0f290768 179 Relative to reg-reg move (2). */
96e7ae40
JH
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
fa79946e
JH
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
f4365627
JH
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
32b5b1aa
SC
198};
199
fce5a9f2 200static const
856b07a1
SC
201struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
e075ae69 204 1, /* variable shift costs */
856b07a1 205 1, /* constant shift costs */
369e59b1 206 4, /* cost of starting a multiply */
856b07a1 207 0, /* cost of multiply per each bit set */
e075ae69 208 17, /* cost of a divide/mod */
44cf5b6a
JH
209 1, /* cost of movsx */
210 1, /* cost of movzx */
96e7ae40 211 8, /* "large" insn */
e2e52e1b 212 6, /* MOVE_RATIO */
7c6b971d 213 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
0f290768 216 Relative to reg-reg move (2). */
96e7ae40
JH
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
fa79946e
JH
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
f4365627
JH
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
856b07a1
SC
235};
236
fce5a9f2 237static const
a269a03c
JC
238struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
e075ae69 240 2, /* cost of a lea instruction */
a269a03c
JC
241 1, /* variable shift costs */
242 1, /* constant shift costs */
73fe76e4 243 3, /* cost of starting a multiply */
a269a03c 244 0, /* cost of multiply per each bit set */
e075ae69 245 18, /* cost of a divide/mod */
44cf5b6a
JH
246 2, /* cost of movsx */
247 2, /* cost of movzx */
96e7ae40 248 8, /* "large" insn */
e2e52e1b 249 4, /* MOVE_RATIO */
7c6b971d 250 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
0f290768 253 Relative to reg-reg move (2). */
96e7ae40
JH
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
fa79946e
JH
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
f4365627
JH
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
a269a03c
JC
272};
273
fce5a9f2 274static const
309ada50
JH
275struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
0b5107cf 277 2, /* cost of a lea instruction */
309ada50
JH
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
0b5107cf 282 42, /* cost of a divide/mod */
44cf5b6a
JH
283 1, /* cost of movsx */
284 1, /* cost of movzx */
309ada50 285 8, /* "large" insn */
e2e52e1b 286 9, /* MOVE_RATIO */
309ada50
JH
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
0f290768 290 Relative to reg-reg move (2). */
309ada50
JH
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
0b5107cf 293 {6, 6, 20}, /* cost of loading fp registers
309ada50 294 in SFmode, DFmode and XFmode */
fa79946e
JH
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
f4365627
JH
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309ada50
JH
309};
310
fce5a9f2 311static const
b4e89e2d
JH
312struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
b4e89e2d
JH
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
b4e89e2d
JH
346};
347
8b60264b 348const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 349
a269a03c
JC
350/* Processor feature/optimization bitmasks. */
351#define m_386 (1<<PROCESSOR_I386)
352#define m_486 (1<<PROCESSOR_I486)
353#define m_PENT (1<<PROCESSOR_PENTIUM)
354#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355#define m_K6 (1<<PROCESSOR_K6)
309ada50 356#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 357#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 358
309ada50 359const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 360const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 361const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 362const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 363const int x86_double_with_add = ~m_386;
a269a03c 364const int x86_use_bit_test = m_386;
e2e52e1b 365const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 366const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 367const int x86_3dnow_a = m_ATHLON;
b4e89e2d 368const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 369const int x86_branch_hints = m_PENT4;
b4e89e2d 370const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
371const int x86_partial_reg_stall = m_PPRO;
372const int x86_use_loop = m_K6;
309ada50 373const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
374const int x86_use_mov0 = m_K6;
375const int x86_use_cltd = ~(m_PENT | m_K6);
376const int x86_read_modify_write = ~m_PENT;
377const int x86_read_modify = ~(m_PENT | m_PPRO);
378const int x86_split_long_moves = m_PPRO;
285464d0
JH
379const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 381const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
382const int x86_qimode_math = ~(0);
383const int x86_promote_qi_regs = 0;
384const int x86_himode_math = ~(m_PPRO);
385const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
386const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
77966be3 390const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
b4e89e2d
JH
391const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
393const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 396const int x86_decompose_lea = m_PENT4;
495333a6 397const int x86_shift1 = ~m_486;
285464d0 398const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
a269a03c 399
6ab16dd9
JH
400/* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403#define FAST_PROLOGUE_INSN_COUNT 30
5bf0ebab 404
6ab16dd9
JH
405/* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407static int use_fast_prologue_epilogue;
408
07933f72 409#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
2a2ab3f9 410
5bf0ebab
RH
411/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
412static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
413static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
414static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
415
416/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 417 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 418
e075ae69 419enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
420{
421 /* ax, dx, cx, bx */
ab408a86 422 AREG, DREG, CREG, BREG,
4c0d89b5 423 /* si, di, bp, sp */
e075ae69 424 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
425 /* FP registers */
426 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 427 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 428 /* arg pointer */
83774849 429 NON_Q_REGS,
564d80f4 430 /* flags, fpsr, dirflag, frame */
a7180f70
BS
431 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
434 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
435 MMX_REGS, MMX_REGS,
436 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
437 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
438 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
439 SSE_REGS, SSE_REGS,
4c0d89b5 440};
c572e5ba 441
3d117b30 442/* The "default" register map used in 32bit mode. */
83774849 443
0f290768 444int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
445{
446 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
447 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 448 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
449 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
450 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
451 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
452 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
453};
454
5bf0ebab
RH
455static int const x86_64_int_parameter_registers[6] =
456{
457 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
458 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
459};
460
461static int const x86_64_int_return_registers[4] =
462{
463 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464};
53c17031 465
0f7fa3d0
JH
466/* The "default" register map used in 64bit mode. */
467int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
468{
469 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 470 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
471 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
472 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
473 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
474 8,9,10,11,12,13,14,15, /* extended integer registers */
475 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476};
477
83774849
RH
478/* Define the register numbers to be used in Dwarf debugging information.
479 The SVR4 reference port C compiler uses the following register numbers
480 in its Dwarf output code:
481 0 for %eax (gcc regno = 0)
482 1 for %ecx (gcc regno = 2)
483 2 for %edx (gcc regno = 1)
484 3 for %ebx (gcc regno = 3)
485 4 for %esp (gcc regno = 7)
486 5 for %ebp (gcc regno = 6)
487 6 for %esi (gcc regno = 4)
488 7 for %edi (gcc regno = 5)
489 The following three DWARF register numbers are never generated by
490 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
491 believes these numbers have these meanings.
492 8 for %eip (no gcc equivalent)
493 9 for %eflags (gcc regno = 17)
494 10 for %trapno (no gcc equivalent)
495 It is not at all clear how we should number the FP stack registers
496 for the x86 architecture. If the version of SDB on x86/svr4 were
497 a bit less brain dead with respect to floating-point then we would
498 have a precedent to follow with respect to DWARF register numbers
499 for x86 FP registers, but the SDB on x86/svr4 is so completely
500 broken with respect to FP registers that it is hardly worth thinking
501 of it as something to strive for compatibility with.
502 The version of x86/svr4 SDB I have at the moment does (partially)
503 seem to believe that DWARF register number 11 is associated with
504 the x86 register %st(0), but that's about all. Higher DWARF
505 register numbers don't seem to be associated with anything in
506 particular, and even for DWARF regno 11, SDB only seems to under-
507 stand that it should say that a variable lives in %st(0) (when
508 asked via an `=' command) if we said it was in DWARF regno 11,
509 but SDB still prints garbage when asked for the value of the
510 variable in question (via a `/' command).
511 (Also note that the labels SDB prints for various FP stack regs
512 when doing an `x' command are all wrong.)
513 Note that these problems generally don't affect the native SVR4
514 C compiler because it doesn't allow the use of -O with -g and
515 because when it is *not* optimizing, it allocates a memory
516 location for each floating-point variable, and the memory
517 location is what gets described in the DWARF AT_location
518 attribute for the variable in question.
519 Regardless of the severe mental illness of the x86/svr4 SDB, we
520 do something sensible here and we use the following DWARF
521 register numbers. Note that these are all stack-top-relative
522 numbers.
523 11 for %st(0) (gcc regno = 8)
524 12 for %st(1) (gcc regno = 9)
525 13 for %st(2) (gcc regno = 10)
526 14 for %st(3) (gcc regno = 11)
527 15 for %st(4) (gcc regno = 12)
528 16 for %st(5) (gcc regno = 13)
529 17 for %st(6) (gcc regno = 14)
530 18 for %st(7) (gcc regno = 15)
531*/
0f290768 532int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
533{
534 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
535 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 536 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
537 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
538 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
539 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
540 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
541};
542
c572e5ba
JVA
543/* Test and compare insns in i386.md store the information needed to
544 generate branch and scc insns here. */
545
07933f72
GS
546rtx ix86_compare_op0 = NULL_RTX;
547rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 548
f996902d
RH
549/* The encoding characters for the four TLS models present in ELF. */
550
755ac5d4 551static char const tls_model_chars[] = " GLil";
f996902d 552
7a2e09f4 553#define MAX_386_STACK_LOCALS 3
8362f420
JH
554/* Size of the register save area. */
555#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
556
557/* Define the structure for the machine field in struct function. */
e2500fed 558struct machine_function GTY(())
36edd3cc
BS
559{
560 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 561 const char *some_ld_name;
8362f420 562 int save_varrargs_registers;
6fca22eb 563 int accesses_prev_frame;
36edd3cc
BS
564};
565
01d939e8 566#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 567#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 568
4dd2ac2c
JH
569/* Structure describing stack frame layout.
570 Stack grows downward:
571
572 [arguments]
573 <- ARG_POINTER
574 saved pc
575
576 saved frame pointer if frame_pointer_needed
577 <- HARD_FRAME_POINTER
578 [saved regs]
579
580 [padding1] \
581 )
582 [va_arg registers] (
583 > to_allocate <- FRAME_POINTER
584 [frame] (
585 )
586 [padding2] /
587 */
588struct ix86_frame
589{
590 int nregs;
591 int padding1;
8362f420 592 int va_arg_size;
4dd2ac2c
JH
593 HOST_WIDE_INT frame;
594 int padding2;
595 int outgoing_arguments_size;
8362f420 596 int red_zone_size;
4dd2ac2c
JH
597
598 HOST_WIDE_INT to_allocate;
599 /* The offsets relative to ARG_POINTER. */
600 HOST_WIDE_INT frame_pointer_offset;
601 HOST_WIDE_INT hard_frame_pointer_offset;
602 HOST_WIDE_INT stack_pointer_offset;
603};
604
c93e80a5
JH
605/* Used to enable/disable debugging features. */
606const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
607/* Code model option as passed by user. */
608const char *ix86_cmodel_string;
609/* Parsed value. */
610enum cmodel ix86_cmodel;
80f33d06
GS
611/* Asm dialect. */
612const char *ix86_asm_string;
613enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
614/* TLS dialext. */
615const char *ix86_tls_dialect_string;
616enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 617
5bf0ebab 618/* Which unit we are generating floating point math for. */
965f5423
JH
619enum fpmath_unit ix86_fpmath;
620
5bf0ebab
RH
621/* Which cpu are we scheduling for. */
622enum processor_type ix86_cpu;
623/* Which instruction set architecture to use. */
624enum processor_type ix86_arch;
c8c5cb99
SC
625
626/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
627const char *ix86_cpu_string; /* for -mcpu=<xxx> */
628const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 629const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 630
0f290768 631/* # of registers to use to pass arguments. */
e075ae69 632const char *ix86_regparm_string;
e9a25f70 633
f4365627
JH
634/* true if sse prefetch instruction is not NOOP. */
635int x86_prefetch_sse;
636
e075ae69
RH
637/* ix86_regparm_string as a number */
638int ix86_regparm;
e9a25f70
JL
639
640/* Alignment to use for loops and jumps: */
641
0f290768 642/* Power of two alignment for loops. */
e075ae69 643const char *ix86_align_loops_string;
e9a25f70 644
0f290768 645/* Power of two alignment for non-loop jumps. */
e075ae69 646const char *ix86_align_jumps_string;
e9a25f70 647
3af4bd89 648/* Power of two alignment for stack boundary in bytes. */
e075ae69 649const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
650
651/* Preferred alignment for stack boundary in bits. */
e075ae69 652int ix86_preferred_stack_boundary;
3af4bd89 653
e9a25f70 654/* Values 1-5: see jump.c */
e075ae69
RH
655int ix86_branch_cost;
656const char *ix86_branch_cost_string;
e9a25f70 657
0f290768 658/* Power of two alignment for functions. */
e075ae69 659const char *ix86_align_funcs_string;
623fe810
RH
660
661/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
662static char internal_label_prefix[16];
663static int internal_label_prefix_len;
e075ae69 664\f
623fe810 665static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 666static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
667static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
668static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 669 int, int, FILE *));
f996902d
RH
670static const char *get_some_local_dynamic_name PARAMS ((void));
671static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
672static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 673static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
674static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
675 rtx *, rtx *));
f996902d 676static rtx get_thread_pointer PARAMS ((void));
145aacc2 677static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
678static rtx gen_push PARAMS ((rtx));
679static int memory_address_length PARAMS ((rtx addr));
680static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
681static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
682static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
683static void ix86_dump_ppro_packet PARAMS ((FILE *));
684static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 685static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 686static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
687static int ix86_nsaved_regs PARAMS ((void));
688static void ix86_emit_save_regs PARAMS ((void));
c6036a37 689static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 690static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 691static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 692static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 693static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 694static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 695static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
696static rtx ix86_expand_aligntest PARAMS ((rtx, int));
697static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
698static int ix86_issue_rate PARAMS ((void));
699static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
700static void ix86_sched_init PARAMS ((FILE *, int, int));
701static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
702static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
703static int ia32_use_dfa_pipeline_interface PARAMS ((void));
704static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 705static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
706
707struct ix86_address
708{
709 rtx base, index, disp;
710 HOST_WIDE_INT scale;
711};
b08de47e 712
e075ae69 713static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65 714
f996902d
RH
715static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
716static const char *ix86_strip_name_encoding PARAMS ((const char *))
717 ATTRIBUTE_UNUSED;
fb49053f 718
bd793c65 719struct builtin_description;
8b60264b
KG
720static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
721 tree, rtx));
722static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
723 tree, rtx));
bd793c65
BS
724static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
725static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
726static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
727static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
728 tree, rtx));
729static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 730static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
731static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
732static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
733 enum rtx_code *,
734 enum rtx_code *,
735 enum rtx_code *));
9e7adcb3
JH
736static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
737 rtx *, rtx *));
738static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
739static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
740static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
741static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 742static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 743static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 744static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 745static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
746const struct attribute_spec ix86_attribute_table[];
747static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
748static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
7c262518 749
21c318ba 750#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
751static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
752#endif
e56feed6 753
53c17031
JH
754/* Register class used for passing given 64bit part of the argument.
755 These represent classes as documented by the PS ABI, with the exception
756 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
757 use SF or DFmode move instead of DImode to avoid reformating penalties.
758
759 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
760 whenever possible (upper half does contain padding).
761 */
762enum x86_64_reg_class
763 {
764 X86_64_NO_CLASS,
765 X86_64_INTEGER_CLASS,
766 X86_64_INTEGERSI_CLASS,
767 X86_64_SSE_CLASS,
768 X86_64_SSESF_CLASS,
769 X86_64_SSEDF_CLASS,
770 X86_64_SSEUP_CLASS,
771 X86_64_X87_CLASS,
772 X86_64_X87UP_CLASS,
773 X86_64_MEMORY_CLASS
774 };
0b5826ac 775static const char * const x86_64_reg_class_name[] =
53c17031
JH
776 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777
778#define MAX_CLASSES 4
779static int classify_argument PARAMS ((enum machine_mode, tree,
780 enum x86_64_reg_class [MAX_CLASSES],
781 int));
782static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
783 int *));
784static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 785 const int *, int));
53c17031
JH
786static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
787 enum x86_64_reg_class));
672a6f42
NB
788\f
789/* Initialize the GCC target structure. */
91d231cb
JM
790#undef TARGET_ATTRIBUTE_TABLE
791#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 792#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
793# undef TARGET_MERGE_DECL_ATTRIBUTES
794# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
795#endif
796
8d8e52be
JM
797#undef TARGET_COMP_TYPE_ATTRIBUTES
798#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799
f6155fda
SS
800#undef TARGET_INIT_BUILTINS
801#define TARGET_INIT_BUILTINS ix86_init_builtins
802
803#undef TARGET_EXPAND_BUILTIN
804#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805
bd09bdeb
RH
806#undef TARGET_ASM_FUNCTION_EPILOGUE
807#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 808
17b53c33
NB
809#undef TARGET_ASM_OPEN_PAREN
810#define TARGET_ASM_OPEN_PAREN ""
811#undef TARGET_ASM_CLOSE_PAREN
812#define TARGET_ASM_CLOSE_PAREN ""
813
301d03af
RS
814#undef TARGET_ASM_ALIGNED_HI_OP
815#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
816#undef TARGET_ASM_ALIGNED_SI_OP
817#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818#ifdef ASM_QUAD
819#undef TARGET_ASM_ALIGNED_DI_OP
820#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
821#endif
822
823#undef TARGET_ASM_UNALIGNED_HI_OP
824#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
825#undef TARGET_ASM_UNALIGNED_SI_OP
826#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
827#undef TARGET_ASM_UNALIGNED_DI_OP
828#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829
c237e94a
ZW
830#undef TARGET_SCHED_ADJUST_COST
831#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
832#undef TARGET_SCHED_ISSUE_RATE
833#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
834#undef TARGET_SCHED_VARIABLE_ISSUE
835#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
836#undef TARGET_SCHED_INIT
837#define TARGET_SCHED_INIT ix86_sched_init
838#undef TARGET_SCHED_REORDER
839#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 840#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
841#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
842 ia32_use_dfa_pipeline_interface
843#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
844#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
845 ia32_multipass_dfa_lookahead
c237e94a 846
f996902d
RH
847#ifdef HAVE_AS_TLS
848#undef TARGET_HAVE_TLS
849#define TARGET_HAVE_TLS true
850#endif
851
f6897b10 852struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 853\f
f5316dfe
MM
854/* Sometimes certain combinations of command options do not make
855 sense on a particular target machine. You can define a macro
856 `OVERRIDE_OPTIONS' to take account of this. This macro, if
857 defined, is executed once just after all the command options have
858 been parsed.
859
860 Don't use this macro to turn on various extra optimizations for
861 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
862
863void
864override_options ()
865{
400500c4 866 int i;
e075ae69
RH
867 /* Comes from final.c -- no real reason to change it. */
868#define MAX_CODE_ALIGN 16
f5316dfe 869
c8c5cb99
SC
870 static struct ptt
871 {
8b60264b
KG
872 const struct processor_costs *cost; /* Processor costs */
873 const int target_enable; /* Target flags to enable. */
874 const int target_disable; /* Target flags to disable. */
875 const int align_loop; /* Default alignments. */
2cca7283 876 const int align_loop_max_skip;
8b60264b 877 const int align_jump;
2cca7283 878 const int align_jump_max_skip;
8b60264b
KG
879 const int align_func;
880 const int branch_cost;
e075ae69 881 }
0f290768 882 const processor_target_table[PROCESSOR_max] =
e075ae69 883 {
2cca7283
JH
884 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
885 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
886 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
887 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
888 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
889 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
890 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
891 };
892
f4365627 893 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
894 static struct pta
895 {
8b60264b
KG
896 const char *const name; /* processor name or nickname. */
897 const enum processor_type processor;
0dd0e980
JH
898 const enum pta_flags
899 {
900 PTA_SSE = 1,
901 PTA_SSE2 = 2,
902 PTA_MMX = 4,
f4365627 903 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
904 PTA_3DNOW = 16,
905 PTA_3DNOW_A = 64
906 } flags;
e075ae69 907 }
0f290768 908 const processor_alias_table[] =
e075ae69 909 {
0dd0e980
JH
910 {"i386", PROCESSOR_I386, 0},
911 {"i486", PROCESSOR_I486, 0},
912 {"i586", PROCESSOR_PENTIUM, 0},
913 {"pentium", PROCESSOR_PENTIUM, 0},
914 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
915 {"i686", PROCESSOR_PENTIUMPRO, 0},
916 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
917 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 918 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 919 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 920 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
921 {"k6", PROCESSOR_K6, PTA_MMX},
922 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 924 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 925 | PTA_3DNOW_A},
f4365627 926 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 927 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 928 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 929 | PTA_3DNOW_A | PTA_SSE},
f4365627 930 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 931 | PTA_3DNOW_A | PTA_SSE},
f4365627 932 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 933 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 934 };
c8c5cb99 935
ca7558fc 936 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 937
f5316dfe
MM
938#ifdef SUBTARGET_OVERRIDE_OPTIONS
939 SUBTARGET_OVERRIDE_OPTIONS;
940#endif
941
f4365627
JH
942 if (!ix86_cpu_string && ix86_arch_string)
943 ix86_cpu_string = ix86_arch_string;
944 if (!ix86_cpu_string)
945 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
946 if (!ix86_arch_string)
947 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 948
6189a572
JH
949 if (ix86_cmodel_string != 0)
950 {
951 if (!strcmp (ix86_cmodel_string, "small"))
952 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
953 else if (flag_pic)
c725bd79 954 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
955 else if (!strcmp (ix86_cmodel_string, "32"))
956 ix86_cmodel = CM_32;
957 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
958 ix86_cmodel = CM_KERNEL;
959 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
960 ix86_cmodel = CM_MEDIUM;
961 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
962 ix86_cmodel = CM_LARGE;
963 else
964 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
965 }
966 else
967 {
968 ix86_cmodel = CM_32;
969 if (TARGET_64BIT)
970 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
971 }
c93e80a5
JH
972 if (ix86_asm_string != 0)
973 {
974 if (!strcmp (ix86_asm_string, "intel"))
975 ix86_asm_dialect = ASM_INTEL;
976 else if (!strcmp (ix86_asm_string, "att"))
977 ix86_asm_dialect = ASM_ATT;
978 else
979 error ("bad value (%s) for -masm= switch", ix86_asm_string);
980 }
6189a572 981 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 982 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
983 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
984 if (ix86_cmodel == CM_LARGE)
c725bd79 985 sorry ("code model `large' not supported yet");
0c2dc519 986 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 987 sorry ("%i-bit mode not compiled in",
0c2dc519 988 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 989
f4365627
JH
990 for (i = 0; i < pta_size; i++)
991 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
992 {
993 ix86_arch = processor_alias_table[i].processor;
994 /* Default cpu tuning to the architecture. */
995 ix86_cpu = ix86_arch;
996 if (processor_alias_table[i].flags & PTA_MMX
997 && !(target_flags & MASK_MMX_SET))
998 target_flags |= MASK_MMX;
999 if (processor_alias_table[i].flags & PTA_3DNOW
1000 && !(target_flags & MASK_3DNOW_SET))
1001 target_flags |= MASK_3DNOW;
1002 if (processor_alias_table[i].flags & PTA_3DNOW_A
1003 && !(target_flags & MASK_3DNOW_A_SET))
1004 target_flags |= MASK_3DNOW_A;
1005 if (processor_alias_table[i].flags & PTA_SSE
1006 && !(target_flags & MASK_SSE_SET))
1007 target_flags |= MASK_SSE;
1008 if (processor_alias_table[i].flags & PTA_SSE2
1009 && !(target_flags & MASK_SSE2_SET))
1010 target_flags |= MASK_SSE2;
1011 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1012 x86_prefetch_sse = true;
1013 break;
1014 }
400500c4 1015
f4365627
JH
1016 if (i == pta_size)
1017 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1018
f4365627
JH
1019 for (i = 0; i < pta_size; i++)
1020 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1021 {
1022 ix86_cpu = processor_alias_table[i].processor;
1023 break;
1024 }
1025 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1026 x86_prefetch_sse = true;
1027 if (i == pta_size)
1028 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1029
2ab0437e
JH
1030 if (optimize_size)
1031 ix86_cost = &size_cost;
1032 else
1033 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1034 target_flags |= processor_target_table[ix86_cpu].target_enable;
1035 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1036
36edd3cc
BS
1037 /* Arrange to set up i386_stack_locals for all functions. */
1038 init_machine_status = ix86_init_machine_status;
fce5a9f2 1039
0f290768 1040 /* Validate -mregparm= value. */
e075ae69 1041 if (ix86_regparm_string)
b08de47e 1042 {
400500c4
RK
1043 i = atoi (ix86_regparm_string);
1044 if (i < 0 || i > REGPARM_MAX)
1045 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1046 else
1047 ix86_regparm = i;
b08de47e 1048 }
0d7d98ee
JH
1049 else
1050 if (TARGET_64BIT)
1051 ix86_regparm = REGPARM_MAX;
b08de47e 1052
3e18fdf6 1053 /* If the user has provided any of the -malign-* options,
a4f31c00 1054 warn and use that value only if -falign-* is not set.
3e18fdf6 1055 Remove this code in GCC 3.2 or later. */
e075ae69 1056 if (ix86_align_loops_string)
b08de47e 1057 {
3e18fdf6
GK
1058 warning ("-malign-loops is obsolete, use -falign-loops");
1059 if (align_loops == 0)
1060 {
1061 i = atoi (ix86_align_loops_string);
1062 if (i < 0 || i > MAX_CODE_ALIGN)
1063 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1064 else
1065 align_loops = 1 << i;
1066 }
b08de47e 1067 }
3af4bd89 1068
e075ae69 1069 if (ix86_align_jumps_string)
b08de47e 1070 {
3e18fdf6
GK
1071 warning ("-malign-jumps is obsolete, use -falign-jumps");
1072 if (align_jumps == 0)
1073 {
1074 i = atoi (ix86_align_jumps_string);
1075 if (i < 0 || i > MAX_CODE_ALIGN)
1076 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1077 else
1078 align_jumps = 1 << i;
1079 }
b08de47e 1080 }
b08de47e 1081
e075ae69 1082 if (ix86_align_funcs_string)
b08de47e 1083 {
3e18fdf6
GK
1084 warning ("-malign-functions is obsolete, use -falign-functions");
1085 if (align_functions == 0)
1086 {
1087 i = atoi (ix86_align_funcs_string);
1088 if (i < 0 || i > MAX_CODE_ALIGN)
1089 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1090 else
1091 align_functions = 1 << i;
1092 }
b08de47e 1093 }
3af4bd89 1094
3e18fdf6 1095 /* Default align_* from the processor table. */
3e18fdf6 1096 if (align_loops == 0)
2cca7283
JH
1097 {
1098 align_loops = processor_target_table[ix86_cpu].align_loop;
1099 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1100 }
3e18fdf6 1101 if (align_jumps == 0)
2cca7283
JH
1102 {
1103 align_jumps = processor_target_table[ix86_cpu].align_jump;
1104 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1105 }
3e18fdf6 1106 if (align_functions == 0)
2cca7283
JH
1107 {
1108 align_functions = processor_target_table[ix86_cpu].align_func;
1109 }
3e18fdf6 1110
e4c0478d 1111 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1112 The default of 128 bits is for Pentium III's SSE __m128, but we
1113 don't want additional code to keep the stack aligned when
1114 optimizing for code size. */
1115 ix86_preferred_stack_boundary = (optimize_size
1116 ? TARGET_64BIT ? 64 : 32
1117 : 128);
e075ae69 1118 if (ix86_preferred_stack_boundary_string)
3af4bd89 1119 {
400500c4 1120 i = atoi (ix86_preferred_stack_boundary_string);
c6257c5d
AO
1121 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1122 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
0d7d98ee 1123 TARGET_64BIT ? 3 : 2);
400500c4
RK
1124 else
1125 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1126 }
77a989d1 1127
0f290768 1128 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1129 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1130 if (ix86_branch_cost_string)
804a8ee0 1131 {
400500c4
RK
1132 i = atoi (ix86_branch_cost_string);
1133 if (i < 0 || i > 5)
1134 error ("-mbranch-cost=%d is not between 0 and 5", i);
1135 else
1136 ix86_branch_cost = i;
804a8ee0 1137 }
804a8ee0 1138
f996902d
RH
1139 if (ix86_tls_dialect_string)
1140 {
1141 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1142 ix86_tls_dialect = TLS_DIALECT_GNU;
1143 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1144 ix86_tls_dialect = TLS_DIALECT_SUN;
1145 else
1146 error ("bad value (%s) for -mtls-dialect= switch",
1147 ix86_tls_dialect_string);
1148 }
1149
e9a25f70
JL
1150 /* Keep nonleaf frame pointers. */
1151 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1152 flag_omit_frame_pointer = 1;
e075ae69
RH
1153
1154 /* If we're doing fast math, we don't care about comparison order
1155 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1156 if (flag_unsafe_math_optimizations)
e075ae69
RH
1157 target_flags &= ~MASK_IEEE_FP;
1158
30c99a84
RH
1159 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1160 since the insns won't need emulation. */
1161 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1162 target_flags &= ~MASK_NO_FANCY_MATH_387;
1163
14f73b5a
JH
1164 if (TARGET_64BIT)
1165 {
1166 if (TARGET_ALIGN_DOUBLE)
c725bd79 1167 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1168 if (TARGET_RTD)
c725bd79 1169 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1170 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1171 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1172 ix86_fpmath = FPMATH_SSE;
14f73b5a 1173 }
965f5423
JH
1174 else
1175 ix86_fpmath = FPMATH_387;
1176
1177 if (ix86_fpmath_string != 0)
1178 {
1179 if (! strcmp (ix86_fpmath_string, "387"))
1180 ix86_fpmath = FPMATH_387;
1181 else if (! strcmp (ix86_fpmath_string, "sse"))
1182 {
1183 if (!TARGET_SSE)
1184 {
1185 warning ("SSE instruction set disabled, using 387 arithmetics");
1186 ix86_fpmath = FPMATH_387;
1187 }
1188 else
1189 ix86_fpmath = FPMATH_SSE;
1190 }
1191 else if (! strcmp (ix86_fpmath_string, "387,sse")
1192 || ! strcmp (ix86_fpmath_string, "sse,387"))
1193 {
1194 if (!TARGET_SSE)
1195 {
1196 warning ("SSE instruction set disabled, using 387 arithmetics");
1197 ix86_fpmath = FPMATH_387;
1198 }
1199 else if (!TARGET_80387)
1200 {
1201 warning ("387 instruction set disabled, using SSE arithmetics");
1202 ix86_fpmath = FPMATH_SSE;
1203 }
1204 else
1205 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1206 }
fce5a9f2 1207 else
965f5423
JH
1208 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1209 }
14f73b5a 1210
a7180f70
BS
1211 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1212 on by -msse. */
1213 if (TARGET_SSE)
e37af218
RH
1214 {
1215 target_flags |= MASK_MMX;
1216 x86_prefetch_sse = true;
1217 }
c6036a37 1218
47f339cf
BS
1219 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1220 if (TARGET_3DNOW)
1221 {
1222 target_flags |= MASK_MMX;
1223 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1224 extensions it adds. */
1225 if (x86_3dnow_a & (1 << ix86_arch))
1226 target_flags |= MASK_3DNOW_A;
1227 }
c6036a37 1228 if ((x86_accumulate_outgoing_args & CPUMASK)
0dd0e980 1229 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
c6036a37
JH
1230 && !optimize_size)
1231 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1232
1233 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1234 {
1235 char *p;
1236 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1237 p = strchr (internal_label_prefix, 'X');
1238 internal_label_prefix_len = p - internal_label_prefix;
1239 *p = '\0';
1240 }
f5316dfe
MM
1241}
1242\f
32b5b1aa 1243void
c6aded7c 1244optimization_options (level, size)
32b5b1aa 1245 int level;
bb5177ac 1246 int size ATTRIBUTE_UNUSED;
32b5b1aa 1247{
e9a25f70
JL
1248 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1249 make the problem with not enough registers even worse. */
32b5b1aa
SC
1250#ifdef INSN_SCHEDULING
1251 if (level > 1)
1252 flag_schedule_insns = 0;
1253#endif
53c17031
JH
1254 if (TARGET_64BIT && optimize >= 1)
1255 flag_omit_frame_pointer = 1;
1256 if (TARGET_64BIT)
b932f770
JH
1257 {
1258 flag_pcc_struct_return = 0;
1259 flag_asynchronous_unwind_tables = 1;
1260 }
32b5b1aa 1261}
b08de47e 1262\f
91d231cb
JM
1263/* Table of valid machine attributes. */
1264const struct attribute_spec ix86_attribute_table[] =
b08de47e 1265{
91d231cb 1266 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1267 /* Stdcall attribute says callee is responsible for popping arguments
1268 if they are not variable. */
91d231cb
JM
1269 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1270 /* Cdecl attribute says the callee is a normal C declaration */
1271 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1272 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1273 passed in registers. */
91d231cb
JM
1274 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1275#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1276 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1277 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1278 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1279#endif
1280 { NULL, 0, 0, false, false, false, NULL }
1281};
1282
1283/* Handle a "cdecl" or "stdcall" attribute;
1284 arguments as in struct attribute_spec.handler. */
1285static tree
1286ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1287 tree *node;
1288 tree name;
1289 tree args ATTRIBUTE_UNUSED;
1290 int flags ATTRIBUTE_UNUSED;
1291 bool *no_add_attrs;
1292{
1293 if (TREE_CODE (*node) != FUNCTION_TYPE
1294 && TREE_CODE (*node) != METHOD_TYPE
1295 && TREE_CODE (*node) != FIELD_DECL
1296 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1297 {
91d231cb
JM
1298 warning ("`%s' attribute only applies to functions",
1299 IDENTIFIER_POINTER (name));
1300 *no_add_attrs = true;
1301 }
b08de47e 1302
91d231cb
JM
1303 if (TARGET_64BIT)
1304 {
1305 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1306 *no_add_attrs = true;
1307 }
b08de47e 1308
91d231cb
JM
1309 return NULL_TREE;
1310}
b08de47e 1311
91d231cb
JM
1312/* Handle a "regparm" attribute;
1313 arguments as in struct attribute_spec.handler. */
1314static tree
1315ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1316 tree *node;
1317 tree name;
1318 tree args;
1319 int flags ATTRIBUTE_UNUSED;
1320 bool *no_add_attrs;
1321{
1322 if (TREE_CODE (*node) != FUNCTION_TYPE
1323 && TREE_CODE (*node) != METHOD_TYPE
1324 && TREE_CODE (*node) != FIELD_DECL
1325 && TREE_CODE (*node) != TYPE_DECL)
1326 {
1327 warning ("`%s' attribute only applies to functions",
1328 IDENTIFIER_POINTER (name));
1329 *no_add_attrs = true;
1330 }
1331 else
1332 {
1333 tree cst;
b08de47e 1334
91d231cb
JM
1335 cst = TREE_VALUE (args);
1336 if (TREE_CODE (cst) != INTEGER_CST)
1337 {
1338 warning ("`%s' attribute requires an integer constant argument",
1339 IDENTIFIER_POINTER (name));
1340 *no_add_attrs = true;
1341 }
1342 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1343 {
1344 warning ("argument to `%s' attribute larger than %d",
1345 IDENTIFIER_POINTER (name), REGPARM_MAX);
1346 *no_add_attrs = true;
1347 }
b08de47e
MM
1348 }
1349
91d231cb 1350 return NULL_TREE;
b08de47e
MM
1351}
1352
1353/* Return 0 if the attributes for two types are incompatible, 1 if they
1354 are compatible, and 2 if they are nearly compatible (which causes a
1355 warning to be generated). */
1356
8d8e52be 1357static int
e075ae69 1358ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1359 tree type1;
1360 tree type2;
b08de47e 1361{
0f290768 1362 /* Check for mismatch of non-default calling convention. */
27c38fbe 1363 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1364
1365 if (TREE_CODE (type1) != FUNCTION_TYPE)
1366 return 1;
1367
1368 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1369 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1370 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1371 return 0;
b08de47e
MM
1372 return 1;
1373}
b08de47e
MM
1374\f
1375/* Value is the number of bytes of arguments automatically
1376 popped when returning from a subroutine call.
1377 FUNDECL is the declaration node of the function (as a tree),
1378 FUNTYPE is the data type of the function (as a tree),
1379 or for a library call it is an identifier node for the subroutine name.
1380 SIZE is the number of bytes of arguments passed on the stack.
1381
1382 On the 80386, the RTD insn may be used to pop them if the number
1383 of args is fixed, but if the number is variable then the caller
1384 must pop them all. RTD can't be used for library calls now
1385 because the library is compiled with the Unix compiler.
1386 Use of RTD is a selectable option, since it is incompatible with
1387 standard Unix calling sequences. If the option is not selected,
1388 the caller must always pop the args.
1389
1390 The attribute stdcall is equivalent to RTD on a per module basis. */
1391
1392int
e075ae69 1393ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1394 tree fundecl;
1395 tree funtype;
1396 int size;
79325812 1397{
3345ee7d 1398 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1399
0f290768 1400 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1401 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1402
0f290768 1403 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1404 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1405 rtd = 1;
79325812 1406
698cdd84
SC
1407 if (rtd
1408 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1409 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1410 == void_type_node)))
698cdd84
SC
1411 return size;
1412 }
79325812 1413
232b8f52 1414 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1415 if (aggregate_value_p (TREE_TYPE (funtype))
1416 && !TARGET_64BIT)
232b8f52
JJ
1417 {
1418 int nregs = ix86_regparm;
79325812 1419
232b8f52
JJ
1420 if (funtype)
1421 {
1422 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1423
1424 if (attr)
1425 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1426 }
1427
1428 if (!nregs)
1429 return GET_MODE_SIZE (Pmode);
1430 }
1431
1432 return 0;
b08de47e 1433}
b08de47e
MM
1434\f
1435/* Argument support functions. */
1436
53c17031
JH
1437/* Return true when register may be used to pass function parameters. */
1438bool
1439ix86_function_arg_regno_p (regno)
1440 int regno;
1441{
1442 int i;
1443 if (!TARGET_64BIT)
0333394e
JJ
1444 return (regno < REGPARM_MAX
1445 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1446 if (SSE_REGNO_P (regno) && TARGET_SSE)
1447 return true;
1448 /* RAX is used as hidden argument to va_arg functions. */
1449 if (!regno)
1450 return true;
1451 for (i = 0; i < REGPARM_MAX; i++)
1452 if (regno == x86_64_int_parameter_registers[i])
1453 return true;
1454 return false;
1455}
1456
b08de47e
MM
1457/* Initialize a variable CUM of type CUMULATIVE_ARGS
1458 for a call to a function whose data type is FNTYPE.
1459 For a library call, FNTYPE is 0. */
1460
1461void
1462init_cumulative_args (cum, fntype, libname)
e9a25f70 1463 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1464 tree fntype; /* tree ptr for function decl */
1465 rtx libname; /* SYMBOL_REF of library name or 0 */
1466{
1467 static CUMULATIVE_ARGS zero_cum;
1468 tree param, next_param;
1469
1470 if (TARGET_DEBUG_ARG)
1471 {
1472 fprintf (stderr, "\ninit_cumulative_args (");
1473 if (fntype)
e9a25f70
JL
1474 fprintf (stderr, "fntype code = %s, ret code = %s",
1475 tree_code_name[(int) TREE_CODE (fntype)],
1476 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1477 else
1478 fprintf (stderr, "no fntype");
1479
1480 if (libname)
1481 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1482 }
1483
1484 *cum = zero_cum;
1485
1486 /* Set up the number of registers to use for passing arguments. */
e075ae69 1487 cum->nregs = ix86_regparm;
53c17031
JH
1488 cum->sse_nregs = SSE_REGPARM_MAX;
1489 if (fntype && !TARGET_64BIT)
b08de47e
MM
1490 {
1491 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1492
b08de47e
MM
1493 if (attr)
1494 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1495 }
53c17031 1496 cum->maybe_vaarg = false;
b08de47e
MM
1497
1498 /* Determine if this function has variable arguments. This is
1499 indicated by the last argument being 'void_type_mode' if there
1500 are no variable arguments. If there are variable arguments, then
1501 we won't pass anything in registers */
1502
1503 if (cum->nregs)
1504 {
1505 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1506 param != 0; param = next_param)
b08de47e
MM
1507 {
1508 next_param = TREE_CHAIN (param);
e9a25f70 1509 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1510 {
1511 if (!TARGET_64BIT)
1512 cum->nregs = 0;
1513 cum->maybe_vaarg = true;
1514 }
b08de47e
MM
1515 }
1516 }
53c17031
JH
1517 if ((!fntype && !libname)
1518 || (fntype && !TYPE_ARG_TYPES (fntype)))
1519 cum->maybe_vaarg = 1;
b08de47e
MM
1520
1521 if (TARGET_DEBUG_ARG)
1522 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1523
1524 return;
1525}
1526
53c17031 1527/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1528 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1529 class and assign registers accordingly. */
1530
1531/* Return the union class of CLASS1 and CLASS2.
1532 See the x86-64 PS ABI for details. */
1533
1534static enum x86_64_reg_class
1535merge_classes (class1, class2)
1536 enum x86_64_reg_class class1, class2;
1537{
1538 /* Rule #1: If both classes are equal, this is the resulting class. */
1539 if (class1 == class2)
1540 return class1;
1541
1542 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1543 the other class. */
1544 if (class1 == X86_64_NO_CLASS)
1545 return class2;
1546 if (class2 == X86_64_NO_CLASS)
1547 return class1;
1548
1549 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1550 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1551 return X86_64_MEMORY_CLASS;
1552
1553 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1554 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1555 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1556 return X86_64_INTEGERSI_CLASS;
1557 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1558 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1559 return X86_64_INTEGER_CLASS;
1560
1561 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1562 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1563 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1564 return X86_64_MEMORY_CLASS;
1565
1566 /* Rule #6: Otherwise class SSE is used. */
1567 return X86_64_SSE_CLASS;
1568}
1569
1570/* Classify the argument of type TYPE and mode MODE.
1571 CLASSES will be filled by the register class used to pass each word
1572 of the operand. The number of words is returned. In case the parameter
1573 should be passed in memory, 0 is returned. As a special case for zero
1574 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1575
1576 BIT_OFFSET is used internally for handling records and specifies offset
1577 of the offset in bits modulo 256 to avoid overflow cases.
1578
1579 See the x86-64 PS ABI for details.
1580*/
1581
1582static int
1583classify_argument (mode, type, classes, bit_offset)
1584 enum machine_mode mode;
1585 tree type;
1586 enum x86_64_reg_class classes[MAX_CLASSES];
1587 int bit_offset;
1588{
1589 int bytes =
1590 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1591 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1592
1593 if (type && AGGREGATE_TYPE_P (type))
1594 {
1595 int i;
1596 tree field;
1597 enum x86_64_reg_class subclasses[MAX_CLASSES];
1598
1599 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1600 if (bytes > 16)
1601 return 0;
1602
1603 for (i = 0; i < words; i++)
1604 classes[i] = X86_64_NO_CLASS;
1605
1606 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1607 signalize memory class, so handle it as special case. */
1608 if (!words)
1609 {
1610 classes[0] = X86_64_NO_CLASS;
1611 return 1;
1612 }
1613
1614 /* Classify each field of record and merge classes. */
1615 if (TREE_CODE (type) == RECORD_TYPE)
1616 {
91ea38f9
JH
1617 /* For classes first merge in the field of the subclasses. */
1618 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1619 {
1620 tree bases = TYPE_BINFO_BASETYPES (type);
1621 int n_bases = TREE_VEC_LENGTH (bases);
1622 int i;
1623
1624 for (i = 0; i < n_bases; ++i)
1625 {
1626 tree binfo = TREE_VEC_ELT (bases, i);
1627 int num;
1628 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1629 tree type = BINFO_TYPE (binfo);
1630
1631 num = classify_argument (TYPE_MODE (type),
1632 type, subclasses,
1633 (offset + bit_offset) % 256);
1634 if (!num)
1635 return 0;
1636 for (i = 0; i < num; i++)
1637 {
1638 int pos = (offset + bit_offset) / 8 / 8;
1639 classes[i + pos] =
1640 merge_classes (subclasses[i], classes[i + pos]);
1641 }
1642 }
1643 }
1644 /* And now merge the fields of structure. */
53c17031
JH
1645 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1646 {
1647 if (TREE_CODE (field) == FIELD_DECL)
1648 {
1649 int num;
1650
1651 /* Bitfields are always classified as integer. Handle them
1652 early, since later code would consider them to be
1653 misaligned integers. */
1654 if (DECL_BIT_FIELD (field))
1655 {
1656 for (i = int_bit_position (field) / 8 / 8;
1657 i < (int_bit_position (field)
1658 + tree_low_cst (DECL_SIZE (field), 0)
1659 + 63) / 8 / 8; i++)
1660 classes[i] =
1661 merge_classes (X86_64_INTEGER_CLASS,
1662 classes[i]);
1663 }
1664 else
1665 {
1666 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1667 TREE_TYPE (field), subclasses,
1668 (int_bit_position (field)
1669 + bit_offset) % 256);
1670 if (!num)
1671 return 0;
1672 for (i = 0; i < num; i++)
1673 {
1674 int pos =
1675 (int_bit_position (field) + bit_offset) / 8 / 8;
1676 classes[i + pos] =
1677 merge_classes (subclasses[i], classes[i + pos]);
1678 }
1679 }
1680 }
1681 }
1682 }
1683 /* Arrays are handled as small records. */
1684 else if (TREE_CODE (type) == ARRAY_TYPE)
1685 {
1686 int num;
1687 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1688 TREE_TYPE (type), subclasses, bit_offset);
1689 if (!num)
1690 return 0;
1691
1692 /* The partial classes are now full classes. */
1693 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1694 subclasses[0] = X86_64_SSE_CLASS;
1695 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1696 subclasses[0] = X86_64_INTEGER_CLASS;
1697
1698 for (i = 0; i < words; i++)
1699 classes[i] = subclasses[i % num];
1700 }
1701 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1702 else if (TREE_CODE (type) == UNION_TYPE
1703 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 1704 {
91ea38f9
JH
1705 /* For classes first merge in the field of the subclasses. */
1706 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1707 {
1708 tree bases = TYPE_BINFO_BASETYPES (type);
1709 int n_bases = TREE_VEC_LENGTH (bases);
1710 int i;
1711
1712 for (i = 0; i < n_bases; ++i)
1713 {
1714 tree binfo = TREE_VEC_ELT (bases, i);
1715 int num;
1716 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1717 tree type = BINFO_TYPE (binfo);
1718
1719 num = classify_argument (TYPE_MODE (type),
1720 type, subclasses,
1721 (offset + bit_offset) % 256);
1722 if (!num)
1723 return 0;
1724 for (i = 0; i < num; i++)
1725 {
1726 int pos = (offset + bit_offset) / 8 / 8;
1727 classes[i + pos] =
1728 merge_classes (subclasses[i], classes[i + pos]);
1729 }
1730 }
1731 }
53c17031
JH
1732 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1733 {
1734 if (TREE_CODE (field) == FIELD_DECL)
1735 {
1736 int num;
1737 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1738 TREE_TYPE (field), subclasses,
1739 bit_offset);
1740 if (!num)
1741 return 0;
1742 for (i = 0; i < num; i++)
1743 classes[i] = merge_classes (subclasses[i], classes[i]);
1744 }
1745 }
1746 }
1747 else
1748 abort ();
1749
1750 /* Final merger cleanup. */
1751 for (i = 0; i < words; i++)
1752 {
1753 /* If one class is MEMORY, everything should be passed in
1754 memory. */
1755 if (classes[i] == X86_64_MEMORY_CLASS)
1756 return 0;
1757
d6a7951f 1758 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1759 X86_64_SSE_CLASS. */
1760 if (classes[i] == X86_64_SSEUP_CLASS
1761 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1762 classes[i] = X86_64_SSE_CLASS;
1763
d6a7951f 1764 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1765 if (classes[i] == X86_64_X87UP_CLASS
1766 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1767 classes[i] = X86_64_SSE_CLASS;
1768 }
1769 return words;
1770 }
1771
1772 /* Compute alignment needed. We align all types to natural boundaries with
1773 exception of XFmode that is aligned to 64bits. */
1774 if (mode != VOIDmode && mode != BLKmode)
1775 {
1776 int mode_alignment = GET_MODE_BITSIZE (mode);
1777
1778 if (mode == XFmode)
1779 mode_alignment = 128;
1780 else if (mode == XCmode)
1781 mode_alignment = 256;
f5143c46 1782 /* Misaligned fields are always returned in memory. */
53c17031
JH
1783 if (bit_offset % mode_alignment)
1784 return 0;
1785 }
1786
1787 /* Classification of atomic types. */
1788 switch (mode)
1789 {
1790 case DImode:
1791 case SImode:
1792 case HImode:
1793 case QImode:
1794 case CSImode:
1795 case CHImode:
1796 case CQImode:
1797 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1798 classes[0] = X86_64_INTEGERSI_CLASS;
1799 else
1800 classes[0] = X86_64_INTEGER_CLASS;
1801 return 1;
1802 case CDImode:
1803 case TImode:
1804 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1805 return 2;
1806 case CTImode:
1807 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1808 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1809 return 4;
1810 case SFmode:
1811 if (!(bit_offset % 64))
1812 classes[0] = X86_64_SSESF_CLASS;
1813 else
1814 classes[0] = X86_64_SSE_CLASS;
1815 return 1;
1816 case DFmode:
1817 classes[0] = X86_64_SSEDF_CLASS;
1818 return 1;
1819 case TFmode:
1820 classes[0] = X86_64_X87_CLASS;
1821 classes[1] = X86_64_X87UP_CLASS;
1822 return 2;
1823 case TCmode:
1824 classes[0] = X86_64_X87_CLASS;
1825 classes[1] = X86_64_X87UP_CLASS;
1826 classes[2] = X86_64_X87_CLASS;
1827 classes[3] = X86_64_X87UP_CLASS;
1828 return 4;
1829 case DCmode:
1830 classes[0] = X86_64_SSEDF_CLASS;
1831 classes[1] = X86_64_SSEDF_CLASS;
1832 return 2;
1833 case SCmode:
1834 classes[0] = X86_64_SSE_CLASS;
1835 return 1;
e95d6b23
JH
1836 case V4SFmode:
1837 case V4SImode:
495333a6
JH
1838 case V16QImode:
1839 case V8HImode:
1840 case V2DFmode:
1841 case V2DImode:
e95d6b23
JH
1842 classes[0] = X86_64_SSE_CLASS;
1843 classes[1] = X86_64_SSEUP_CLASS;
1844 return 2;
1845 case V2SFmode:
1846 case V2SImode:
1847 case V4HImode:
1848 case V8QImode:
1849 classes[0] = X86_64_SSE_CLASS;
1850 return 1;
53c17031 1851 case BLKmode:
e95d6b23 1852 case VOIDmode:
53c17031
JH
1853 return 0;
1854 default:
1855 abort ();
1856 }
1857}
1858
1859/* Examine the argument and return set number of register required in each
f5143c46 1860 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1861static int
1862examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1863 enum machine_mode mode;
1864 tree type;
1865 int *int_nregs, *sse_nregs;
1866 int in_return;
1867{
1868 enum x86_64_reg_class class[MAX_CLASSES];
1869 int n = classify_argument (mode, type, class, 0);
1870
1871 *int_nregs = 0;
1872 *sse_nregs = 0;
1873 if (!n)
1874 return 0;
1875 for (n--; n >= 0; n--)
1876 switch (class[n])
1877 {
1878 case X86_64_INTEGER_CLASS:
1879 case X86_64_INTEGERSI_CLASS:
1880 (*int_nregs)++;
1881 break;
1882 case X86_64_SSE_CLASS:
1883 case X86_64_SSESF_CLASS:
1884 case X86_64_SSEDF_CLASS:
1885 (*sse_nregs)++;
1886 break;
1887 case X86_64_NO_CLASS:
1888 case X86_64_SSEUP_CLASS:
1889 break;
1890 case X86_64_X87_CLASS:
1891 case X86_64_X87UP_CLASS:
1892 if (!in_return)
1893 return 0;
1894 break;
1895 case X86_64_MEMORY_CLASS:
1896 abort ();
1897 }
1898 return 1;
1899}
1900/* Construct container for the argument used by GCC interface. See
1901 FUNCTION_ARG for the detailed description. */
1902static rtx
1903construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1904 enum machine_mode mode;
1905 tree type;
1906 int in_return;
1907 int nintregs, nsseregs;
07933f72
GS
1908 const int * intreg;
1909 int sse_regno;
53c17031
JH
1910{
1911 enum machine_mode tmpmode;
1912 int bytes =
1913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1914 enum x86_64_reg_class class[MAX_CLASSES];
1915 int n;
1916 int i;
1917 int nexps = 0;
1918 int needed_sseregs, needed_intregs;
1919 rtx exp[MAX_CLASSES];
1920 rtx ret;
1921
1922 n = classify_argument (mode, type, class, 0);
1923 if (TARGET_DEBUG_ARG)
1924 {
1925 if (!n)
1926 fprintf (stderr, "Memory class\n");
1927 else
1928 {
1929 fprintf (stderr, "Classes:");
1930 for (i = 0; i < n; i++)
1931 {
1932 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1933 }
1934 fprintf (stderr, "\n");
1935 }
1936 }
1937 if (!n)
1938 return NULL;
1939 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1940 return NULL;
1941 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1942 return NULL;
1943
1944 /* First construct simple cases. Avoid SCmode, since we want to use
1945 single register to pass this type. */
1946 if (n == 1 && mode != SCmode)
1947 switch (class[0])
1948 {
1949 case X86_64_INTEGER_CLASS:
1950 case X86_64_INTEGERSI_CLASS:
1951 return gen_rtx_REG (mode, intreg[0]);
1952 case X86_64_SSE_CLASS:
1953 case X86_64_SSESF_CLASS:
1954 case X86_64_SSEDF_CLASS:
1955 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1956 case X86_64_X87_CLASS:
1957 return gen_rtx_REG (mode, FIRST_STACK_REG);
1958 case X86_64_NO_CLASS:
1959 /* Zero sized array, struct or class. */
1960 return NULL;
1961 default:
1962 abort ();
1963 }
1964 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 1965 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
1966 if (n == 2
1967 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1968 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1969 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1970 && class[1] == X86_64_INTEGER_CLASS
1971 && (mode == CDImode || mode == TImode)
1972 && intreg[0] + 1 == intreg[1])
1973 return gen_rtx_REG (mode, intreg[0]);
1974 if (n == 4
1975 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1976 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1977 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1978
1979 /* Otherwise figure out the entries of the PARALLEL. */
1980 for (i = 0; i < n; i++)
1981 {
1982 switch (class[i])
1983 {
1984 case X86_64_NO_CLASS:
1985 break;
1986 case X86_64_INTEGER_CLASS:
1987 case X86_64_INTEGERSI_CLASS:
1988 /* Merge TImodes on aligned occassions here too. */
1989 if (i * 8 + 8 > bytes)
1990 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1991 else if (class[i] == X86_64_INTEGERSI_CLASS)
1992 tmpmode = SImode;
1993 else
1994 tmpmode = DImode;
1995 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1996 if (tmpmode == BLKmode)
1997 tmpmode = DImode;
1998 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1999 gen_rtx_REG (tmpmode, *intreg),
2000 GEN_INT (i*8));
2001 intreg++;
2002 break;
2003 case X86_64_SSESF_CLASS:
2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005 gen_rtx_REG (SFmode,
2006 SSE_REGNO (sse_regno)),
2007 GEN_INT (i*8));
2008 sse_regno++;
2009 break;
2010 case X86_64_SSEDF_CLASS:
2011 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2012 gen_rtx_REG (DFmode,
2013 SSE_REGNO (sse_regno)),
2014 GEN_INT (i*8));
2015 sse_regno++;
2016 break;
2017 case X86_64_SSE_CLASS:
2018 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2019 tmpmode = TImode, i++;
2020 else
2021 tmpmode = DImode;
2022 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2023 gen_rtx_REG (tmpmode,
2024 SSE_REGNO (sse_regno)),
2025 GEN_INT (i*8));
2026 sse_regno++;
2027 break;
2028 default:
2029 abort ();
2030 }
2031 }
2032 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2033 for (i = 0; i < nexps; i++)
2034 XVECEXP (ret, 0, i) = exp [i];
2035 return ret;
2036}
2037
b08de47e
MM
2038/* Update the data in CUM to advance over an argument
2039 of mode MODE and data type TYPE.
2040 (TYPE is null for libcalls where that information may not be available.) */
2041
2042void
2043function_arg_advance (cum, mode, type, named)
2044 CUMULATIVE_ARGS *cum; /* current arg information */
2045 enum machine_mode mode; /* current arg mode */
2046 tree type; /* type of the argument or 0 if lib support */
2047 int named; /* whether or not the argument was named */
2048{
5ac9118e
KG
2049 int bytes =
2050 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2051 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2052
2053 if (TARGET_DEBUG_ARG)
2054 fprintf (stderr,
e9a25f70 2055 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2056 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2057 if (TARGET_64BIT)
b08de47e 2058 {
53c17031
JH
2059 int int_nregs, sse_nregs;
2060 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2061 cum->words += words;
2062 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2063 {
53c17031
JH
2064 cum->nregs -= int_nregs;
2065 cum->sse_nregs -= sse_nregs;
2066 cum->regno += int_nregs;
2067 cum->sse_regno += sse_nregs;
82a127a9 2068 }
53c17031
JH
2069 else
2070 cum->words += words;
b08de47e 2071 }
a4f31c00 2072 else
82a127a9 2073 {
53c17031
JH
2074 if (TARGET_SSE && mode == TImode)
2075 {
2076 cum->sse_words += words;
2077 cum->sse_nregs -= 1;
2078 cum->sse_regno += 1;
2079 if (cum->sse_nregs <= 0)
2080 {
2081 cum->sse_nregs = 0;
2082 cum->sse_regno = 0;
2083 }
2084 }
2085 else
82a127a9 2086 {
53c17031
JH
2087 cum->words += words;
2088 cum->nregs -= words;
2089 cum->regno += words;
2090
2091 if (cum->nregs <= 0)
2092 {
2093 cum->nregs = 0;
2094 cum->regno = 0;
2095 }
82a127a9
CM
2096 }
2097 }
b08de47e
MM
2098 return;
2099}
2100
2101/* Define where to put the arguments to a function.
2102 Value is zero to push the argument on the stack,
2103 or a hard register in which to store the argument.
2104
2105 MODE is the argument's machine mode.
2106 TYPE is the data type of the argument (as a tree).
2107 This is null for libcalls where that information may
2108 not be available.
2109 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2110 the preceding args and about the function being called.
2111 NAMED is nonzero if this argument is a named parameter
2112 (otherwise it is an extra parameter matching an ellipsis). */
2113
07933f72 2114rtx
b08de47e
MM
2115function_arg (cum, mode, type, named)
2116 CUMULATIVE_ARGS *cum; /* current arg information */
2117 enum machine_mode mode; /* current arg mode */
2118 tree type; /* type of the argument or 0 if lib support */
2119 int named; /* != 0 for normal args, == 0 for ... args */
2120{
2121 rtx ret = NULL_RTX;
5ac9118e
KG
2122 int bytes =
2123 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2124 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2125
53c17031
JH
2126 /* Handle an hidden AL argument containing number of registers for varargs
2127 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2128 any AL settings. */
32ee7d1d 2129 if (mode == VOIDmode)
b08de47e 2130 {
53c17031
JH
2131 if (TARGET_64BIT)
2132 return GEN_INT (cum->maybe_vaarg
2133 ? (cum->sse_nregs < 0
2134 ? SSE_REGPARM_MAX
2135 : cum->sse_regno)
2136 : -1);
2137 else
2138 return constm1_rtx;
b08de47e 2139 }
53c17031
JH
2140 if (TARGET_64BIT)
2141 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2142 &x86_64_int_parameter_registers [cum->regno],
2143 cum->sse_regno);
2144 else
2145 switch (mode)
2146 {
2147 /* For now, pass fp/complex values on the stack. */
2148 default:
2149 break;
2150
2151 case BLKmode:
2152 case DImode:
2153 case SImode:
2154 case HImode:
2155 case QImode:
2156 if (words <= cum->nregs)
2157 ret = gen_rtx_REG (mode, cum->regno);
2158 break;
2159 case TImode:
2160 if (cum->sse_nregs)
2161 ret = gen_rtx_REG (mode, cum->sse_regno);
2162 break;
2163 }
b08de47e
MM
2164
2165 if (TARGET_DEBUG_ARG)
2166 {
2167 fprintf (stderr,
91ea38f9 2168 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2169 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2170
2171 if (ret)
91ea38f9 2172 print_simple_rtl (stderr, ret);
b08de47e
MM
2173 else
2174 fprintf (stderr, ", stack");
2175
2176 fprintf (stderr, " )\n");
2177 }
2178
2179 return ret;
2180}
53c17031
JH
2181
2182/* Gives the alignment boundary, in bits, of an argument with the specified mode
2183 and type. */
2184
2185int
2186ix86_function_arg_boundary (mode, type)
2187 enum machine_mode mode;
2188 tree type;
2189{
2190 int align;
2191 if (!TARGET_64BIT)
2192 return PARM_BOUNDARY;
2193 if (type)
2194 align = TYPE_ALIGN (type);
2195 else
2196 align = GET_MODE_ALIGNMENT (mode);
2197 if (align < PARM_BOUNDARY)
2198 align = PARM_BOUNDARY;
2199 if (align > 128)
2200 align = 128;
2201 return align;
2202}
2203
2204/* Return true if N is a possible register number of function value. */
2205bool
2206ix86_function_value_regno_p (regno)
2207 int regno;
2208{
2209 if (!TARGET_64BIT)
2210 {
2211 return ((regno) == 0
2212 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2213 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2214 }
2215 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2216 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2217 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2218}
2219
2220/* Define how to find the value returned by a function.
2221 VALTYPE is the data type of the value (as a tree).
2222 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2223 otherwise, FUNC is 0. */
2224rtx
2225ix86_function_value (valtype)
2226 tree valtype;
2227{
2228 if (TARGET_64BIT)
2229 {
2230 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2231 REGPARM_MAX, SSE_REGPARM_MAX,
2232 x86_64_int_return_registers, 0);
2233 /* For zero sized structures, construct_continer return NULL, but we need
2234 to keep rest of compiler happy by returning meaningfull value. */
2235 if (!ret)
2236 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2237 return ret;
2238 }
2239 else
2240 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2241}
2242
f5143c46 2243/* Return false iff type is returned in memory. */
53c17031
JH
2244int
2245ix86_return_in_memory (type)
2246 tree type;
2247{
2248 int needed_intregs, needed_sseregs;
2249 if (TARGET_64BIT)
2250 {
2251 return !examine_argument (TYPE_MODE (type), type, 1,
2252 &needed_intregs, &needed_sseregs);
2253 }
2254 else
2255 {
2256 if (TYPE_MODE (type) == BLKmode
2257 || (VECTOR_MODE_P (TYPE_MODE (type))
2258 && int_size_in_bytes (type) == 8)
2259 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2260 && TYPE_MODE (type) != TFmode
2261 && !VECTOR_MODE_P (TYPE_MODE (type))))
2262 return 1;
2263 return 0;
2264 }
2265}
2266
2267/* Define how to find the value returned by a library function
2268 assuming the value has mode MODE. */
2269rtx
2270ix86_libcall_value (mode)
2271 enum machine_mode mode;
2272{
2273 if (TARGET_64BIT)
2274 {
2275 switch (mode)
2276 {
2277 case SFmode:
2278 case SCmode:
2279 case DFmode:
2280 case DCmode:
2281 return gen_rtx_REG (mode, FIRST_SSE_REG);
2282 case TFmode:
2283 case TCmode:
2284 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2285 default:
2286 return gen_rtx_REG (mode, 0);
2287 }
2288 }
2289 else
2290 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2291}
ad919812
JH
2292\f
2293/* Create the va_list data type. */
53c17031 2294
ad919812
JH
2295tree
2296ix86_build_va_list ()
2297{
2298 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2299
ad919812
JH
2300 /* For i386 we use plain pointer to argument area. */
2301 if (!TARGET_64BIT)
2302 return build_pointer_type (char_type_node);
2303
f1e639b1 2304 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2305 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2306
fce5a9f2 2307 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2308 unsigned_type_node);
fce5a9f2 2309 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2310 unsigned_type_node);
2311 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2312 ptr_type_node);
2313 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2314 ptr_type_node);
2315
2316 DECL_FIELD_CONTEXT (f_gpr) = record;
2317 DECL_FIELD_CONTEXT (f_fpr) = record;
2318 DECL_FIELD_CONTEXT (f_ovf) = record;
2319 DECL_FIELD_CONTEXT (f_sav) = record;
2320
2321 TREE_CHAIN (record) = type_decl;
2322 TYPE_NAME (record) = type_decl;
2323 TYPE_FIELDS (record) = f_gpr;
2324 TREE_CHAIN (f_gpr) = f_fpr;
2325 TREE_CHAIN (f_fpr) = f_ovf;
2326 TREE_CHAIN (f_ovf) = f_sav;
2327
2328 layout_type (record);
2329
2330 /* The correct type is an array type of one element. */
2331 return build_array_type (record, build_index_type (size_zero_node));
2332}
2333
2334/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2335 variable number of arguments.
ad919812
JH
2336
2337 CUM is as above.
2338
2339 MODE and TYPE are the mode and type of the current parameter.
2340
2341 PRETEND_SIZE is a variable that should be set to the amount of stack
2342 that must be pushed by the prolog to pretend that our caller pushed
2343 it.
2344
2345 Normally, this macro will push all remaining incoming registers on the
2346 stack and set PRETEND_SIZE to the length of the registers pushed. */
2347
2348void
2349ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2350 CUMULATIVE_ARGS *cum;
2351 enum machine_mode mode;
2352 tree type;
2353 int *pretend_size ATTRIBUTE_UNUSED;
2354 int no_rtl;
2355
2356{
2357 CUMULATIVE_ARGS next_cum;
2358 rtx save_area = NULL_RTX, mem;
2359 rtx label;
2360 rtx label_ref;
2361 rtx tmp_reg;
2362 rtx nsse_reg;
2363 int set;
2364 tree fntype;
2365 int stdarg_p;
2366 int i;
2367
2368 if (!TARGET_64BIT)
2369 return;
2370
2371 /* Indicate to allocate space on the stack for varargs save area. */
2372 ix86_save_varrargs_registers = 1;
2373
2374 fntype = TREE_TYPE (current_function_decl);
2375 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2376 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2377 != void_type_node));
2378
2379 /* For varargs, we do not want to skip the dummy va_dcl argument.
2380 For stdargs, we do want to skip the last named argument. */
2381 next_cum = *cum;
2382 if (stdarg_p)
2383 function_arg_advance (&next_cum, mode, type, 1);
2384
2385 if (!no_rtl)
2386 save_area = frame_pointer_rtx;
2387
2388 set = get_varargs_alias_set ();
2389
2390 for (i = next_cum.regno; i < ix86_regparm; i++)
2391 {
2392 mem = gen_rtx_MEM (Pmode,
2393 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2394 set_mem_alias_set (mem, set);
ad919812
JH
2395 emit_move_insn (mem, gen_rtx_REG (Pmode,
2396 x86_64_int_parameter_registers[i]));
2397 }
2398
2399 if (next_cum.sse_nregs)
2400 {
2401 /* Now emit code to save SSE registers. The AX parameter contains number
2402 of SSE parameter regsiters used to call this function. We use
2403 sse_prologue_save insn template that produces computed jump across
2404 SSE saves. We need some preparation work to get this working. */
2405
2406 label = gen_label_rtx ();
2407 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2408
2409 /* Compute address to jump to :
2410 label - 5*eax + nnamed_sse_arguments*5 */
2411 tmp_reg = gen_reg_rtx (Pmode);
2412 nsse_reg = gen_reg_rtx (Pmode);
2413 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2414 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2415 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2416 GEN_INT (4))));
2417 if (next_cum.sse_regno)
2418 emit_move_insn
2419 (nsse_reg,
2420 gen_rtx_CONST (DImode,
2421 gen_rtx_PLUS (DImode,
2422 label_ref,
2423 GEN_INT (next_cum.sse_regno * 4))));
2424 else
2425 emit_move_insn (nsse_reg, label_ref);
2426 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2427
2428 /* Compute address of memory block we save into. We always use pointer
2429 pointing 127 bytes after first byte to store - this is needed to keep
2430 instruction size limited by 4 bytes. */
2431 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2432 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2433 plus_constant (save_area,
2434 8 * REGPARM_MAX + 127)));
ad919812 2435 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2436 set_mem_alias_set (mem, set);
8ac61af7 2437 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2438
2439 /* And finally do the dirty job! */
8ac61af7
RK
2440 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2441 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2442 }
2443
2444}
2445
2446/* Implement va_start. */
2447
2448void
2449ix86_va_start (stdarg_p, valist, nextarg)
2450 int stdarg_p;
2451 tree valist;
2452 rtx nextarg;
2453{
2454 HOST_WIDE_INT words, n_gpr, n_fpr;
2455 tree f_gpr, f_fpr, f_ovf, f_sav;
2456 tree gpr, fpr, ovf, sav, t;
2457
2458 /* Only 64bit target needs something special. */
2459 if (!TARGET_64BIT)
2460 {
2461 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2462 return;
2463 }
2464
2465 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2466 f_fpr = TREE_CHAIN (f_gpr);
2467 f_ovf = TREE_CHAIN (f_fpr);
2468 f_sav = TREE_CHAIN (f_ovf);
2469
2470 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2471 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2472 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2473 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2474 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2475
2476 /* Count number of gp and fp argument registers used. */
2477 words = current_function_args_info.words;
2478 n_gpr = current_function_args_info.regno;
2479 n_fpr = current_function_args_info.sse_regno;
2480
2481 if (TARGET_DEBUG_ARG)
2482 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2483 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2484
2485 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2486 build_int_2 (n_gpr * 8, 0));
2487 TREE_SIDE_EFFECTS (t) = 1;
2488 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2489
2490 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2491 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2492 TREE_SIDE_EFFECTS (t) = 1;
2493 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2494
2495 /* Find the overflow area. */
2496 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2497 if (words != 0)
2498 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2499 build_int_2 (words * UNITS_PER_WORD, 0));
2500 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2501 TREE_SIDE_EFFECTS (t) = 1;
2502 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2503
2504 /* Find the register save area.
2505 Prologue of the function save it right above stack frame. */
2506 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2507 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2508 TREE_SIDE_EFFECTS (t) = 1;
2509 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2510}
2511
2512/* Implement va_arg. */
2513rtx
2514ix86_va_arg (valist, type)
2515 tree valist, type;
2516{
0139adca 2517 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2518 tree f_gpr, f_fpr, f_ovf, f_sav;
2519 tree gpr, fpr, ovf, sav, t;
b932f770 2520 int size, rsize;
ad919812
JH
2521 rtx lab_false, lab_over = NULL_RTX;
2522 rtx addr_rtx, r;
2523 rtx container;
2524
2525 /* Only 64bit target needs something special. */
2526 if (!TARGET_64BIT)
2527 {
2528 return std_expand_builtin_va_arg (valist, type);
2529 }
2530
2531 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2532 f_fpr = TREE_CHAIN (f_gpr);
2533 f_ovf = TREE_CHAIN (f_fpr);
2534 f_sav = TREE_CHAIN (f_ovf);
2535
2536 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2537 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2538 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2539 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2540 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2541
2542 size = int_size_in_bytes (type);
2543 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2544
2545 container = construct_container (TYPE_MODE (type), type, 0,
2546 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2547 /*
2548 * Pull the value out of the saved registers ...
2549 */
2550
2551 addr_rtx = gen_reg_rtx (Pmode);
2552
2553 if (container)
2554 {
2555 rtx int_addr_rtx, sse_addr_rtx;
2556 int needed_intregs, needed_sseregs;
2557 int need_temp;
2558
2559 lab_over = gen_label_rtx ();
2560 lab_false = gen_label_rtx ();
8bad7136 2561
ad919812
JH
2562 examine_argument (TYPE_MODE (type), type, 0,
2563 &needed_intregs, &needed_sseregs);
2564
2565
2566 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2567 || TYPE_ALIGN (type) > 128);
2568
2569 /* In case we are passing structure, verify that it is consetuctive block
2570 on the register save area. If not we need to do moves. */
2571 if (!need_temp && !REG_P (container))
2572 {
2573 /* Verify that all registers are strictly consetuctive */
2574 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2575 {
2576 int i;
2577
2578 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2579 {
2580 rtx slot = XVECEXP (container, 0, i);
b531087a 2581 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2582 || INTVAL (XEXP (slot, 1)) != i * 16)
2583 need_temp = 1;
2584 }
2585 }
2586 else
2587 {
2588 int i;
2589
2590 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2591 {
2592 rtx slot = XVECEXP (container, 0, i);
b531087a 2593 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2594 || INTVAL (XEXP (slot, 1)) != i * 8)
2595 need_temp = 1;
2596 }
2597 }
2598 }
2599 if (!need_temp)
2600 {
2601 int_addr_rtx = addr_rtx;
2602 sse_addr_rtx = addr_rtx;
2603 }
2604 else
2605 {
2606 int_addr_rtx = gen_reg_rtx (Pmode);
2607 sse_addr_rtx = gen_reg_rtx (Pmode);
2608 }
2609 /* First ensure that we fit completely in registers. */
2610 if (needed_intregs)
2611 {
2612 emit_cmp_and_jump_insns (expand_expr
2613 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2614 GEN_INT ((REGPARM_MAX - needed_intregs +
2615 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2616 1, lab_false);
ad919812
JH
2617 }
2618 if (needed_sseregs)
2619 {
2620 emit_cmp_and_jump_insns (expand_expr
2621 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2622 GEN_INT ((SSE_REGPARM_MAX -
2623 needed_sseregs + 1) * 16 +
2624 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2625 SImode, 1, lab_false);
ad919812
JH
2626 }
2627
2628 /* Compute index to start of area used for integer regs. */
2629 if (needed_intregs)
2630 {
2631 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2632 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2633 if (r != int_addr_rtx)
2634 emit_move_insn (int_addr_rtx, r);
2635 }
2636 if (needed_sseregs)
2637 {
2638 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2639 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2640 if (r != sse_addr_rtx)
2641 emit_move_insn (sse_addr_rtx, r);
2642 }
2643 if (need_temp)
2644 {
2645 int i;
2646 rtx mem;
2647
b932f770
JH
2648 /* Never use the memory itself, as it has the alias set. */
2649 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2650 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2651 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2652 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2653
ad919812
JH
2654 for (i = 0; i < XVECLEN (container, 0); i++)
2655 {
2656 rtx slot = XVECEXP (container, 0, i);
2657 rtx reg = XEXP (slot, 0);
2658 enum machine_mode mode = GET_MODE (reg);
2659 rtx src_addr;
2660 rtx src_mem;
2661 int src_offset;
2662 rtx dest_mem;
2663
2664 if (SSE_REGNO_P (REGNO (reg)))
2665 {
2666 src_addr = sse_addr_rtx;
2667 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2668 }
2669 else
2670 {
2671 src_addr = int_addr_rtx;
2672 src_offset = REGNO (reg) * 8;
2673 }
2674 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2675 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2676 src_mem = adjust_address (src_mem, mode, src_offset);
2677 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2678 emit_move_insn (dest_mem, src_mem);
2679 }
2680 }
2681
2682 if (needed_intregs)
2683 {
2684 t =
2685 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2686 build_int_2 (needed_intregs * 8, 0));
2687 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2688 TREE_SIDE_EFFECTS (t) = 1;
2689 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2690 }
2691 if (needed_sseregs)
2692 {
2693 t =
2694 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2695 build_int_2 (needed_sseregs * 16, 0));
2696 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2697 TREE_SIDE_EFFECTS (t) = 1;
2698 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2699 }
2700
2701 emit_jump_insn (gen_jump (lab_over));
2702 emit_barrier ();
2703 emit_label (lab_false);
2704 }
2705
2706 /* ... otherwise out of the overflow area. */
2707
2708 /* Care for on-stack alignment if needed. */
2709 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2710 t = ovf;
2711 else
2712 {
2713 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2714 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2715 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2716 }
2717 t = save_expr (t);
2718
2719 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2720 if (r != addr_rtx)
2721 emit_move_insn (addr_rtx, r);
2722
2723 t =
2724 build (PLUS_EXPR, TREE_TYPE (t), t,
2725 build_int_2 (rsize * UNITS_PER_WORD, 0));
2726 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2727 TREE_SIDE_EFFECTS (t) = 1;
2728 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2729
2730 if (container)
2731 emit_label (lab_over);
2732
ad919812
JH
2733 return addr_rtx;
2734}
2735\f
7dd4b4a3
JH
2736/* Return nonzero if OP is general operand representable on x86_64. */
2737
2738int
2739x86_64_general_operand (op, mode)
2740 rtx op;
2741 enum machine_mode mode;
2742{
2743 if (!TARGET_64BIT)
2744 return general_operand (op, mode);
2745 if (nonimmediate_operand (op, mode))
2746 return 1;
2747 return x86_64_sign_extended_value (op);
2748}
2749
2750/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2751 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2752
2753int
2754x86_64_szext_general_operand (op, mode)
2755 rtx op;
2756 enum machine_mode mode;
2757{
2758 if (!TARGET_64BIT)
2759 return general_operand (op, mode);
2760 if (nonimmediate_operand (op, mode))
2761 return 1;
2762 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2763}
2764
2765/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2766
2767int
2768x86_64_nonmemory_operand (op, mode)
2769 rtx op;
2770 enum machine_mode mode;
2771{
2772 if (!TARGET_64BIT)
2773 return nonmemory_operand (op, mode);
2774 if (register_operand (op, mode))
2775 return 1;
2776 return x86_64_sign_extended_value (op);
2777}
2778
2779/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2780
2781int
2782x86_64_movabs_operand (op, mode)
2783 rtx op;
2784 enum machine_mode mode;
2785{
2786 if (!TARGET_64BIT || !flag_pic)
2787 return nonmemory_operand (op, mode);
2788 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2789 return 1;
2790 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2791 return 1;
2792 return 0;
2793}
2794
2795/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2796
2797int
2798x86_64_szext_nonmemory_operand (op, mode)
2799 rtx op;
2800 enum machine_mode mode;
2801{
2802 if (!TARGET_64BIT)
2803 return nonmemory_operand (op, mode);
2804 if (register_operand (op, mode))
2805 return 1;
2806 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2807}
2808
2809/* Return nonzero if OP is immediate operand representable on x86_64. */
2810
2811int
2812x86_64_immediate_operand (op, mode)
2813 rtx op;
2814 enum machine_mode mode;
2815{
2816 if (!TARGET_64BIT)
2817 return immediate_operand (op, mode);
2818 return x86_64_sign_extended_value (op);
2819}
2820
2821/* Return nonzero if OP is immediate operand representable on x86_64. */
2822
2823int
2824x86_64_zext_immediate_operand (op, mode)
2825 rtx op;
2826 enum machine_mode mode ATTRIBUTE_UNUSED;
2827{
2828 return x86_64_zero_extended_value (op);
2829}
2830
8bad7136
JL
2831/* Return nonzero if OP is (const_int 1), else return zero. */
2832
2833int
2834const_int_1_operand (op, mode)
2835 rtx op;
2836 enum machine_mode mode ATTRIBUTE_UNUSED;
2837{
2838 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2839}
2840
e075ae69
RH
2841/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2842 reference and a constant. */
b08de47e
MM
2843
2844int
e075ae69
RH
2845symbolic_operand (op, mode)
2846 register rtx op;
2847 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2848{
e075ae69 2849 switch (GET_CODE (op))
2a2ab3f9 2850 {
e075ae69
RH
2851 case SYMBOL_REF:
2852 case LABEL_REF:
2853 return 1;
2854
2855 case CONST:
2856 op = XEXP (op, 0);
2857 if (GET_CODE (op) == SYMBOL_REF
2858 || GET_CODE (op) == LABEL_REF
2859 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
2860 && (XINT (op, 1) == UNSPEC_GOT
2861 || XINT (op, 1) == UNSPEC_GOTOFF
2862 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
2863 return 1;
2864 if (GET_CODE (op) != PLUS
2865 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2866 return 0;
2867
2868 op = XEXP (op, 0);
2869 if (GET_CODE (op) == SYMBOL_REF
2870 || GET_CODE (op) == LABEL_REF)
2871 return 1;
2872 /* Only @GOTOFF gets offsets. */
2873 if (GET_CODE (op) != UNSPEC
8ee41eaf 2874 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
2875 return 0;
2876
2877 op = XVECEXP (op, 0, 0);
2878 if (GET_CODE (op) == SYMBOL_REF
2879 || GET_CODE (op) == LABEL_REF)
2880 return 1;
2881 return 0;
2882
2883 default:
2884 return 0;
2a2ab3f9
JVA
2885 }
2886}
2a2ab3f9 2887
e075ae69 2888/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 2889
e075ae69
RH
2890int
2891pic_symbolic_operand (op, mode)
2892 register rtx op;
2893 enum machine_mode mode ATTRIBUTE_UNUSED;
2894{
6eb791fc
JH
2895 if (GET_CODE (op) != CONST)
2896 return 0;
2897 op = XEXP (op, 0);
2898 if (TARGET_64BIT)
2899 {
2900 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2901 return 1;
2902 }
fce5a9f2 2903 else
2a2ab3f9 2904 {
e075ae69
RH
2905 if (GET_CODE (op) == UNSPEC)
2906 return 1;
2907 if (GET_CODE (op) != PLUS
2908 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2909 return 0;
2910 op = XEXP (op, 0);
2911 if (GET_CODE (op) == UNSPEC)
2912 return 1;
2a2ab3f9 2913 }
e075ae69 2914 return 0;
2a2ab3f9 2915}
2a2ab3f9 2916
623fe810
RH
2917/* Return true if OP is a symbolic operand that resolves locally. */
2918
2919static int
2920local_symbolic_operand (op, mode)
2921 rtx op;
2922 enum machine_mode mode ATTRIBUTE_UNUSED;
2923{
2924 if (GET_CODE (op) == LABEL_REF)
2925 return 1;
2926
2927 if (GET_CODE (op) == CONST
2928 && GET_CODE (XEXP (op, 0)) == PLUS
2929 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2930 op = XEXP (XEXP (op, 0), 0);
2931
2932 if (GET_CODE (op) != SYMBOL_REF)
2933 return 0;
2934
2935 /* These we've been told are local by varasm and encode_section_info
2936 respectively. */
2937 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2938 return 1;
2939
2940 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 2941 the compiler that assumes it can just stick the results of
623fe810
RH
2942 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2943 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 2944 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
2945 if (strncmp (XSTR (op, 0), internal_label_prefix,
2946 internal_label_prefix_len) == 0)
2947 return 1;
2948
2949 return 0;
2950}
2951
f996902d
RH
2952/* Test for various thread-local symbols. See ix86_encode_section_info. */
2953
2954int
2955tls_symbolic_operand (op, mode)
2956 register rtx op;
2957 enum machine_mode mode ATTRIBUTE_UNUSED;
2958{
2959 const char *symbol_str;
2960
2961 if (GET_CODE (op) != SYMBOL_REF)
2962 return 0;
2963 symbol_str = XSTR (op, 0);
2964
2965 if (symbol_str[0] != '%')
2966 return 0;
755ac5d4 2967 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
2968}
2969
2970static int
2971tls_symbolic_operand_1 (op, kind)
2972 rtx op;
2973 enum tls_model kind;
2974{
2975 const char *symbol_str;
2976
2977 if (GET_CODE (op) != SYMBOL_REF)
2978 return 0;
2979 symbol_str = XSTR (op, 0);
2980
2981 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
2982}
2983
2984int
2985global_dynamic_symbolic_operand (op, mode)
2986 register rtx op;
2987 enum machine_mode mode ATTRIBUTE_UNUSED;
2988{
2989 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
2990}
2991
2992int
2993local_dynamic_symbolic_operand (op, mode)
2994 register rtx op;
2995 enum machine_mode mode ATTRIBUTE_UNUSED;
2996{
2997 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
2998}
2999
3000int
3001initial_exec_symbolic_operand (op, mode)
3002 register rtx op;
3003 enum machine_mode mode ATTRIBUTE_UNUSED;
3004{
3005 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3006}
3007
3008int
3009local_exec_symbolic_operand (op, mode)
3010 register rtx op;
3011 enum machine_mode mode ATTRIBUTE_UNUSED;
3012{
3013 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3014}
3015
28d52ffb
RH
3016/* Test for a valid operand for a call instruction. Don't allow the
3017 arg pointer register or virtual regs since they may decay into
3018 reg + const, which the patterns can't handle. */
2a2ab3f9 3019
e075ae69
RH
3020int
3021call_insn_operand (op, mode)
3022 rtx op;
3023 enum machine_mode mode ATTRIBUTE_UNUSED;
3024{
e075ae69
RH
3025 /* Disallow indirect through a virtual register. This leads to
3026 compiler aborts when trying to eliminate them. */
3027 if (GET_CODE (op) == REG
3028 && (op == arg_pointer_rtx
564d80f4 3029 || op == frame_pointer_rtx
e075ae69
RH
3030 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3031 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3032 return 0;
2a2ab3f9 3033
28d52ffb
RH
3034 /* Disallow `call 1234'. Due to varying assembler lameness this
3035 gets either rejected or translated to `call .+1234'. */
3036 if (GET_CODE (op) == CONST_INT)
3037 return 0;
3038
cbbf65e0
RH
3039 /* Explicitly allow SYMBOL_REF even if pic. */
3040 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3041 return 1;
2a2ab3f9 3042
cbbf65e0
RH
3043 /* Otherwise we can allow any general_operand in the address. */
3044 return general_operand (op, Pmode);
e075ae69 3045}
79325812 3046
e075ae69
RH
3047int
3048constant_call_address_operand (op, mode)
3049 rtx op;
3050 enum machine_mode mode ATTRIBUTE_UNUSED;
3051{
eaf19aba
JJ
3052 if (GET_CODE (op) == CONST
3053 && GET_CODE (XEXP (op, 0)) == PLUS
3054 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3055 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3056 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3057}
2a2ab3f9 3058
e075ae69 3059/* Match exactly zero and one. */
e9a25f70 3060
0f290768 3061int
e075ae69
RH
3062const0_operand (op, mode)
3063 register rtx op;
3064 enum machine_mode mode;
3065{
3066 return op == CONST0_RTX (mode);
3067}
e9a25f70 3068
0f290768 3069int
e075ae69
RH
3070const1_operand (op, mode)
3071 register rtx op;
3072 enum machine_mode mode ATTRIBUTE_UNUSED;
3073{
3074 return op == const1_rtx;
3075}
2a2ab3f9 3076
e075ae69 3077/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3078
e075ae69
RH
3079int
3080const248_operand (op, mode)
3081 register rtx op;
3082 enum machine_mode mode ATTRIBUTE_UNUSED;
3083{
3084 return (GET_CODE (op) == CONST_INT
3085 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3086}
e9a25f70 3087
e075ae69 3088/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3089
e075ae69
RH
3090int
3091incdec_operand (op, mode)
3092 register rtx op;
0631e0bf 3093 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3094{
f5143c46 3095 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3096 registers, since carry flag is not set. */
3097 if (TARGET_PENTIUM4 && !optimize_size)
3098 return 0;
2b1c08f5 3099 return op == const1_rtx || op == constm1_rtx;
e075ae69 3100}
2a2ab3f9 3101
371bc54b
JH
3102/* Return nonzero if OP is acceptable as operand of DImode shift
3103 expander. */
3104
3105int
3106shiftdi_operand (op, mode)
3107 rtx op;
3108 enum machine_mode mode ATTRIBUTE_UNUSED;
3109{
3110 if (TARGET_64BIT)
3111 return nonimmediate_operand (op, mode);
3112 else
3113 return register_operand (op, mode);
3114}
3115
0f290768 3116/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3117 register eliminable to the stack pointer. Otherwise, this is
3118 a register operand.
2a2ab3f9 3119
e075ae69
RH
3120 This is used to prevent esp from being used as an index reg.
3121 Which would only happen in pathological cases. */
5f1ec3e6 3122
e075ae69
RH
3123int
3124reg_no_sp_operand (op, mode)
3125 register rtx op;
3126 enum machine_mode mode;
3127{
3128 rtx t = op;
3129 if (GET_CODE (t) == SUBREG)
3130 t = SUBREG_REG (t);
564d80f4 3131 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3132 return 0;
2a2ab3f9 3133
e075ae69 3134 return register_operand (op, mode);
2a2ab3f9 3135}
b840bfb0 3136
915119a5
BS
3137int
3138mmx_reg_operand (op, mode)
3139 register rtx op;
bd793c65 3140 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3141{
3142 return MMX_REG_P (op);
3143}
3144
2c5a510c
RH
3145/* Return false if this is any eliminable register. Otherwise
3146 general_operand. */
3147
3148int
3149general_no_elim_operand (op, mode)
3150 register rtx op;
3151 enum machine_mode mode;
3152{
3153 rtx t = op;
3154 if (GET_CODE (t) == SUBREG)
3155 t = SUBREG_REG (t);
3156 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3157 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3158 || t == virtual_stack_dynamic_rtx)
3159 return 0;
1020a5ab
RH
3160 if (REG_P (t)
3161 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3162 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3163 return 0;
2c5a510c
RH
3164
3165 return general_operand (op, mode);
3166}
3167
3168/* Return false if this is any eliminable register. Otherwise
3169 register_operand or const_int. */
3170
3171int
3172nonmemory_no_elim_operand (op, mode)
3173 register rtx op;
3174 enum machine_mode mode;
3175{
3176 rtx t = op;
3177 if (GET_CODE (t) == SUBREG)
3178 t = SUBREG_REG (t);
3179 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3180 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3181 || t == virtual_stack_dynamic_rtx)
3182 return 0;
3183
3184 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3185}
3186
e075ae69 3187/* Return true if op is a Q_REGS class register. */
b840bfb0 3188
e075ae69
RH
3189int
3190q_regs_operand (op, mode)
3191 register rtx op;
3192 enum machine_mode mode;
b840bfb0 3193{
e075ae69
RH
3194 if (mode != VOIDmode && GET_MODE (op) != mode)
3195 return 0;
3196 if (GET_CODE (op) == SUBREG)
3197 op = SUBREG_REG (op);
7799175f 3198 return ANY_QI_REG_P (op);
0f290768 3199}
b840bfb0 3200
e075ae69 3201/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3202
e075ae69
RH
3203int
3204non_q_regs_operand (op, mode)
3205 register rtx op;
3206 enum machine_mode mode;
3207{
3208 if (mode != VOIDmode && GET_MODE (op) != mode)
3209 return 0;
3210 if (GET_CODE (op) == SUBREG)
3211 op = SUBREG_REG (op);
3212 return NON_QI_REG_P (op);
0f290768 3213}
b840bfb0 3214
915119a5
BS
3215/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3216 insns. */
3217int
3218sse_comparison_operator (op, mode)
3219 rtx op;
3220 enum machine_mode mode ATTRIBUTE_UNUSED;
3221{
3222 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3223 switch (code)
3224 {
3225 /* Operations supported directly. */
3226 case EQ:
3227 case LT:
3228 case LE:
3229 case UNORDERED:
3230 case NE:
3231 case UNGE:
3232 case UNGT:
3233 case ORDERED:
3234 return 1;
3235 /* These are equivalent to ones above in non-IEEE comparisons. */
3236 case UNEQ:
3237 case UNLT:
3238 case UNLE:
3239 case LTGT:
3240 case GE:
3241 case GT:
3242 return !TARGET_IEEE_FP;
3243 default:
3244 return 0;
3245 }
915119a5 3246}
9076b9c1 3247/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3248int
9076b9c1
JH
3249ix86_comparison_operator (op, mode)
3250 register rtx op;
3251 enum machine_mode mode;
e075ae69 3252{
9076b9c1 3253 enum machine_mode inmode;
9a915772 3254 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3255 if (mode != VOIDmode && GET_MODE (op) != mode)
3256 return 0;
9a915772
JH
3257 if (GET_RTX_CLASS (code) != '<')
3258 return 0;
3259 inmode = GET_MODE (XEXP (op, 0));
3260
3261 if (inmode == CCFPmode || inmode == CCFPUmode)
3262 {
3263 enum rtx_code second_code, bypass_code;
3264 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3265 return (bypass_code == NIL && second_code == NIL);
3266 }
3267 switch (code)
3a3677ff
RH
3268 {
3269 case EQ: case NE:
3a3677ff 3270 return 1;
9076b9c1 3271 case LT: case GE:
7e08e190 3272 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3273 || inmode == CCGOCmode || inmode == CCNOmode)
3274 return 1;
3275 return 0;
7e08e190 3276 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3277 if (inmode == CCmode)
9076b9c1
JH
3278 return 1;
3279 return 0;
3280 case GT: case LE:
7e08e190 3281 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3282 return 1;
3283 return 0;
3a3677ff
RH
3284 default:
3285 return 0;
3286 }
3287}
3288
9076b9c1 3289/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3290
9076b9c1
JH
3291int
3292fcmov_comparison_operator (op, mode)
3a3677ff
RH
3293 register rtx op;
3294 enum machine_mode mode;
3295{
b62d22a2 3296 enum machine_mode inmode;
9a915772 3297 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3298 if (mode != VOIDmode && GET_MODE (op) != mode)
3299 return 0;
9a915772
JH
3300 if (GET_RTX_CLASS (code) != '<')
3301 return 0;
3302 inmode = GET_MODE (XEXP (op, 0));
3303 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3304 {
9a915772
JH
3305 enum rtx_code second_code, bypass_code;
3306 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3307 if (bypass_code != NIL || second_code != NIL)
3308 return 0;
3309 code = ix86_fp_compare_code_to_integer (code);
3310 }
3311 /* i387 supports just limited amount of conditional codes. */
3312 switch (code)
3313 {
3314 case LTU: case GTU: case LEU: case GEU:
3315 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3316 return 1;
3317 return 0;
9a915772
JH
3318 case ORDERED: case UNORDERED:
3319 case EQ: case NE:
3320 return 1;
3a3677ff
RH
3321 default:
3322 return 0;
3323 }
e075ae69 3324}
b840bfb0 3325
e9e80858
JH
3326/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3327
3328int
3329promotable_binary_operator (op, mode)
3330 register rtx op;
3331 enum machine_mode mode ATTRIBUTE_UNUSED;
3332{
3333 switch (GET_CODE (op))
3334 {
3335 case MULT:
3336 /* Modern CPUs have same latency for HImode and SImode multiply,
3337 but 386 and 486 do HImode multiply faster. */
3338 return ix86_cpu > PROCESSOR_I486;
3339 case PLUS:
3340 case AND:
3341 case IOR:
3342 case XOR:
3343 case ASHIFT:
3344 return 1;
3345 default:
3346 return 0;
3347 }
3348}
3349
e075ae69
RH
3350/* Nearly general operand, but accept any const_double, since we wish
3351 to be able to drop them into memory rather than have them get pulled
3352 into registers. */
b840bfb0 3353
2a2ab3f9 3354int
e075ae69
RH
3355cmp_fp_expander_operand (op, mode)
3356 register rtx op;
3357 enum machine_mode mode;
2a2ab3f9 3358{
e075ae69 3359 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3360 return 0;
e075ae69 3361 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3362 return 1;
e075ae69 3363 return general_operand (op, mode);
2a2ab3f9
JVA
3364}
3365
e075ae69 3366/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3367
3368int
e075ae69 3369ext_register_operand (op, mode)
2a2ab3f9 3370 register rtx op;
bb5177ac 3371 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3372{
3522082b 3373 int regno;
0d7d98ee
JH
3374 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3375 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3376 return 0;
3522082b
JH
3377
3378 if (!register_operand (op, VOIDmode))
3379 return 0;
3380
3381 /* Be curefull to accept only registers having upper parts. */
3382 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3383 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3384}
3385
3386/* Return 1 if this is a valid binary floating-point operation.
0f290768 3387 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3388
3389int
3390binary_fp_operator (op, mode)
3391 register rtx op;
3392 enum machine_mode mode;
3393{
3394 if (mode != VOIDmode && mode != GET_MODE (op))
3395 return 0;
3396
2a2ab3f9
JVA
3397 switch (GET_CODE (op))
3398 {
e075ae69
RH
3399 case PLUS:
3400 case MINUS:
3401 case MULT:
3402 case DIV:
3403 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3404
2a2ab3f9
JVA
3405 default:
3406 return 0;
3407 }
3408}
fee2770d 3409
e075ae69 3410int
b531087a 3411mult_operator (op, mode)
e075ae69
RH
3412 register rtx op;
3413 enum machine_mode mode ATTRIBUTE_UNUSED;
3414{
3415 return GET_CODE (op) == MULT;
3416}
3417
3418int
b531087a 3419div_operator (op, mode)
e075ae69
RH
3420 register rtx op;
3421 enum machine_mode mode ATTRIBUTE_UNUSED;
3422{
3423 return GET_CODE (op) == DIV;
3424}
0a726ef1
JL
3425
3426int
e075ae69
RH
3427arith_or_logical_operator (op, mode)
3428 rtx op;
3429 enum machine_mode mode;
0a726ef1 3430{
e075ae69
RH
3431 return ((mode == VOIDmode || GET_MODE (op) == mode)
3432 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3433 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3434}
3435
e075ae69 3436/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3437
3438int
e075ae69
RH
3439memory_displacement_operand (op, mode)
3440 register rtx op;
3441 enum machine_mode mode;
4f2c8ebb 3442{
e075ae69 3443 struct ix86_address parts;
e9a25f70 3444
e075ae69
RH
3445 if (! memory_operand (op, mode))
3446 return 0;
3447
3448 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3449 abort ();
3450
3451 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3452}
3453
16189740 3454/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3455 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3456
3457 ??? It seems likely that this will only work because cmpsi is an
3458 expander, and no actual insns use this. */
4f2c8ebb
RS
3459
3460int
e075ae69
RH
3461cmpsi_operand (op, mode)
3462 rtx op;
3463 enum machine_mode mode;
fee2770d 3464{
b9b2c339 3465 if (nonimmediate_operand (op, mode))
e075ae69
RH
3466 return 1;
3467
3468 if (GET_CODE (op) == AND
3469 && GET_MODE (op) == SImode
3470 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3471 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3472 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3473 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3474 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3475 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3476 return 1;
e9a25f70 3477
fee2770d
RS
3478 return 0;
3479}
d784886d 3480
e075ae69
RH
3481/* Returns 1 if OP is memory operand that can not be represented by the
3482 modRM array. */
d784886d
RK
3483
3484int
e075ae69 3485long_memory_operand (op, mode)
d784886d
RK
3486 register rtx op;
3487 enum machine_mode mode;
3488{
e075ae69 3489 if (! memory_operand (op, mode))
d784886d
RK
3490 return 0;
3491
e075ae69 3492 return memory_address_length (op) != 0;
d784886d 3493}
2247f6ed
JH
3494
3495/* Return nonzero if the rtx is known aligned. */
3496
3497int
3498aligned_operand (op, mode)
3499 rtx op;
3500 enum machine_mode mode;
3501{
3502 struct ix86_address parts;
3503
3504 if (!general_operand (op, mode))
3505 return 0;
3506
0f290768 3507 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3508 if (GET_CODE (op) != MEM)
3509 return 1;
3510
0f290768 3511 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3512 if (MEM_VOLATILE_P (op))
3513 return 0;
3514
3515 op = XEXP (op, 0);
3516
3517 /* Pushes and pops are only valid on the stack pointer. */
3518 if (GET_CODE (op) == PRE_DEC
3519 || GET_CODE (op) == POST_INC)
3520 return 1;
3521
3522 /* Decode the address. */
3523 if (! ix86_decompose_address (op, &parts))
3524 abort ();
3525
1540f9eb
JH
3526 if (parts.base && GET_CODE (parts.base) == SUBREG)
3527 parts.base = SUBREG_REG (parts.base);
3528 if (parts.index && GET_CODE (parts.index) == SUBREG)
3529 parts.index = SUBREG_REG (parts.index);
3530
2247f6ed
JH
3531 /* Look for some component that isn't known to be aligned. */
3532 if (parts.index)
3533 {
3534 if (parts.scale < 4
bdb429a5 3535 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3536 return 0;
3537 }
3538 if (parts.base)
3539 {
bdb429a5 3540 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3541 return 0;
3542 }
3543 if (parts.disp)
3544 {
3545 if (GET_CODE (parts.disp) != CONST_INT
3546 || (INTVAL (parts.disp) & 3) != 0)
3547 return 0;
3548 }
3549
3550 /* Didn't find one -- this must be an aligned address. */
3551 return 1;
3552}
e075ae69
RH
3553\f
3554/* Return true if the constant is something that can be loaded with
3555 a special instruction. Only handle 0.0 and 1.0; others are less
3556 worthwhile. */
57dbca5e
BS
3557
3558int
e075ae69
RH
3559standard_80387_constant_p (x)
3560 rtx x;
57dbca5e 3561{
2b04e52b 3562 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3563 return -1;
2b04e52b
JH
3564 /* Note that on the 80387, other constants, such as pi, that we should support
3565 too. On some machines, these are much slower to load as standard constant,
3566 than to load from doubles in memory. */
3567 if (x == CONST0_RTX (GET_MODE (x)))
3568 return 1;
3569 if (x == CONST1_RTX (GET_MODE (x)))
3570 return 2;
e075ae69 3571 return 0;
57dbca5e
BS
3572}
3573
2b04e52b
JH
3574/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3575 */
3576int
3577standard_sse_constant_p (x)
3578 rtx x;
3579{
3580 if (GET_CODE (x) != CONST_DOUBLE)
3581 return -1;
3582 return (x == CONST0_RTX (GET_MODE (x)));
3583}
3584
2a2ab3f9
JVA
3585/* Returns 1 if OP contains a symbol reference */
3586
3587int
3588symbolic_reference_mentioned_p (op)
3589 rtx op;
3590{
6f7d635c 3591 register const char *fmt;
2a2ab3f9
JVA
3592 register int i;
3593
3594 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3595 return 1;
3596
3597 fmt = GET_RTX_FORMAT (GET_CODE (op));
3598 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3599 {
3600 if (fmt[i] == 'E')
3601 {
3602 register int j;
3603
3604 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3605 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3606 return 1;
3607 }
e9a25f70 3608
2a2ab3f9
JVA
3609 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3610 return 1;
3611 }
3612
3613 return 0;
3614}
e075ae69
RH
3615
3616/* Return 1 if it is appropriate to emit `ret' instructions in the
3617 body of a function. Do this only if the epilogue is simple, needing a
3618 couple of insns. Prior to reloading, we can't tell how many registers
3619 must be saved, so return 0 then. Return 0 if there is no frame
3620 marker to de-allocate.
3621
3622 If NON_SAVING_SETJMP is defined and true, then it is not possible
3623 for the epilogue to be simple, so return 0. This is a special case
3624 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3625 until final, but jump_optimize may need to know sooner if a
3626 `return' is OK. */
32b5b1aa
SC
3627
3628int
e075ae69 3629ix86_can_use_return_insn_p ()
32b5b1aa 3630{
4dd2ac2c 3631 struct ix86_frame frame;
9a7372d6 3632
e075ae69
RH
3633#ifdef NON_SAVING_SETJMP
3634 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3635 return 0;
3636#endif
9a7372d6
RH
3637
3638 if (! reload_completed || frame_pointer_needed)
3639 return 0;
32b5b1aa 3640
9a7372d6
RH
3641 /* Don't allow more than 32 pop, since that's all we can do
3642 with one instruction. */
3643 if (current_function_pops_args
3644 && current_function_args_size >= 32768)
e075ae69 3645 return 0;
32b5b1aa 3646
4dd2ac2c
JH
3647 ix86_compute_frame_layout (&frame);
3648 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3649}
6189a572
JH
3650\f
3651/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3652int
3653x86_64_sign_extended_value (value)
3654 rtx value;
3655{
3656 switch (GET_CODE (value))
3657 {
3658 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3659 to be at least 32 and this all acceptable constants are
3660 represented as CONST_INT. */
3661 case CONST_INT:
3662 if (HOST_BITS_PER_WIDE_INT == 32)
3663 return 1;
3664 else
3665 {
3666 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3667 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3668 }
3669 break;
3670
3671 /* For certain code models, the symbolic references are known to fit. */
3672 case SYMBOL_REF:
3673 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3674
3675 /* For certain code models, the code is near as well. */
3676 case LABEL_REF:
3677 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3678
3679 /* We also may accept the offsetted memory references in certain special
3680 cases. */
3681 case CONST:
3682 if (GET_CODE (XEXP (value, 0)) == UNSPEC
8ee41eaf 3683 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
6189a572
JH
3684 return 1;
3685 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3686 {
3687 rtx op1 = XEXP (XEXP (value, 0), 0);
3688 rtx op2 = XEXP (XEXP (value, 0), 1);
3689 HOST_WIDE_INT offset;
3690
3691 if (ix86_cmodel == CM_LARGE)
3692 return 0;
3693 if (GET_CODE (op2) != CONST_INT)
3694 return 0;
3695 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3696 switch (GET_CODE (op1))
3697 {
3698 case SYMBOL_REF:
3699 /* For CM_SMALL assume that latest object is 1MB before
3700 end of 31bits boundary. We may also accept pretty
3701 large negative constants knowing that all objects are
3702 in the positive half of address space. */
3703 if (ix86_cmodel == CM_SMALL
3704 && offset < 1024*1024*1024
3705 && trunc_int_for_mode (offset, SImode) == offset)
3706 return 1;
3707 /* For CM_KERNEL we know that all object resist in the
3708 negative half of 32bits address space. We may not
3709 accept negative offsets, since they may be just off
d6a7951f 3710 and we may accept pretty large positive ones. */
6189a572
JH
3711 if (ix86_cmodel == CM_KERNEL
3712 && offset > 0
3713 && trunc_int_for_mode (offset, SImode) == offset)
3714 return 1;
3715 break;
3716 case LABEL_REF:
3717 /* These conditions are similar to SYMBOL_REF ones, just the
3718 constraints for code models differ. */
3719 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3720 && offset < 1024*1024*1024
3721 && trunc_int_for_mode (offset, SImode) == offset)
3722 return 1;
3723 if (ix86_cmodel == CM_KERNEL
3724 && offset > 0
3725 && trunc_int_for_mode (offset, SImode) == offset)
3726 return 1;
3727 break;
3728 default:
3729 return 0;
3730 }
3731 }
3732 return 0;
3733 default:
3734 return 0;
3735 }
3736}
3737
3738/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3739int
3740x86_64_zero_extended_value (value)
3741 rtx value;
3742{
3743 switch (GET_CODE (value))
3744 {
3745 case CONST_DOUBLE:
3746 if (HOST_BITS_PER_WIDE_INT == 32)
3747 return (GET_MODE (value) == VOIDmode
3748 && !CONST_DOUBLE_HIGH (value));
3749 else
3750 return 0;
3751 case CONST_INT:
3752 if (HOST_BITS_PER_WIDE_INT == 32)
3753 return INTVAL (value) >= 0;
3754 else
b531087a 3755 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3756 break;
3757
3758 /* For certain code models, the symbolic references are known to fit. */
3759 case SYMBOL_REF:
3760 return ix86_cmodel == CM_SMALL;
3761
3762 /* For certain code models, the code is near as well. */
3763 case LABEL_REF:
3764 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3765
3766 /* We also may accept the offsetted memory references in certain special
3767 cases. */
3768 case CONST:
3769 if (GET_CODE (XEXP (value, 0)) == PLUS)
3770 {
3771 rtx op1 = XEXP (XEXP (value, 0), 0);
3772 rtx op2 = XEXP (XEXP (value, 0), 1);
3773
3774 if (ix86_cmodel == CM_LARGE)
3775 return 0;
3776 switch (GET_CODE (op1))
3777 {
3778 case SYMBOL_REF:
3779 return 0;
d6a7951f 3780 /* For small code model we may accept pretty large positive
6189a572
JH
3781 offsets, since one bit is available for free. Negative
3782 offsets are limited by the size of NULL pointer area
3783 specified by the ABI. */
3784 if (ix86_cmodel == CM_SMALL
3785 && GET_CODE (op2) == CONST_INT
3786 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3787 && (trunc_int_for_mode (INTVAL (op2), SImode)
3788 == INTVAL (op2)))
3789 return 1;
3790 /* ??? For the kernel, we may accept adjustment of
3791 -0x10000000, since we know that it will just convert
d6a7951f 3792 negative address space to positive, but perhaps this
6189a572
JH
3793 is not worthwhile. */
3794 break;
3795 case LABEL_REF:
3796 /* These conditions are similar to SYMBOL_REF ones, just the
3797 constraints for code models differ. */
3798 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3799 && GET_CODE (op2) == CONST_INT
3800 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3801 && (trunc_int_for_mode (INTVAL (op2), SImode)
3802 == INTVAL (op2)))
3803 return 1;
3804 break;
3805 default:
3806 return 0;
3807 }
3808 }
3809 return 0;
3810 default:
3811 return 0;
3812 }
3813}
6fca22eb
RH
3814
3815/* Value should be nonzero if functions must have frame pointers.
3816 Zero means the frame pointer need not be set up (and parms may
3817 be accessed via the stack pointer) in functions that seem suitable. */
3818
3819int
3820ix86_frame_pointer_required ()
3821{
3822 /* If we accessed previous frames, then the generated code expects
3823 to be able to access the saved ebp value in our frame. */
3824 if (cfun->machine->accesses_prev_frame)
3825 return 1;
a4f31c00 3826
6fca22eb
RH
3827 /* Several x86 os'es need a frame pointer for other reasons,
3828 usually pertaining to setjmp. */
3829 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3830 return 1;
3831
3832 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3833 the frame pointer by default. Turn it back on now if we've not
3834 got a leaf function. */
a7943381
RH
3835 if (TARGET_OMIT_LEAF_FRAME_POINTER
3836 && (!current_function_is_leaf || current_function_profile))
6fca22eb
RH
3837 return 1;
3838
3839 return 0;
3840}
3841
3842/* Record that the current function accesses previous call frames. */
3843
3844void
3845ix86_setup_frame_addresses ()
3846{
3847 cfun->machine->accesses_prev_frame = 1;
3848}
e075ae69 3849\f
145aacc2
RH
3850#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3851# define USE_HIDDEN_LINKONCE 1
3852#else
3853# define USE_HIDDEN_LINKONCE 0
3854#endif
3855
bd09bdeb 3856static int pic_labels_used;
e9a25f70 3857
145aacc2
RH
3858/* Fills in the label name that should be used for a pc thunk for
3859 the given register. */
3860
3861static void
3862get_pc_thunk_name (name, regno)
3863 char name[32];
3864 unsigned int regno;
3865{
3866 if (USE_HIDDEN_LINKONCE)
3867 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3868 else
3869 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3870}
3871
3872
e075ae69
RH
3873/* This function generates code for -fpic that loads %ebx with
3874 the return address of the caller and then returns. */
3875
3876void
4cf12e7e 3877ix86_asm_file_end (file)
e075ae69 3878 FILE *file;
e075ae69
RH
3879{
3880 rtx xops[2];
bd09bdeb 3881 int regno;
32b5b1aa 3882
bd09bdeb 3883 for (regno = 0; regno < 8; ++regno)
7c262518 3884 {
145aacc2
RH
3885 char name[32];
3886
bd09bdeb
RH
3887 if (! ((pic_labels_used >> regno) & 1))
3888 continue;
3889
145aacc2 3890 get_pc_thunk_name (name, regno);
bd09bdeb 3891
145aacc2
RH
3892 if (USE_HIDDEN_LINKONCE)
3893 {
3894 tree decl;
3895
3896 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3897 error_mark_node);
3898 TREE_PUBLIC (decl) = 1;
3899 TREE_STATIC (decl) = 1;
3900 DECL_ONE_ONLY (decl) = 1;
3901
3902 (*targetm.asm_out.unique_section) (decl, 0);
3903 named_section (decl, NULL, 0);
3904
3905 ASM_GLOBALIZE_LABEL (file, name);
3906 fputs ("\t.hidden\t", file);
3907 assemble_name (file, name);
3908 fputc ('\n', file);
3909 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3910 }
3911 else
3912 {
3913 text_section ();
3914 ASM_OUTPUT_LABEL (file, name);
3915 }
bd09bdeb
RH
3916
3917 xops[0] = gen_rtx_REG (SImode, regno);
3918 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3919 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3920 output_asm_insn ("ret", xops);
7c262518 3921 }
32b5b1aa 3922}
32b5b1aa 3923
c8c03509 3924/* Emit code for the SET_GOT patterns. */
32b5b1aa 3925
c8c03509
RH
3926const char *
3927output_set_got (dest)
3928 rtx dest;
3929{
3930 rtx xops[3];
0d7d98ee 3931
c8c03509
RH
3932 xops[0] = dest;
3933 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
32b5b1aa 3934
c8c03509 3935 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 3936 {
c8c03509
RH
3937 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3938
3939 if (!flag_pic)
3940 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3941 else
3942 output_asm_insn ("call\t%a2", xops);
3943
3944 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3945 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3946
3947 if (flag_pic)
3948 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 3949 }
e075ae69 3950 else
e5cb57e8 3951 {
145aacc2
RH
3952 char name[32];
3953 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 3954 pic_labels_used |= 1 << REGNO (dest);
f996902d 3955
145aacc2 3956 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
3957 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3958 output_asm_insn ("call\t%X2", xops);
e5cb57e8 3959 }
e5cb57e8 3960
c8c03509
RH
3961 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3962 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3963 else
8e9fadc3 3964 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 3965
c8c03509 3966 return "";
e9a25f70 3967}
8dfe5673 3968
0d7d98ee 3969/* Generate an "push" pattern for input ARG. */
e9a25f70 3970
e075ae69
RH
3971static rtx
3972gen_push (arg)
3973 rtx arg;
e9a25f70 3974{
c5c76735 3975 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3976 gen_rtx_MEM (Pmode,
3977 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3978 stack_pointer_rtx)),
3979 arg);
e9a25f70
JL
3980}
3981
bd09bdeb
RH
3982/* Return >= 0 if there is an unused call-clobbered register available
3983 for the entire function. */
3984
3985static unsigned int
3986ix86_select_alt_pic_regnum ()
3987{
3988 if (current_function_is_leaf && !current_function_profile)
3989 {
3990 int i;
3991 for (i = 2; i >= 0; --i)
3992 if (!regs_ever_live[i])
3993 return i;
3994 }
3995
3996 return INVALID_REGNUM;
3997}
fce5a9f2 3998
4dd2ac2c
JH
3999/* Return 1 if we need to save REGNO. */
4000static int
1020a5ab 4001ix86_save_reg (regno, maybe_eh_return)
9b690711 4002 unsigned int regno;
37a58036 4003 int maybe_eh_return;
1020a5ab 4004{
bd09bdeb
RH
4005 if (pic_offset_table_rtx
4006 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4007 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4008 || current_function_profile
1020a5ab 4009 || current_function_calls_eh_return))
bd09bdeb
RH
4010 {
4011 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4012 return 0;
4013 return 1;
4014 }
1020a5ab
RH
4015
4016 if (current_function_calls_eh_return && maybe_eh_return)
4017 {
4018 unsigned i;
4019 for (i = 0; ; i++)
4020 {
b531087a 4021 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4022 if (test == INVALID_REGNUM)
4023 break;
9b690711 4024 if (test == regno)
1020a5ab
RH
4025 return 1;
4026 }
4027 }
4dd2ac2c 4028
1020a5ab
RH
4029 return (regs_ever_live[regno]
4030 && !call_used_regs[regno]
4031 && !fixed_regs[regno]
4032 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4033}
4034
0903fcab
JH
4035/* Return number of registers to be saved on the stack. */
4036
4037static int
4038ix86_nsaved_regs ()
4039{
4040 int nregs = 0;
0903fcab
JH
4041 int regno;
4042
4dd2ac2c 4043 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4044 if (ix86_save_reg (regno, true))
4dd2ac2c 4045 nregs++;
0903fcab
JH
4046 return nregs;
4047}
4048
4049/* Return the offset between two registers, one to be eliminated, and the other
4050 its replacement, at the start of a routine. */
4051
4052HOST_WIDE_INT
4053ix86_initial_elimination_offset (from, to)
4054 int from;
4055 int to;
4056{
4dd2ac2c
JH
4057 struct ix86_frame frame;
4058 ix86_compute_frame_layout (&frame);
564d80f4
JH
4059
4060 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4061 return frame.hard_frame_pointer_offset;
564d80f4
JH
4062 else if (from == FRAME_POINTER_REGNUM
4063 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4064 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4065 else
4066 {
564d80f4
JH
4067 if (to != STACK_POINTER_REGNUM)
4068 abort ();
4069 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4070 return frame.stack_pointer_offset;
564d80f4
JH
4071 else if (from != FRAME_POINTER_REGNUM)
4072 abort ();
0903fcab 4073 else
4dd2ac2c 4074 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4075 }
4076}
4077
4dd2ac2c 4078/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4079
4dd2ac2c
JH
4080static void
4081ix86_compute_frame_layout (frame)
4082 struct ix86_frame *frame;
65954bd8 4083{
65954bd8 4084 HOST_WIDE_INT total_size;
564d80f4 4085 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4086 int offset;
4087 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4088 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4089
4dd2ac2c 4090 frame->nregs = ix86_nsaved_regs ();
564d80f4 4091 total_size = size;
65954bd8 4092
9ba81eaa 4093 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4094 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4095
4096 frame->hard_frame_pointer_offset = offset;
564d80f4 4097
fcbfaa65
RK
4098 /* Do some sanity checking of stack_alignment_needed and
4099 preferred_alignment, since i386 port is the only using those features
f710504c 4100 that may break easily. */
564d80f4 4101
44affdae
JH
4102 if (size && !stack_alignment_needed)
4103 abort ();
44affdae
JH
4104 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4105 abort ();
4106 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4107 abort ();
4108 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4109 abort ();
564d80f4 4110
4dd2ac2c
JH
4111 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4112 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4113
4dd2ac2c
JH
4114 /* Register save area */
4115 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4116
8362f420
JH
4117 /* Va-arg area */
4118 if (ix86_save_varrargs_registers)
4119 {
4120 offset += X86_64_VARARGS_SIZE;
4121 frame->va_arg_size = X86_64_VARARGS_SIZE;
4122 }
4123 else
4124 frame->va_arg_size = 0;
4125
4dd2ac2c
JH
4126 /* Align start of frame for local function. */
4127 frame->padding1 = ((offset + stack_alignment_needed - 1)
4128 & -stack_alignment_needed) - offset;
f73ad30e 4129
4dd2ac2c 4130 offset += frame->padding1;
65954bd8 4131
4dd2ac2c
JH
4132 /* Frame pointer points here. */
4133 frame->frame_pointer_offset = offset;
54ff41b7 4134
4dd2ac2c 4135 offset += size;
65954bd8 4136
0b7ae565
RH
4137 /* Add outgoing arguments area. Can be skipped if we eliminated
4138 all the function calls as dead code. */
4139 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4140 {
4141 offset += current_function_outgoing_args_size;
4142 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4143 }
4144 else
4145 frame->outgoing_arguments_size = 0;
564d80f4 4146
002ff5bc
RH
4147 /* Align stack boundary. Only needed if we're calling another function
4148 or using alloca. */
4149 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4150 frame->padding2 = ((offset + preferred_alignment - 1)
4151 & -preferred_alignment) - offset;
4152 else
4153 frame->padding2 = 0;
4dd2ac2c
JH
4154
4155 offset += frame->padding2;
4156
4157 /* We've reached end of stack frame. */
4158 frame->stack_pointer_offset = offset;
4159
4160 /* Size prologue needs to allocate. */
4161 frame->to_allocate =
4162 (size + frame->padding1 + frame->padding2
8362f420 4163 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4164
8362f420
JH
4165 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4166 && current_function_is_leaf)
4167 {
4168 frame->red_zone_size = frame->to_allocate;
4169 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4170 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4171 }
4172 else
4173 frame->red_zone_size = 0;
4174 frame->to_allocate -= frame->red_zone_size;
4175 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4176#if 0
4177 fprintf (stderr, "nregs: %i\n", frame->nregs);
4178 fprintf (stderr, "size: %i\n", size);
4179 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4180 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4181 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4182 fprintf (stderr, "padding2: %i\n", frame->padding2);
4183 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4184 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4185 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4186 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4187 frame->hard_frame_pointer_offset);
4188 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4189#endif
65954bd8
JL
4190}
4191
0903fcab
JH
4192/* Emit code to save registers in the prologue. */
4193
4194static void
4195ix86_emit_save_regs ()
4196{
4197 register int regno;
0903fcab 4198 rtx insn;
0903fcab 4199
4dd2ac2c 4200 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4201 if (ix86_save_reg (regno, true))
0903fcab 4202 {
0d7d98ee 4203 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4204 RTX_FRAME_RELATED_P (insn) = 1;
4205 }
4206}
4207
c6036a37
JH
4208/* Emit code to save registers using MOV insns. First register
4209 is restored from POINTER + OFFSET. */
4210static void
4211ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4212 rtx pointer;
4213 HOST_WIDE_INT offset;
c6036a37
JH
4214{
4215 int regno;
4216 rtx insn;
4217
4218 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4219 if (ix86_save_reg (regno, true))
4220 {
b72f00af
RK
4221 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4222 Pmode, offset),
c6036a37
JH
4223 gen_rtx_REG (Pmode, regno));
4224 RTX_FRAME_RELATED_P (insn) = 1;
4225 offset += UNITS_PER_WORD;
4226 }
4227}
4228
0f290768 4229/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4230
4231void
4232ix86_expand_prologue ()
2a2ab3f9 4233{
564d80f4 4234 rtx insn;
bd09bdeb 4235 bool pic_reg_used;
4dd2ac2c 4236 struct ix86_frame frame;
6ab16dd9 4237 int use_mov = 0;
c6036a37 4238 HOST_WIDE_INT allocate;
4dd2ac2c 4239
2ab0437e 4240 if (!optimize_size)
6ab16dd9
JH
4241 {
4242 use_fast_prologue_epilogue
4243 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4244 if (TARGET_PROLOGUE_USING_MOVE)
4245 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4246 }
4dd2ac2c 4247 ix86_compute_frame_layout (&frame);
79325812 4248
e075ae69
RH
4249 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4250 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4251
2a2ab3f9
JVA
4252 if (frame_pointer_needed)
4253 {
564d80f4 4254 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4255 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4256
564d80f4 4257 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4258 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4259 }
4260
c6036a37
JH
4261 allocate = frame.to_allocate;
4262 /* In case we are dealing only with single register and empty frame,
4263 push is equivalent of the mov+add sequence. */
4264 if (allocate == 0 && frame.nregs <= 1)
4265 use_mov = 0;
4266
4267 if (!use_mov)
4268 ix86_emit_save_regs ();
4269 else
4270 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4271
c6036a37 4272 if (allocate == 0)
8dfe5673 4273 ;
e323735c 4274 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4275 {
f2042df3
RH
4276 insn = emit_insn (gen_pro_epilogue_adjust_stack
4277 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4278 GEN_INT (-allocate)));
e075ae69 4279 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4280 }
79325812 4281 else
8dfe5673 4282 {
e075ae69 4283 /* ??? Is this only valid for Win32? */
e9a25f70 4284
e075ae69 4285 rtx arg0, sym;
e9a25f70 4286
8362f420 4287 if (TARGET_64BIT)
b531087a 4288 abort ();
8362f420 4289
e075ae69 4290 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4291 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4292
e075ae69
RH
4293 sym = gen_rtx_MEM (FUNCTION_MODE,
4294 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4295 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4296
4297 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4298 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4299 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4300 }
c6036a37
JH
4301 if (use_mov)
4302 {
4303 if (!frame_pointer_needed || !frame.to_allocate)
4304 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4305 else
4306 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4307 -frame.nregs * UNITS_PER_WORD);
4308 }
e9a25f70 4309
84530511
SC
4310#ifdef SUBTARGET_PROLOGUE
4311 SUBTARGET_PROLOGUE;
0f290768 4312#endif
84530511 4313
bd09bdeb
RH
4314 pic_reg_used = false;
4315 if (pic_offset_table_rtx
4316 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4317 || current_function_profile))
4318 {
4319 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4320
4321 if (alt_pic_reg_used != INVALID_REGNUM)
4322 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4323
4324 pic_reg_used = true;
4325 }
4326
e9a25f70 4327 if (pic_reg_used)
c8c03509
RH
4328 {
4329 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4330
66edd3b4
RH
4331 /* Even with accurate pre-reload life analysis, we can wind up
4332 deleting all references to the pic register after reload.
4333 Consider if cross-jumping unifies two sides of a branch
4334 controled by a comparison vs the only read from a global.
4335 In which case, allow the set_got to be deleted, though we're
4336 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4337 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4338 }
77a989d1 4339
66edd3b4
RH
4340 /* Prevent function calls from be scheduled before the call to mcount.
4341 In the pic_reg_used case, make sure that the got load isn't deleted. */
4342 if (current_function_profile)
4343 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4344}
4345
da2d1d3a
JH
4346/* Emit code to restore saved registers using MOV insns. First register
4347 is restored from POINTER + OFFSET. */
4348static void
1020a5ab
RH
4349ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4350 rtx pointer;
4351 int offset;
37a58036 4352 int maybe_eh_return;
da2d1d3a
JH
4353{
4354 int regno;
da2d1d3a 4355
4dd2ac2c 4356 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4357 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4358 {
4dd2ac2c 4359 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4360 adjust_address (gen_rtx_MEM (Pmode, pointer),
4361 Pmode, offset));
4dd2ac2c 4362 offset += UNITS_PER_WORD;
da2d1d3a
JH
4363 }
4364}
4365
0f290768 4366/* Restore function stack, frame, and registers. */
e9a25f70 4367
2a2ab3f9 4368void
1020a5ab
RH
4369ix86_expand_epilogue (style)
4370 int style;
2a2ab3f9 4371{
1c71e60e 4372 int regno;
fdb8a883 4373 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4374 struct ix86_frame frame;
65954bd8 4375 HOST_WIDE_INT offset;
4dd2ac2c
JH
4376
4377 ix86_compute_frame_layout (&frame);
2a2ab3f9 4378
a4f31c00 4379 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4380 must be taken for the normal return case of a function using
4381 eh_return: the eax and edx registers are marked as saved, but not
4382 restored along this path. */
4383 offset = frame.nregs;
4384 if (current_function_calls_eh_return && style != 2)
4385 offset -= 2;
4386 offset *= -UNITS_PER_WORD;
2a2ab3f9 4387
fdb8a883
JW
4388 /* If we're only restoring one register and sp is not valid then
4389 using a move instruction to restore the register since it's
0f290768 4390 less work than reloading sp and popping the register.
da2d1d3a
JH
4391
4392 The default code result in stack adjustment using add/lea instruction,
4393 while this code results in LEAVE instruction (or discrete equivalent),
4394 so it is profitable in some other cases as well. Especially when there
4395 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4396 and there is exactly one register to pop. This heruistic may need some
4397 tuning in future. */
4dd2ac2c 4398 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4399 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4400 && use_fast_prologue_epilogue
c6036a37 4401 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4402 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4403 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4404 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4405 || current_function_calls_eh_return)
2a2ab3f9 4406 {
da2d1d3a
JH
4407 /* Restore registers. We can use ebp or esp to address the memory
4408 locations. If both are available, default to ebp, since offsets
4409 are known to be small. Only exception is esp pointing directly to the
4410 end of block of saved registers, where we may simplify addressing
4411 mode. */
4412
4dd2ac2c 4413 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4414 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4415 frame.to_allocate, style == 2);
da2d1d3a 4416 else
1020a5ab
RH
4417 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4418 offset, style == 2);
4419
4420 /* eh_return epilogues need %ecx added to the stack pointer. */
4421 if (style == 2)
4422 {
4423 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4424
1020a5ab
RH
4425 if (frame_pointer_needed)
4426 {
4427 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4428 tmp = plus_constant (tmp, UNITS_PER_WORD);
4429 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4430
4431 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4432 emit_move_insn (hard_frame_pointer_rtx, tmp);
4433
4434 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4435 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4436 }
4437 else
4438 {
4439 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4440 tmp = plus_constant (tmp, (frame.to_allocate
4441 + frame.nregs * UNITS_PER_WORD));
4442 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4443 }
4444 }
4445 else if (!frame_pointer_needed)
f2042df3
RH
4446 emit_insn (gen_pro_epilogue_adjust_stack
4447 (stack_pointer_rtx, stack_pointer_rtx,
4448 GEN_INT (frame.to_allocate
4449 + frame.nregs * UNITS_PER_WORD)));
0f290768 4450 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4451 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4452 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4453 else
2a2ab3f9 4454 {
1c71e60e
JH
4455 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4456 hard_frame_pointer_rtx,
f2042df3 4457 const0_rtx));
8362f420
JH
4458 if (TARGET_64BIT)
4459 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4460 else
4461 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4462 }
4463 }
1c71e60e 4464 else
68f654ec 4465 {
1c71e60e
JH
4466 /* First step is to deallocate the stack frame so that we can
4467 pop the registers. */
4468 if (!sp_valid)
4469 {
4470 if (!frame_pointer_needed)
4471 abort ();
4472 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4473 hard_frame_pointer_rtx,
f2042df3 4474 GEN_INT (offset)));
1c71e60e 4475 }
4dd2ac2c 4476 else if (frame.to_allocate)
f2042df3
RH
4477 emit_insn (gen_pro_epilogue_adjust_stack
4478 (stack_pointer_rtx, stack_pointer_rtx,
4479 GEN_INT (frame.to_allocate)));
1c71e60e 4480
4dd2ac2c 4481 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4482 if (ix86_save_reg (regno, false))
8362f420
JH
4483 {
4484 if (TARGET_64BIT)
4485 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4486 else
4487 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4488 }
4dd2ac2c 4489 if (frame_pointer_needed)
8362f420 4490 {
f5143c46 4491 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4492 able to grok it fast. */
4493 if (TARGET_USE_LEAVE)
4494 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4495 else if (TARGET_64BIT)
8362f420
JH
4496 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4497 else
4498 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4499 }
68f654ec 4500 }
68f654ec 4501
cbbf65e0 4502 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4503 if (style == 0)
cbbf65e0
RH
4504 return;
4505
2a2ab3f9
JVA
4506 if (current_function_pops_args && current_function_args_size)
4507 {
e075ae69 4508 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4509
b8c752c8
UD
4510 /* i386 can only pop 64K bytes. If asked to pop more, pop
4511 return address, do explicit add, and jump indirectly to the
0f290768 4512 caller. */
2a2ab3f9 4513
b8c752c8 4514 if (current_function_pops_args >= 65536)
2a2ab3f9 4515 {
e075ae69 4516 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4517
8362f420
JH
4518 /* There are is no "pascal" calling convention in 64bit ABI. */
4519 if (TARGET_64BIT)
b531087a 4520 abort ();
8362f420 4521
e075ae69
RH
4522 emit_insn (gen_popsi1 (ecx));
4523 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4524 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4525 }
79325812 4526 else
e075ae69
RH
4527 emit_jump_insn (gen_return_pop_internal (popc));
4528 }
4529 else
4530 emit_jump_insn (gen_return_internal ());
4531}
bd09bdeb
RH
4532
4533/* Reset from the function's potential modifications. */
4534
4535static void
4536ix86_output_function_epilogue (file, size)
4537 FILE *file ATTRIBUTE_UNUSED;
4538 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4539{
4540 if (pic_offset_table_rtx)
4541 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4542}
e075ae69
RH
4543\f
4544/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4545 for an instruction. Return 0 if the structure of the address is
4546 grossly off. Return -1 if the address contains ASHIFT, so it is not
4547 strictly valid, but still used for computing length of lea instruction.
4548 */
e075ae69
RH
4549
4550static int
4551ix86_decompose_address (addr, out)
4552 register rtx addr;
4553 struct ix86_address *out;
4554{
4555 rtx base = NULL_RTX;
4556 rtx index = NULL_RTX;
4557 rtx disp = NULL_RTX;
4558 HOST_WIDE_INT scale = 1;
4559 rtx scale_rtx = NULL_RTX;
b446e5a2 4560 int retval = 1;
e075ae69 4561
1540f9eb 4562 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4563 base = addr;
4564 else if (GET_CODE (addr) == PLUS)
4565 {
4566 rtx op0 = XEXP (addr, 0);
4567 rtx op1 = XEXP (addr, 1);
4568 enum rtx_code code0 = GET_CODE (op0);
4569 enum rtx_code code1 = GET_CODE (op1);
4570
4571 if (code0 == REG || code0 == SUBREG)
4572 {
4573 if (code1 == REG || code1 == SUBREG)
4574 index = op0, base = op1; /* index + base */
4575 else
4576 base = op0, disp = op1; /* base + displacement */
4577 }
4578 else if (code0 == MULT)
e9a25f70 4579 {
e075ae69
RH
4580 index = XEXP (op0, 0);
4581 scale_rtx = XEXP (op0, 1);
4582 if (code1 == REG || code1 == SUBREG)
4583 base = op1; /* index*scale + base */
e9a25f70 4584 else
e075ae69
RH
4585 disp = op1; /* index*scale + disp */
4586 }
4587 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4588 {
4589 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4590 scale_rtx = XEXP (XEXP (op0, 0), 1);
4591 base = XEXP (op0, 1);
4592 disp = op1;
2a2ab3f9 4593 }
e075ae69
RH
4594 else if (code0 == PLUS)
4595 {
4596 index = XEXP (op0, 0); /* index + base + disp */
4597 base = XEXP (op0, 1);
4598 disp = op1;
4599 }
4600 else
b446e5a2 4601 return 0;
e075ae69
RH
4602 }
4603 else if (GET_CODE (addr) == MULT)
4604 {
4605 index = XEXP (addr, 0); /* index*scale */
4606 scale_rtx = XEXP (addr, 1);
4607 }
4608 else if (GET_CODE (addr) == ASHIFT)
4609 {
4610 rtx tmp;
4611
4612 /* We're called for lea too, which implements ashift on occasion. */
4613 index = XEXP (addr, 0);
4614 tmp = XEXP (addr, 1);
4615 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4616 return 0;
e075ae69
RH
4617 scale = INTVAL (tmp);
4618 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4619 return 0;
e075ae69 4620 scale = 1 << scale;
b446e5a2 4621 retval = -1;
2a2ab3f9 4622 }
2a2ab3f9 4623 else
e075ae69
RH
4624 disp = addr; /* displacement */
4625
4626 /* Extract the integral value of scale. */
4627 if (scale_rtx)
e9a25f70 4628 {
e075ae69 4629 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4630 return 0;
e075ae69 4631 scale = INTVAL (scale_rtx);
e9a25f70 4632 }
3b3c6a3f 4633
e075ae69
RH
4634 /* Allow arg pointer and stack pointer as index if there is not scaling */
4635 if (base && index && scale == 1
564d80f4
JH
4636 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4637 || index == stack_pointer_rtx))
e075ae69
RH
4638 {
4639 rtx tmp = base;
4640 base = index;
4641 index = tmp;
4642 }
4643
4644 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4645 if ((base == hard_frame_pointer_rtx
4646 || base == frame_pointer_rtx
4647 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4648 disp = const0_rtx;
4649
4650 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4651 Avoid this by transforming to [%esi+0]. */
4652 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4653 && base && !index && !disp
329e1d01 4654 && REG_P (base)
e075ae69
RH
4655 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4656 disp = const0_rtx;
4657
4658 /* Special case: encode reg+reg instead of reg*2. */
4659 if (!base && index && scale && scale == 2)
4660 base = index, scale = 1;
0f290768 4661
e075ae69
RH
4662 /* Special case: scaling cannot be encoded without base or displacement. */
4663 if (!base && !disp && index && scale != 1)
4664 disp = const0_rtx;
4665
4666 out->base = base;
4667 out->index = index;
4668 out->disp = disp;
4669 out->scale = scale;
3b3c6a3f 4670
b446e5a2 4671 return retval;
e075ae69 4672}
01329426
JH
4673\f
4674/* Return cost of the memory address x.
4675 For i386, it is better to use a complex address than let gcc copy
4676 the address into a reg and make a new pseudo. But not if the address
4677 requires to two regs - that would mean more pseudos with longer
4678 lifetimes. */
4679int
4680ix86_address_cost (x)
4681 rtx x;
4682{
4683 struct ix86_address parts;
4684 int cost = 1;
3b3c6a3f 4685
01329426
JH
4686 if (!ix86_decompose_address (x, &parts))
4687 abort ();
4688
1540f9eb
JH
4689 if (parts.base && GET_CODE (parts.base) == SUBREG)
4690 parts.base = SUBREG_REG (parts.base);
4691 if (parts.index && GET_CODE (parts.index) == SUBREG)
4692 parts.index = SUBREG_REG (parts.index);
4693
01329426
JH
4694 /* More complex memory references are better. */
4695 if (parts.disp && parts.disp != const0_rtx)
4696 cost--;
4697
4698 /* Attempt to minimize number of registers in the address. */
4699 if ((parts.base
4700 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4701 || (parts.index
4702 && (!REG_P (parts.index)
4703 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4704 cost++;
4705
4706 if (parts.base
4707 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4708 && parts.index
4709 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4710 && parts.base != parts.index)
4711 cost++;
4712
4713 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4714 since it's predecode logic can't detect the length of instructions
4715 and it degenerates to vector decoded. Increase cost of such
4716 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4717 to split such addresses or even refuse such addresses at all.
01329426
JH
4718
4719 Following addressing modes are affected:
4720 [base+scale*index]
4721 [scale*index+disp]
4722 [base+index]
0f290768 4723
01329426
JH
4724 The first and last case may be avoidable by explicitly coding the zero in
4725 memory address, but I don't have AMD-K6 machine handy to check this
4726 theory. */
4727
4728 if (TARGET_K6
4729 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4730 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4731 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4732 cost += 10;
0f290768 4733
01329426
JH
4734 return cost;
4735}
4736\f
b949ea8b
JW
4737/* If X is a machine specific address (i.e. a symbol or label being
4738 referenced as a displacement from the GOT implemented using an
4739 UNSPEC), then return the base term. Otherwise return X. */
4740
4741rtx
4742ix86_find_base_term (x)
4743 rtx x;
4744{
4745 rtx term;
4746
6eb791fc
JH
4747 if (TARGET_64BIT)
4748 {
4749 if (GET_CODE (x) != CONST)
4750 return x;
4751 term = XEXP (x, 0);
4752 if (GET_CODE (term) == PLUS
4753 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4754 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4755 term = XEXP (term, 0);
4756 if (GET_CODE (term) != UNSPEC
8ee41eaf 4757 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4758 return x;
4759
4760 term = XVECEXP (term, 0, 0);
4761
4762 if (GET_CODE (term) != SYMBOL_REF
4763 && GET_CODE (term) != LABEL_REF)
4764 return x;
4765
4766 return term;
4767 }
4768
b949ea8b
JW
4769 if (GET_CODE (x) != PLUS
4770 || XEXP (x, 0) != pic_offset_table_rtx
4771 || GET_CODE (XEXP (x, 1)) != CONST)
4772 return x;
4773
4774 term = XEXP (XEXP (x, 1), 0);
4775
4776 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4777 term = XEXP (term, 0);
4778
4779 if (GET_CODE (term) != UNSPEC
8ee41eaf 4780 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
4781 return x;
4782
4783 term = XVECEXP (term, 0, 0);
4784
4785 if (GET_CODE (term) != SYMBOL_REF
4786 && GET_CODE (term) != LABEL_REF)
4787 return x;
4788
4789 return term;
4790}
4791\f
f996902d
RH
4792/* Determine if a given RTX is a valid constant. We already know this
4793 satisfies CONSTANT_P. */
4794
4795bool
4796legitimate_constant_p (x)
4797 rtx x;
4798{
4799 rtx inner;
4800
4801 switch (GET_CODE (x))
4802 {
4803 case SYMBOL_REF:
4804 /* TLS symbols are not constant. */
4805 if (tls_symbolic_operand (x, Pmode))
4806 return false;
4807 break;
4808
4809 case CONST:
4810 inner = XEXP (x, 0);
4811
4812 /* Offsets of TLS symbols are never valid.
4813 Discourage CSE from creating them. */
4814 if (GET_CODE (inner) == PLUS
4815 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4816 return false;
4817
4818 /* Only some unspecs are valid as "constants". */
4819 if (GET_CODE (inner) == UNSPEC)
4820 switch (XINT (inner, 1))
4821 {
4822 case UNSPEC_TPOFF:
4823 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4824 case UNSPEC_TP:
4825 return true;
4826 default:
4827 return false;
4828 }
4829 break;
4830
4831 default:
4832 break;
4833 }
4834
4835 /* Otherwise we handle everything else in the move patterns. */
4836 return true;
4837}
4838
4839/* Determine if a given RTX is a valid constant address. */
4840
4841bool
4842constant_address_p (x)
4843 rtx x;
4844{
4845 switch (GET_CODE (x))
4846 {
4847 case LABEL_REF:
4848 case CONST_INT:
4849 return true;
4850
4851 case CONST_DOUBLE:
4852 return TARGET_64BIT;
4853
4854 case CONST:
4855 case SYMBOL_REF:
4856 return !flag_pic && legitimate_constant_p (x);
4857
4858 default:
4859 return false;
4860 }
4861}
4862
4863/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 4864 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
4865 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4866
4867bool
4868legitimate_pic_operand_p (x)
4869 rtx x;
4870{
4871 rtx inner;
4872
4873 switch (GET_CODE (x))
4874 {
4875 case CONST:
4876 inner = XEXP (x, 0);
4877
4878 /* Only some unspecs are valid as "constants". */
4879 if (GET_CODE (inner) == UNSPEC)
4880 switch (XINT (inner, 1))
4881 {
4882 case UNSPEC_TPOFF:
4883 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4884 case UNSPEC_TP:
4885 return true;
4886 default:
4887 return false;
4888 }
4889 /* FALLTHRU */
4890
4891 case SYMBOL_REF:
4892 case LABEL_REF:
4893 return legitimate_pic_address_disp_p (x);
4894
4895 default:
4896 return true;
4897 }
4898}
4899
e075ae69
RH
4900/* Determine if a given CONST RTX is a valid memory displacement
4901 in PIC mode. */
0f290768 4902
59be65f6 4903int
91bb873f
RH
4904legitimate_pic_address_disp_p (disp)
4905 register rtx disp;
4906{
f996902d
RH
4907 bool saw_plus;
4908
6eb791fc
JH
4909 /* In 64bit mode we can allow direct addresses of symbols and labels
4910 when they are not dynamic symbols. */
4911 if (TARGET_64BIT)
4912 {
4913 rtx x = disp;
4914 if (GET_CODE (disp) == CONST)
4915 x = XEXP (disp, 0);
4916 /* ??? Handle PIC code models */
4917 if (GET_CODE (x) == PLUS
4918 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4919 && ix86_cmodel == CM_SMALL_PIC
4920 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4921 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4922 x = XEXP (x, 0);
4923 if (local_symbolic_operand (x, Pmode))
4924 return 1;
4925 }
91bb873f
RH
4926 if (GET_CODE (disp) != CONST)
4927 return 0;
4928 disp = XEXP (disp, 0);
4929
6eb791fc
JH
4930 if (TARGET_64BIT)
4931 {
4932 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4933 of GOT tables. We should not need these anyway. */
4934 if (GET_CODE (disp) != UNSPEC
8ee41eaf 4935 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4936 return 0;
4937
4938 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4939 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4940 return 0;
4941 return 1;
4942 }
4943
f996902d 4944 saw_plus = false;
91bb873f
RH
4945 if (GET_CODE (disp) == PLUS)
4946 {
4947 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4948 return 0;
4949 disp = XEXP (disp, 0);
f996902d 4950 saw_plus = true;
91bb873f
RH
4951 }
4952
8ee41eaf 4953 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
4954 return 0;
4955
623fe810
RH
4956 switch (XINT (disp, 1))
4957 {
8ee41eaf 4958 case UNSPEC_GOT:
f996902d
RH
4959 if (saw_plus)
4960 return false;
623fe810 4961 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 4962 case UNSPEC_GOTOFF:
623fe810 4963 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d
RH
4964 case UNSPEC_GOTTPOFF:
4965 if (saw_plus)
4966 return false;
4967 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4968 case UNSPEC_NTPOFF:
4969 /* ??? Could support offset here. */
4970 if (saw_plus)
4971 return false;
4972 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4973 case UNSPEC_DTPOFF:
4974 /* ??? Could support offset here. */
4975 if (saw_plus)
4976 return false;
4977 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 4978 }
fce5a9f2 4979
623fe810 4980 return 0;
91bb873f
RH
4981}
4982
e075ae69
RH
4983/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4984 memory address for an instruction. The MODE argument is the machine mode
4985 for the MEM expression that wants to use this address.
4986
4987 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4988 convert common non-canonical forms to canonical form so that they will
4989 be recognized. */
4990
3b3c6a3f
MM
4991int
4992legitimate_address_p (mode, addr, strict)
4993 enum machine_mode mode;
4994 register rtx addr;
4995 int strict;
4996{
e075ae69
RH
4997 struct ix86_address parts;
4998 rtx base, index, disp;
4999 HOST_WIDE_INT scale;
5000 const char *reason = NULL;
5001 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5002
5003 if (TARGET_DEBUG_ADDR)
5004 {
5005 fprintf (stderr,
e9a25f70 5006 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5007 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5008 debug_rtx (addr);
5009 }
5010
b446e5a2 5011 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5012 {
e075ae69 5013 reason = "decomposition failed";
50e60bc3 5014 goto report_error;
3b3c6a3f
MM
5015 }
5016
e075ae69
RH
5017 base = parts.base;
5018 index = parts.index;
5019 disp = parts.disp;
5020 scale = parts.scale;
91f0226f 5021
e075ae69 5022 /* Validate base register.
e9a25f70
JL
5023
5024 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5025 is one word out of a two word structure, which is represented internally
5026 as a DImode int. */
e9a25f70 5027
3b3c6a3f
MM
5028 if (base)
5029 {
1540f9eb 5030 rtx reg;
e075ae69
RH
5031 reason_rtx = base;
5032
1540f9eb
JH
5033 if (GET_CODE (base) == SUBREG)
5034 reg = SUBREG_REG (base);
5035 else
5036 reg = base;
5037
5038 if (GET_CODE (reg) != REG)
3b3c6a3f 5039 {
e075ae69 5040 reason = "base is not a register";
50e60bc3 5041 goto report_error;
3b3c6a3f
MM
5042 }
5043
c954bd01
RH
5044 if (GET_MODE (base) != Pmode)
5045 {
e075ae69 5046 reason = "base is not in Pmode";
50e60bc3 5047 goto report_error;
c954bd01
RH
5048 }
5049
1540f9eb
JH
5050 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5051 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5052 {
e075ae69 5053 reason = "base is not valid";
50e60bc3 5054 goto report_error;
3b3c6a3f
MM
5055 }
5056 }
5057
e075ae69 5058 /* Validate index register.
e9a25f70
JL
5059
5060 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5061 is one word out of a two word structure, which is represented internally
5062 as a DImode int. */
e075ae69
RH
5063
5064 if (index)
3b3c6a3f 5065 {
1540f9eb 5066 rtx reg;
e075ae69
RH
5067 reason_rtx = index;
5068
1540f9eb
JH
5069 if (GET_CODE (index) == SUBREG)
5070 reg = SUBREG_REG (index);
5071 else
5072 reg = index;
5073
5074 if (GET_CODE (reg) != REG)
3b3c6a3f 5075 {
e075ae69 5076 reason = "index is not a register";
50e60bc3 5077 goto report_error;
3b3c6a3f
MM
5078 }
5079
e075ae69 5080 if (GET_MODE (index) != Pmode)
c954bd01 5081 {
e075ae69 5082 reason = "index is not in Pmode";
50e60bc3 5083 goto report_error;
c954bd01
RH
5084 }
5085
1540f9eb
JH
5086 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5087 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5088 {
e075ae69 5089 reason = "index is not valid";
50e60bc3 5090 goto report_error;
3b3c6a3f
MM
5091 }
5092 }
3b3c6a3f 5093
e075ae69
RH
5094 /* Validate scale factor. */
5095 if (scale != 1)
3b3c6a3f 5096 {
e075ae69
RH
5097 reason_rtx = GEN_INT (scale);
5098 if (!index)
3b3c6a3f 5099 {
e075ae69 5100 reason = "scale without index";
50e60bc3 5101 goto report_error;
3b3c6a3f
MM
5102 }
5103
e075ae69 5104 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5105 {
e075ae69 5106 reason = "scale is not a valid multiplier";
50e60bc3 5107 goto report_error;
3b3c6a3f
MM
5108 }
5109 }
5110
91bb873f 5111 /* Validate displacement. */
3b3c6a3f
MM
5112 if (disp)
5113 {
e075ae69
RH
5114 reason_rtx = disp;
5115
0d7d98ee 5116 if (TARGET_64BIT)
3b3c6a3f 5117 {
0d7d98ee
JH
5118 if (!x86_64_sign_extended_value (disp))
5119 {
5120 reason = "displacement is out of range";
5121 goto report_error;
5122 }
5123 }
5124 else
5125 {
5126 if (GET_CODE (disp) == CONST_DOUBLE)
5127 {
5128 reason = "displacement is a const_double";
5129 goto report_error;
5130 }
3b3c6a3f
MM
5131 }
5132
f996902d
RH
5133 if (GET_CODE (disp) == CONST
5134 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5135 switch (XINT (XEXP (disp, 0), 1))
5136 {
5137 case UNSPEC_GOT:
5138 case UNSPEC_GOTOFF:
5139 case UNSPEC_GOTPCREL:
5140 if (!flag_pic)
5141 abort ();
5142 goto is_legitimate_pic;
5143
5144 case UNSPEC_GOTTPOFF:
5145 case UNSPEC_NTPOFF:
5146 case UNSPEC_DTPOFF:
5147 break;
5148
5149 default:
5150 reason = "invalid address unspec";
5151 goto report_error;
5152 }
5153
5154 else if (flag_pic && SYMBOLIC_CONST (disp))
3b3c6a3f 5155 {
f996902d 5156 is_legitimate_pic:
0d7d98ee
JH
5157 if (TARGET_64BIT && (index || base))
5158 {
5159 reason = "non-constant pic memory reference";
5160 goto report_error;
5161 }
91bb873f
RH
5162 if (! legitimate_pic_address_disp_p (disp))
5163 {
e075ae69 5164 reason = "displacement is an invalid pic construct";
50e60bc3 5165 goto report_error;
91bb873f
RH
5166 }
5167
4e9efe54 5168 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5169 includes the pic_offset_table_rtx register.
5170
4e9efe54
JH
5171 While this is good idea, unfortunately these constructs may
5172 be created by "adds using lea" optimization for incorrect
5173 code like:
5174
5175 int a;
5176 int foo(int i)
5177 {
5178 return *(&a+i);
5179 }
5180
50e60bc3 5181 This code is nonsensical, but results in addressing
4e9efe54 5182 GOT table with pic_offset_table_rtx base. We can't
f710504c 5183 just refuse it easily, since it gets matched by
4e9efe54
JH
5184 "addsi3" pattern, that later gets split to lea in the
5185 case output register differs from input. While this
5186 can be handled by separate addsi pattern for this case
5187 that never results in lea, this seems to be easier and
5188 correct fix for crash to disable this test. */
3b3c6a3f 5189 }
f996902d
RH
5190 else if (!CONSTANT_ADDRESS_P (disp))
5191 {
5192 reason = "displacement is not constant";
5193 goto report_error;
5194 }
3b3c6a3f
MM
5195 }
5196
e075ae69 5197 /* Everything looks valid. */
3b3c6a3f 5198 if (TARGET_DEBUG_ADDR)
e075ae69 5199 fprintf (stderr, "Success.\n");
3b3c6a3f 5200 return TRUE;
e075ae69 5201
5bf0ebab 5202 report_error:
e075ae69
RH
5203 if (TARGET_DEBUG_ADDR)
5204 {
5205 fprintf (stderr, "Error: %s\n", reason);
5206 debug_rtx (reason_rtx);
5207 }
5208 return FALSE;
3b3c6a3f 5209}
3b3c6a3f 5210\f
55efb413
JW
5211/* Return an unique alias set for the GOT. */
5212
0f290768 5213static HOST_WIDE_INT
55efb413
JW
5214ix86_GOT_alias_set ()
5215{
5bf0ebab
RH
5216 static HOST_WIDE_INT set = -1;
5217 if (set == -1)
5218 set = new_alias_set ();
5219 return set;
0f290768 5220}
55efb413 5221
3b3c6a3f
MM
5222/* Return a legitimate reference for ORIG (an address) using the
5223 register REG. If REG is 0, a new pseudo is generated.
5224
91bb873f 5225 There are two types of references that must be handled:
3b3c6a3f
MM
5226
5227 1. Global data references must load the address from the GOT, via
5228 the PIC reg. An insn is emitted to do this load, and the reg is
5229 returned.
5230
91bb873f
RH
5231 2. Static data references, constant pool addresses, and code labels
5232 compute the address as an offset from the GOT, whose base is in
5233 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5234 differentiate them from global data objects. The returned
5235 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5236
5237 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5238 reg also appears in the address. */
3b3c6a3f
MM
5239
5240rtx
5241legitimize_pic_address (orig, reg)
5242 rtx orig;
5243 rtx reg;
5244{
5245 rtx addr = orig;
5246 rtx new = orig;
91bb873f 5247 rtx base;
3b3c6a3f 5248
623fe810 5249 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 5250 {
14f73b5a
JH
5251 /* In 64bit mode we can address such objects directly. */
5252 if (TARGET_64BIT)
5253 new = addr;
5254 else
5255 {
5256 /* This symbol may be referenced via a displacement from the PIC
5257 base address (@GOTOFF). */
3b3c6a3f 5258
66edd3b4
RH
5259 if (reload_in_progress)
5260 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5261 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
5262 new = gen_rtx_CONST (Pmode, new);
5263 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5264
14f73b5a
JH
5265 if (reg != 0)
5266 {
5267 emit_move_insn (reg, new);
5268 new = reg;
5269 }
5270 }
3b3c6a3f 5271 }
91bb873f 5272 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5273 {
14f73b5a
JH
5274 if (TARGET_64BIT)
5275 {
8ee41eaf 5276 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5277 new = gen_rtx_CONST (Pmode, new);
5278 new = gen_rtx_MEM (Pmode, new);
5279 RTX_UNCHANGING_P (new) = 1;
5280 set_mem_alias_set (new, ix86_GOT_alias_set ());
5281
5282 if (reg == 0)
5283 reg = gen_reg_rtx (Pmode);
5284 /* Use directly gen_movsi, otherwise the address is loaded
5285 into register for CSE. We don't want to CSE this addresses,
5286 instead we CSE addresses from the GOT table, so skip this. */
5287 emit_insn (gen_movsi (reg, new));
5288 new = reg;
5289 }
5290 else
5291 {
5292 /* This symbol must be referenced via a load from the
5293 Global Offset Table (@GOT). */
3b3c6a3f 5294
66edd3b4
RH
5295 if (reload_in_progress)
5296 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5297 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5298 new = gen_rtx_CONST (Pmode, new);
5299 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5300 new = gen_rtx_MEM (Pmode, new);
5301 RTX_UNCHANGING_P (new) = 1;
5302 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5303
14f73b5a
JH
5304 if (reg == 0)
5305 reg = gen_reg_rtx (Pmode);
5306 emit_move_insn (reg, new);
5307 new = reg;
5308 }
0f290768 5309 }
91bb873f
RH
5310 else
5311 {
5312 if (GET_CODE (addr) == CONST)
3b3c6a3f 5313 {
91bb873f 5314 addr = XEXP (addr, 0);
e3c8ea67
RH
5315
5316 /* We must match stuff we generate before. Assume the only
5317 unspecs that can get here are ours. Not that we could do
5318 anything with them anyway... */
5319 if (GET_CODE (addr) == UNSPEC
5320 || (GET_CODE (addr) == PLUS
5321 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5322 return orig;
5323 if (GET_CODE (addr) != PLUS)
564d80f4 5324 abort ();
3b3c6a3f 5325 }
91bb873f
RH
5326 if (GET_CODE (addr) == PLUS)
5327 {
5328 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5329
91bb873f
RH
5330 /* Check first to see if this is a constant offset from a @GOTOFF
5331 symbol reference. */
623fe810 5332 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5333 && GET_CODE (op1) == CONST_INT)
5334 {
6eb791fc
JH
5335 if (!TARGET_64BIT)
5336 {
66edd3b4
RH
5337 if (reload_in_progress)
5338 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5339 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5340 UNSPEC_GOTOFF);
6eb791fc
JH
5341 new = gen_rtx_PLUS (Pmode, new, op1);
5342 new = gen_rtx_CONST (Pmode, new);
5343 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5344
6eb791fc
JH
5345 if (reg != 0)
5346 {
5347 emit_move_insn (reg, new);
5348 new = reg;
5349 }
5350 }
5351 else
91bb873f 5352 {
6eb791fc 5353 /* ??? We need to limit offsets here. */
91bb873f
RH
5354 }
5355 }
5356 else
5357 {
5358 base = legitimize_pic_address (XEXP (addr, 0), reg);
5359 new = legitimize_pic_address (XEXP (addr, 1),
5360 base == reg ? NULL_RTX : reg);
5361
5362 if (GET_CODE (new) == CONST_INT)
5363 new = plus_constant (base, INTVAL (new));
5364 else
5365 {
5366 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5367 {
5368 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5369 new = XEXP (new, 1);
5370 }
5371 new = gen_rtx_PLUS (Pmode, base, new);
5372 }
5373 }
5374 }
3b3c6a3f
MM
5375 }
5376 return new;
5377}
fb49053f 5378
fb49053f 5379static void
f996902d 5380ix86_encode_section_info (decl, first)
fb49053f
RH
5381 tree decl;
5382 int first ATTRIBUTE_UNUSED;
5383{
f996902d
RH
5384 bool local_p = (*targetm.binds_local_p) (decl);
5385 rtx rtl, symbol;
5386
5387 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5388 if (GET_CODE (rtl) != MEM)
5389 return;
5390 symbol = XEXP (rtl, 0);
5391 if (GET_CODE (symbol) != SYMBOL_REF)
5392 return;
5393
5394 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5395 symbol so that we may access it directly in the GOT. */
5396
fb49053f 5397 if (flag_pic)
f996902d
RH
5398 SYMBOL_REF_FLAG (symbol) = local_p;
5399
5400 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5401 "local dynamic", "initial exec" or "local exec" TLS models
5402 respectively. */
5403
5404 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5405 {
f996902d
RH
5406 const char *symbol_str;
5407 char *newstr;
5408 size_t len;
5409 enum tls_model kind;
5410
5411 if (!flag_pic)
5412 {
5413 if (local_p)
5414 kind = TLS_MODEL_LOCAL_EXEC;
5415 else
5416 kind = TLS_MODEL_INITIAL_EXEC;
5417 }
5418 /* Local dynamic is inefficient when we're not combining the
5419 parts of the address. */
5420 else if (optimize && local_p)
5421 kind = TLS_MODEL_LOCAL_DYNAMIC;
5422 else
5423 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5424 if (kind < flag_tls_default)
5425 kind = flag_tls_default;
5426
5427 symbol_str = XSTR (symbol, 0);
fb49053f 5428
f996902d
RH
5429 if (symbol_str[0] == '%')
5430 {
5431 if (symbol_str[1] == tls_model_chars[kind])
5432 return;
5433 symbol_str += 2;
5434 }
5435 len = strlen (symbol_str) + 1;
5436 newstr = alloca (len + 2);
5437
5438 newstr[0] = '%';
5439 newstr[1] = tls_model_chars[kind];
5440 memcpy (newstr + 2, symbol_str, len);
5441
5442 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5443 }
5444}
f996902d
RH
5445
5446/* Undo the above when printing symbol names. */
5447
5448static const char *
5449ix86_strip_name_encoding (str)
5450 const char *str;
5451{
5452 if (str[0] == '%')
5453 str += 2;
5454 if (str [0] == '*')
5455 str += 1;
5456 return str;
5457}
3b3c6a3f 5458\f
f996902d
RH
5459/* Load the thread pointer into a register. */
5460
5461static rtx
5462get_thread_pointer ()
5463{
5464 rtx tp;
5465
5466 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5467 tp = gen_rtx_CONST (Pmode, tp);
5468 tp = force_reg (Pmode, tp);
5469
5470 return tp;
5471}
fce5a9f2 5472
3b3c6a3f
MM
5473/* Try machine-dependent ways of modifying an illegitimate address
5474 to be legitimate. If we find one, return the new, valid address.
5475 This macro is used in only one place: `memory_address' in explow.c.
5476
5477 OLDX is the address as it was before break_out_memory_refs was called.
5478 In some cases it is useful to look at this to decide what needs to be done.
5479
5480 MODE and WIN are passed so that this macro can use
5481 GO_IF_LEGITIMATE_ADDRESS.
5482
5483 It is always safe for this macro to do nothing. It exists to recognize
5484 opportunities to optimize the output.
5485
5486 For the 80386, we handle X+REG by loading X into a register R and
5487 using R+REG. R will go in a general reg and indexing will be used.
5488 However, if REG is a broken-out memory address or multiplication,
5489 nothing needs to be done because REG can certainly go in a general reg.
5490
5491 When -fpic is used, special handling is needed for symbolic references.
5492 See comments by legitimize_pic_address in i386.c for details. */
5493
5494rtx
5495legitimize_address (x, oldx, mode)
5496 register rtx x;
bb5177ac 5497 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5498 enum machine_mode mode;
5499{
5500 int changed = 0;
5501 unsigned log;
5502
5503 if (TARGET_DEBUG_ADDR)
5504 {
e9a25f70
JL
5505 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5506 GET_MODE_NAME (mode));
3b3c6a3f
MM
5507 debug_rtx (x);
5508 }
5509
f996902d
RH
5510 log = tls_symbolic_operand (x, mode);
5511 if (log)
5512 {
5513 rtx dest, base, off, pic;
5514
755ac5d4 5515 switch (log)
f996902d
RH
5516 {
5517 case TLS_MODEL_GLOBAL_DYNAMIC:
5518 dest = gen_reg_rtx (Pmode);
5519 emit_insn (gen_tls_global_dynamic (dest, x));
5520 break;
5521
5522 case TLS_MODEL_LOCAL_DYNAMIC:
5523 base = gen_reg_rtx (Pmode);
5524 emit_insn (gen_tls_local_dynamic_base (base));
5525
5526 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5527 off = gen_rtx_CONST (Pmode, off);
5528
5529 return gen_rtx_PLUS (Pmode, base, off);
5530
5531 case TLS_MODEL_INITIAL_EXEC:
5532 if (flag_pic)
5533 {
66edd3b4
RH
5534 if (reload_in_progress)
5535 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d
RH
5536 pic = pic_offset_table_rtx;
5537 }
5538 else
5539 {
5540 pic = gen_reg_rtx (Pmode);
5541 emit_insn (gen_set_got (pic));
5542 }
5543
5544 base = get_thread_pointer ();
5545
5546 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5547 off = gen_rtx_CONST (Pmode, off);
5548 off = gen_rtx_PLUS (Pmode, pic, off);
5549 off = gen_rtx_MEM (Pmode, off);
5550 RTX_UNCHANGING_P (off) = 1;
5551 set_mem_alias_set (off, ix86_GOT_alias_set ());
5552
5553 /* Damn Sun for specifing a set of dynamic relocations without
5554 considering the two-operand nature of the architecture!
5555 We'd be much better off with a "GOTNTPOFF" relocation that
5556 already contained the negated constant. */
5557 /* ??? Using negl and reg+reg addressing appears to be a lose
5558 size-wise. The negl is two bytes, just like the extra movl
5559 incurred by the two-operand subl, but reg+reg addressing
5560 uses the two-byte modrm form, unlike plain reg. */
5561
5562 dest = gen_reg_rtx (Pmode);
5563 emit_insn (gen_subsi3 (dest, base, off));
5564 break;
5565
5566 case TLS_MODEL_LOCAL_EXEC:
5567 base = get_thread_pointer ();
5568
5569 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5570 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5571 off = gen_rtx_CONST (Pmode, off);
5572
5573 if (TARGET_GNU_TLS)
5574 return gen_rtx_PLUS (Pmode, base, off);
5575 else
5576 {
5577 dest = gen_reg_rtx (Pmode);
5578 emit_insn (gen_subsi3 (dest, base, off));
5579 }
5580 break;
5581
5582 default:
5583 abort ();
5584 }
5585
5586 return dest;
5587 }
5588
3b3c6a3f
MM
5589 if (flag_pic && SYMBOLIC_CONST (x))
5590 return legitimize_pic_address (x, 0);
5591
5592 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5593 if (GET_CODE (x) == ASHIFT
5594 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5595 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5596 {
5597 changed = 1;
a269a03c
JC
5598 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5599 GEN_INT (1 << log));
3b3c6a3f
MM
5600 }
5601
5602 if (GET_CODE (x) == PLUS)
5603 {
0f290768 5604 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5605
3b3c6a3f
MM
5606 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5607 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5608 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5609 {
5610 changed = 1;
c5c76735
JL
5611 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5612 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5613 GEN_INT (1 << log));
3b3c6a3f
MM
5614 }
5615
5616 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5617 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5618 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5619 {
5620 changed = 1;
c5c76735
JL
5621 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5622 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5623 GEN_INT (1 << log));
3b3c6a3f
MM
5624 }
5625
0f290768 5626 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5627 if (GET_CODE (XEXP (x, 1)) == MULT)
5628 {
5629 rtx tmp = XEXP (x, 0);
5630 XEXP (x, 0) = XEXP (x, 1);
5631 XEXP (x, 1) = tmp;
5632 changed = 1;
5633 }
5634
5635 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5636 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5637 created by virtual register instantiation, register elimination, and
5638 similar optimizations. */
5639 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5640 {
5641 changed = 1;
c5c76735
JL
5642 x = gen_rtx_PLUS (Pmode,
5643 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5644 XEXP (XEXP (x, 1), 0)),
5645 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5646 }
5647
e9a25f70
JL
5648 /* Canonicalize
5649 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5650 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5651 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5653 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5654 && CONSTANT_P (XEXP (x, 1)))
5655 {
00c79232
ML
5656 rtx constant;
5657 rtx other = NULL_RTX;
3b3c6a3f
MM
5658
5659 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5660 {
5661 constant = XEXP (x, 1);
5662 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5663 }
5664 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5665 {
5666 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5667 other = XEXP (x, 1);
5668 }
5669 else
5670 constant = 0;
5671
5672 if (constant)
5673 {
5674 changed = 1;
c5c76735
JL
5675 x = gen_rtx_PLUS (Pmode,
5676 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5677 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5678 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5679 }
5680 }
5681
5682 if (changed && legitimate_address_p (mode, x, FALSE))
5683 return x;
5684
5685 if (GET_CODE (XEXP (x, 0)) == MULT)
5686 {
5687 changed = 1;
5688 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5689 }
5690
5691 if (GET_CODE (XEXP (x, 1)) == MULT)
5692 {
5693 changed = 1;
5694 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5695 }
5696
5697 if (changed
5698 && GET_CODE (XEXP (x, 1)) == REG
5699 && GET_CODE (XEXP (x, 0)) == REG)
5700 return x;
5701
5702 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5703 {
5704 changed = 1;
5705 x = legitimize_pic_address (x, 0);
5706 }
5707
5708 if (changed && legitimate_address_p (mode, x, FALSE))
5709 return x;
5710
5711 if (GET_CODE (XEXP (x, 0)) == REG)
5712 {
5713 register rtx temp = gen_reg_rtx (Pmode);
5714 register rtx val = force_operand (XEXP (x, 1), temp);
5715 if (val != temp)
5716 emit_move_insn (temp, val);
5717
5718 XEXP (x, 1) = temp;
5719 return x;
5720 }
5721
5722 else if (GET_CODE (XEXP (x, 1)) == REG)
5723 {
5724 register rtx temp = gen_reg_rtx (Pmode);
5725 register rtx val = force_operand (XEXP (x, 0), temp);
5726 if (val != temp)
5727 emit_move_insn (temp, val);
5728
5729 XEXP (x, 0) = temp;
5730 return x;
5731 }
5732 }
5733
5734 return x;
5735}
2a2ab3f9
JVA
5736\f
5737/* Print an integer constant expression in assembler syntax. Addition
5738 and subtraction are the only arithmetic that may appear in these
5739 expressions. FILE is the stdio stream to write to, X is the rtx, and
5740 CODE is the operand print code from the output string. */
5741
5742static void
5743output_pic_addr_const (file, x, code)
5744 FILE *file;
5745 rtx x;
5746 int code;
5747{
5748 char buf[256];
5749
5750 switch (GET_CODE (x))
5751 {
5752 case PC:
5753 if (flag_pic)
5754 putc ('.', file);
5755 else
5756 abort ();
5757 break;
5758
5759 case SYMBOL_REF:
91bb873f
RH
5760 assemble_name (file, XSTR (x, 0));
5761 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5762 fputs ("@PLT", file);
2a2ab3f9
JVA
5763 break;
5764
91bb873f
RH
5765 case LABEL_REF:
5766 x = XEXP (x, 0);
5767 /* FALLTHRU */
2a2ab3f9
JVA
5768 case CODE_LABEL:
5769 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5770 assemble_name (asm_out_file, buf);
5771 break;
5772
5773 case CONST_INT:
f64cecad 5774 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5775 break;
5776
5777 case CONST:
5778 /* This used to output parentheses around the expression,
5779 but that does not work on the 386 (either ATT or BSD assembler). */
5780 output_pic_addr_const (file, XEXP (x, 0), code);
5781 break;
5782
5783 case CONST_DOUBLE:
5784 if (GET_MODE (x) == VOIDmode)
5785 {
5786 /* We can use %d if the number is <32 bits and positive. */
5787 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5788 fprintf (file, "0x%lx%08lx",
5789 (unsigned long) CONST_DOUBLE_HIGH (x),
5790 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5791 else
f64cecad 5792 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5793 }
5794 else
5795 /* We can't handle floating point constants;
5796 PRINT_OPERAND must handle them. */
5797 output_operand_lossage ("floating constant misused");
5798 break;
5799
5800 case PLUS:
e9a25f70 5801 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5802 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5803 {
2a2ab3f9 5804 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5805 putc ('+', file);
e9a25f70 5806 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5807 }
91bb873f 5808 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5809 {
2a2ab3f9 5810 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5811 putc ('+', file);
e9a25f70 5812 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5813 }
91bb873f
RH
5814 else
5815 abort ();
2a2ab3f9
JVA
5816 break;
5817
5818 case MINUS:
80f33d06 5819 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5820 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5821 putc ('-', file);
2a2ab3f9 5822 output_pic_addr_const (file, XEXP (x, 1), code);
80f33d06 5823 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5824 break;
5825
91bb873f
RH
5826 case UNSPEC:
5827 if (XVECLEN (x, 0) != 1)
5bf0ebab 5828 abort ();
91bb873f
RH
5829 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5830 switch (XINT (x, 1))
77ebd435 5831 {
8ee41eaf 5832 case UNSPEC_GOT:
77ebd435
AJ
5833 fputs ("@GOT", file);
5834 break;
8ee41eaf 5835 case UNSPEC_GOTOFF:
77ebd435
AJ
5836 fputs ("@GOTOFF", file);
5837 break;
8ee41eaf 5838 case UNSPEC_GOTPCREL:
edfe8595 5839 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 5840 break;
f996902d
RH
5841 case UNSPEC_GOTTPOFF:
5842 fputs ("@GOTTPOFF", file);
5843 break;
5844 case UNSPEC_TPOFF:
5845 fputs ("@TPOFF", file);
5846 break;
5847 case UNSPEC_NTPOFF:
5848 fputs ("@NTPOFF", file);
5849 break;
5850 case UNSPEC_DTPOFF:
5851 fputs ("@DTPOFF", file);
5852 break;
77ebd435
AJ
5853 default:
5854 output_operand_lossage ("invalid UNSPEC as operand");
5855 break;
5856 }
91bb873f
RH
5857 break;
5858
2a2ab3f9
JVA
5859 default:
5860 output_operand_lossage ("invalid expression as operand");
5861 }
5862}
1865dbb5 5863
0f290768 5864/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
5865 We need to handle our special PIC relocations. */
5866
0f290768 5867void
1865dbb5
JM
5868i386_dwarf_output_addr_const (file, x)
5869 FILE *file;
5870 rtx x;
5871{
14f73b5a 5872#ifdef ASM_QUAD
18b5b8d6 5873 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
5874#else
5875 if (TARGET_64BIT)
5876 abort ();
18b5b8d6 5877 fprintf (file, "%s", ASM_LONG);
14f73b5a 5878#endif
1865dbb5
JM
5879 if (flag_pic)
5880 output_pic_addr_const (file, x, '\0');
5881 else
5882 output_addr_const (file, x);
5883 fputc ('\n', file);
5884}
5885
5886/* In the name of slightly smaller debug output, and to cater to
5887 general assembler losage, recognize PIC+GOTOFF and turn it back
5888 into a direct symbol reference. */
5889
5890rtx
5891i386_simplify_dwarf_addr (orig_x)
5892 rtx orig_x;
5893{
ec65b2e3 5894 rtx x = orig_x, y;
1865dbb5 5895
4c8c0dec
JJ
5896 if (GET_CODE (x) == MEM)
5897 x = XEXP (x, 0);
5898
6eb791fc
JH
5899 if (TARGET_64BIT)
5900 {
5901 if (GET_CODE (x) != CONST
5902 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 5903 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 5904 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
5905 return orig_x;
5906 return XVECEXP (XEXP (x, 0), 0, 0);
5907 }
5908
1865dbb5 5909 if (GET_CODE (x) != PLUS
1865dbb5
JM
5910 || GET_CODE (XEXP (x, 1)) != CONST)
5911 return orig_x;
5912
ec65b2e3
JJ
5913 if (GET_CODE (XEXP (x, 0)) == REG
5914 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5915 /* %ebx + GOT/GOTOFF */
5916 y = NULL;
5917 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5918 {
5919 /* %ebx + %reg * scale + GOT/GOTOFF */
5920 y = XEXP (x, 0);
5921 if (GET_CODE (XEXP (y, 0)) == REG
5922 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5923 y = XEXP (y, 1);
5924 else if (GET_CODE (XEXP (y, 1)) == REG
5925 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5926 y = XEXP (y, 0);
5927 else
5928 return orig_x;
5929 if (GET_CODE (y) != REG
5930 && GET_CODE (y) != MULT
5931 && GET_CODE (y) != ASHIFT)
5932 return orig_x;
5933 }
5934 else
5935 return orig_x;
5936
1865dbb5
JM
5937 x = XEXP (XEXP (x, 1), 0);
5938 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
5939 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5940 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
5941 {
5942 if (y)
5943 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5944 return XVECEXP (x, 0, 0);
5945 }
1865dbb5
JM
5946
5947 if (GET_CODE (x) == PLUS
5948 && GET_CODE (XEXP (x, 0)) == UNSPEC
5949 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
5950 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5951 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5952 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
5953 {
5954 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5955 if (y)
5956 return gen_rtx_PLUS (Pmode, y, x);
5957 return x;
5958 }
1865dbb5
JM
5959
5960 return orig_x;
5961}
2a2ab3f9 5962\f
a269a03c 5963static void
e075ae69 5964put_condition_code (code, mode, reverse, fp, file)
a269a03c 5965 enum rtx_code code;
e075ae69
RH
5966 enum machine_mode mode;
5967 int reverse, fp;
a269a03c
JC
5968 FILE *file;
5969{
a269a03c
JC
5970 const char *suffix;
5971
9a915772
JH
5972 if (mode == CCFPmode || mode == CCFPUmode)
5973 {
5974 enum rtx_code second_code, bypass_code;
5975 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5976 if (bypass_code != NIL || second_code != NIL)
b531087a 5977 abort ();
9a915772
JH
5978 code = ix86_fp_compare_code_to_integer (code);
5979 mode = CCmode;
5980 }
a269a03c
JC
5981 if (reverse)
5982 code = reverse_condition (code);
e075ae69 5983
a269a03c
JC
5984 switch (code)
5985 {
5986 case EQ:
5987 suffix = "e";
5988 break;
a269a03c
JC
5989 case NE:
5990 suffix = "ne";
5991 break;
a269a03c 5992 case GT:
7e08e190 5993 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
5994 abort ();
5995 suffix = "g";
a269a03c 5996 break;
a269a03c 5997 case GTU:
e075ae69
RH
5998 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5999 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6000 if (mode != CCmode)
0f290768 6001 abort ();
e075ae69 6002 suffix = fp ? "nbe" : "a";
a269a03c 6003 break;
a269a03c 6004 case LT:
9076b9c1 6005 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6006 suffix = "s";
7e08e190 6007 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6008 suffix = "l";
9076b9c1 6009 else
0f290768 6010 abort ();
a269a03c 6011 break;
a269a03c 6012 case LTU:
9076b9c1 6013 if (mode != CCmode)
0f290768 6014 abort ();
a269a03c
JC
6015 suffix = "b";
6016 break;
a269a03c 6017 case GE:
9076b9c1 6018 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6019 suffix = "ns";
7e08e190 6020 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6021 suffix = "ge";
9076b9c1 6022 else
0f290768 6023 abort ();
a269a03c 6024 break;
a269a03c 6025 case GEU:
e075ae69 6026 /* ??? As above. */
7e08e190 6027 if (mode != CCmode)
0f290768 6028 abort ();
7e08e190 6029 suffix = fp ? "nb" : "ae";
a269a03c 6030 break;
a269a03c 6031 case LE:
7e08e190 6032 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6033 abort ();
6034 suffix = "le";
a269a03c 6035 break;
a269a03c 6036 case LEU:
9076b9c1
JH
6037 if (mode != CCmode)
6038 abort ();
7e08e190 6039 suffix = "be";
a269a03c 6040 break;
3a3677ff 6041 case UNORDERED:
9e7adcb3 6042 suffix = fp ? "u" : "p";
3a3677ff
RH
6043 break;
6044 case ORDERED:
9e7adcb3 6045 suffix = fp ? "nu" : "np";
3a3677ff 6046 break;
a269a03c
JC
6047 default:
6048 abort ();
6049 }
6050 fputs (suffix, file);
6051}
6052
e075ae69
RH
6053void
6054print_reg (x, code, file)
6055 rtx x;
6056 int code;
6057 FILE *file;
e5cb57e8 6058{
e075ae69 6059 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6060 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6061 || REGNO (x) == FLAGS_REG
6062 || REGNO (x) == FPSR_REG)
6063 abort ();
e9a25f70 6064
5bf0ebab 6065 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6066 putc ('%', file);
6067
ef6257cd 6068 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6069 code = 2;
6070 else if (code == 'b')
6071 code = 1;
6072 else if (code == 'k')
6073 code = 4;
3f3f2124
JH
6074 else if (code == 'q')
6075 code = 8;
e075ae69
RH
6076 else if (code == 'y')
6077 code = 3;
6078 else if (code == 'h')
6079 code = 0;
6080 else
6081 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6082
3f3f2124
JH
6083 /* Irritatingly, AMD extended registers use different naming convention
6084 from the normal registers. */
6085 if (REX_INT_REG_P (x))
6086 {
885a70fd
JH
6087 if (!TARGET_64BIT)
6088 abort ();
3f3f2124
JH
6089 switch (code)
6090 {
ef6257cd 6091 case 0:
c725bd79 6092 error ("extended registers have no high halves");
3f3f2124
JH
6093 break;
6094 case 1:
6095 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6096 break;
6097 case 2:
6098 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6099 break;
6100 case 4:
6101 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6102 break;
6103 case 8:
6104 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6105 break;
6106 default:
c725bd79 6107 error ("unsupported operand size for extended register");
3f3f2124
JH
6108 break;
6109 }
6110 return;
6111 }
e075ae69
RH
6112 switch (code)
6113 {
6114 case 3:
6115 if (STACK_TOP_P (x))
6116 {
6117 fputs ("st(0)", file);
6118 break;
6119 }
6120 /* FALLTHRU */
e075ae69 6121 case 8:
3f3f2124 6122 case 4:
e075ae69 6123 case 12:
446988df 6124 if (! ANY_FP_REG_P (x))
885a70fd 6125 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6126 /* FALLTHRU */
a7180f70 6127 case 16:
e075ae69
RH
6128 case 2:
6129 fputs (hi_reg_name[REGNO (x)], file);
6130 break;
6131 case 1:
6132 fputs (qi_reg_name[REGNO (x)], file);
6133 break;
6134 case 0:
6135 fputs (qi_high_reg_name[REGNO (x)], file);
6136 break;
6137 default:
6138 abort ();
fe25fea3 6139 }
e5cb57e8
SC
6140}
6141
f996902d
RH
6142/* Locate some local-dynamic symbol still in use by this function
6143 so that we can print its name in some tls_local_dynamic_base
6144 pattern. */
6145
6146static const char *
6147get_some_local_dynamic_name ()
6148{
6149 rtx insn;
6150
6151 if (cfun->machine->some_ld_name)
6152 return cfun->machine->some_ld_name;
6153
6154 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6155 if (INSN_P (insn)
6156 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6157 return cfun->machine->some_ld_name;
6158
6159 abort ();
6160}
6161
6162static int
6163get_some_local_dynamic_name_1 (px, data)
6164 rtx *px;
6165 void *data ATTRIBUTE_UNUSED;
6166{
6167 rtx x = *px;
6168
6169 if (GET_CODE (x) == SYMBOL_REF
6170 && local_dynamic_symbolic_operand (x, Pmode))
6171 {
6172 cfun->machine->some_ld_name = XSTR (x, 0);
6173 return 1;
6174 }
6175
6176 return 0;
6177}
6178
2a2ab3f9 6179/* Meaning of CODE:
fe25fea3 6180 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6181 C -- print opcode suffix for set/cmov insn.
fe25fea3 6182 c -- like C, but print reversed condition
ef6257cd 6183 F,f -- likewise, but for floating-point.
048b1c95
JJ
6184 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6185 nothing
2a2ab3f9
JVA
6186 R -- print the prefix for register names.
6187 z -- print the opcode suffix for the size of the current operand.
6188 * -- print a star (in certain assembler syntax)
fb204271 6189 A -- print an absolute memory reference.
2a2ab3f9 6190 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6191 s -- print a shift double count, followed by the assemblers argument
6192 delimiter.
fe25fea3
SC
6193 b -- print the QImode name of the register for the indicated operand.
6194 %b0 would print %al if operands[0] is reg 0.
6195 w -- likewise, print the HImode name of the register.
6196 k -- likewise, print the SImode name of the register.
3f3f2124 6197 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6198 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6199 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6200 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6201 P -- if PIC, print an @PLT suffix.
6202 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6203 & -- print some in-use local-dynamic symbol name.
a46d1d38 6204 */
2a2ab3f9
JVA
6205
6206void
6207print_operand (file, x, code)
6208 FILE *file;
6209 rtx x;
6210 int code;
6211{
6212 if (code)
6213 {
6214 switch (code)
6215 {
6216 case '*':
80f33d06 6217 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6218 putc ('*', file);
6219 return;
6220
f996902d
RH
6221 case '&':
6222 assemble_name (file, get_some_local_dynamic_name ());
6223 return;
6224
fb204271 6225 case 'A':
80f33d06 6226 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6227 putc ('*', file);
80f33d06 6228 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6229 {
6230 /* Intel syntax. For absolute addresses, registers should not
6231 be surrounded by braces. */
6232 if (GET_CODE (x) != REG)
6233 {
6234 putc ('[', file);
6235 PRINT_OPERAND (file, x, 0);
6236 putc (']', file);
6237 return;
6238 }
6239 }
80f33d06
GS
6240 else
6241 abort ();
fb204271
DN
6242
6243 PRINT_OPERAND (file, x, 0);
6244 return;
6245
6246
2a2ab3f9 6247 case 'L':
80f33d06 6248 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6249 putc ('l', file);
2a2ab3f9
JVA
6250 return;
6251
6252 case 'W':
80f33d06 6253 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6254 putc ('w', file);
2a2ab3f9
JVA
6255 return;
6256
6257 case 'B':
80f33d06 6258 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6259 putc ('b', file);
2a2ab3f9
JVA
6260 return;
6261
6262 case 'Q':
80f33d06 6263 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6264 putc ('l', file);
2a2ab3f9
JVA
6265 return;
6266
6267 case 'S':
80f33d06 6268 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6269 putc ('s', file);
2a2ab3f9
JVA
6270 return;
6271
5f1ec3e6 6272 case 'T':
80f33d06 6273 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6274 putc ('t', file);
5f1ec3e6
JVA
6275 return;
6276
2a2ab3f9
JVA
6277 case 'z':
6278 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6279 registers. */
2a2ab3f9
JVA
6280 if (STACK_REG_P (x))
6281 return;
6282
831c4e87
KC
6283 /* Likewise if using Intel opcodes. */
6284 if (ASSEMBLER_DIALECT == ASM_INTEL)
6285 return;
6286
6287 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6288 switch (GET_MODE_SIZE (GET_MODE (x)))
6289 {
2a2ab3f9 6290 case 2:
155d8a47
JW
6291#ifdef HAVE_GAS_FILDS_FISTS
6292 putc ('s', file);
6293#endif
2a2ab3f9
JVA
6294 return;
6295
6296 case 4:
6297 if (GET_MODE (x) == SFmode)
6298 {
e075ae69 6299 putc ('s', file);
2a2ab3f9
JVA
6300 return;
6301 }
6302 else
e075ae69 6303 putc ('l', file);
2a2ab3f9
JVA
6304 return;
6305
5f1ec3e6 6306 case 12:
2b589241 6307 case 16:
e075ae69
RH
6308 putc ('t', file);
6309 return;
5f1ec3e6 6310
2a2ab3f9
JVA
6311 case 8:
6312 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6313 {
6314#ifdef GAS_MNEMONICS
e075ae69 6315 putc ('q', file);
56c0e8fa 6316#else
e075ae69
RH
6317 putc ('l', file);
6318 putc ('l', file);
56c0e8fa
JVA
6319#endif
6320 }
e075ae69
RH
6321 else
6322 putc ('l', file);
2a2ab3f9 6323 return;
155d8a47
JW
6324
6325 default:
6326 abort ();
2a2ab3f9 6327 }
4af3895e
JVA
6328
6329 case 'b':
6330 case 'w':
6331 case 'k':
3f3f2124 6332 case 'q':
4af3895e
JVA
6333 case 'h':
6334 case 'y':
5cb6195d 6335 case 'X':
e075ae69 6336 case 'P':
4af3895e
JVA
6337 break;
6338
2d49677f
SC
6339 case 's':
6340 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6341 {
6342 PRINT_OPERAND (file, x, 0);
e075ae69 6343 putc (',', file);
2d49677f 6344 }
a269a03c
JC
6345 return;
6346
a46d1d38
JH
6347 case 'D':
6348 /* Little bit of braindamage here. The SSE compare instructions
6349 does use completely different names for the comparisons that the
6350 fp conditional moves. */
6351 switch (GET_CODE (x))
6352 {
6353 case EQ:
6354 case UNEQ:
6355 fputs ("eq", file);
6356 break;
6357 case LT:
6358 case UNLT:
6359 fputs ("lt", file);
6360 break;
6361 case LE:
6362 case UNLE:
6363 fputs ("le", file);
6364 break;
6365 case UNORDERED:
6366 fputs ("unord", file);
6367 break;
6368 case NE:
6369 case LTGT:
6370 fputs ("neq", file);
6371 break;
6372 case UNGE:
6373 case GE:
6374 fputs ("nlt", file);
6375 break;
6376 case UNGT:
6377 case GT:
6378 fputs ("nle", file);
6379 break;
6380 case ORDERED:
6381 fputs ("ord", file);
6382 break;
6383 default:
6384 abort ();
6385 break;
6386 }
6387 return;
048b1c95
JJ
6388 case 'O':
6389#ifdef CMOV_SUN_AS_SYNTAX
6390 if (ASSEMBLER_DIALECT == ASM_ATT)
6391 {
6392 switch (GET_MODE (x))
6393 {
6394 case HImode: putc ('w', file); break;
6395 case SImode:
6396 case SFmode: putc ('l', file); break;
6397 case DImode:
6398 case DFmode: putc ('q', file); break;
6399 default: abort ();
6400 }
6401 putc ('.', file);
6402 }
6403#endif
6404 return;
1853aadd 6405 case 'C':
e075ae69 6406 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6407 return;
fe25fea3 6408 case 'F':
048b1c95
JJ
6409#ifdef CMOV_SUN_AS_SYNTAX
6410 if (ASSEMBLER_DIALECT == ASM_ATT)
6411 putc ('.', file);
6412#endif
e075ae69 6413 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6414 return;
6415
e9a25f70 6416 /* Like above, but reverse condition */
e075ae69 6417 case 'c':
fce5a9f2 6418 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
6419 and not a condition code which needs to be reversed. */
6420 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6421 {
6422 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6423 return;
6424 }
e075ae69
RH
6425 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6426 return;
fe25fea3 6427 case 'f':
048b1c95
JJ
6428#ifdef CMOV_SUN_AS_SYNTAX
6429 if (ASSEMBLER_DIALECT == ASM_ATT)
6430 putc ('.', file);
6431#endif
e075ae69 6432 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6433 return;
ef6257cd
JH
6434 case '+':
6435 {
6436 rtx x;
e5cb57e8 6437
ef6257cd
JH
6438 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6439 return;
a4f31c00 6440
ef6257cd
JH
6441 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6442 if (x)
6443 {
6444 int pred_val = INTVAL (XEXP (x, 0));
6445
6446 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6447 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6448 {
6449 int taken = pred_val > REG_BR_PROB_BASE / 2;
6450 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6451
6452 /* Emit hints only in the case default branch prediction
6453 heruistics would fail. */
6454 if (taken != cputaken)
6455 {
6456 /* We use 3e (DS) prefix for taken branches and
6457 2e (CS) prefix for not taken branches. */
6458 if (taken)
6459 fputs ("ds ; ", file);
6460 else
6461 fputs ("cs ; ", file);
6462 }
6463 }
6464 }
6465 return;
6466 }
4af3895e 6467 default:
a52453cc 6468 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
6469 }
6470 }
e9a25f70 6471
2a2ab3f9
JVA
6472 if (GET_CODE (x) == REG)
6473 {
6474 PRINT_REG (x, code, file);
6475 }
e9a25f70 6476
2a2ab3f9
JVA
6477 else if (GET_CODE (x) == MEM)
6478 {
e075ae69 6479 /* No `byte ptr' prefix for call instructions. */
80f33d06 6480 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6481 {
69ddee61 6482 const char * size;
e075ae69
RH
6483 switch (GET_MODE_SIZE (GET_MODE (x)))
6484 {
6485 case 1: size = "BYTE"; break;
6486 case 2: size = "WORD"; break;
6487 case 4: size = "DWORD"; break;
6488 case 8: size = "QWORD"; break;
6489 case 12: size = "XWORD"; break;
a7180f70 6490 case 16: size = "XMMWORD"; break;
e075ae69 6491 default:
564d80f4 6492 abort ();
e075ae69 6493 }
fb204271
DN
6494
6495 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6496 if (code == 'b')
6497 size = "BYTE";
6498 else if (code == 'w')
6499 size = "WORD";
6500 else if (code == 'k')
6501 size = "DWORD";
6502
e075ae69
RH
6503 fputs (size, file);
6504 fputs (" PTR ", file);
2a2ab3f9 6505 }
e075ae69
RH
6506
6507 x = XEXP (x, 0);
6508 if (flag_pic && CONSTANT_ADDRESS_P (x))
6509 output_pic_addr_const (file, x, code);
0d7d98ee 6510 /* Avoid (%rip) for call operands. */
5bf0ebab 6511 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6512 && GET_CODE (x) != CONST_INT)
6513 output_addr_const (file, x);
c8b94768
RH
6514 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6515 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6516 else
e075ae69 6517 output_address (x);
2a2ab3f9 6518 }
e9a25f70 6519
2a2ab3f9
JVA
6520 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6521 {
e9a25f70
JL
6522 REAL_VALUE_TYPE r;
6523 long l;
6524
5f1ec3e6
JVA
6525 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6526 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6527
80f33d06 6528 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6529 putc ('$', file);
52267fcb 6530 fprintf (file, "0x%lx", l);
5f1ec3e6 6531 }
e9a25f70 6532
0f290768 6533 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6534 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6535 {
e9a25f70
JL
6536 REAL_VALUE_TYPE r;
6537 char dstr[30];
6538
5f1ec3e6
JVA
6539 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6540 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6541 fprintf (file, "%s", dstr);
2a2ab3f9 6542 }
e9a25f70 6543
2b589241
JH
6544 else if (GET_CODE (x) == CONST_DOUBLE
6545 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6546 {
e9a25f70
JL
6547 REAL_VALUE_TYPE r;
6548 char dstr[30];
6549
5f1ec3e6
JVA
6550 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6551 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6552 fprintf (file, "%s", dstr);
2a2ab3f9 6553 }
f996902d
RH
6554
6555 else if (GET_CODE (x) == CONST
6556 && GET_CODE (XEXP (x, 0)) == UNSPEC
6557 && XINT (XEXP (x, 0), 1) == UNSPEC_TP)
6558 {
6559 if (ASSEMBLER_DIALECT == ASM_INTEL)
6560 fputs ("DWORD PTR ", file);
6561 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6562 putc ('%', file);
6563 fputs ("gs:0", file);
6564 }
6565
79325812 6566 else
2a2ab3f9 6567 {
4af3895e 6568 if (code != 'P')
2a2ab3f9 6569 {
695dac07 6570 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6571 {
80f33d06 6572 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6573 putc ('$', file);
6574 }
2a2ab3f9
JVA
6575 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6576 || GET_CODE (x) == LABEL_REF)
e075ae69 6577 {
80f33d06 6578 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6579 putc ('$', file);
6580 else
6581 fputs ("OFFSET FLAT:", file);
6582 }
2a2ab3f9 6583 }
e075ae69
RH
6584 if (GET_CODE (x) == CONST_INT)
6585 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6586 else if (flag_pic)
2a2ab3f9
JVA
6587 output_pic_addr_const (file, x, code);
6588 else
6589 output_addr_const (file, x);
6590 }
6591}
6592\f
6593/* Print a memory operand whose address is ADDR. */
6594
6595void
6596print_operand_address (file, addr)
6597 FILE *file;
6598 register rtx addr;
6599{
e075ae69
RH
6600 struct ix86_address parts;
6601 rtx base, index, disp;
6602 int scale;
e9a25f70 6603
e075ae69
RH
6604 if (! ix86_decompose_address (addr, &parts))
6605 abort ();
e9a25f70 6606
e075ae69
RH
6607 base = parts.base;
6608 index = parts.index;
6609 disp = parts.disp;
6610 scale = parts.scale;
e9a25f70 6611
e075ae69
RH
6612 if (!base && !index)
6613 {
6614 /* Displacement only requires special attention. */
e9a25f70 6615
e075ae69 6616 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6617 {
80f33d06 6618 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6619 {
6620 if (USER_LABEL_PREFIX[0] == 0)
6621 putc ('%', file);
6622 fputs ("ds:", file);
6623 }
e075ae69 6624 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6625 }
e075ae69
RH
6626 else if (flag_pic)
6627 output_pic_addr_const (file, addr, 0);
6628 else
6629 output_addr_const (file, addr);
0d7d98ee
JH
6630
6631 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595
RH
6632 if (TARGET_64BIT
6633 && (GET_CODE (addr) == SYMBOL_REF
6634 || GET_CODE (addr) == LABEL_REF
6635 || (GET_CODE (addr) == CONST
6636 && GET_CODE (XEXP (addr, 0)) == PLUS
6637 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6638 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 6639 fputs ("(%rip)", file);
e075ae69
RH
6640 }
6641 else
6642 {
80f33d06 6643 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6644 {
e075ae69 6645 if (disp)
2a2ab3f9 6646 {
c399861d 6647 if (flag_pic)
e075ae69
RH
6648 output_pic_addr_const (file, disp, 0);
6649 else if (GET_CODE (disp) == LABEL_REF)
6650 output_asm_label (disp);
2a2ab3f9 6651 else
e075ae69 6652 output_addr_const (file, disp);
2a2ab3f9
JVA
6653 }
6654
e075ae69
RH
6655 putc ('(', file);
6656 if (base)
6657 PRINT_REG (base, 0, file);
6658 if (index)
2a2ab3f9 6659 {
e075ae69
RH
6660 putc (',', file);
6661 PRINT_REG (index, 0, file);
6662 if (scale != 1)
6663 fprintf (file, ",%d", scale);
2a2ab3f9 6664 }
e075ae69 6665 putc (')', file);
2a2ab3f9 6666 }
2a2ab3f9
JVA
6667 else
6668 {
e075ae69 6669 rtx offset = NULL_RTX;
e9a25f70 6670
e075ae69
RH
6671 if (disp)
6672 {
6673 /* Pull out the offset of a symbol; print any symbol itself. */
6674 if (GET_CODE (disp) == CONST
6675 && GET_CODE (XEXP (disp, 0)) == PLUS
6676 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6677 {
6678 offset = XEXP (XEXP (disp, 0), 1);
6679 disp = gen_rtx_CONST (VOIDmode,
6680 XEXP (XEXP (disp, 0), 0));
6681 }
ce193852 6682
e075ae69
RH
6683 if (flag_pic)
6684 output_pic_addr_const (file, disp, 0);
6685 else if (GET_CODE (disp) == LABEL_REF)
6686 output_asm_label (disp);
6687 else if (GET_CODE (disp) == CONST_INT)
6688 offset = disp;
6689 else
6690 output_addr_const (file, disp);
6691 }
e9a25f70 6692
e075ae69
RH
6693 putc ('[', file);
6694 if (base)
a8620236 6695 {
e075ae69
RH
6696 PRINT_REG (base, 0, file);
6697 if (offset)
6698 {
6699 if (INTVAL (offset) >= 0)
6700 putc ('+', file);
6701 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6702 }
a8620236 6703 }
e075ae69
RH
6704 else if (offset)
6705 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6706 else
e075ae69 6707 putc ('0', file);
e9a25f70 6708
e075ae69
RH
6709 if (index)
6710 {
6711 putc ('+', file);
6712 PRINT_REG (index, 0, file);
6713 if (scale != 1)
6714 fprintf (file, "*%d", scale);
6715 }
6716 putc (']', file);
6717 }
2a2ab3f9
JVA
6718 }
6719}
f996902d
RH
6720
6721bool
6722output_addr_const_extra (file, x)
6723 FILE *file;
6724 rtx x;
6725{
6726 rtx op;
6727
6728 if (GET_CODE (x) != UNSPEC)
6729 return false;
6730
6731 op = XVECEXP (x, 0, 0);
6732 switch (XINT (x, 1))
6733 {
6734 case UNSPEC_GOTTPOFF:
6735 output_addr_const (file, op);
6736 fputs ("@GOTTPOFF", file);
6737 break;
6738 case UNSPEC_TPOFF:
6739 output_addr_const (file, op);
6740 fputs ("@TPOFF", file);
6741 break;
6742 case UNSPEC_NTPOFF:
6743 output_addr_const (file, op);
6744 fputs ("@NTPOFF", file);
6745 break;
6746 case UNSPEC_DTPOFF:
6747 output_addr_const (file, op);
6748 fputs ("@DTPOFF", file);
6749 break;
6750
6751 default:
6752 return false;
6753 }
6754
6755 return true;
6756}
2a2ab3f9
JVA
6757\f
6758/* Split one or more DImode RTL references into pairs of SImode
6759 references. The RTL can be REG, offsettable MEM, integer constant, or
6760 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6761 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6762 that parallel "operands". */
2a2ab3f9
JVA
6763
6764void
6765split_di (operands, num, lo_half, hi_half)
6766 rtx operands[];
6767 int num;
6768 rtx lo_half[], hi_half[];
6769{
6770 while (num--)
6771 {
57dbca5e 6772 rtx op = operands[num];
b932f770
JH
6773
6774 /* simplify_subreg refuse to split volatile memory addresses,
6775 but we still have to handle it. */
6776 if (GET_CODE (op) == MEM)
2a2ab3f9 6777 {
f4ef873c 6778 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6779 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6780 }
6781 else
b932f770 6782 {
38ca929b
JH
6783 lo_half[num] = simplify_gen_subreg (SImode, op,
6784 GET_MODE (op) == VOIDmode
6785 ? DImode : GET_MODE (op), 0);
6786 hi_half[num] = simplify_gen_subreg (SImode, op,
6787 GET_MODE (op) == VOIDmode
6788 ? DImode : GET_MODE (op), 4);
b932f770 6789 }
2a2ab3f9
JVA
6790 }
6791}
44cf5b6a
JH
6792/* Split one or more TImode RTL references into pairs of SImode
6793 references. The RTL can be REG, offsettable MEM, integer constant, or
6794 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6795 split and "num" is its length. lo_half and hi_half are output arrays
6796 that parallel "operands". */
6797
6798void
6799split_ti (operands, num, lo_half, hi_half)
6800 rtx operands[];
6801 int num;
6802 rtx lo_half[], hi_half[];
6803{
6804 while (num--)
6805 {
6806 rtx op = operands[num];
b932f770
JH
6807
6808 /* simplify_subreg refuse to split volatile memory addresses, but we
6809 still have to handle it. */
6810 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6811 {
6812 lo_half[num] = adjust_address (op, DImode, 0);
6813 hi_half[num] = adjust_address (op, DImode, 8);
6814 }
6815 else
b932f770
JH
6816 {
6817 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6818 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6819 }
44cf5b6a
JH
6820 }
6821}
2a2ab3f9 6822\f
2a2ab3f9
JVA
6823/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6824 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6825 is the expression of the binary operation. The output may either be
6826 emitted here, or returned to the caller, like all output_* functions.
6827
6828 There is no guarantee that the operands are the same mode, as they
0f290768 6829 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6830
e3c2afab
AM
6831#ifndef SYSV386_COMPAT
6832/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6833 wants to fix the assemblers because that causes incompatibility
6834 with gcc. No-one wants to fix gcc because that causes
6835 incompatibility with assemblers... You can use the option of
6836 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6837#define SYSV386_COMPAT 1
6838#endif
6839
69ddee61 6840const char *
2a2ab3f9
JVA
6841output_387_binary_op (insn, operands)
6842 rtx insn;
6843 rtx *operands;
6844{
e3c2afab 6845 static char buf[30];
69ddee61 6846 const char *p;
1deaa899
JH
6847 const char *ssep;
6848 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 6849
e3c2afab
AM
6850#ifdef ENABLE_CHECKING
6851 /* Even if we do not want to check the inputs, this documents input
6852 constraints. Which helps in understanding the following code. */
6853 if (STACK_REG_P (operands[0])
6854 && ((REG_P (operands[1])
6855 && REGNO (operands[0]) == REGNO (operands[1])
6856 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6857 || (REG_P (operands[2])
6858 && REGNO (operands[0]) == REGNO (operands[2])
6859 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6860 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6861 ; /* ok */
1deaa899 6862 else if (!is_sse)
e3c2afab
AM
6863 abort ();
6864#endif
6865
2a2ab3f9
JVA
6866 switch (GET_CODE (operands[3]))
6867 {
6868 case PLUS:
e075ae69
RH
6869 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6870 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6871 p = "fiadd";
6872 else
6873 p = "fadd";
1deaa899 6874 ssep = "add";
2a2ab3f9
JVA
6875 break;
6876
6877 case MINUS:
e075ae69
RH
6878 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6879 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6880 p = "fisub";
6881 else
6882 p = "fsub";
1deaa899 6883 ssep = "sub";
2a2ab3f9
JVA
6884 break;
6885
6886 case MULT:
e075ae69
RH
6887 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6888 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6889 p = "fimul";
6890 else
6891 p = "fmul";
1deaa899 6892 ssep = "mul";
2a2ab3f9
JVA
6893 break;
6894
6895 case DIV:
e075ae69
RH
6896 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6897 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6898 p = "fidiv";
6899 else
6900 p = "fdiv";
1deaa899 6901 ssep = "div";
2a2ab3f9
JVA
6902 break;
6903
6904 default:
6905 abort ();
6906 }
6907
1deaa899
JH
6908 if (is_sse)
6909 {
6910 strcpy (buf, ssep);
6911 if (GET_MODE (operands[0]) == SFmode)
6912 strcat (buf, "ss\t{%2, %0|%0, %2}");
6913 else
6914 strcat (buf, "sd\t{%2, %0|%0, %2}");
6915 return buf;
6916 }
e075ae69 6917 strcpy (buf, p);
2a2ab3f9
JVA
6918
6919 switch (GET_CODE (operands[3]))
6920 {
6921 case MULT:
6922 case PLUS:
6923 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6924 {
e3c2afab 6925 rtx temp = operands[2];
2a2ab3f9
JVA
6926 operands[2] = operands[1];
6927 operands[1] = temp;
6928 }
6929
e3c2afab
AM
6930 /* know operands[0] == operands[1]. */
6931
2a2ab3f9 6932 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6933 {
6934 p = "%z2\t%2";
6935 break;
6936 }
2a2ab3f9
JVA
6937
6938 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
6939 {
6940 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6941 /* How is it that we are storing to a dead operand[2]?
6942 Well, presumably operands[1] is dead too. We can't
6943 store the result to st(0) as st(0) gets popped on this
6944 instruction. Instead store to operands[2] (which I
6945 think has to be st(1)). st(1) will be popped later.
6946 gcc <= 2.8.1 didn't have this check and generated
6947 assembly code that the Unixware assembler rejected. */
6948 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6949 else
e3c2afab 6950 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 6951 break;
6b28fd63 6952 }
2a2ab3f9
JVA
6953
6954 if (STACK_TOP_P (operands[0]))
e3c2afab 6955 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 6956 else
e3c2afab 6957 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 6958 break;
2a2ab3f9
JVA
6959
6960 case MINUS:
6961 case DIV:
6962 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
6963 {
6964 p = "r%z1\t%1";
6965 break;
6966 }
2a2ab3f9
JVA
6967
6968 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
6969 {
6970 p = "%z2\t%2";
6971 break;
6972 }
2a2ab3f9 6973
2a2ab3f9 6974 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 6975 {
e3c2afab
AM
6976#if SYSV386_COMPAT
6977 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6978 derived assemblers, confusingly reverse the direction of
6979 the operation for fsub{r} and fdiv{r} when the
6980 destination register is not st(0). The Intel assembler
6981 doesn't have this brain damage. Read !SYSV386_COMPAT to
6982 figure out what the hardware really does. */
6983 if (STACK_TOP_P (operands[0]))
6984 p = "{p\t%0, %2|rp\t%2, %0}";
6985 else
6986 p = "{rp\t%2, %0|p\t%0, %2}";
6987#else
6b28fd63 6988 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
6989 /* As above for fmul/fadd, we can't store to st(0). */
6990 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 6991 else
e3c2afab
AM
6992 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6993#endif
e075ae69 6994 break;
6b28fd63 6995 }
2a2ab3f9
JVA
6996
6997 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 6998 {
e3c2afab 6999#if SYSV386_COMPAT
6b28fd63 7000 if (STACK_TOP_P (operands[0]))
e3c2afab 7001 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7002 else
e3c2afab
AM
7003 p = "{p\t%1, %0|rp\t%0, %1}";
7004#else
7005 if (STACK_TOP_P (operands[0]))
7006 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7007 else
7008 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7009#endif
e075ae69 7010 break;
6b28fd63 7011 }
2a2ab3f9
JVA
7012
7013 if (STACK_TOP_P (operands[0]))
7014 {
7015 if (STACK_TOP_P (operands[1]))
e3c2afab 7016 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7017 else
e3c2afab 7018 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7019 break;
2a2ab3f9
JVA
7020 }
7021 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7022 {
7023#if SYSV386_COMPAT
7024 p = "{\t%1, %0|r\t%0, %1}";
7025#else
7026 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7027#endif
7028 }
2a2ab3f9 7029 else
e3c2afab
AM
7030 {
7031#if SYSV386_COMPAT
7032 p = "{r\t%2, %0|\t%0, %2}";
7033#else
7034 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7035#endif
7036 }
e075ae69 7037 break;
2a2ab3f9
JVA
7038
7039 default:
7040 abort ();
7041 }
e075ae69
RH
7042
7043 strcat (buf, p);
7044 return buf;
2a2ab3f9 7045}
e075ae69 7046
a4f31c00 7047/* Output code to initialize control word copies used by
7a2e09f4
JH
7048 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7049 is set to control word rounding downwards. */
7050void
7051emit_i387_cw_initialization (normal, round_down)
7052 rtx normal, round_down;
7053{
7054 rtx reg = gen_reg_rtx (HImode);
7055
7056 emit_insn (gen_x86_fnstcw_1 (normal));
7057 emit_move_insn (reg, normal);
7058 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7059 && !TARGET_64BIT)
7060 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7061 else
7062 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7063 emit_move_insn (round_down, reg);
7064}
7065
2a2ab3f9 7066/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7067 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7068 operand may be [SDX]Fmode. */
2a2ab3f9 7069
69ddee61 7070const char *
2a2ab3f9
JVA
7071output_fix_trunc (insn, operands)
7072 rtx insn;
7073 rtx *operands;
7074{
7075 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7076 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7077
e075ae69
RH
7078 /* Jump through a hoop or two for DImode, since the hardware has no
7079 non-popping instruction. We used to do this a different way, but
7080 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7081 if (dimode_p && !stack_top_dies)
7082 output_asm_insn ("fld\t%y1", operands);
e075ae69 7083
7a2e09f4 7084 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7085 abort ();
7086
e075ae69 7087 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7088 abort ();
e9a25f70 7089
7a2e09f4 7090 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7091 if (stack_top_dies || dimode_p)
7a2e09f4 7092 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7093 else
7a2e09f4 7094 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7095 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7096
e075ae69 7097 return "";
2a2ab3f9 7098}
cda749b1 7099
e075ae69
RH
7100/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7101 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7102 when fucom should be used. */
7103
69ddee61 7104const char *
e075ae69 7105output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7106 rtx insn;
7107 rtx *operands;
e075ae69 7108 int eflags_p, unordered_p;
cda749b1 7109{
e075ae69
RH
7110 int stack_top_dies;
7111 rtx cmp_op0 = operands[0];
7112 rtx cmp_op1 = operands[1];
0644b628 7113 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7114
7115 if (eflags_p == 2)
7116 {
7117 cmp_op0 = cmp_op1;
7118 cmp_op1 = operands[2];
7119 }
0644b628
JH
7120 if (is_sse)
7121 {
7122 if (GET_MODE (operands[0]) == SFmode)
7123 if (unordered_p)
7124 return "ucomiss\t{%1, %0|%0, %1}";
7125 else
7126 return "comiss\t{%1, %0|%0, %y}";
7127 else
7128 if (unordered_p)
7129 return "ucomisd\t{%1, %0|%0, %1}";
7130 else
7131 return "comisd\t{%1, %0|%0, %y}";
7132 }
cda749b1 7133
e075ae69 7134 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7135 abort ();
7136
e075ae69 7137 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7138
e075ae69
RH
7139 if (STACK_REG_P (cmp_op1)
7140 && stack_top_dies
7141 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7142 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7143 {
e075ae69
RH
7144 /* If both the top of the 387 stack dies, and the other operand
7145 is also a stack register that dies, then this must be a
7146 `fcompp' float compare */
7147
7148 if (eflags_p == 1)
7149 {
7150 /* There is no double popping fcomi variant. Fortunately,
7151 eflags is immune from the fstp's cc clobbering. */
7152 if (unordered_p)
7153 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7154 else
7155 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7156 return "fstp\t%y0";
7157 }
7158 else
cda749b1 7159 {
e075ae69
RH
7160 if (eflags_p == 2)
7161 {
7162 if (unordered_p)
7163 return "fucompp\n\tfnstsw\t%0";
7164 else
7165 return "fcompp\n\tfnstsw\t%0";
7166 }
cda749b1
JW
7167 else
7168 {
e075ae69
RH
7169 if (unordered_p)
7170 return "fucompp";
7171 else
7172 return "fcompp";
cda749b1
JW
7173 }
7174 }
cda749b1
JW
7175 }
7176 else
7177 {
e075ae69 7178 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7179
0f290768 7180 static const char * const alt[24] =
e075ae69
RH
7181 {
7182 "fcom%z1\t%y1",
7183 "fcomp%z1\t%y1",
7184 "fucom%z1\t%y1",
7185 "fucomp%z1\t%y1",
0f290768 7186
e075ae69
RH
7187 "ficom%z1\t%y1",
7188 "ficomp%z1\t%y1",
7189 NULL,
7190 NULL,
7191
7192 "fcomi\t{%y1, %0|%0, %y1}",
7193 "fcomip\t{%y1, %0|%0, %y1}",
7194 "fucomi\t{%y1, %0|%0, %y1}",
7195 "fucomip\t{%y1, %0|%0, %y1}",
7196
7197 NULL,
7198 NULL,
7199 NULL,
7200 NULL,
7201
7202 "fcom%z2\t%y2\n\tfnstsw\t%0",
7203 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7204 "fucom%z2\t%y2\n\tfnstsw\t%0",
7205 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7206
e075ae69
RH
7207 "ficom%z2\t%y2\n\tfnstsw\t%0",
7208 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7209 NULL,
7210 NULL
7211 };
7212
7213 int mask;
69ddee61 7214 const char *ret;
e075ae69
RH
7215
7216 mask = eflags_p << 3;
7217 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7218 mask |= unordered_p << 1;
7219 mask |= stack_top_dies;
7220
7221 if (mask >= 24)
7222 abort ();
7223 ret = alt[mask];
7224 if (ret == NULL)
7225 abort ();
cda749b1 7226
e075ae69 7227 return ret;
cda749b1
JW
7228 }
7229}
2a2ab3f9 7230
f88c65f7
RH
7231void
7232ix86_output_addr_vec_elt (file, value)
7233 FILE *file;
7234 int value;
7235{
7236 const char *directive = ASM_LONG;
7237
7238 if (TARGET_64BIT)
7239 {
7240#ifdef ASM_QUAD
7241 directive = ASM_QUAD;
7242#else
7243 abort ();
7244#endif
7245 }
7246
7247 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7248}
7249
7250void
7251ix86_output_addr_diff_elt (file, value, rel)
7252 FILE *file;
7253 int value, rel;
7254{
7255 if (TARGET_64BIT)
74411039 7256 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7257 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7258 else if (HAVE_AS_GOTOFF_IN_DATA)
7259 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7260 else
7261 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7262 ASM_LONG, LPREFIX, value);
7263}
32b5b1aa 7264\f
a8bac9ab
RH
7265/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7266 for the target. */
7267
7268void
7269ix86_expand_clear (dest)
7270 rtx dest;
7271{
7272 rtx tmp;
7273
7274 /* We play register width games, which are only valid after reload. */
7275 if (!reload_completed)
7276 abort ();
7277
7278 /* Avoid HImode and its attendant prefix byte. */
7279 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7280 dest = gen_rtx_REG (SImode, REGNO (dest));
7281
7282 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7283
7284 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7285 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7286 {
7287 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7288 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7289 }
7290
7291 emit_insn (tmp);
7292}
7293
f996902d
RH
7294/* X is an unchanging MEM. If it is a constant pool reference, return
7295 the constant pool rtx, else NULL. */
7296
7297static rtx
7298maybe_get_pool_constant (x)
7299 rtx x;
7300{
7301 x = XEXP (x, 0);
7302
7303 if (flag_pic)
7304 {
7305 if (GET_CODE (x) != PLUS)
7306 return NULL_RTX;
7307 if (XEXP (x, 0) != pic_offset_table_rtx)
7308 return NULL_RTX;
7309 x = XEXP (x, 1);
7310 if (GET_CODE (x) != CONST)
7311 return NULL_RTX;
7312 x = XEXP (x, 0);
7313 if (GET_CODE (x) != UNSPEC)
7314 return NULL_RTX;
7315 if (XINT (x, 1) != UNSPEC_GOTOFF)
7316 return NULL_RTX;
7317 x = XVECEXP (x, 0, 0);
7318 }
7319
7320 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7321 return get_pool_constant (x);
7322
7323 return NULL_RTX;
7324}
7325
79325812 7326void
e075ae69
RH
7327ix86_expand_move (mode, operands)
7328 enum machine_mode mode;
7329 rtx operands[];
32b5b1aa 7330{
e075ae69 7331 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7332 rtx insn, op0, op1, tmp;
7333
7334 op0 = operands[0];
7335 op1 = operands[1];
7336
7337 /* ??? We have a slight problem. We need to say that tls symbols are
7338 not legitimate constants so that reload does not helpfully reload
7339 these constants from a REG_EQUIV, which we cannot handle. (Recall
7340 that general- and local-dynamic address resolution requires a
7341 function call.)
e9a25f70 7342
f996902d
RH
7343 However, if we say that tls symbols are not legitimate constants,
7344 then emit_move_insn helpfully drop them into the constant pool.
7345
7346 It is far easier to work around emit_move_insn than reload. Recognize
7347 the MEM that we would have created and extract the symbol_ref. */
7348
7349 if (mode == Pmode
7350 && GET_CODE (op1) == MEM
7351 && RTX_UNCHANGING_P (op1))
32b5b1aa 7352 {
f996902d
RH
7353 tmp = maybe_get_pool_constant (op1);
7354 /* Note that we only care about symbolic constants here, which
7355 unlike CONST_INT will always have a proper mode. */
7356 if (tmp && GET_MODE (tmp) == Pmode)
7357 op1 = tmp;
7358 }
e9a25f70 7359
f996902d
RH
7360 if (tls_symbolic_operand (op1, Pmode))
7361 {
7362 op1 = legitimize_address (op1, op1, VOIDmode);
7363 if (GET_CODE (op0) == MEM)
7364 {
7365 tmp = gen_reg_rtx (mode);
7366 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7367 op1 = tmp;
7368 }
7369 }
7370 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7371 {
7372 if (GET_CODE (op0) == MEM)
7373 op1 = force_reg (Pmode, op1);
e075ae69 7374 else
32b5b1aa 7375 {
f996902d 7376 rtx temp = op0;
e075ae69
RH
7377 if (GET_CODE (temp) != REG)
7378 temp = gen_reg_rtx (Pmode);
f996902d
RH
7379 temp = legitimize_pic_address (op1, temp);
7380 if (temp == op0)
e075ae69 7381 return;
f996902d 7382 op1 = temp;
32b5b1aa 7383 }
e075ae69
RH
7384 }
7385 else
7386 {
f996902d 7387 if (GET_CODE (op0) == MEM
44cf5b6a 7388 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7389 || !push_operand (op0, mode))
7390 && GET_CODE (op1) == MEM)
7391 op1 = force_reg (mode, op1);
e9a25f70 7392
f996902d
RH
7393 if (push_operand (op0, mode)
7394 && ! general_no_elim_operand (op1, mode))
7395 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7396
44cf5b6a
JH
7397 /* Force large constants in 64bit compilation into register
7398 to get them CSEed. */
7399 if (TARGET_64BIT && mode == DImode
f996902d
RH
7400 && immediate_operand (op1, mode)
7401 && !x86_64_zero_extended_value (op1)
7402 && !register_operand (op0, mode)
44cf5b6a 7403 && optimize && !reload_completed && !reload_in_progress)
f996902d 7404 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7405
e075ae69 7406 if (FLOAT_MODE_P (mode))
32b5b1aa 7407 {
d7a29404
JH
7408 /* If we are loading a floating point constant to a register,
7409 force the value to memory now, since we'll get better code
7410 out the back end. */
e075ae69
RH
7411
7412 if (strict)
7413 ;
f996902d
RH
7414 else if (GET_CODE (op1) == CONST_DOUBLE
7415 && register_operand (op0, mode))
7416 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 7417 }
32b5b1aa 7418 }
e9a25f70 7419
f996902d 7420 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 7421
e075ae69
RH
7422 emit_insn (insn);
7423}
e9a25f70 7424
e37af218
RH
7425void
7426ix86_expand_vector_move (mode, operands)
7427 enum machine_mode mode;
7428 rtx operands[];
7429{
7430 /* Force constants other than zero into memory. We do not know how
7431 the instructions used to build constants modify the upper 64 bits
7432 of the register, once we have that information we may be able
7433 to handle some of them more efficiently. */
7434 if ((reload_in_progress | reload_completed) == 0
7435 && register_operand (operands[0], mode)
7436 && CONSTANT_P (operands[1]))
7437 {
7438 rtx addr = gen_reg_rtx (Pmode);
7439 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7440 operands[1] = gen_rtx_MEM (mode, addr);
7441 }
7442
7443 /* Make operand1 a register if it isn't already. */
7444 if ((reload_in_progress | reload_completed) == 0
7445 && !register_operand (operands[0], mode)
7446 && !register_operand (operands[1], mode)
7447 && operands[1] != CONST0_RTX (mode))
7448 {
59bef189 7449 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7450 emit_move_insn (operands[0], temp);
7451 return;
7452 }
7453
7454 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 7455}
e37af218 7456
e075ae69
RH
7457/* Attempt to expand a binary operator. Make the expansion closer to the
7458 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7459 memory references (one output, two input) in a single insn. */
e9a25f70 7460
e075ae69
RH
7461void
7462ix86_expand_binary_operator (code, mode, operands)
7463 enum rtx_code code;
7464 enum machine_mode mode;
7465 rtx operands[];
7466{
7467 int matching_memory;
7468 rtx src1, src2, dst, op, clob;
7469
7470 dst = operands[0];
7471 src1 = operands[1];
7472 src2 = operands[2];
7473
7474 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7475 if (GET_RTX_CLASS (code) == 'c'
7476 && (rtx_equal_p (dst, src2)
7477 || immediate_operand (src1, mode)))
7478 {
7479 rtx temp = src1;
7480 src1 = src2;
7481 src2 = temp;
32b5b1aa 7482 }
e9a25f70 7483
e075ae69
RH
7484 /* If the destination is memory, and we do not have matching source
7485 operands, do things in registers. */
7486 matching_memory = 0;
7487 if (GET_CODE (dst) == MEM)
32b5b1aa 7488 {
e075ae69
RH
7489 if (rtx_equal_p (dst, src1))
7490 matching_memory = 1;
7491 else if (GET_RTX_CLASS (code) == 'c'
7492 && rtx_equal_p (dst, src2))
7493 matching_memory = 2;
7494 else
7495 dst = gen_reg_rtx (mode);
7496 }
0f290768 7497
e075ae69
RH
7498 /* Both source operands cannot be in memory. */
7499 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7500 {
7501 if (matching_memory != 2)
7502 src2 = force_reg (mode, src2);
7503 else
7504 src1 = force_reg (mode, src1);
32b5b1aa 7505 }
e9a25f70 7506
06a964de
JH
7507 /* If the operation is not commutable, source 1 cannot be a constant
7508 or non-matching memory. */
0f290768 7509 if ((CONSTANT_P (src1)
06a964de
JH
7510 || (!matching_memory && GET_CODE (src1) == MEM))
7511 && GET_RTX_CLASS (code) != 'c')
e075ae69 7512 src1 = force_reg (mode, src1);
0f290768 7513
e075ae69 7514 /* If optimizing, copy to regs to improve CSE */
fe577e58 7515 if (optimize && ! no_new_pseudos)
32b5b1aa 7516 {
e075ae69
RH
7517 if (GET_CODE (dst) == MEM)
7518 dst = gen_reg_rtx (mode);
7519 if (GET_CODE (src1) == MEM)
7520 src1 = force_reg (mode, src1);
7521 if (GET_CODE (src2) == MEM)
7522 src2 = force_reg (mode, src2);
32b5b1aa 7523 }
e9a25f70 7524
e075ae69
RH
7525 /* Emit the instruction. */
7526
7527 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7528 if (reload_in_progress)
7529 {
7530 /* Reload doesn't know about the flags register, and doesn't know that
7531 it doesn't want to clobber it. We can only do this with PLUS. */
7532 if (code != PLUS)
7533 abort ();
7534 emit_insn (op);
7535 }
7536 else
32b5b1aa 7537 {
e075ae69
RH
7538 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7539 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7540 }
e9a25f70 7541
e075ae69
RH
7542 /* Fix up the destination if needed. */
7543 if (dst != operands[0])
7544 emit_move_insn (operands[0], dst);
7545}
7546
7547/* Return TRUE or FALSE depending on whether the binary operator meets the
7548 appropriate constraints. */
7549
7550int
7551ix86_binary_operator_ok (code, mode, operands)
7552 enum rtx_code code;
7553 enum machine_mode mode ATTRIBUTE_UNUSED;
7554 rtx operands[3];
7555{
7556 /* Both source operands cannot be in memory. */
7557 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7558 return 0;
7559 /* If the operation is not commutable, source 1 cannot be a constant. */
7560 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7561 return 0;
7562 /* If the destination is memory, we must have a matching source operand. */
7563 if (GET_CODE (operands[0]) == MEM
7564 && ! (rtx_equal_p (operands[0], operands[1])
7565 || (GET_RTX_CLASS (code) == 'c'
7566 && rtx_equal_p (operands[0], operands[2]))))
7567 return 0;
06a964de 7568 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7569 have a matching destination. */
06a964de
JH
7570 if (GET_CODE (operands[1]) == MEM
7571 && GET_RTX_CLASS (code) != 'c'
7572 && ! rtx_equal_p (operands[0], operands[1]))
7573 return 0;
e075ae69
RH
7574 return 1;
7575}
7576
7577/* Attempt to expand a unary operator. Make the expansion closer to the
7578 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7579 memory references (one output, one input) in a single insn. */
e075ae69 7580
9d81fc27 7581void
e075ae69
RH
7582ix86_expand_unary_operator (code, mode, operands)
7583 enum rtx_code code;
7584 enum machine_mode mode;
7585 rtx operands[];
7586{
06a964de
JH
7587 int matching_memory;
7588 rtx src, dst, op, clob;
7589
7590 dst = operands[0];
7591 src = operands[1];
e075ae69 7592
06a964de
JH
7593 /* If the destination is memory, and we do not have matching source
7594 operands, do things in registers. */
7595 matching_memory = 0;
7596 if (GET_CODE (dst) == MEM)
32b5b1aa 7597 {
06a964de
JH
7598 if (rtx_equal_p (dst, src))
7599 matching_memory = 1;
e075ae69 7600 else
06a964de 7601 dst = gen_reg_rtx (mode);
32b5b1aa 7602 }
e9a25f70 7603
06a964de
JH
7604 /* When source operand is memory, destination must match. */
7605 if (!matching_memory && GET_CODE (src) == MEM)
7606 src = force_reg (mode, src);
0f290768 7607
06a964de 7608 /* If optimizing, copy to regs to improve CSE */
fe577e58 7609 if (optimize && ! no_new_pseudos)
06a964de
JH
7610 {
7611 if (GET_CODE (dst) == MEM)
7612 dst = gen_reg_rtx (mode);
7613 if (GET_CODE (src) == MEM)
7614 src = force_reg (mode, src);
7615 }
7616
7617 /* Emit the instruction. */
7618
7619 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7620 if (reload_in_progress || code == NOT)
7621 {
7622 /* Reload doesn't know about the flags register, and doesn't know that
7623 it doesn't want to clobber it. */
7624 if (code != NOT)
7625 abort ();
7626 emit_insn (op);
7627 }
7628 else
7629 {
7630 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7631 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7632 }
7633
7634 /* Fix up the destination if needed. */
7635 if (dst != operands[0])
7636 emit_move_insn (operands[0], dst);
e075ae69
RH
7637}
7638
7639/* Return TRUE or FALSE depending on whether the unary operator meets the
7640 appropriate constraints. */
7641
7642int
7643ix86_unary_operator_ok (code, mode, operands)
7644 enum rtx_code code ATTRIBUTE_UNUSED;
7645 enum machine_mode mode ATTRIBUTE_UNUSED;
7646 rtx operands[2] ATTRIBUTE_UNUSED;
7647{
06a964de
JH
7648 /* If one of operands is memory, source and destination must match. */
7649 if ((GET_CODE (operands[0]) == MEM
7650 || GET_CODE (operands[1]) == MEM)
7651 && ! rtx_equal_p (operands[0], operands[1]))
7652 return FALSE;
e075ae69
RH
7653 return TRUE;
7654}
7655
16189740
RH
7656/* Return TRUE or FALSE depending on whether the first SET in INSN
7657 has source and destination with matching CC modes, and that the
7658 CC mode is at least as constrained as REQ_MODE. */
7659
7660int
7661ix86_match_ccmode (insn, req_mode)
7662 rtx insn;
7663 enum machine_mode req_mode;
7664{
7665 rtx set;
7666 enum machine_mode set_mode;
7667
7668 set = PATTERN (insn);
7669 if (GET_CODE (set) == PARALLEL)
7670 set = XVECEXP (set, 0, 0);
7671 if (GET_CODE (set) != SET)
7672 abort ();
9076b9c1
JH
7673 if (GET_CODE (SET_SRC (set)) != COMPARE)
7674 abort ();
16189740
RH
7675
7676 set_mode = GET_MODE (SET_DEST (set));
7677 switch (set_mode)
7678 {
9076b9c1
JH
7679 case CCNOmode:
7680 if (req_mode != CCNOmode
7681 && (req_mode != CCmode
7682 || XEXP (SET_SRC (set), 1) != const0_rtx))
7683 return 0;
7684 break;
16189740 7685 case CCmode:
9076b9c1 7686 if (req_mode == CCGCmode)
16189740
RH
7687 return 0;
7688 /* FALLTHRU */
9076b9c1
JH
7689 case CCGCmode:
7690 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7691 return 0;
7692 /* FALLTHRU */
7693 case CCGOCmode:
16189740
RH
7694 if (req_mode == CCZmode)
7695 return 0;
7696 /* FALLTHRU */
7697 case CCZmode:
7698 break;
7699
7700 default:
7701 abort ();
7702 }
7703
7704 return (GET_MODE (SET_SRC (set)) == set_mode);
7705}
7706
e075ae69
RH
7707/* Generate insn patterns to do an integer compare of OPERANDS. */
7708
7709static rtx
7710ix86_expand_int_compare (code, op0, op1)
7711 enum rtx_code code;
7712 rtx op0, op1;
7713{
7714 enum machine_mode cmpmode;
7715 rtx tmp, flags;
7716
7717 cmpmode = SELECT_CC_MODE (code, op0, op1);
7718 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7719
7720 /* This is very simple, but making the interface the same as in the
7721 FP case makes the rest of the code easier. */
7722 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7723 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7724
7725 /* Return the test that should be put into the flags user, i.e.
7726 the bcc, scc, or cmov instruction. */
7727 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7728}
7729
3a3677ff
RH
7730/* Figure out whether to use ordered or unordered fp comparisons.
7731 Return the appropriate mode to use. */
e075ae69 7732
b1cdafbb 7733enum machine_mode
3a3677ff 7734ix86_fp_compare_mode (code)
8752c357 7735 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7736{
9e7adcb3
JH
7737 /* ??? In order to make all comparisons reversible, we do all comparisons
7738 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7739 all forms trapping and nontrapping comparisons, we can make inequality
7740 comparisons trapping again, since it results in better code when using
7741 FCOM based compares. */
7742 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7743}
7744
9076b9c1
JH
7745enum machine_mode
7746ix86_cc_mode (code, op0, op1)
7747 enum rtx_code code;
7748 rtx op0, op1;
7749{
7750 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7751 return ix86_fp_compare_mode (code);
7752 switch (code)
7753 {
7754 /* Only zero flag is needed. */
7755 case EQ: /* ZF=0 */
7756 case NE: /* ZF!=0 */
7757 return CCZmode;
7758 /* Codes needing carry flag. */
265dab10
JH
7759 case GEU: /* CF=0 */
7760 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7761 case LTU: /* CF=1 */
7762 case LEU: /* CF=1 | ZF=1 */
265dab10 7763 return CCmode;
9076b9c1
JH
7764 /* Codes possibly doable only with sign flag when
7765 comparing against zero. */
7766 case GE: /* SF=OF or SF=0 */
7e08e190 7767 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7768 if (op1 == const0_rtx)
7769 return CCGOCmode;
7770 else
7771 /* For other cases Carry flag is not required. */
7772 return CCGCmode;
7773 /* Codes doable only with sign flag when comparing
7774 against zero, but we miss jump instruction for it
7775 so we need to use relational tests agains overflow
7776 that thus needs to be zero. */
7777 case GT: /* ZF=0 & SF=OF */
7778 case LE: /* ZF=1 | SF<>OF */
7779 if (op1 == const0_rtx)
7780 return CCNOmode;
7781 else
7782 return CCGCmode;
7fcd7218
JH
7783 /* strcmp pattern do (use flags) and combine may ask us for proper
7784 mode. */
7785 case USE:
7786 return CCmode;
9076b9c1 7787 default:
0f290768 7788 abort ();
9076b9c1
JH
7789 }
7790}
7791
3a3677ff
RH
7792/* Return true if we should use an FCOMI instruction for this fp comparison. */
7793
a940d8bd 7794int
3a3677ff 7795ix86_use_fcomi_compare (code)
9e7adcb3 7796 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 7797{
9e7adcb3
JH
7798 enum rtx_code swapped_code = swap_condition (code);
7799 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7800 || (ix86_fp_comparison_cost (swapped_code)
7801 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
7802}
7803
0f290768 7804/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
7805 to a fp comparison. The operands are updated in place; the new
7806 comparsion code is returned. */
7807
7808static enum rtx_code
7809ix86_prepare_fp_compare_args (code, pop0, pop1)
7810 enum rtx_code code;
7811 rtx *pop0, *pop1;
7812{
7813 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7814 rtx op0 = *pop0, op1 = *pop1;
7815 enum machine_mode op_mode = GET_MODE (op0);
0644b628 7816 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 7817
e075ae69 7818 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
7819 The same is true of the XFmode compare instructions. The same is
7820 true of the fcomi compare instructions. */
7821
0644b628
JH
7822 if (!is_sse
7823 && (fpcmp_mode == CCFPUmode
7824 || op_mode == XFmode
7825 || op_mode == TFmode
7826 || ix86_use_fcomi_compare (code)))
e075ae69 7827 {
3a3677ff
RH
7828 op0 = force_reg (op_mode, op0);
7829 op1 = force_reg (op_mode, op1);
e075ae69
RH
7830 }
7831 else
7832 {
7833 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7834 things around if they appear profitable, otherwise force op0
7835 into a register. */
7836
7837 if (standard_80387_constant_p (op0) == 0
7838 || (GET_CODE (op0) == MEM
7839 && ! (standard_80387_constant_p (op1) == 0
7840 || GET_CODE (op1) == MEM)))
32b5b1aa 7841 {
e075ae69
RH
7842 rtx tmp;
7843 tmp = op0, op0 = op1, op1 = tmp;
7844 code = swap_condition (code);
7845 }
7846
7847 if (GET_CODE (op0) != REG)
3a3677ff 7848 op0 = force_reg (op_mode, op0);
e075ae69
RH
7849
7850 if (CONSTANT_P (op1))
7851 {
7852 if (standard_80387_constant_p (op1))
3a3677ff 7853 op1 = force_reg (op_mode, op1);
e075ae69 7854 else
3a3677ff 7855 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
7856 }
7857 }
e9a25f70 7858
9e7adcb3
JH
7859 /* Try to rearrange the comparison to make it cheaper. */
7860 if (ix86_fp_comparison_cost (code)
7861 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 7862 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
7863 {
7864 rtx tmp;
7865 tmp = op0, op0 = op1, op1 = tmp;
7866 code = swap_condition (code);
7867 if (GET_CODE (op0) != REG)
7868 op0 = force_reg (op_mode, op0);
7869 }
7870
3a3677ff
RH
7871 *pop0 = op0;
7872 *pop1 = op1;
7873 return code;
7874}
7875
c0c102a9
JH
7876/* Convert comparison codes we use to represent FP comparison to integer
7877 code that will result in proper branch. Return UNKNOWN if no such code
7878 is available. */
7879static enum rtx_code
7880ix86_fp_compare_code_to_integer (code)
7881 enum rtx_code code;
7882{
7883 switch (code)
7884 {
7885 case GT:
7886 return GTU;
7887 case GE:
7888 return GEU;
7889 case ORDERED:
7890 case UNORDERED:
7891 return code;
7892 break;
7893 case UNEQ:
7894 return EQ;
7895 break;
7896 case UNLT:
7897 return LTU;
7898 break;
7899 case UNLE:
7900 return LEU;
7901 break;
7902 case LTGT:
7903 return NE;
7904 break;
7905 default:
7906 return UNKNOWN;
7907 }
7908}
7909
7910/* Split comparison code CODE into comparisons we can do using branch
7911 instructions. BYPASS_CODE is comparison code for branch that will
7912 branch around FIRST_CODE and SECOND_CODE. If some of branches
7913 is not required, set value to NIL.
7914 We never require more than two branches. */
7915static void
7916ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7917 enum rtx_code code, *bypass_code, *first_code, *second_code;
7918{
7919 *first_code = code;
7920 *bypass_code = NIL;
7921 *second_code = NIL;
7922
7923 /* The fcomi comparison sets flags as follows:
7924
7925 cmp ZF PF CF
7926 > 0 0 0
7927 < 0 0 1
7928 = 1 0 0
7929 un 1 1 1 */
7930
7931 switch (code)
7932 {
7933 case GT: /* GTU - CF=0 & ZF=0 */
7934 case GE: /* GEU - CF=0 */
7935 case ORDERED: /* PF=0 */
7936 case UNORDERED: /* PF=1 */
7937 case UNEQ: /* EQ - ZF=1 */
7938 case UNLT: /* LTU - CF=1 */
7939 case UNLE: /* LEU - CF=1 | ZF=1 */
7940 case LTGT: /* EQ - ZF=0 */
7941 break;
7942 case LT: /* LTU - CF=1 - fails on unordered */
7943 *first_code = UNLT;
7944 *bypass_code = UNORDERED;
7945 break;
7946 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7947 *first_code = UNLE;
7948 *bypass_code = UNORDERED;
7949 break;
7950 case EQ: /* EQ - ZF=1 - fails on unordered */
7951 *first_code = UNEQ;
7952 *bypass_code = UNORDERED;
7953 break;
7954 case NE: /* NE - ZF=0 - fails on unordered */
7955 *first_code = LTGT;
7956 *second_code = UNORDERED;
7957 break;
7958 case UNGE: /* GEU - CF=0 - fails on unordered */
7959 *first_code = GE;
7960 *second_code = UNORDERED;
7961 break;
7962 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7963 *first_code = GT;
7964 *second_code = UNORDERED;
7965 break;
7966 default:
7967 abort ();
7968 }
7969 if (!TARGET_IEEE_FP)
7970 {
7971 *second_code = NIL;
7972 *bypass_code = NIL;
7973 }
7974}
7975
9e7adcb3
JH
7976/* Return cost of comparison done fcom + arithmetics operations on AX.
7977 All following functions do use number of instructions as an cost metrics.
7978 In future this should be tweaked to compute bytes for optimize_size and
7979 take into account performance of various instructions on various CPUs. */
7980static int
7981ix86_fp_comparison_arithmetics_cost (code)
7982 enum rtx_code code;
7983{
7984 if (!TARGET_IEEE_FP)
7985 return 4;
7986 /* The cost of code output by ix86_expand_fp_compare. */
7987 switch (code)
7988 {
7989 case UNLE:
7990 case UNLT:
7991 case LTGT:
7992 case GT:
7993 case GE:
7994 case UNORDERED:
7995 case ORDERED:
7996 case UNEQ:
7997 return 4;
7998 break;
7999 case LT:
8000 case NE:
8001 case EQ:
8002 case UNGE:
8003 return 5;
8004 break;
8005 case LE:
8006 case UNGT:
8007 return 6;
8008 break;
8009 default:
8010 abort ();
8011 }
8012}
8013
8014/* Return cost of comparison done using fcomi operation.
8015 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8016static int
8017ix86_fp_comparison_fcomi_cost (code)
8018 enum rtx_code code;
8019{
8020 enum rtx_code bypass_code, first_code, second_code;
8021 /* Return arbitarily high cost when instruction is not supported - this
8022 prevents gcc from using it. */
8023 if (!TARGET_CMOVE)
8024 return 1024;
8025 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8026 return (bypass_code != NIL || second_code != NIL) + 2;
8027}
8028
8029/* Return cost of comparison done using sahf operation.
8030 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8031static int
8032ix86_fp_comparison_sahf_cost (code)
8033 enum rtx_code code;
8034{
8035 enum rtx_code bypass_code, first_code, second_code;
8036 /* Return arbitarily high cost when instruction is not preferred - this
8037 avoids gcc from using it. */
8038 if (!TARGET_USE_SAHF && !optimize_size)
8039 return 1024;
8040 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8041 return (bypass_code != NIL || second_code != NIL) + 3;
8042}
8043
8044/* Compute cost of the comparison done using any method.
8045 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8046static int
8047ix86_fp_comparison_cost (code)
8048 enum rtx_code code;
8049{
8050 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8051 int min;
8052
8053 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8054 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8055
8056 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8057 if (min > sahf_cost)
8058 min = sahf_cost;
8059 if (min > fcomi_cost)
8060 min = fcomi_cost;
8061 return min;
8062}
c0c102a9 8063
3a3677ff
RH
8064/* Generate insn patterns to do a floating point compare of OPERANDS. */
8065
9e7adcb3
JH
8066static rtx
8067ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8068 enum rtx_code code;
8069 rtx op0, op1, scratch;
9e7adcb3
JH
8070 rtx *second_test;
8071 rtx *bypass_test;
3a3677ff
RH
8072{
8073 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8074 rtx tmp, tmp2;
9e7adcb3 8075 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8076 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8077
8078 fpcmp_mode = ix86_fp_compare_mode (code);
8079 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8080
9e7adcb3
JH
8081 if (second_test)
8082 *second_test = NULL_RTX;
8083 if (bypass_test)
8084 *bypass_test = NULL_RTX;
8085
c0c102a9
JH
8086 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8087
9e7adcb3
JH
8088 /* Do fcomi/sahf based test when profitable. */
8089 if ((bypass_code == NIL || bypass_test)
8090 && (second_code == NIL || second_test)
8091 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8092 {
c0c102a9
JH
8093 if (TARGET_CMOVE)
8094 {
8095 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8096 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8097 tmp);
8098 emit_insn (tmp);
8099 }
8100 else
8101 {
8102 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8103 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8104 if (!scratch)
8105 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8106 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8107 emit_insn (gen_x86_sahf_1 (scratch));
8108 }
e075ae69
RH
8109
8110 /* The FP codes work out to act like unsigned. */
9a915772 8111 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8112 code = first_code;
8113 if (bypass_code != NIL)
8114 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8115 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8116 const0_rtx);
8117 if (second_code != NIL)
8118 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8119 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8120 const0_rtx);
e075ae69
RH
8121 }
8122 else
8123 {
8124 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8125 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8126 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8127 if (!scratch)
8128 scratch = gen_reg_rtx (HImode);
3a3677ff 8129 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8130
9a915772
JH
8131 /* In the unordered case, we have to check C2 for NaN's, which
8132 doesn't happen to work out to anything nice combination-wise.
8133 So do some bit twiddling on the value we've got in AH to come
8134 up with an appropriate set of condition codes. */
e075ae69 8135
9a915772
JH
8136 intcmp_mode = CCNOmode;
8137 switch (code)
32b5b1aa 8138 {
9a915772
JH
8139 case GT:
8140 case UNGT:
8141 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8142 {
3a3677ff 8143 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8144 code = EQ;
9a915772
JH
8145 }
8146 else
8147 {
8148 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8149 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8150 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8151 intcmp_mode = CCmode;
8152 code = GEU;
8153 }
8154 break;
8155 case LT:
8156 case UNLT:
8157 if (code == LT && TARGET_IEEE_FP)
8158 {
3a3677ff
RH
8159 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8160 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8161 intcmp_mode = CCmode;
8162 code = EQ;
9a915772
JH
8163 }
8164 else
8165 {
8166 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8167 code = NE;
8168 }
8169 break;
8170 case GE:
8171 case UNGE:
8172 if (code == GE || !TARGET_IEEE_FP)
8173 {
3a3677ff 8174 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8175 code = EQ;
9a915772
JH
8176 }
8177 else
8178 {
8179 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8180 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8181 GEN_INT (0x01)));
8182 code = NE;
8183 }
8184 break;
8185 case LE:
8186 case UNLE:
8187 if (code == LE && TARGET_IEEE_FP)
8188 {
3a3677ff
RH
8189 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8190 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8191 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8192 intcmp_mode = CCmode;
8193 code = LTU;
9a915772
JH
8194 }
8195 else
8196 {
8197 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8198 code = NE;
8199 }
8200 break;
8201 case EQ:
8202 case UNEQ:
8203 if (code == EQ && TARGET_IEEE_FP)
8204 {
3a3677ff
RH
8205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8206 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8207 intcmp_mode = CCmode;
8208 code = EQ;
9a915772
JH
8209 }
8210 else
8211 {
3a3677ff
RH
8212 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8213 code = NE;
8214 break;
9a915772
JH
8215 }
8216 break;
8217 case NE:
8218 case LTGT:
8219 if (code == NE && TARGET_IEEE_FP)
8220 {
3a3677ff 8221 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8222 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8223 GEN_INT (0x40)));
3a3677ff 8224 code = NE;
9a915772
JH
8225 }
8226 else
8227 {
3a3677ff
RH
8228 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8229 code = EQ;
32b5b1aa 8230 }
9a915772
JH
8231 break;
8232
8233 case UNORDERED:
8234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8235 code = NE;
8236 break;
8237 case ORDERED:
8238 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8239 code = EQ;
8240 break;
8241
8242 default:
8243 abort ();
32b5b1aa 8244 }
32b5b1aa 8245 }
e075ae69
RH
8246
8247 /* Return the test that should be put into the flags user, i.e.
8248 the bcc, scc, or cmov instruction. */
8249 return gen_rtx_fmt_ee (code, VOIDmode,
8250 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8251 const0_rtx);
8252}
8253
9e3e266c 8254rtx
a1b8572c 8255ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8256 enum rtx_code code;
a1b8572c 8257 rtx *second_test, *bypass_test;
e075ae69
RH
8258{
8259 rtx op0, op1, ret;
8260 op0 = ix86_compare_op0;
8261 op1 = ix86_compare_op1;
8262
a1b8572c
JH
8263 if (second_test)
8264 *second_test = NULL_RTX;
8265 if (bypass_test)
8266 *bypass_test = NULL_RTX;
8267
e075ae69 8268 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8269 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8270 second_test, bypass_test);
32b5b1aa 8271 else
e075ae69
RH
8272 ret = ix86_expand_int_compare (code, op0, op1);
8273
8274 return ret;
8275}
8276
03598dea
JH
8277/* Return true if the CODE will result in nontrivial jump sequence. */
8278bool
8279ix86_fp_jump_nontrivial_p (code)
8280 enum rtx_code code;
8281{
8282 enum rtx_code bypass_code, first_code, second_code;
8283 if (!TARGET_CMOVE)
8284 return true;
8285 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8286 return bypass_code != NIL || second_code != NIL;
8287}
8288
e075ae69 8289void
3a3677ff 8290ix86_expand_branch (code, label)
e075ae69 8291 enum rtx_code code;
e075ae69
RH
8292 rtx label;
8293{
3a3677ff 8294 rtx tmp;
e075ae69 8295
3a3677ff 8296 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8297 {
3a3677ff
RH
8298 case QImode:
8299 case HImode:
8300 case SImode:
0d7d98ee 8301 simple:
a1b8572c 8302 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8303 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8304 gen_rtx_LABEL_REF (VOIDmode, label),
8305 pc_rtx);
8306 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8307 return;
e075ae69 8308
3a3677ff
RH
8309 case SFmode:
8310 case DFmode:
0f290768 8311 case XFmode:
2b589241 8312 case TFmode:
3a3677ff
RH
8313 {
8314 rtvec vec;
8315 int use_fcomi;
03598dea 8316 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8317
8318 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8319 &ix86_compare_op1);
fce5a9f2 8320
03598dea
JH
8321 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8322
8323 /* Check whether we will use the natural sequence with one jump. If
8324 so, we can expand jump early. Otherwise delay expansion by
8325 creating compound insn to not confuse optimizers. */
8326 if (bypass_code == NIL && second_code == NIL
8327 && TARGET_CMOVE)
8328 {
8329 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8330 gen_rtx_LABEL_REF (VOIDmode, label),
8331 pc_rtx, NULL_RTX);
8332 }
8333 else
8334 {
8335 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8336 ix86_compare_op0, ix86_compare_op1);
8337 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8338 gen_rtx_LABEL_REF (VOIDmode, label),
8339 pc_rtx);
8340 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8341
8342 use_fcomi = ix86_use_fcomi_compare (code);
8343 vec = rtvec_alloc (3 + !use_fcomi);
8344 RTVEC_ELT (vec, 0) = tmp;
8345 RTVEC_ELT (vec, 1)
8346 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8347 RTVEC_ELT (vec, 2)
8348 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8349 if (! use_fcomi)
8350 RTVEC_ELT (vec, 3)
8351 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8352
8353 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8354 }
3a3677ff
RH
8355 return;
8356 }
32b5b1aa 8357
3a3677ff 8358 case DImode:
0d7d98ee
JH
8359 if (TARGET_64BIT)
8360 goto simple;
3a3677ff
RH
8361 /* Expand DImode branch into multiple compare+branch. */
8362 {
8363 rtx lo[2], hi[2], label2;
8364 enum rtx_code code1, code2, code3;
32b5b1aa 8365
3a3677ff
RH
8366 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8367 {
8368 tmp = ix86_compare_op0;
8369 ix86_compare_op0 = ix86_compare_op1;
8370 ix86_compare_op1 = tmp;
8371 code = swap_condition (code);
8372 }
8373 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8374 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8375
3a3677ff
RH
8376 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8377 avoid two branches. This costs one extra insn, so disable when
8378 optimizing for size. */
32b5b1aa 8379
3a3677ff
RH
8380 if ((code == EQ || code == NE)
8381 && (!optimize_size
8382 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8383 {
8384 rtx xor0, xor1;
32b5b1aa 8385
3a3677ff
RH
8386 xor1 = hi[0];
8387 if (hi[1] != const0_rtx)
8388 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8389 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8390
3a3677ff
RH
8391 xor0 = lo[0];
8392 if (lo[1] != const0_rtx)
8393 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8394 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8395
3a3677ff
RH
8396 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8397 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8398
3a3677ff
RH
8399 ix86_compare_op0 = tmp;
8400 ix86_compare_op1 = const0_rtx;
8401 ix86_expand_branch (code, label);
8402 return;
8403 }
e075ae69 8404
1f9124e4
JJ
8405 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8406 op1 is a constant and the low word is zero, then we can just
8407 examine the high word. */
32b5b1aa 8408
1f9124e4
JJ
8409 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8410 switch (code)
8411 {
8412 case LT: case LTU: case GE: case GEU:
8413 ix86_compare_op0 = hi[0];
8414 ix86_compare_op1 = hi[1];
8415 ix86_expand_branch (code, label);
8416 return;
8417 default:
8418 break;
8419 }
e075ae69 8420
3a3677ff 8421 /* Otherwise, we need two or three jumps. */
e075ae69 8422
3a3677ff 8423 label2 = gen_label_rtx ();
e075ae69 8424
3a3677ff
RH
8425 code1 = code;
8426 code2 = swap_condition (code);
8427 code3 = unsigned_condition (code);
e075ae69 8428
3a3677ff
RH
8429 switch (code)
8430 {
8431 case LT: case GT: case LTU: case GTU:
8432 break;
e075ae69 8433
3a3677ff
RH
8434 case LE: code1 = LT; code2 = GT; break;
8435 case GE: code1 = GT; code2 = LT; break;
8436 case LEU: code1 = LTU; code2 = GTU; break;
8437 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8438
3a3677ff
RH
8439 case EQ: code1 = NIL; code2 = NE; break;
8440 case NE: code2 = NIL; break;
e075ae69 8441
3a3677ff
RH
8442 default:
8443 abort ();
8444 }
e075ae69 8445
3a3677ff
RH
8446 /*
8447 * a < b =>
8448 * if (hi(a) < hi(b)) goto true;
8449 * if (hi(a) > hi(b)) goto false;
8450 * if (lo(a) < lo(b)) goto true;
8451 * false:
8452 */
8453
8454 ix86_compare_op0 = hi[0];
8455 ix86_compare_op1 = hi[1];
8456
8457 if (code1 != NIL)
8458 ix86_expand_branch (code1, label);
8459 if (code2 != NIL)
8460 ix86_expand_branch (code2, label2);
8461
8462 ix86_compare_op0 = lo[0];
8463 ix86_compare_op1 = lo[1];
8464 ix86_expand_branch (code3, label);
8465
8466 if (code2 != NIL)
8467 emit_label (label2);
8468 return;
8469 }
e075ae69 8470
3a3677ff
RH
8471 default:
8472 abort ();
8473 }
32b5b1aa 8474}
e075ae69 8475
9e7adcb3
JH
8476/* Split branch based on floating point condition. */
8477void
03598dea
JH
8478ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8479 enum rtx_code code;
8480 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
8481{
8482 rtx second, bypass;
8483 rtx label = NULL_RTX;
03598dea 8484 rtx condition;
6b24c259
JH
8485 int bypass_probability = -1, second_probability = -1, probability = -1;
8486 rtx i;
9e7adcb3
JH
8487
8488 if (target2 != pc_rtx)
8489 {
8490 rtx tmp = target2;
8491 code = reverse_condition_maybe_unordered (code);
8492 target2 = target1;
8493 target1 = tmp;
8494 }
8495
8496 condition = ix86_expand_fp_compare (code, op1, op2,
8497 tmp, &second, &bypass);
6b24c259
JH
8498
8499 if (split_branch_probability >= 0)
8500 {
8501 /* Distribute the probabilities across the jumps.
8502 Assume the BYPASS and SECOND to be always test
8503 for UNORDERED. */
8504 probability = split_branch_probability;
8505
d6a7951f 8506 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8507 to be updated. Later we may run some experiments and see
8508 if unordered values are more frequent in practice. */
8509 if (bypass)
8510 bypass_probability = 1;
8511 if (second)
8512 second_probability = 1;
8513 }
9e7adcb3
JH
8514 if (bypass != NULL_RTX)
8515 {
8516 label = gen_label_rtx ();
6b24c259
JH
8517 i = emit_jump_insn (gen_rtx_SET
8518 (VOIDmode, pc_rtx,
8519 gen_rtx_IF_THEN_ELSE (VOIDmode,
8520 bypass,
8521 gen_rtx_LABEL_REF (VOIDmode,
8522 label),
8523 pc_rtx)));
8524 if (bypass_probability >= 0)
8525 REG_NOTES (i)
8526 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8527 GEN_INT (bypass_probability),
8528 REG_NOTES (i));
8529 }
8530 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8531 (VOIDmode, pc_rtx,
8532 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8533 condition, target1, target2)));
8534 if (probability >= 0)
8535 REG_NOTES (i)
8536 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8537 GEN_INT (probability),
8538 REG_NOTES (i));
8539 if (second != NULL_RTX)
9e7adcb3 8540 {
6b24c259
JH
8541 i = emit_jump_insn (gen_rtx_SET
8542 (VOIDmode, pc_rtx,
8543 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8544 target2)));
8545 if (second_probability >= 0)
8546 REG_NOTES (i)
8547 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8548 GEN_INT (second_probability),
8549 REG_NOTES (i));
9e7adcb3 8550 }
9e7adcb3
JH
8551 if (label != NULL_RTX)
8552 emit_label (label);
8553}
8554
32b5b1aa 8555int
3a3677ff 8556ix86_expand_setcc (code, dest)
e075ae69 8557 enum rtx_code code;
e075ae69 8558 rtx dest;
32b5b1aa 8559{
a1b8572c
JH
8560 rtx ret, tmp, tmpreg;
8561 rtx second_test, bypass_test;
e075ae69 8562
885a70fd
JH
8563 if (GET_MODE (ix86_compare_op0) == DImode
8564 && !TARGET_64BIT)
e075ae69
RH
8565 return 0; /* FAIL */
8566
b932f770
JH
8567 if (GET_MODE (dest) != QImode)
8568 abort ();
e075ae69 8569
a1b8572c 8570 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8571 PUT_MODE (ret, QImode);
8572
8573 tmp = dest;
a1b8572c 8574 tmpreg = dest;
32b5b1aa 8575
e075ae69 8576 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8577 if (bypass_test || second_test)
8578 {
8579 rtx test = second_test;
8580 int bypass = 0;
8581 rtx tmp2 = gen_reg_rtx (QImode);
8582 if (bypass_test)
8583 {
8584 if (second_test)
b531087a 8585 abort ();
a1b8572c
JH
8586 test = bypass_test;
8587 bypass = 1;
8588 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8589 }
8590 PUT_MODE (test, QImode);
8591 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8592
8593 if (bypass)
8594 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8595 else
8596 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8597 }
e075ae69 8598
e075ae69 8599 return 1; /* DONE */
32b5b1aa 8600}
e075ae69 8601
32b5b1aa 8602int
e075ae69
RH
8603ix86_expand_int_movcc (operands)
8604 rtx operands[];
32b5b1aa 8605{
e075ae69
RH
8606 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8607 rtx compare_seq, compare_op;
a1b8572c 8608 rtx second_test, bypass_test;
635559ab 8609 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8610
36583fea
JH
8611 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8612 In case comparsion is done with immediate, we can convert it to LTU or
8613 GEU by altering the integer. */
8614
8615 if ((code == LEU || code == GTU)
8616 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 8617 && mode != HImode
261376e7
RH
8618 && INTVAL (ix86_compare_op1) != -1
8619 /* For x86-64, the immediate field in the instruction is 32-bit
8620 signed, so we can't increment a DImode value above 0x7fffffff. */
74411039
JH
8621 && (!TARGET_64BIT
8622 || GET_MODE (ix86_compare_op0) != DImode
261376e7 8623 || INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 8624 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
8625 && GET_CODE (operands[3]) == CONST_INT)
8626 {
8627 if (code == LEU)
8628 code = LTU;
8629 else
8630 code = GEU;
261376e7
RH
8631 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8632 GET_MODE (ix86_compare_op0));
36583fea 8633 }
3a3677ff 8634
e075ae69 8635 start_sequence ();
a1b8572c 8636 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 8637 compare_seq = get_insns ();
e075ae69
RH
8638 end_sequence ();
8639
8640 compare_code = GET_CODE (compare_op);
8641
8642 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8643 HImode insns, we'd be swallowed in word prefix ops. */
8644
635559ab
JH
8645 if (mode != HImode
8646 && (mode != DImode || TARGET_64BIT)
0f290768 8647 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8648 && GET_CODE (operands[3]) == CONST_INT)
8649 {
8650 rtx out = operands[0];
8651 HOST_WIDE_INT ct = INTVAL (operands[2]);
8652 HOST_WIDE_INT cf = INTVAL (operands[3]);
8653 HOST_WIDE_INT diff;
8654
a1b8572c
JH
8655 if ((compare_code == LTU || compare_code == GEU)
8656 && !second_test && !bypass_test)
e075ae69 8657 {
e075ae69
RH
8658
8659 /* Detect overlap between destination and compare sources. */
8660 rtx tmp = out;
8661
0f290768 8662 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8663 if (compare_code == LTU)
8664 {
8665 int tmp = ct;
8666 ct = cf;
8667 cf = tmp;
8668 compare_code = reverse_condition (compare_code);
8669 code = reverse_condition (code);
8670 }
8671 diff = ct - cf;
8672
e075ae69 8673 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8674 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8675 tmp = gen_reg_rtx (mode);
e075ae69
RH
8676
8677 emit_insn (compare_seq);
635559ab 8678 if (mode == DImode)
14f73b5a
JH
8679 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8680 else
8681 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8682
36583fea
JH
8683 if (diff == 1)
8684 {
8685 /*
8686 * cmpl op0,op1
8687 * sbbl dest,dest
8688 * [addl dest, ct]
8689 *
8690 * Size 5 - 8.
8691 */
8692 if (ct)
635559ab
JH
8693 tmp = expand_simple_binop (mode, PLUS,
8694 tmp, GEN_INT (ct),
8695 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8696 }
8697 else if (cf == -1)
8698 {
8699 /*
8700 * cmpl op0,op1
8701 * sbbl dest,dest
8702 * orl $ct, dest
8703 *
8704 * Size 8.
8705 */
635559ab
JH
8706 tmp = expand_simple_binop (mode, IOR,
8707 tmp, GEN_INT (ct),
8708 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8709 }
8710 else if (diff == -1 && ct)
8711 {
8712 /*
8713 * cmpl op0,op1
8714 * sbbl dest,dest
8715 * xorl $-1, dest
8716 * [addl dest, cf]
8717 *
8718 * Size 8 - 11.
8719 */
635559ab
JH
8720 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8721 if (cf)
8722 tmp = expand_simple_binop (mode, PLUS,
8723 tmp, GEN_INT (cf),
8724 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8725 }
8726 else
8727 {
8728 /*
8729 * cmpl op0,op1
8730 * sbbl dest,dest
8731 * andl cf - ct, dest
8732 * [addl dest, ct]
8733 *
8734 * Size 8 - 11.
8735 */
635559ab
JH
8736 tmp = expand_simple_binop (mode, AND,
8737 tmp,
d8bf17f9 8738 gen_int_mode (cf - ct, mode),
635559ab
JH
8739 tmp, 1, OPTAB_DIRECT);
8740 if (ct)
8741 tmp = expand_simple_binop (mode, PLUS,
8742 tmp, GEN_INT (ct),
8743 tmp, 1, OPTAB_DIRECT);
36583fea 8744 }
e075ae69
RH
8745
8746 if (tmp != out)
8747 emit_move_insn (out, tmp);
8748
8749 return 1; /* DONE */
8750 }
8751
8752 diff = ct - cf;
8753 if (diff < 0)
8754 {
8755 HOST_WIDE_INT tmp;
8756 tmp = ct, ct = cf, cf = tmp;
8757 diff = -diff;
734dba19
JH
8758 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8759 {
8760 /* We may be reversing unordered compare to normal compare, that
8761 is not valid in general (we may convert non-trapping condition
8762 to trapping one), however on i386 we currently emit all
8763 comparisons unordered. */
8764 compare_code = reverse_condition_maybe_unordered (compare_code);
8765 code = reverse_condition_maybe_unordered (code);
8766 }
8767 else
8768 {
8769 compare_code = reverse_condition (compare_code);
8770 code = reverse_condition (code);
8771 }
e075ae69 8772 }
0f2a3457
JJ
8773
8774 compare_code = NIL;
8775 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8776 && GET_CODE (ix86_compare_op1) == CONST_INT)
8777 {
8778 if (ix86_compare_op1 == const0_rtx
8779 && (code == LT || code == GE))
8780 compare_code = code;
8781 else if (ix86_compare_op1 == constm1_rtx)
8782 {
8783 if (code == LE)
8784 compare_code = LT;
8785 else if (code == GT)
8786 compare_code = GE;
8787 }
8788 }
8789
8790 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8791 if (compare_code != NIL
8792 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8793 && (cf == -1 || ct == -1))
8794 {
8795 /* If lea code below could be used, only optimize
8796 if it results in a 2 insn sequence. */
8797
8798 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8799 || diff == 3 || diff == 5 || diff == 9)
8800 || (compare_code == LT && ct == -1)
8801 || (compare_code == GE && cf == -1))
8802 {
8803 /*
8804 * notl op1 (if necessary)
8805 * sarl $31, op1
8806 * orl cf, op1
8807 */
8808 if (ct != -1)
8809 {
8810 cf = ct;
8811 ct = -1;
8812 code = reverse_condition (code);
8813 }
8814
8815 out = emit_store_flag (out, code, ix86_compare_op0,
8816 ix86_compare_op1, VOIDmode, 0, -1);
8817
8818 out = expand_simple_binop (mode, IOR,
8819 out, GEN_INT (cf),
8820 out, 1, OPTAB_DIRECT);
8821 if (out != operands[0])
8822 emit_move_insn (operands[0], out);
8823
8824 return 1; /* DONE */
8825 }
8826 }
8827
635559ab
JH
8828 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8829 || diff == 3 || diff == 5 || diff == 9)
8830 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
8831 {
8832 /*
8833 * xorl dest,dest
8834 * cmpl op1,op2
8835 * setcc dest
8836 * lea cf(dest*(ct-cf)),dest
8837 *
8838 * Size 14.
8839 *
8840 * This also catches the degenerate setcc-only case.
8841 */
8842
8843 rtx tmp;
8844 int nops;
8845
8846 out = emit_store_flag (out, code, ix86_compare_op0,
8847 ix86_compare_op1, VOIDmode, 0, 1);
8848
8849 nops = 0;
885a70fd
JH
8850 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8851 done in proper mode to match. */
e075ae69 8852 if (diff == 1)
14f73b5a 8853 tmp = out;
e075ae69
RH
8854 else
8855 {
885a70fd 8856 rtx out1;
14f73b5a 8857 out1 = out;
635559ab 8858 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
8859 nops++;
8860 if (diff & 1)
8861 {
635559ab 8862 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
8863 nops++;
8864 }
8865 }
8866 if (cf != 0)
8867 {
635559ab 8868 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
8869 nops++;
8870 }
885a70fd
JH
8871 if (tmp != out
8872 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 8873 {
14f73b5a 8874 if (nops == 1)
e075ae69
RH
8875 {
8876 rtx clob;
8877
8878 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8879 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8880
8881 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8882 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8883 emit_insn (tmp);
8884 }
8885 else
8886 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8887 }
8888 if (out != operands[0])
8889 emit_move_insn (operands[0], out);
8890
8891 return 1; /* DONE */
8892 }
8893
8894 /*
8895 * General case: Jumpful:
8896 * xorl dest,dest cmpl op1, op2
8897 * cmpl op1, op2 movl ct, dest
8898 * setcc dest jcc 1f
8899 * decl dest movl cf, dest
8900 * andl (cf-ct),dest 1:
8901 * addl ct,dest
0f290768 8902 *
e075ae69
RH
8903 * Size 20. Size 14.
8904 *
8905 * This is reasonably steep, but branch mispredict costs are
8906 * high on modern cpus, so consider failing only if optimizing
8907 * for space.
8908 *
8909 * %%% Parameterize branch_cost on the tuning architecture, then
8910 * use that. The 80386 couldn't care less about mispredicts.
8911 */
8912
8913 if (!optimize_size && !TARGET_CMOVE)
8914 {
8915 if (ct == 0)
8916 {
8917 ct = cf;
8918 cf = 0;
734dba19 8919 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
8920 /* We may be reversing unordered compare to normal compare,
8921 that is not valid in general (we may convert non-trapping
8922 condition to trapping one), however on i386 we currently
8923 emit all comparisons unordered. */
8924 code = reverse_condition_maybe_unordered (code);
8925 else
8926 {
8927 code = reverse_condition (code);
8928 if (compare_code != NIL)
8929 compare_code = reverse_condition (compare_code);
8930 }
8931 }
8932
8933 if (compare_code != NIL)
8934 {
8935 /* notl op1 (if needed)
8936 sarl $31, op1
8937 andl (cf-ct), op1
8938 addl ct, op1
8939
8940 For x < 0 (resp. x <= -1) there will be no notl,
8941 so if possible swap the constants to get rid of the
8942 complement.
8943 True/false will be -1/0 while code below (store flag
8944 followed by decrement) is 0/-1, so the constants need
8945 to be exchanged once more. */
8946
8947 if (compare_code == GE || !cf)
734dba19 8948 {
0f2a3457
JJ
8949 code = reverse_condition (code);
8950 compare_code = LT;
734dba19
JH
8951 }
8952 else
8953 {
0f2a3457
JJ
8954 HOST_WIDE_INT tmp = cf;
8955 cf = ct;
8956 ct = tmp;
734dba19 8957 }
0f2a3457
JJ
8958
8959 out = emit_store_flag (out, code, ix86_compare_op0,
8960 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 8961 }
0f2a3457
JJ
8962 else
8963 {
8964 out = emit_store_flag (out, code, ix86_compare_op0,
8965 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 8966
0f2a3457
JJ
8967 out = expand_simple_binop (mode, PLUS,
8968 out, constm1_rtx,
8969 out, 1, OPTAB_DIRECT);
8970 }
e075ae69 8971
635559ab
JH
8972 out = expand_simple_binop (mode, AND,
8973 out,
d8bf17f9 8974 gen_int_mode (cf - ct, mode),
635559ab
JH
8975 out, 1, OPTAB_DIRECT);
8976 out = expand_simple_binop (mode, PLUS,
8977 out, GEN_INT (ct),
8978 out, 1, OPTAB_DIRECT);
e075ae69
RH
8979 if (out != operands[0])
8980 emit_move_insn (operands[0], out);
8981
8982 return 1; /* DONE */
8983 }
8984 }
8985
8986 if (!TARGET_CMOVE)
8987 {
8988 /* Try a few things more with specific constants and a variable. */
8989
78a0d70c 8990 optab op;
e075ae69
RH
8991 rtx var, orig_out, out, tmp;
8992
8993 if (optimize_size)
8994 return 0; /* FAIL */
8995
0f290768 8996 /* If one of the two operands is an interesting constant, load a
e075ae69 8997 constant with the above and mask it in with a logical operation. */
0f290768 8998
e075ae69
RH
8999 if (GET_CODE (operands[2]) == CONST_INT)
9000 {
9001 var = operands[3];
9002 if (INTVAL (operands[2]) == 0)
9003 operands[3] = constm1_rtx, op = and_optab;
9004 else if (INTVAL (operands[2]) == -1)
9005 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9006 else
9007 return 0; /* FAIL */
e075ae69
RH
9008 }
9009 else if (GET_CODE (operands[3]) == CONST_INT)
9010 {
9011 var = operands[2];
9012 if (INTVAL (operands[3]) == 0)
9013 operands[2] = constm1_rtx, op = and_optab;
9014 else if (INTVAL (operands[3]) == -1)
9015 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9016 else
9017 return 0; /* FAIL */
e075ae69 9018 }
78a0d70c 9019 else
e075ae69
RH
9020 return 0; /* FAIL */
9021
9022 orig_out = operands[0];
635559ab 9023 tmp = gen_reg_rtx (mode);
e075ae69
RH
9024 operands[0] = tmp;
9025
9026 /* Recurse to get the constant loaded. */
9027 if (ix86_expand_int_movcc (operands) == 0)
9028 return 0; /* FAIL */
9029
9030 /* Mask in the interesting variable. */
635559ab 9031 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
9032 OPTAB_WIDEN);
9033 if (out != orig_out)
9034 emit_move_insn (orig_out, out);
9035
9036 return 1; /* DONE */
9037 }
9038
9039 /*
9040 * For comparison with above,
9041 *
9042 * movl cf,dest
9043 * movl ct,tmp
9044 * cmpl op1,op2
9045 * cmovcc tmp,dest
9046 *
9047 * Size 15.
9048 */
9049
635559ab
JH
9050 if (! nonimmediate_operand (operands[2], mode))
9051 operands[2] = force_reg (mode, operands[2]);
9052 if (! nonimmediate_operand (operands[3], mode))
9053 operands[3] = force_reg (mode, operands[3]);
e075ae69 9054
a1b8572c
JH
9055 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9056 {
635559ab 9057 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9058 emit_move_insn (tmp, operands[3]);
9059 operands[3] = tmp;
9060 }
9061 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9062 {
635559ab 9063 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9064 emit_move_insn (tmp, operands[2]);
9065 operands[2] = tmp;
9066 }
c9682caf
JH
9067 if (! register_operand (operands[2], VOIDmode)
9068 && ! register_operand (operands[3], VOIDmode))
635559ab 9069 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9070
e075ae69
RH
9071 emit_insn (compare_seq);
9072 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9073 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9074 compare_op, operands[2],
9075 operands[3])));
a1b8572c
JH
9076 if (bypass_test)
9077 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9078 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9079 bypass_test,
9080 operands[3],
9081 operands[0])));
9082 if (second_test)
9083 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9084 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9085 second_test,
9086 operands[2],
9087 operands[0])));
e075ae69
RH
9088
9089 return 1; /* DONE */
e9a25f70 9090}
e075ae69 9091
32b5b1aa 9092int
e075ae69
RH
9093ix86_expand_fp_movcc (operands)
9094 rtx operands[];
32b5b1aa 9095{
e075ae69 9096 enum rtx_code code;
e075ae69 9097 rtx tmp;
a1b8572c 9098 rtx compare_op, second_test, bypass_test;
32b5b1aa 9099
0073023d
JH
9100 /* For SF/DFmode conditional moves based on comparisons
9101 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9102 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9103 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9104 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9105 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9106 && (!TARGET_IEEE_FP
9107 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9108 /* We may be called from the post-reload splitter. */
9109 && (!REG_P (operands[0])
9110 || SSE_REG_P (operands[0])
52a661a6 9111 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9112 {
9113 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9114 code = GET_CODE (operands[1]);
9115
9116 /* See if we have (cross) match between comparison operands and
9117 conditional move operands. */
9118 if (rtx_equal_p (operands[2], op1))
9119 {
9120 rtx tmp = op0;
9121 op0 = op1;
9122 op1 = tmp;
9123 code = reverse_condition_maybe_unordered (code);
9124 }
9125 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9126 {
9127 /* Check for min operation. */
9128 if (code == LT)
9129 {
9130 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9131 if (memory_operand (op0, VOIDmode))
9132 op0 = force_reg (GET_MODE (operands[0]), op0);
9133 if (GET_MODE (operands[0]) == SFmode)
9134 emit_insn (gen_minsf3 (operands[0], op0, op1));
9135 else
9136 emit_insn (gen_mindf3 (operands[0], op0, op1));
9137 return 1;
9138 }
9139 /* Check for max operation. */
9140 if (code == GT)
9141 {
9142 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9143 if (memory_operand (op0, VOIDmode))
9144 op0 = force_reg (GET_MODE (operands[0]), op0);
9145 if (GET_MODE (operands[0]) == SFmode)
9146 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9147 else
9148 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9149 return 1;
9150 }
9151 }
9152 /* Manage condition to be sse_comparison_operator. In case we are
9153 in non-ieee mode, try to canonicalize the destination operand
9154 to be first in the comparison - this helps reload to avoid extra
9155 moves. */
9156 if (!sse_comparison_operator (operands[1], VOIDmode)
9157 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9158 {
9159 rtx tmp = ix86_compare_op0;
9160 ix86_compare_op0 = ix86_compare_op1;
9161 ix86_compare_op1 = tmp;
9162 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9163 VOIDmode, ix86_compare_op0,
9164 ix86_compare_op1);
9165 }
9166 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9167 move. We also don't support the NE comparison on SSE, so try to
9168 avoid it. */
037f20f1
JH
9169 if ((rtx_equal_p (operands[0], operands[3])
9170 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9171 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9172 {
9173 rtx tmp = operands[2];
9174 operands[2] = operands[3];
92d0fb09 9175 operands[3] = tmp;
0073023d
JH
9176 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9177 (GET_CODE (operands[1])),
9178 VOIDmode, ix86_compare_op0,
9179 ix86_compare_op1);
9180 }
9181 if (GET_MODE (operands[0]) == SFmode)
9182 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9183 operands[2], operands[3],
9184 ix86_compare_op0, ix86_compare_op1));
9185 else
9186 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9187 operands[2], operands[3],
9188 ix86_compare_op0, ix86_compare_op1));
9189 return 1;
9190 }
9191
e075ae69 9192 /* The floating point conditional move instructions don't directly
0f290768 9193 support conditions resulting from a signed integer comparison. */
32b5b1aa 9194
e075ae69 9195 code = GET_CODE (operands[1]);
a1b8572c 9196 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9197
9198 /* The floating point conditional move instructions don't directly
9199 support signed integer comparisons. */
9200
a1b8572c 9201 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9202 {
a1b8572c 9203 if (second_test != NULL || bypass_test != NULL)
b531087a 9204 abort ();
e075ae69 9205 tmp = gen_reg_rtx (QImode);
3a3677ff 9206 ix86_expand_setcc (code, tmp);
e075ae69
RH
9207 code = NE;
9208 ix86_compare_op0 = tmp;
9209 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9210 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9211 }
9212 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9213 {
9214 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9215 emit_move_insn (tmp, operands[3]);
9216 operands[3] = tmp;
9217 }
9218 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9219 {
9220 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9221 emit_move_insn (tmp, operands[2]);
9222 operands[2] = tmp;
e075ae69 9223 }
e9a25f70 9224
e075ae69
RH
9225 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9226 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9227 compare_op,
e075ae69
RH
9228 operands[2],
9229 operands[3])));
a1b8572c
JH
9230 if (bypass_test)
9231 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9232 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9233 bypass_test,
9234 operands[3],
9235 operands[0])));
9236 if (second_test)
9237 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9238 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9239 second_test,
9240 operands[2],
9241 operands[0])));
32b5b1aa 9242
e075ae69 9243 return 1;
32b5b1aa
SC
9244}
9245
2450a057
JH
9246/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9247 works for floating pointer parameters and nonoffsetable memories.
9248 For pushes, it returns just stack offsets; the values will be saved
9249 in the right order. Maximally three parts are generated. */
9250
2b589241 9251static int
2450a057
JH
9252ix86_split_to_parts (operand, parts, mode)
9253 rtx operand;
9254 rtx *parts;
9255 enum machine_mode mode;
32b5b1aa 9256{
26e5b205
JH
9257 int size;
9258
9259 if (!TARGET_64BIT)
9260 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9261 else
9262 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9263
a7180f70
BS
9264 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9265 abort ();
2450a057
JH
9266 if (size < 2 || size > 3)
9267 abort ();
9268
f996902d
RH
9269 /* Optimize constant pool reference to immediates. This is used by fp
9270 moves, that force all constants to memory to allow combining. */
9271 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9272 {
9273 rtx tmp = maybe_get_pool_constant (operand);
9274 if (tmp)
9275 operand = tmp;
9276 }
d7a29404 9277
2450a057 9278 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9279 {
2450a057
JH
9280 /* The only non-offsetable memories we handle are pushes. */
9281 if (! push_operand (operand, VOIDmode))
9282 abort ();
9283
26e5b205
JH
9284 operand = copy_rtx (operand);
9285 PUT_MODE (operand, Pmode);
2450a057
JH
9286 parts[0] = parts[1] = parts[2] = operand;
9287 }
26e5b205 9288 else if (!TARGET_64BIT)
2450a057
JH
9289 {
9290 if (mode == DImode)
9291 split_di (&operand, 1, &parts[0], &parts[1]);
9292 else
e075ae69 9293 {
2450a057
JH
9294 if (REG_P (operand))
9295 {
9296 if (!reload_completed)
9297 abort ();
9298 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9299 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9300 if (size == 3)
9301 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9302 }
9303 else if (offsettable_memref_p (operand))
9304 {
f4ef873c 9305 operand = adjust_address (operand, SImode, 0);
2450a057 9306 parts[0] = operand;
b72f00af 9307 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9308 if (size == 3)
b72f00af 9309 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9310 }
9311 else if (GET_CODE (operand) == CONST_DOUBLE)
9312 {
9313 REAL_VALUE_TYPE r;
2b589241 9314 long l[4];
2450a057
JH
9315
9316 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9317 switch (mode)
9318 {
9319 case XFmode:
2b589241 9320 case TFmode:
2450a057 9321 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9322 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9323 break;
9324 case DFmode:
9325 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9326 break;
9327 default:
9328 abort ();
9329 }
d8bf17f9
LB
9330 parts[1] = gen_int_mode (l[1], SImode);
9331 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9332 }
9333 else
9334 abort ();
e075ae69 9335 }
2450a057 9336 }
26e5b205
JH
9337 else
9338 {
44cf5b6a
JH
9339 if (mode == TImode)
9340 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9341 if (mode == XFmode || mode == TFmode)
9342 {
9343 if (REG_P (operand))
9344 {
9345 if (!reload_completed)
9346 abort ();
9347 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9348 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9349 }
9350 else if (offsettable_memref_p (operand))
9351 {
b72f00af 9352 operand = adjust_address (operand, DImode, 0);
26e5b205 9353 parts[0] = operand;
b72f00af 9354 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
9355 }
9356 else if (GET_CODE (operand) == CONST_DOUBLE)
9357 {
9358 REAL_VALUE_TYPE r;
9359 long l[3];
9360
9361 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9362 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9363 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9364 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9365 parts[0]
d8bf17f9 9366 = gen_int_mode
44cf5b6a 9367 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9368 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9369 DImode);
26e5b205
JH
9370 else
9371 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 9372 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
9373 }
9374 else
9375 abort ();
9376 }
9377 }
2450a057 9378
2b589241 9379 return size;
2450a057
JH
9380}
9381
9382/* Emit insns to perform a move or push of DI, DF, and XF values.
9383 Return false when normal moves are needed; true when all required
9384 insns have been emitted. Operands 2-4 contain the input values
9385 int the correct order; operands 5-7 contain the output values. */
9386
26e5b205
JH
9387void
9388ix86_split_long_move (operands)
9389 rtx operands[];
2450a057
JH
9390{
9391 rtx part[2][3];
26e5b205 9392 int nparts;
2450a057
JH
9393 int push = 0;
9394 int collisions = 0;
26e5b205
JH
9395 enum machine_mode mode = GET_MODE (operands[0]);
9396
9397 /* The DFmode expanders may ask us to move double.
9398 For 64bit target this is single move. By hiding the fact
9399 here we simplify i386.md splitters. */
9400 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9401 {
8cdfa312
RH
9402 /* Optimize constant pool reference to immediates. This is used by
9403 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9404
9405 if (GET_CODE (operands[1]) == MEM
9406 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9407 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9408 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9409 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9410 {
9411 operands[0] = copy_rtx (operands[0]);
9412 PUT_MODE (operands[0], Pmode);
9413 }
26e5b205
JH
9414 else
9415 operands[0] = gen_lowpart (DImode, operands[0]);
9416 operands[1] = gen_lowpart (DImode, operands[1]);
9417 emit_move_insn (operands[0], operands[1]);
9418 return;
9419 }
2450a057 9420
2450a057
JH
9421 /* The only non-offsettable memory we handle is push. */
9422 if (push_operand (operands[0], VOIDmode))
9423 push = 1;
9424 else if (GET_CODE (operands[0]) == MEM
9425 && ! offsettable_memref_p (operands[0]))
9426 abort ();
9427
26e5b205
JH
9428 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9429 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9430
9431 /* When emitting push, take care for source operands on the stack. */
9432 if (push && GET_CODE (operands[1]) == MEM
9433 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9434 {
26e5b205 9435 if (nparts == 3)
886cbb88
JH
9436 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9437 XEXP (part[1][2], 0));
9438 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9439 XEXP (part[1][1], 0));
2450a057
JH
9440 }
9441
0f290768 9442 /* We need to do copy in the right order in case an address register
2450a057
JH
9443 of the source overlaps the destination. */
9444 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9445 {
9446 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9447 collisions++;
9448 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9449 collisions++;
26e5b205 9450 if (nparts == 3
2450a057
JH
9451 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9452 collisions++;
9453
9454 /* Collision in the middle part can be handled by reordering. */
26e5b205 9455 if (collisions == 1 && nparts == 3
2450a057 9456 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9457 {
2450a057
JH
9458 rtx tmp;
9459 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9460 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9461 }
e075ae69 9462
2450a057
JH
9463 /* If there are more collisions, we can't handle it by reordering.
9464 Do an lea to the last part and use only one colliding move. */
9465 else if (collisions > 1)
9466 {
9467 collisions = 1;
26e5b205 9468 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 9469 XEXP (part[1][0], 0)));
26e5b205
JH
9470 part[1][0] = change_address (part[1][0],
9471 TARGET_64BIT ? DImode : SImode,
9472 part[0][nparts - 1]);
b72f00af 9473 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 9474 if (nparts == 3)
b72f00af 9475 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
9476 }
9477 }
9478
9479 if (push)
9480 {
26e5b205 9481 if (!TARGET_64BIT)
2b589241 9482 {
26e5b205
JH
9483 if (nparts == 3)
9484 {
9485 /* We use only first 12 bytes of TFmode value, but for pushing we
9486 are required to adjust stack as if we were pushing real 16byte
9487 value. */
9488 if (mode == TFmode && !TARGET_64BIT)
9489 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9490 GEN_INT (-4)));
9491 emit_move_insn (part[0][2], part[1][2]);
9492 }
2b589241 9493 }
26e5b205
JH
9494 else
9495 {
9496 /* In 64bit mode we don't have 32bit push available. In case this is
9497 register, it is OK - we will just use larger counterpart. We also
9498 retype memory - these comes from attempt to avoid REX prefix on
9499 moving of second half of TFmode value. */
9500 if (GET_MODE (part[1][1]) == SImode)
9501 {
9502 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9503 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9504 else if (REG_P (part[1][1]))
9505 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9506 else
b531087a 9507 abort ();
886cbb88
JH
9508 if (GET_MODE (part[1][0]) == SImode)
9509 part[1][0] = part[1][1];
26e5b205
JH
9510 }
9511 }
9512 emit_move_insn (part[0][1], part[1][1]);
9513 emit_move_insn (part[0][0], part[1][0]);
9514 return;
2450a057
JH
9515 }
9516
9517 /* Choose correct order to not overwrite the source before it is copied. */
9518 if ((REG_P (part[0][0])
9519 && REG_P (part[1][1])
9520 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9521 || (nparts == 3
2450a057
JH
9522 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9523 || (collisions > 0
9524 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9525 {
26e5b205 9526 if (nparts == 3)
2450a057 9527 {
26e5b205
JH
9528 operands[2] = part[0][2];
9529 operands[3] = part[0][1];
9530 operands[4] = part[0][0];
9531 operands[5] = part[1][2];
9532 operands[6] = part[1][1];
9533 operands[7] = part[1][0];
2450a057
JH
9534 }
9535 else
9536 {
26e5b205
JH
9537 operands[2] = part[0][1];
9538 operands[3] = part[0][0];
9539 operands[5] = part[1][1];
9540 operands[6] = part[1][0];
2450a057
JH
9541 }
9542 }
9543 else
9544 {
26e5b205 9545 if (nparts == 3)
2450a057 9546 {
26e5b205
JH
9547 operands[2] = part[0][0];
9548 operands[3] = part[0][1];
9549 operands[4] = part[0][2];
9550 operands[5] = part[1][0];
9551 operands[6] = part[1][1];
9552 operands[7] = part[1][2];
2450a057
JH
9553 }
9554 else
9555 {
26e5b205
JH
9556 operands[2] = part[0][0];
9557 operands[3] = part[0][1];
9558 operands[5] = part[1][0];
9559 operands[6] = part[1][1];
e075ae69
RH
9560 }
9561 }
26e5b205
JH
9562 emit_move_insn (operands[2], operands[5]);
9563 emit_move_insn (operands[3], operands[6]);
9564 if (nparts == 3)
9565 emit_move_insn (operands[4], operands[7]);
32b5b1aa 9566
26e5b205 9567 return;
32b5b1aa 9568}
32b5b1aa 9569
e075ae69
RH
9570void
9571ix86_split_ashldi (operands, scratch)
9572 rtx *operands, scratch;
32b5b1aa 9573{
e075ae69
RH
9574 rtx low[2], high[2];
9575 int count;
b985a30f 9576
e075ae69
RH
9577 if (GET_CODE (operands[2]) == CONST_INT)
9578 {
9579 split_di (operands, 2, low, high);
9580 count = INTVAL (operands[2]) & 63;
32b5b1aa 9581
e075ae69
RH
9582 if (count >= 32)
9583 {
9584 emit_move_insn (high[0], low[1]);
9585 emit_move_insn (low[0], const0_rtx);
b985a30f 9586
e075ae69
RH
9587 if (count > 32)
9588 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9589 }
9590 else
9591 {
9592 if (!rtx_equal_p (operands[0], operands[1]))
9593 emit_move_insn (operands[0], operands[1]);
9594 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9595 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9596 }
9597 }
9598 else
9599 {
9600 if (!rtx_equal_p (operands[0], operands[1]))
9601 emit_move_insn (operands[0], operands[1]);
b985a30f 9602
e075ae69 9603 split_di (operands, 1, low, high);
b985a30f 9604
e075ae69
RH
9605 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9606 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 9607
fe577e58 9608 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9609 {
fe577e58 9610 if (! no_new_pseudos)
e075ae69
RH
9611 scratch = force_reg (SImode, const0_rtx);
9612 else
9613 emit_move_insn (scratch, const0_rtx);
9614
9615 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9616 scratch));
9617 }
9618 else
9619 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9620 }
e9a25f70 9621}
32b5b1aa 9622
e075ae69
RH
9623void
9624ix86_split_ashrdi (operands, scratch)
9625 rtx *operands, scratch;
32b5b1aa 9626{
e075ae69
RH
9627 rtx low[2], high[2];
9628 int count;
32b5b1aa 9629
e075ae69
RH
9630 if (GET_CODE (operands[2]) == CONST_INT)
9631 {
9632 split_di (operands, 2, low, high);
9633 count = INTVAL (operands[2]) & 63;
32b5b1aa 9634
e075ae69
RH
9635 if (count >= 32)
9636 {
9637 emit_move_insn (low[0], high[1]);
32b5b1aa 9638
e075ae69
RH
9639 if (! reload_completed)
9640 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9641 else
9642 {
9643 emit_move_insn (high[0], low[0]);
9644 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9645 }
9646
9647 if (count > 32)
9648 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9649 }
9650 else
9651 {
9652 if (!rtx_equal_p (operands[0], operands[1]))
9653 emit_move_insn (operands[0], operands[1]);
9654 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9655 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9656 }
9657 }
9658 else
32b5b1aa 9659 {
e075ae69
RH
9660 if (!rtx_equal_p (operands[0], operands[1]))
9661 emit_move_insn (operands[0], operands[1]);
9662
9663 split_di (operands, 1, low, high);
9664
9665 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9666 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9667
fe577e58 9668 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9669 {
fe577e58 9670 if (! no_new_pseudos)
e075ae69
RH
9671 scratch = gen_reg_rtx (SImode);
9672 emit_move_insn (scratch, high[0]);
9673 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9674 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9675 scratch));
9676 }
9677 else
9678 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9679 }
e075ae69 9680}
32b5b1aa 9681
e075ae69
RH
9682void
9683ix86_split_lshrdi (operands, scratch)
9684 rtx *operands, scratch;
9685{
9686 rtx low[2], high[2];
9687 int count;
32b5b1aa 9688
e075ae69 9689 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9690 {
e075ae69
RH
9691 split_di (operands, 2, low, high);
9692 count = INTVAL (operands[2]) & 63;
9693
9694 if (count >= 32)
c7271385 9695 {
e075ae69
RH
9696 emit_move_insn (low[0], high[1]);
9697 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9698
e075ae69
RH
9699 if (count > 32)
9700 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9701 }
9702 else
9703 {
9704 if (!rtx_equal_p (operands[0], operands[1]))
9705 emit_move_insn (operands[0], operands[1]);
9706 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9707 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9708 }
32b5b1aa 9709 }
e075ae69
RH
9710 else
9711 {
9712 if (!rtx_equal_p (operands[0], operands[1]))
9713 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9714
e075ae69
RH
9715 split_di (operands, 1, low, high);
9716
9717 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9718 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9719
9720 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9721 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9722 {
fe577e58 9723 if (! no_new_pseudos)
e075ae69
RH
9724 scratch = force_reg (SImode, const0_rtx);
9725 else
9726 emit_move_insn (scratch, const0_rtx);
9727
9728 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9729 scratch));
9730 }
9731 else
9732 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9733 }
32b5b1aa 9734}
3f803cd9 9735
0407c02b 9736/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9737 it is aligned to VALUE bytes. If true, jump to the label. */
9738static rtx
9739ix86_expand_aligntest (variable, value)
9740 rtx variable;
9741 int value;
9742{
9743 rtx label = gen_label_rtx ();
9744 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9745 if (GET_MODE (variable) == DImode)
9746 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9747 else
9748 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9749 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 9750 1, label);
0945b39d
JH
9751 return label;
9752}
9753
9754/* Adjust COUNTER by the VALUE. */
9755static void
9756ix86_adjust_counter (countreg, value)
9757 rtx countreg;
9758 HOST_WIDE_INT value;
9759{
9760 if (GET_MODE (countreg) == DImode)
9761 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9762 else
9763 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9764}
9765
9766/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 9767rtx
0945b39d
JH
9768ix86_zero_extend_to_Pmode (exp)
9769 rtx exp;
9770{
9771 rtx r;
9772 if (GET_MODE (exp) == VOIDmode)
9773 return force_reg (Pmode, exp);
9774 if (GET_MODE (exp) == Pmode)
9775 return copy_to_mode_reg (Pmode, exp);
9776 r = gen_reg_rtx (Pmode);
9777 emit_insn (gen_zero_extendsidi2 (r, exp));
9778 return r;
9779}
9780
9781/* Expand string move (memcpy) operation. Use i386 string operations when
9782 profitable. expand_clrstr contains similar code. */
9783int
9784ix86_expand_movstr (dst, src, count_exp, align_exp)
9785 rtx dst, src, count_exp, align_exp;
9786{
9787 rtx srcreg, destreg, countreg;
9788 enum machine_mode counter_mode;
9789 HOST_WIDE_INT align = 0;
9790 unsigned HOST_WIDE_INT count = 0;
9791 rtx insns;
9792
9793 start_sequence ();
9794
9795 if (GET_CODE (align_exp) == CONST_INT)
9796 align = INTVAL (align_exp);
9797
5519a4f9 9798 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
9799 if (!TARGET_ALIGN_STRINGOPS)
9800 align = 64;
9801
9802 if (GET_CODE (count_exp) == CONST_INT)
9803 count = INTVAL (count_exp);
9804
9805 /* Figure out proper mode for counter. For 32bits it is always SImode,
9806 for 64bits use SImode when possible, otherwise DImode.
9807 Set count to number of bytes copied when known at compile time. */
9808 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9809 || x86_64_zero_extended_value (count_exp))
9810 counter_mode = SImode;
9811 else
9812 counter_mode = DImode;
9813
9814 if (counter_mode != SImode && counter_mode != DImode)
9815 abort ();
9816
9817 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9818 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9819
9820 emit_insn (gen_cld ());
9821
9822 /* When optimizing for size emit simple rep ; movsb instruction for
9823 counts not divisible by 4. */
9824
9825 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9826 {
9827 countreg = ix86_zero_extend_to_Pmode (count_exp);
9828 if (TARGET_64BIT)
9829 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9830 destreg, srcreg, countreg));
9831 else
9832 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9833 destreg, srcreg, countreg));
9834 }
9835
9836 /* For constant aligned (or small unaligned) copies use rep movsl
9837 followed by code copying the rest. For PentiumPro ensure 8 byte
9838 alignment to allow rep movsl acceleration. */
9839
9840 else if (count != 0
9841 && (align >= 8
9842 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 9843 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
9844 {
9845 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9846 if (count & ~(size - 1))
9847 {
9848 countreg = copy_to_mode_reg (counter_mode,
9849 GEN_INT ((count >> (size == 4 ? 2 : 3))
9850 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9851 countreg = ix86_zero_extend_to_Pmode (countreg);
9852 if (size == 4)
9853 {
9854 if (TARGET_64BIT)
9855 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9856 destreg, srcreg, countreg));
9857 else
9858 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9859 destreg, srcreg, countreg));
9860 }
9861 else
9862 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9863 destreg, srcreg, countreg));
9864 }
9865 if (size == 8 && (count & 0x04))
9866 emit_insn (gen_strmovsi (destreg, srcreg));
9867 if (count & 0x02)
9868 emit_insn (gen_strmovhi (destreg, srcreg));
9869 if (count & 0x01)
9870 emit_insn (gen_strmovqi (destreg, srcreg));
9871 }
9872 /* The generic code based on the glibc implementation:
9873 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9874 allowing accelerated copying there)
9875 - copy the data using rep movsl
9876 - copy the rest. */
9877 else
9878 {
9879 rtx countreg2;
9880 rtx label = NULL;
37ad04a5
JH
9881 int desired_alignment = (TARGET_PENTIUMPRO
9882 && (count == 0 || count >= (unsigned int) 260)
9883 ? 8 : UNITS_PER_WORD);
0945b39d
JH
9884
9885 /* In case we don't know anything about the alignment, default to
9886 library version, since it is usually equally fast and result in
9887 shorter code. */
9888 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9889 {
9890 end_sequence ();
9891 return 0;
9892 }
9893
9894 if (TARGET_SINGLE_STRINGOP)
9895 emit_insn (gen_cld ());
9896
9897 countreg2 = gen_reg_rtx (Pmode);
9898 countreg = copy_to_mode_reg (counter_mode, count_exp);
9899
9900 /* We don't use loops to align destination and to copy parts smaller
9901 than 4 bytes, because gcc is able to optimize such code better (in
9902 the case the destination or the count really is aligned, gcc is often
9903 able to predict the branches) and also it is friendlier to the
a4f31c00 9904 hardware branch prediction.
0945b39d
JH
9905
9906 Using loops is benefical for generic case, because we can
9907 handle small counts using the loops. Many CPUs (such as Athlon)
9908 have large REP prefix setup costs.
9909
9910 This is quite costy. Maybe we can revisit this decision later or
9911 add some customizability to this code. */
9912
37ad04a5 9913 if (count == 0 && align < desired_alignment)
0945b39d
JH
9914 {
9915 label = gen_label_rtx ();
aaae0bb9 9916 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 9917 LEU, 0, counter_mode, 1, label);
0945b39d
JH
9918 }
9919 if (align <= 1)
9920 {
9921 rtx label = ix86_expand_aligntest (destreg, 1);
9922 emit_insn (gen_strmovqi (destreg, srcreg));
9923 ix86_adjust_counter (countreg, 1);
9924 emit_label (label);
9925 LABEL_NUSES (label) = 1;
9926 }
9927 if (align <= 2)
9928 {
9929 rtx label = ix86_expand_aligntest (destreg, 2);
9930 emit_insn (gen_strmovhi (destreg, srcreg));
9931 ix86_adjust_counter (countreg, 2);
9932 emit_label (label);
9933 LABEL_NUSES (label) = 1;
9934 }
37ad04a5 9935 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
9936 {
9937 rtx label = ix86_expand_aligntest (destreg, 4);
9938 emit_insn (gen_strmovsi (destreg, srcreg));
9939 ix86_adjust_counter (countreg, 4);
9940 emit_label (label);
9941 LABEL_NUSES (label) = 1;
9942 }
9943
37ad04a5
JH
9944 if (label && desired_alignment > 4 && !TARGET_64BIT)
9945 {
9946 emit_label (label);
9947 LABEL_NUSES (label) = 1;
9948 label = NULL_RTX;
9949 }
0945b39d
JH
9950 if (!TARGET_SINGLE_STRINGOP)
9951 emit_insn (gen_cld ());
9952 if (TARGET_64BIT)
9953 {
9954 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9955 GEN_INT (3)));
9956 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9957 destreg, srcreg, countreg2));
9958 }
9959 else
9960 {
9961 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9962 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9963 destreg, srcreg, countreg2));
9964 }
9965
9966 if (label)
9967 {
9968 emit_label (label);
9969 LABEL_NUSES (label) = 1;
9970 }
9971 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9972 emit_insn (gen_strmovsi (destreg, srcreg));
9973 if ((align <= 4 || count == 0) && TARGET_64BIT)
9974 {
9975 rtx label = ix86_expand_aligntest (countreg, 4);
9976 emit_insn (gen_strmovsi (destreg, srcreg));
9977 emit_label (label);
9978 LABEL_NUSES (label) = 1;
9979 }
9980 if (align > 2 && count != 0 && (count & 2))
9981 emit_insn (gen_strmovhi (destreg, srcreg));
9982 if (align <= 2 || count == 0)
9983 {
9984 rtx label = ix86_expand_aligntest (countreg, 2);
9985 emit_insn (gen_strmovhi (destreg, srcreg));
9986 emit_label (label);
9987 LABEL_NUSES (label) = 1;
9988 }
9989 if (align > 1 && count != 0 && (count & 1))
9990 emit_insn (gen_strmovqi (destreg, srcreg));
9991 if (align <= 1 || count == 0)
9992 {
9993 rtx label = ix86_expand_aligntest (countreg, 1);
9994 emit_insn (gen_strmovqi (destreg, srcreg));
9995 emit_label (label);
9996 LABEL_NUSES (label) = 1;
9997 }
9998 }
9999
10000 insns = get_insns ();
10001 end_sequence ();
10002
10003 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10004 emit_insn (insns);
0945b39d
JH
10005 return 1;
10006}
10007
10008/* Expand string clear operation (bzero). Use i386 string operations when
10009 profitable. expand_movstr contains similar code. */
10010int
10011ix86_expand_clrstr (src, count_exp, align_exp)
10012 rtx src, count_exp, align_exp;
10013{
10014 rtx destreg, zeroreg, countreg;
10015 enum machine_mode counter_mode;
10016 HOST_WIDE_INT align = 0;
10017 unsigned HOST_WIDE_INT count = 0;
10018
10019 if (GET_CODE (align_exp) == CONST_INT)
10020 align = INTVAL (align_exp);
10021
5519a4f9 10022 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10023 if (!TARGET_ALIGN_STRINGOPS)
10024 align = 32;
10025
10026 if (GET_CODE (count_exp) == CONST_INT)
10027 count = INTVAL (count_exp);
10028 /* Figure out proper mode for counter. For 32bits it is always SImode,
10029 for 64bits use SImode when possible, otherwise DImode.
10030 Set count to number of bytes copied when known at compile time. */
10031 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10032 || x86_64_zero_extended_value (count_exp))
10033 counter_mode = SImode;
10034 else
10035 counter_mode = DImode;
10036
10037 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10038
10039 emit_insn (gen_cld ());
10040
10041 /* When optimizing for size emit simple rep ; movsb instruction for
10042 counts not divisible by 4. */
10043
10044 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10045 {
10046 countreg = ix86_zero_extend_to_Pmode (count_exp);
10047 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10048 if (TARGET_64BIT)
10049 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10050 destreg, countreg));
10051 else
10052 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10053 destreg, countreg));
10054 }
10055 else if (count != 0
10056 && (align >= 8
10057 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10058 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10059 {
10060 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10061 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10062 if (count & ~(size - 1))
10063 {
10064 countreg = copy_to_mode_reg (counter_mode,
10065 GEN_INT ((count >> (size == 4 ? 2 : 3))
10066 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10067 countreg = ix86_zero_extend_to_Pmode (countreg);
10068 if (size == 4)
10069 {
10070 if (TARGET_64BIT)
10071 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10072 destreg, countreg));
10073 else
10074 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10075 destreg, countreg));
10076 }
10077 else
10078 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10079 destreg, countreg));
10080 }
10081 if (size == 8 && (count & 0x04))
10082 emit_insn (gen_strsetsi (destreg,
10083 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10084 if (count & 0x02)
10085 emit_insn (gen_strsethi (destreg,
10086 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10087 if (count & 0x01)
10088 emit_insn (gen_strsetqi (destreg,
10089 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10090 }
10091 else
10092 {
10093 rtx countreg2;
10094 rtx label = NULL;
37ad04a5
JH
10095 /* Compute desired alignment of the string operation. */
10096 int desired_alignment = (TARGET_PENTIUMPRO
10097 && (count == 0 || count >= (unsigned int) 260)
10098 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10099
10100 /* In case we don't know anything about the alignment, default to
10101 library version, since it is usually equally fast and result in
10102 shorter code. */
10103 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10104 return 0;
10105
10106 if (TARGET_SINGLE_STRINGOP)
10107 emit_insn (gen_cld ());
10108
10109 countreg2 = gen_reg_rtx (Pmode);
10110 countreg = copy_to_mode_reg (counter_mode, count_exp);
10111 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10112
37ad04a5 10113 if (count == 0 && align < desired_alignment)
0945b39d
JH
10114 {
10115 label = gen_label_rtx ();
37ad04a5 10116 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10117 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10118 }
10119 if (align <= 1)
10120 {
10121 rtx label = ix86_expand_aligntest (destreg, 1);
10122 emit_insn (gen_strsetqi (destreg,
10123 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10124 ix86_adjust_counter (countreg, 1);
10125 emit_label (label);
10126 LABEL_NUSES (label) = 1;
10127 }
10128 if (align <= 2)
10129 {
10130 rtx label = ix86_expand_aligntest (destreg, 2);
10131 emit_insn (gen_strsethi (destreg,
10132 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10133 ix86_adjust_counter (countreg, 2);
10134 emit_label (label);
10135 LABEL_NUSES (label) = 1;
10136 }
37ad04a5 10137 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10138 {
10139 rtx label = ix86_expand_aligntest (destreg, 4);
10140 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10141 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10142 : zeroreg)));
10143 ix86_adjust_counter (countreg, 4);
10144 emit_label (label);
10145 LABEL_NUSES (label) = 1;
10146 }
10147
37ad04a5
JH
10148 if (label && desired_alignment > 4 && !TARGET_64BIT)
10149 {
10150 emit_label (label);
10151 LABEL_NUSES (label) = 1;
10152 label = NULL_RTX;
10153 }
10154
0945b39d
JH
10155 if (!TARGET_SINGLE_STRINGOP)
10156 emit_insn (gen_cld ());
10157 if (TARGET_64BIT)
10158 {
10159 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10160 GEN_INT (3)));
10161 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10162 destreg, countreg2));
10163 }
10164 else
10165 {
10166 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10167 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10168 destreg, countreg2));
10169 }
0945b39d
JH
10170 if (label)
10171 {
10172 emit_label (label);
10173 LABEL_NUSES (label) = 1;
10174 }
37ad04a5 10175
0945b39d
JH
10176 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10177 emit_insn (gen_strsetsi (destreg,
10178 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10179 if (TARGET_64BIT && (align <= 4 || count == 0))
10180 {
74411039 10181 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10182 emit_insn (gen_strsetsi (destreg,
10183 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10184 emit_label (label);
10185 LABEL_NUSES (label) = 1;
10186 }
10187 if (align > 2 && count != 0 && (count & 2))
10188 emit_insn (gen_strsethi (destreg,
10189 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10190 if (align <= 2 || count == 0)
10191 {
74411039 10192 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10193 emit_insn (gen_strsethi (destreg,
10194 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10195 emit_label (label);
10196 LABEL_NUSES (label) = 1;
10197 }
10198 if (align > 1 && count != 0 && (count & 1))
10199 emit_insn (gen_strsetqi (destreg,
10200 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10201 if (align <= 1 || count == 0)
10202 {
74411039 10203 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10204 emit_insn (gen_strsetqi (destreg,
10205 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10206 emit_label (label);
10207 LABEL_NUSES (label) = 1;
10208 }
10209 }
10210 return 1;
10211}
10212/* Expand strlen. */
10213int
10214ix86_expand_strlen (out, src, eoschar, align)
10215 rtx out, src, eoschar, align;
10216{
10217 rtx addr, scratch1, scratch2, scratch3, scratch4;
10218
10219 /* The generic case of strlen expander is long. Avoid it's
10220 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10221
10222 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10223 && !TARGET_INLINE_ALL_STRINGOPS
10224 && !optimize_size
10225 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10226 return 0;
10227
10228 addr = force_reg (Pmode, XEXP (src, 0));
10229 scratch1 = gen_reg_rtx (Pmode);
10230
10231 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10232 && !optimize_size)
10233 {
10234 /* Well it seems that some optimizer does not combine a call like
10235 foo(strlen(bar), strlen(bar));
10236 when the move and the subtraction is done here. It does calculate
10237 the length just once when these instructions are done inside of
10238 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10239 often used and I use one fewer register for the lifetime of
10240 output_strlen_unroll() this is better. */
10241
10242 emit_move_insn (out, addr);
10243
10244 ix86_expand_strlensi_unroll_1 (out, align);
10245
10246 /* strlensi_unroll_1 returns the address of the zero at the end of
10247 the string, like memchr(), so compute the length by subtracting
10248 the start address. */
10249 if (TARGET_64BIT)
10250 emit_insn (gen_subdi3 (out, out, addr));
10251 else
10252 emit_insn (gen_subsi3 (out, out, addr));
10253 }
10254 else
10255 {
10256 scratch2 = gen_reg_rtx (Pmode);
10257 scratch3 = gen_reg_rtx (Pmode);
10258 scratch4 = force_reg (Pmode, constm1_rtx);
10259
10260 emit_move_insn (scratch3, addr);
10261 eoschar = force_reg (QImode, eoschar);
10262
10263 emit_insn (gen_cld ());
10264 if (TARGET_64BIT)
10265 {
10266 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10267 align, scratch4, scratch3));
10268 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10269 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10270 }
10271 else
10272 {
10273 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10274 align, scratch4, scratch3));
10275 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10276 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10277 }
10278 }
10279 return 1;
10280}
10281
e075ae69
RH
10282/* Expand the appropriate insns for doing strlen if not just doing
10283 repnz; scasb
10284
10285 out = result, initialized with the start address
10286 align_rtx = alignment of the address.
10287 scratch = scratch register, initialized with the startaddress when
77ebd435 10288 not aligned, otherwise undefined
3f803cd9
SC
10289
10290 This is just the body. It needs the initialisations mentioned above and
10291 some address computing at the end. These things are done in i386.md. */
10292
0945b39d
JH
10293static void
10294ix86_expand_strlensi_unroll_1 (out, align_rtx)
10295 rtx out, align_rtx;
3f803cd9 10296{
e075ae69
RH
10297 int align;
10298 rtx tmp;
10299 rtx align_2_label = NULL_RTX;
10300 rtx align_3_label = NULL_RTX;
10301 rtx align_4_label = gen_label_rtx ();
10302 rtx end_0_label = gen_label_rtx ();
e075ae69 10303 rtx mem;
e2e52e1b 10304 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10305 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
10306
10307 align = 0;
10308 if (GET_CODE (align_rtx) == CONST_INT)
10309 align = INTVAL (align_rtx);
3f803cd9 10310
e9a25f70 10311 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10312
e9a25f70 10313 /* Is there a known alignment and is it less than 4? */
e075ae69 10314 if (align < 4)
3f803cd9 10315 {
0945b39d
JH
10316 rtx scratch1 = gen_reg_rtx (Pmode);
10317 emit_move_insn (scratch1, out);
e9a25f70 10318 /* Is there a known alignment and is it not 2? */
e075ae69 10319 if (align != 2)
3f803cd9 10320 {
e075ae69
RH
10321 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10322 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10323
10324 /* Leave just the 3 lower bits. */
0945b39d 10325 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
10326 NULL_RTX, 0, OPTAB_WIDEN);
10327
9076b9c1 10328 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10329 Pmode, 1, align_4_label);
9076b9c1 10330 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 10331 Pmode, 1, align_2_label);
9076b9c1 10332 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 10333 Pmode, 1, align_3_label);
3f803cd9
SC
10334 }
10335 else
10336 {
e9a25f70
JL
10337 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10338 check if is aligned to 4 - byte. */
e9a25f70 10339
0945b39d 10340 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
10341 NULL_RTX, 0, OPTAB_WIDEN);
10342
9076b9c1 10343 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10344 Pmode, 1, align_4_label);
3f803cd9
SC
10345 }
10346
e075ae69 10347 mem = gen_rtx_MEM (QImode, out);
e9a25f70 10348
e075ae69 10349 /* Now compare the bytes. */
e9a25f70 10350
0f290768 10351 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 10352 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 10353 QImode, 1, end_0_label);
3f803cd9 10354
0f290768 10355 /* Increment the address. */
0945b39d
JH
10356 if (TARGET_64BIT)
10357 emit_insn (gen_adddi3 (out, out, const1_rtx));
10358 else
10359 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 10360
e075ae69
RH
10361 /* Not needed with an alignment of 2 */
10362 if (align != 2)
10363 {
10364 emit_label (align_2_label);
3f803cd9 10365
d43e0b7d
RK
10366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10367 end_0_label);
e075ae69 10368
0945b39d
JH
10369 if (TARGET_64BIT)
10370 emit_insn (gen_adddi3 (out, out, const1_rtx));
10371 else
10372 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
10373
10374 emit_label (align_3_label);
10375 }
10376
d43e0b7d
RK
10377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10378 end_0_label);
e075ae69 10379
0945b39d
JH
10380 if (TARGET_64BIT)
10381 emit_insn (gen_adddi3 (out, out, const1_rtx));
10382 else
10383 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
10384 }
10385
e075ae69
RH
10386 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10387 align this loop. It gives only huge programs, but does not help to
10388 speed up. */
10389 emit_label (align_4_label);
3f803cd9 10390
e075ae69
RH
10391 mem = gen_rtx_MEM (SImode, out);
10392 emit_move_insn (scratch, mem);
0945b39d
JH
10393 if (TARGET_64BIT)
10394 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10395 else
10396 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 10397
e2e52e1b
JH
10398 /* This formula yields a nonzero result iff one of the bytes is zero.
10399 This saves three branches inside loop and many cycles. */
10400
10401 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10402 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10403 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 10404 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 10405 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
10406 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10407 align_4_label);
e2e52e1b
JH
10408
10409 if (TARGET_CMOVE)
10410 {
10411 rtx reg = gen_reg_rtx (SImode);
0945b39d 10412 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
10413 emit_move_insn (reg, tmpreg);
10414 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10415
0f290768 10416 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 10417 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10418 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10419 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10420 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10421 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
10422 reg,
10423 tmpreg)));
e2e52e1b 10424 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
10425 emit_insn (gen_rtx_SET (SImode, reg2,
10426 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
10427
10428 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10429 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10430 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 10431 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
10432 reg2,
10433 out)));
e2e52e1b
JH
10434
10435 }
10436 else
10437 {
10438 rtx end_2_label = gen_label_rtx ();
10439 /* Is zero in the first two bytes? */
10440
16189740 10441 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10442 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10443 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10444 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10445 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10446 pc_rtx);
10447 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10448 JUMP_LABEL (tmp) = end_2_label;
10449
0f290768 10450 /* Not in the first two. Move two bytes forward. */
e2e52e1b 10451 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
10452 if (TARGET_64BIT)
10453 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10454 else
10455 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
10456
10457 emit_label (end_2_label);
10458
10459 }
10460
0f290768 10461 /* Avoid branch in fixing the byte. */
e2e52e1b 10462 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 10463 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
10464 if (TARGET_64BIT)
10465 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10466 else
10467 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
10468
10469 emit_label (end_0_label);
10470}
0e07aff3
RH
10471
10472void
10473ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10474 rtx retval, fnaddr, callarg1, callarg2, pop;
10475{
10476 rtx use = NULL, call;
10477
10478 if (pop == const0_rtx)
10479 pop = NULL;
10480 if (TARGET_64BIT && pop)
10481 abort ();
10482
10483 /* Static functions and indirect calls don't need the pic register. */
10484 if (! TARGET_64BIT && flag_pic
10485 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10486 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 10487 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
10488
10489 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10490 {
10491 rtx al = gen_rtx_REG (QImode, 0);
10492 emit_move_insn (al, callarg2);
10493 use_reg (&use, al);
10494 }
10495
10496 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10497 {
10498 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10499 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10500 }
10501
10502 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10503 if (retval)
10504 call = gen_rtx_SET (VOIDmode, retval, call);
10505 if (pop)
10506 {
10507 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10508 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10509 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10510 }
10511
10512 call = emit_call_insn (call);
10513 if (use)
10514 CALL_INSN_FUNCTION_USAGE (call) = use;
10515}
fce5a9f2 10516
e075ae69 10517\f
e075ae69
RH
10518/* Clear stack slot assignments remembered from previous functions.
10519 This is called from INIT_EXPANDERS once before RTL is emitted for each
10520 function. */
10521
e2500fed
GK
10522static struct machine_function *
10523ix86_init_machine_status ()
37b15744 10524{
e2500fed 10525 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
10526}
10527
e075ae69
RH
10528/* Return a MEM corresponding to a stack slot with mode MODE.
10529 Allocate a new slot if necessary.
10530
10531 The RTL for a function can have several slots available: N is
10532 which slot to use. */
10533
10534rtx
10535assign_386_stack_local (mode, n)
10536 enum machine_mode mode;
10537 int n;
10538{
10539 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10540 abort ();
10541
10542 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10543 ix86_stack_locals[(int) mode][n]
10544 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10545
10546 return ix86_stack_locals[(int) mode][n];
10547}
f996902d
RH
10548
10549/* Construct the SYMBOL_REF for the tls_get_addr function. */
10550
e2500fed 10551static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
10552rtx
10553ix86_tls_get_addr ()
10554{
f996902d 10555
e2500fed 10556 if (!ix86_tls_symbol)
f996902d 10557 {
e2500fed 10558 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
f996902d
RH
10559 ? "___tls_get_addr"
10560 : "__tls_get_addr"));
f996902d
RH
10561 }
10562
e2500fed 10563 return ix86_tls_symbol;
f996902d 10564}
e075ae69
RH
10565\f
10566/* Calculate the length of the memory address in the instruction
10567 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10568
10569static int
10570memory_address_length (addr)
10571 rtx addr;
10572{
10573 struct ix86_address parts;
10574 rtx base, index, disp;
10575 int len;
10576
10577 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
10578 || GET_CODE (addr) == POST_INC
10579 || GET_CODE (addr) == PRE_MODIFY
10580 || GET_CODE (addr) == POST_MODIFY)
e075ae69 10581 return 0;
3f803cd9 10582
e075ae69
RH
10583 if (! ix86_decompose_address (addr, &parts))
10584 abort ();
3f803cd9 10585
e075ae69
RH
10586 base = parts.base;
10587 index = parts.index;
10588 disp = parts.disp;
10589 len = 0;
3f803cd9 10590
e075ae69
RH
10591 /* Register Indirect. */
10592 if (base && !index && !disp)
10593 {
10594 /* Special cases: ebp and esp need the two-byte modrm form. */
10595 if (addr == stack_pointer_rtx
10596 || addr == arg_pointer_rtx
564d80f4
JH
10597 || addr == frame_pointer_rtx
10598 || addr == hard_frame_pointer_rtx)
e075ae69 10599 len = 1;
3f803cd9 10600 }
e9a25f70 10601
e075ae69
RH
10602 /* Direct Addressing. */
10603 else if (disp && !base && !index)
10604 len = 4;
10605
3f803cd9
SC
10606 else
10607 {
e075ae69
RH
10608 /* Find the length of the displacement constant. */
10609 if (disp)
10610 {
10611 if (GET_CODE (disp) == CONST_INT
10612 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10613 len = 1;
10614 else
10615 len = 4;
10616 }
3f803cd9 10617
e075ae69
RH
10618 /* An index requires the two-byte modrm form. */
10619 if (index)
10620 len += 1;
3f803cd9
SC
10621 }
10622
e075ae69
RH
10623 return len;
10624}
79325812 10625
5bf0ebab
RH
10626/* Compute default value for "length_immediate" attribute. When SHORTFORM
10627 is set, expect that insn have 8bit immediate alternative. */
e075ae69 10628int
6ef67412 10629ix86_attr_length_immediate_default (insn, shortform)
e075ae69 10630 rtx insn;
6ef67412 10631 int shortform;
e075ae69 10632{
6ef67412
JH
10633 int len = 0;
10634 int i;
6c698a6d 10635 extract_insn_cached (insn);
6ef67412
JH
10636 for (i = recog_data.n_operands - 1; i >= 0; --i)
10637 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 10638 {
6ef67412 10639 if (len)
3071fab5 10640 abort ();
6ef67412
JH
10641 if (shortform
10642 && GET_CODE (recog_data.operand[i]) == CONST_INT
10643 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10644 len = 1;
10645 else
10646 {
10647 switch (get_attr_mode (insn))
10648 {
10649 case MODE_QI:
10650 len+=1;
10651 break;
10652 case MODE_HI:
10653 len+=2;
10654 break;
10655 case MODE_SI:
10656 len+=4;
10657 break;
14f73b5a
JH
10658 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10659 case MODE_DI:
10660 len+=4;
10661 break;
6ef67412 10662 default:
c725bd79 10663 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
10664 }
10665 }
3071fab5 10666 }
6ef67412
JH
10667 return len;
10668}
10669/* Compute default value for "length_address" attribute. */
10670int
10671ix86_attr_length_address_default (insn)
10672 rtx insn;
10673{
10674 int i;
6c698a6d 10675 extract_insn_cached (insn);
1ccbefce
RH
10676 for (i = recog_data.n_operands - 1; i >= 0; --i)
10677 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10678 {
6ef67412 10679 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10680 break;
10681 }
6ef67412 10682 return 0;
3f803cd9 10683}
e075ae69
RH
10684\f
10685/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10686
c237e94a 10687static int
e075ae69 10688ix86_issue_rate ()
b657fc39 10689{
e075ae69 10690 switch (ix86_cpu)
b657fc39 10691 {
e075ae69
RH
10692 case PROCESSOR_PENTIUM:
10693 case PROCESSOR_K6:
10694 return 2;
79325812 10695
e075ae69 10696 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10697 case PROCESSOR_PENTIUM4:
10698 case PROCESSOR_ATHLON:
e075ae69 10699 return 3;
b657fc39 10700
b657fc39 10701 default:
e075ae69 10702 return 1;
b657fc39 10703 }
b657fc39
L
10704}
10705
e075ae69
RH
10706/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10707 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10708
e075ae69
RH
10709static int
10710ix86_flags_dependant (insn, dep_insn, insn_type)
10711 rtx insn, dep_insn;
10712 enum attr_type insn_type;
10713{
10714 rtx set, set2;
b657fc39 10715
e075ae69
RH
10716 /* Simplify the test for uninteresting insns. */
10717 if (insn_type != TYPE_SETCC
10718 && insn_type != TYPE_ICMOV
10719 && insn_type != TYPE_FCMOV
10720 && insn_type != TYPE_IBR)
10721 return 0;
b657fc39 10722
e075ae69
RH
10723 if ((set = single_set (dep_insn)) != 0)
10724 {
10725 set = SET_DEST (set);
10726 set2 = NULL_RTX;
10727 }
10728 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10729 && XVECLEN (PATTERN (dep_insn), 0) == 2
10730 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10731 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10732 {
10733 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10734 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10735 }
78a0d70c
ZW
10736 else
10737 return 0;
b657fc39 10738
78a0d70c
ZW
10739 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10740 return 0;
b657fc39 10741
f5143c46 10742 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
10743 not any other potentially set register. */
10744 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10745 return 0;
10746
10747 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10748 return 0;
10749
10750 return 1;
e075ae69 10751}
b657fc39 10752
e075ae69
RH
10753/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10754 address with operands set by DEP_INSN. */
10755
10756static int
10757ix86_agi_dependant (insn, dep_insn, insn_type)
10758 rtx insn, dep_insn;
10759 enum attr_type insn_type;
10760{
10761 rtx addr;
10762
6ad48e84
JH
10763 if (insn_type == TYPE_LEA
10764 && TARGET_PENTIUM)
5fbdde42
RH
10765 {
10766 addr = PATTERN (insn);
10767 if (GET_CODE (addr) == SET)
10768 ;
10769 else if (GET_CODE (addr) == PARALLEL
10770 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10771 addr = XVECEXP (addr, 0, 0);
10772 else
10773 abort ();
10774 addr = SET_SRC (addr);
10775 }
e075ae69
RH
10776 else
10777 {
10778 int i;
6c698a6d 10779 extract_insn_cached (insn);
1ccbefce
RH
10780 for (i = recog_data.n_operands - 1; i >= 0; --i)
10781 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10782 {
1ccbefce 10783 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
10784 goto found;
10785 }
10786 return 0;
10787 found:;
b657fc39
L
10788 }
10789
e075ae69 10790 return modified_in_p (addr, dep_insn);
b657fc39 10791}
a269a03c 10792
c237e94a 10793static int
e075ae69 10794ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
10795 rtx insn, link, dep_insn;
10796 int cost;
10797{
e075ae69 10798 enum attr_type insn_type, dep_insn_type;
6ad48e84 10799 enum attr_memory memory, dep_memory;
e075ae69 10800 rtx set, set2;
9b00189f 10801 int dep_insn_code_number;
a269a03c 10802
309ada50 10803 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 10804 if (REG_NOTE_KIND (link) != 0)
309ada50 10805 return 0;
a269a03c 10806
9b00189f
JH
10807 dep_insn_code_number = recog_memoized (dep_insn);
10808
e075ae69 10809 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 10810 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 10811 return cost;
a269a03c 10812
1c71e60e
JH
10813 insn_type = get_attr_type (insn);
10814 dep_insn_type = get_attr_type (dep_insn);
9b00189f 10815
a269a03c
JC
10816 switch (ix86_cpu)
10817 {
10818 case PROCESSOR_PENTIUM:
e075ae69
RH
10819 /* Address Generation Interlock adds a cycle of latency. */
10820 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10821 cost += 1;
10822
10823 /* ??? Compares pair with jump/setcc. */
10824 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10825 cost = 0;
10826
10827 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 10828 if (insn_type == TYPE_FMOV
e075ae69
RH
10829 && get_attr_memory (insn) == MEMORY_STORE
10830 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10831 cost += 1;
10832 break;
a269a03c 10833
e075ae69 10834 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
10835 memory = get_attr_memory (insn);
10836 dep_memory = get_attr_memory (dep_insn);
10837
0f290768 10838 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
10839 increase the cost here for non-imov insns. */
10840 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
10841 && dep_insn_type != TYPE_FMOV
10842 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
10843 cost += 1;
10844
10845 /* INT->FP conversion is expensive. */
10846 if (get_attr_fp_int_src (dep_insn))
10847 cost += 5;
10848
10849 /* There is one cycle extra latency between an FP op and a store. */
10850 if (insn_type == TYPE_FMOV
10851 && (set = single_set (dep_insn)) != NULL_RTX
10852 && (set2 = single_set (insn)) != NULL_RTX
10853 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10854 && GET_CODE (SET_DEST (set2)) == MEM)
10855 cost += 1;
6ad48e84
JH
10856
10857 /* Show ability of reorder buffer to hide latency of load by executing
10858 in parallel with previous instruction in case
10859 previous instruction is not needed to compute the address. */
10860 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10861 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10862 {
10863 /* Claim moves to take one cycle, as core can issue one load
10864 at time and the next load can start cycle later. */
10865 if (dep_insn_type == TYPE_IMOV
10866 || dep_insn_type == TYPE_FMOV)
10867 cost = 1;
10868 else if (cost > 1)
10869 cost--;
10870 }
e075ae69 10871 break;
a269a03c 10872
e075ae69 10873 case PROCESSOR_K6:
6ad48e84
JH
10874 memory = get_attr_memory (insn);
10875 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
10876 /* The esp dependency is resolved before the instruction is really
10877 finished. */
10878 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10879 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10880 return 1;
a269a03c 10881
0f290768 10882 /* Since we can't represent delayed latencies of load+operation,
e075ae69 10883 increase the cost here for non-imov insns. */
6ad48e84 10884 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
10885 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10886
10887 /* INT->FP conversion is expensive. */
10888 if (get_attr_fp_int_src (dep_insn))
10889 cost += 5;
6ad48e84
JH
10890
10891 /* Show ability of reorder buffer to hide latency of load by executing
10892 in parallel with previous instruction in case
10893 previous instruction is not needed to compute the address. */
10894 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10895 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10896 {
10897 /* Claim moves to take one cycle, as core can issue one load
10898 at time and the next load can start cycle later. */
10899 if (dep_insn_type == TYPE_IMOV
10900 || dep_insn_type == TYPE_FMOV)
10901 cost = 1;
10902 else if (cost > 2)
10903 cost -= 2;
10904 else
10905 cost = 1;
10906 }
a14003ee 10907 break;
e075ae69 10908
309ada50 10909 case PROCESSOR_ATHLON:
6ad48e84
JH
10910 memory = get_attr_memory (insn);
10911 dep_memory = get_attr_memory (dep_insn);
10912
10913 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
10914 {
10915 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10916 cost += 2;
10917 else
10918 cost += 3;
10919 }
6ad48e84
JH
10920 /* Show ability of reorder buffer to hide latency of load by executing
10921 in parallel with previous instruction in case
10922 previous instruction is not needed to compute the address. */
10923 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10924 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10925 {
10926 /* Claim moves to take one cycle, as core can issue one load
10927 at time and the next load can start cycle later. */
10928 if (dep_insn_type == TYPE_IMOV
10929 || dep_insn_type == TYPE_FMOV)
10930 cost = 0;
10931 else if (cost >= 3)
10932 cost -= 3;
10933 else
10934 cost = 0;
10935 }
309ada50 10936
a269a03c 10937 default:
a269a03c
JC
10938 break;
10939 }
10940
10941 return cost;
10942}
0a726ef1 10943
e075ae69
RH
10944static union
10945{
10946 struct ppro_sched_data
10947 {
10948 rtx decode[3];
10949 int issued_this_cycle;
10950 } ppro;
10951} ix86_sched_data;
0a726ef1 10952
e075ae69
RH
10953static enum attr_ppro_uops
10954ix86_safe_ppro_uops (insn)
10955 rtx insn;
10956{
10957 if (recog_memoized (insn) >= 0)
10958 return get_attr_ppro_uops (insn);
10959 else
10960 return PPRO_UOPS_MANY;
10961}
0a726ef1 10962
e075ae69
RH
10963static void
10964ix86_dump_ppro_packet (dump)
10965 FILE *dump;
0a726ef1 10966{
e075ae69 10967 if (ix86_sched_data.ppro.decode[0])
0a726ef1 10968 {
e075ae69
RH
10969 fprintf (dump, "PPRO packet: %d",
10970 INSN_UID (ix86_sched_data.ppro.decode[0]));
10971 if (ix86_sched_data.ppro.decode[1])
10972 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10973 if (ix86_sched_data.ppro.decode[2])
10974 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10975 fputc ('\n', dump);
10976 }
10977}
0a726ef1 10978
e075ae69 10979/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 10980
c237e94a
ZW
10981static void
10982ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
10983 FILE *dump ATTRIBUTE_UNUSED;
10984 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 10985 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
10986{
10987 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10988}
10989
10990/* Shift INSN to SLOT, and shift everything else down. */
10991
10992static void
10993ix86_reorder_insn (insnp, slot)
10994 rtx *insnp, *slot;
10995{
10996 if (insnp != slot)
10997 {
10998 rtx insn = *insnp;
0f290768 10999 do
e075ae69
RH
11000 insnp[0] = insnp[1];
11001 while (++insnp != slot);
11002 *insnp = insn;
0a726ef1 11003 }
e075ae69
RH
11004}
11005
c6991660 11006static void
78a0d70c
ZW
11007ix86_sched_reorder_ppro (ready, e_ready)
11008 rtx *ready;
11009 rtx *e_ready;
11010{
11011 rtx decode[3];
11012 enum attr_ppro_uops cur_uops;
11013 int issued_this_cycle;
11014 rtx *insnp;
11015 int i;
e075ae69 11016
0f290768 11017 /* At this point .ppro.decode contains the state of the three
78a0d70c 11018 decoders from last "cycle". That is, those insns that were
0f290768 11019 actually independent. But here we're scheduling for the
78a0d70c
ZW
11020 decoder, and we may find things that are decodable in the
11021 same cycle. */
e075ae69 11022
0f290768 11023 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11024 issued_this_cycle = 0;
e075ae69 11025
78a0d70c
ZW
11026 insnp = e_ready;
11027 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11028
78a0d70c
ZW
11029 /* If the decoders are empty, and we've a complex insn at the
11030 head of the priority queue, let it issue without complaint. */
11031 if (decode[0] == NULL)
11032 {
11033 if (cur_uops == PPRO_UOPS_MANY)
11034 {
11035 decode[0] = *insnp;
11036 goto ppro_done;
11037 }
11038
11039 /* Otherwise, search for a 2-4 uop unsn to issue. */
11040 while (cur_uops != PPRO_UOPS_FEW)
11041 {
11042 if (insnp == ready)
11043 break;
11044 cur_uops = ix86_safe_ppro_uops (*--insnp);
11045 }
11046
11047 /* If so, move it to the head of the line. */
11048 if (cur_uops == PPRO_UOPS_FEW)
11049 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11050
78a0d70c
ZW
11051 /* Issue the head of the queue. */
11052 issued_this_cycle = 1;
11053 decode[0] = *e_ready--;
11054 }
fb693d44 11055
78a0d70c
ZW
11056 /* Look for simple insns to fill in the other two slots. */
11057 for (i = 1; i < 3; ++i)
11058 if (decode[i] == NULL)
11059 {
a151daf0 11060 if (ready > e_ready)
78a0d70c 11061 goto ppro_done;
fb693d44 11062
e075ae69
RH
11063 insnp = e_ready;
11064 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11065 while (cur_uops != PPRO_UOPS_ONE)
11066 {
11067 if (insnp == ready)
11068 break;
11069 cur_uops = ix86_safe_ppro_uops (*--insnp);
11070 }
fb693d44 11071
78a0d70c
ZW
11072 /* Found one. Move it to the head of the queue and issue it. */
11073 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11074 {
78a0d70c
ZW
11075 ix86_reorder_insn (insnp, e_ready);
11076 decode[i] = *e_ready--;
11077 issued_this_cycle++;
11078 continue;
11079 }
fb693d44 11080
78a0d70c
ZW
11081 /* ??? Didn't find one. Ideally, here we would do a lazy split
11082 of 2-uop insns, issue one and queue the other. */
11083 }
fb693d44 11084
78a0d70c
ZW
11085 ppro_done:
11086 if (issued_this_cycle == 0)
11087 issued_this_cycle = 1;
11088 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11089}
fb693d44 11090
0f290768 11091/* We are about to being issuing insns for this clock cycle.
78a0d70c 11092 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11093static int
11094ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11095 FILE *dump ATTRIBUTE_UNUSED;
11096 int sched_verbose ATTRIBUTE_UNUSED;
11097 rtx *ready;
c237e94a 11098 int *n_readyp;
78a0d70c
ZW
11099 int clock_var ATTRIBUTE_UNUSED;
11100{
c237e94a 11101 int n_ready = *n_readyp;
78a0d70c 11102 rtx *e_ready = ready + n_ready - 1;
fb693d44 11103
fce5a9f2 11104 /* Make sure to go ahead and initialize key items in
a151daf0
JL
11105 ix86_sched_data if we are not going to bother trying to
11106 reorder the ready queue. */
78a0d70c 11107 if (n_ready < 2)
a151daf0
JL
11108 {
11109 ix86_sched_data.ppro.issued_this_cycle = 1;
11110 goto out;
11111 }
e075ae69 11112
78a0d70c
ZW
11113 switch (ix86_cpu)
11114 {
11115 default:
11116 break;
e075ae69 11117
78a0d70c
ZW
11118 case PROCESSOR_PENTIUMPRO:
11119 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11120 break;
fb693d44
RH
11121 }
11122
e075ae69
RH
11123out:
11124 return ix86_issue_rate ();
11125}
fb693d44 11126
e075ae69
RH
11127/* We are about to issue INSN. Return the number of insns left on the
11128 ready queue that can be issued this cycle. */
b222082e 11129
c237e94a 11130static int
e075ae69
RH
11131ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11132 FILE *dump;
11133 int sched_verbose;
11134 rtx insn;
11135 int can_issue_more;
11136{
11137 int i;
11138 switch (ix86_cpu)
fb693d44 11139 {
e075ae69
RH
11140 default:
11141 return can_issue_more - 1;
fb693d44 11142
e075ae69
RH
11143 case PROCESSOR_PENTIUMPRO:
11144 {
11145 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11146
e075ae69
RH
11147 if (uops == PPRO_UOPS_MANY)
11148 {
11149 if (sched_verbose)
11150 ix86_dump_ppro_packet (dump);
11151 ix86_sched_data.ppro.decode[0] = insn;
11152 ix86_sched_data.ppro.decode[1] = NULL;
11153 ix86_sched_data.ppro.decode[2] = NULL;
11154 if (sched_verbose)
11155 ix86_dump_ppro_packet (dump);
11156 ix86_sched_data.ppro.decode[0] = NULL;
11157 }
11158 else if (uops == PPRO_UOPS_FEW)
11159 {
11160 if (sched_verbose)
11161 ix86_dump_ppro_packet (dump);
11162 ix86_sched_data.ppro.decode[0] = insn;
11163 ix86_sched_data.ppro.decode[1] = NULL;
11164 ix86_sched_data.ppro.decode[2] = NULL;
11165 }
11166 else
11167 {
11168 for (i = 0; i < 3; ++i)
11169 if (ix86_sched_data.ppro.decode[i] == NULL)
11170 {
11171 ix86_sched_data.ppro.decode[i] = insn;
11172 break;
11173 }
11174 if (i == 3)
11175 abort ();
11176 if (i == 2)
11177 {
11178 if (sched_verbose)
11179 ix86_dump_ppro_packet (dump);
11180 ix86_sched_data.ppro.decode[0] = NULL;
11181 ix86_sched_data.ppro.decode[1] = NULL;
11182 ix86_sched_data.ppro.decode[2] = NULL;
11183 }
11184 }
11185 }
11186 return --ix86_sched_data.ppro.issued_this_cycle;
11187 }
fb693d44 11188}
9b690711
RH
11189
11190static int
11191ia32_use_dfa_pipeline_interface ()
11192{
11193 if (ix86_cpu == PROCESSOR_PENTIUM)
11194 return 1;
11195 return 0;
11196}
11197
11198/* How many alternative schedules to try. This should be as wide as the
11199 scheduling freedom in the DFA, but no wider. Making this value too
11200 large results extra work for the scheduler. */
11201
11202static int
11203ia32_multipass_dfa_lookahead ()
11204{
11205 if (ix86_cpu == PROCESSOR_PENTIUM)
11206 return 2;
11207 else
11208 return 0;
11209}
11210
a7180f70 11211\f
0e4970d7
RK
11212/* Walk through INSNS and look for MEM references whose address is DSTREG or
11213 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11214 appropriate. */
11215
11216void
11217ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11218 rtx insns;
11219 rtx dstref, srcref, dstreg, srcreg;
11220{
11221 rtx insn;
11222
11223 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11224 if (INSN_P (insn))
11225 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11226 dstreg, srcreg);
11227}
11228
11229/* Subroutine of above to actually do the updating by recursively walking
11230 the rtx. */
11231
11232static void
11233ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11234 rtx x;
11235 rtx dstref, srcref, dstreg, srcreg;
11236{
11237 enum rtx_code code = GET_CODE (x);
11238 const char *format_ptr = GET_RTX_FORMAT (code);
11239 int i, j;
11240
11241 if (code == MEM && XEXP (x, 0) == dstreg)
11242 MEM_COPY_ATTRIBUTES (x, dstref);
11243 else if (code == MEM && XEXP (x, 0) == srcreg)
11244 MEM_COPY_ATTRIBUTES (x, srcref);
11245
11246 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11247 {
11248 if (*format_ptr == 'e')
11249 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11250 dstreg, srcreg);
11251 else if (*format_ptr == 'E')
11252 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 11253 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
11254 dstreg, srcreg);
11255 }
11256}
11257\f
a7180f70
BS
11258/* Compute the alignment given to a constant that is being placed in memory.
11259 EXP is the constant and ALIGN is the alignment that the object would
11260 ordinarily have.
11261 The value of this function is used instead of that alignment to align
11262 the object. */
11263
11264int
11265ix86_constant_alignment (exp, align)
11266 tree exp;
11267 int align;
11268{
11269 if (TREE_CODE (exp) == REAL_CST)
11270 {
11271 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11272 return 64;
11273 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11274 return 128;
11275 }
11276 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11277 && align < 256)
11278 return 256;
11279
11280 return align;
11281}
11282
11283/* Compute the alignment for a static variable.
11284 TYPE is the data type, and ALIGN is the alignment that
11285 the object would ordinarily have. The value of this function is used
11286 instead of that alignment to align the object. */
11287
11288int
11289ix86_data_alignment (type, align)
11290 tree type;
11291 int align;
11292{
11293 if (AGGREGATE_TYPE_P (type)
11294 && TYPE_SIZE (type)
11295 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11296 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11297 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11298 return 256;
11299
0d7d98ee
JH
11300 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11301 to 16byte boundary. */
11302 if (TARGET_64BIT)
11303 {
11304 if (AGGREGATE_TYPE_P (type)
11305 && TYPE_SIZE (type)
11306 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11307 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11308 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11309 return 128;
11310 }
11311
a7180f70
BS
11312 if (TREE_CODE (type) == ARRAY_TYPE)
11313 {
11314 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11315 return 64;
11316 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11317 return 128;
11318 }
11319 else if (TREE_CODE (type) == COMPLEX_TYPE)
11320 {
0f290768 11321
a7180f70
BS
11322 if (TYPE_MODE (type) == DCmode && align < 64)
11323 return 64;
11324 if (TYPE_MODE (type) == XCmode && align < 128)
11325 return 128;
11326 }
11327 else if ((TREE_CODE (type) == RECORD_TYPE
11328 || TREE_CODE (type) == UNION_TYPE
11329 || TREE_CODE (type) == QUAL_UNION_TYPE)
11330 && TYPE_FIELDS (type))
11331 {
11332 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11333 return 64;
11334 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11335 return 128;
11336 }
11337 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11338 || TREE_CODE (type) == INTEGER_TYPE)
11339 {
11340 if (TYPE_MODE (type) == DFmode && align < 64)
11341 return 64;
11342 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11343 return 128;
11344 }
11345
11346 return align;
11347}
11348
11349/* Compute the alignment for a local variable.
11350 TYPE is the data type, and ALIGN is the alignment that
11351 the object would ordinarily have. The value of this macro is used
11352 instead of that alignment to align the object. */
11353
11354int
11355ix86_local_alignment (type, align)
11356 tree type;
11357 int align;
11358{
0d7d98ee
JH
11359 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11360 to 16byte boundary. */
11361 if (TARGET_64BIT)
11362 {
11363 if (AGGREGATE_TYPE_P (type)
11364 && TYPE_SIZE (type)
11365 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11366 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11367 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11368 return 128;
11369 }
a7180f70
BS
11370 if (TREE_CODE (type) == ARRAY_TYPE)
11371 {
11372 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11373 return 64;
11374 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11375 return 128;
11376 }
11377 else if (TREE_CODE (type) == COMPLEX_TYPE)
11378 {
11379 if (TYPE_MODE (type) == DCmode && align < 64)
11380 return 64;
11381 if (TYPE_MODE (type) == XCmode && align < 128)
11382 return 128;
11383 }
11384 else if ((TREE_CODE (type) == RECORD_TYPE
11385 || TREE_CODE (type) == UNION_TYPE
11386 || TREE_CODE (type) == QUAL_UNION_TYPE)
11387 && TYPE_FIELDS (type))
11388 {
11389 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11390 return 64;
11391 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11392 return 128;
11393 }
11394 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11395 || TREE_CODE (type) == INTEGER_TYPE)
11396 {
0f290768 11397
a7180f70
BS
11398 if (TYPE_MODE (type) == DFmode && align < 64)
11399 return 64;
11400 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11401 return 128;
11402 }
11403 return align;
11404}
0ed08620
JH
11405\f
11406/* Emit RTL insns to initialize the variable parts of a trampoline.
11407 FNADDR is an RTX for the address of the function's pure code.
11408 CXT is an RTX for the static chain value for the function. */
11409void
11410x86_initialize_trampoline (tramp, fnaddr, cxt)
11411 rtx tramp, fnaddr, cxt;
11412{
11413 if (!TARGET_64BIT)
11414 {
11415 /* Compute offset from the end of the jmp to the target function. */
11416 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11417 plus_constant (tramp, 10),
11418 NULL_RTX, 1, OPTAB_DIRECT);
11419 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11420 gen_int_mode (0xb9, QImode));
0ed08620
JH
11421 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11422 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11423 gen_int_mode (0xe9, QImode));
0ed08620
JH
11424 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11425 }
11426 else
11427 {
11428 int offset = 0;
11429 /* Try to load address using shorter movl instead of movabs.
11430 We may want to support movq for kernel mode, but kernel does not use
11431 trampolines at the moment. */
11432 if (x86_64_zero_extended_value (fnaddr))
11433 {
11434 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11435 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11436 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11437 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11438 gen_lowpart (SImode, fnaddr));
11439 offset += 6;
11440 }
11441 else
11442 {
11443 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11444 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11445 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11446 fnaddr);
11447 offset += 10;
11448 }
11449 /* Load static chain using movabs to r10. */
11450 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11451 gen_int_mode (0xba49, HImode));
0ed08620
JH
11452 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11453 cxt);
11454 offset += 10;
11455 /* Jump to the r11 */
11456 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11457 gen_int_mode (0xff49, HImode));
0ed08620 11458 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11459 gen_int_mode (0xe3, QImode));
0ed08620
JH
11460 offset += 3;
11461 if (offset > TRAMPOLINE_SIZE)
b531087a 11462 abort ();
0ed08620
JH
11463 }
11464}
eeb06b1b 11465\f
6a2dd09a
RS
11466#define def_builtin(MASK, NAME, TYPE, CODE) \
11467do { \
11468 if ((MASK) & target_flags) \
11469 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11470 NULL, NULL_TREE); \
eeb06b1b 11471} while (0)
bd793c65 11472
bd793c65
BS
11473struct builtin_description
11474{
8b60264b
KG
11475 const unsigned int mask;
11476 const enum insn_code icode;
11477 const char *const name;
11478 const enum ix86_builtins code;
11479 const enum rtx_code comparison;
11480 const unsigned int flag;
bd793c65
BS
11481};
11482
fbe5eb6d
BS
11483/* Used for builtins that are enabled both by -msse and -msse2. */
11484#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11485
8b60264b 11486static const struct builtin_description bdesc_comi[] =
bd793c65 11487{
fbe5eb6d
BS
11488 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11489 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11490 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11491 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11492 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11493 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11494 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11495 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11496 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11497 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11498 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11499 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11500 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11501 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11502 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11503 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11504 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11505 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11506 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11507 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11508 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11509 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11510 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11511 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
bd793c65
BS
11512};
11513
8b60264b 11514static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11515{
11516 /* SSE */
fbe5eb6d
BS
11517 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11518 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11519 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11520 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11521 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11522 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11523 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11524 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11525
11526 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11527 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11528 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11529 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11530 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11531 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11532 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11533 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11534 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11535 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11536 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11537 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11538 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11539 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11540 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11541 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11542 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11543 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11544 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11545 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11546 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11547 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11548 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11549 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11550
11551 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11552 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11553 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11554 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11555
11556 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11557 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11558 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11559 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11560 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11561
11562 /* MMX */
eeb06b1b
BS
11563 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11564 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11565 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11566 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11567 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11568 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11569
11570 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11571 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11572 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11573 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11574 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11575 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11576 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11577 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11578
11579 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11580 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 11581 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
11582
11583 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11584 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11585 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11586 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11587
fbe5eb6d
BS
11588 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11589 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
11590
11591 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11592 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11593 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11594 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11595 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11596 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11597
fbe5eb6d
BS
11598 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11599 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11600 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11601 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
11602
11603 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11604 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11605 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11606 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11607 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11608 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
11609
11610 /* Special. */
eeb06b1b
BS
11611 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11612 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11613 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11614
fbe5eb6d
BS
11615 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11616 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
11617
11618 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11619 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11620 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11621 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11622 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11623 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11624
11625 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11626 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11627 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11628 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11629 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11630 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11631
11632 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11633 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11634 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11635 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11636
fbe5eb6d
BS
11637 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11638 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11639
11640 /* SSE2 */
11641 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11642 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11643 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11644 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11645 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11646 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11647 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11648 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11649
11650 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11651 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11652 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11653 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11654 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11655 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11656 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11657 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11658 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11659 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11660 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11661 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11662 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11663 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11664 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11665 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11666 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11667 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11668 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11669 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11670 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11671 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11672 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11673 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11674
11675 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11676 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11677 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11678 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11679
11680 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11681 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11682 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11683 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11684
11685 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11686 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11687 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11688
11689 /* SSE2 MMX */
11690 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11691 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11692 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11693 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11694 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11695 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11696 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11697 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11698
11699 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11700 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11701 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11702 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11703 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11704 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11705 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11706 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11707
11708 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11709 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11710 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11711 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11712
916b60b7
BS
11713 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11714 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11715 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11716 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11717
11718 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11719 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11720
11721 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11722 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11723 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11724 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11725 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11726 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11727
11728 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11729 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11730 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11731 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11732
11733 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11734 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11735 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11736 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11737 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11738 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11739
916b60b7
BS
11740 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11741 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11742 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11743
11744 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11745 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11746
11747 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11750 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11751 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11752 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11753
11754 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11755 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11756 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11757 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11758 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11759 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11760
11761 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11762 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11763 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11764 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11765
11766 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11767
fbe5eb6d
BS
11768 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11769 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11770 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
11771};
11772
8b60264b 11773static const struct builtin_description bdesc_1arg[] =
bd793c65 11774{
fbe5eb6d
BS
11775 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11776 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11777
11778 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11779 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11780 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11781
11782 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11783 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11784 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11785 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11786
11787 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11788 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11789 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11790
11791 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11792
11793 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11794 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 11795
fbe5eb6d
BS
11796 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11797 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11798 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11799 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11800 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 11801
fbe5eb6d 11802 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 11803
fbe5eb6d
BS
11804 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11805 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11806
11807 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11808 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11809 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
11810};
11811
f6155fda
SS
11812void
11813ix86_init_builtins ()
11814{
11815 if (TARGET_MMX)
11816 ix86_init_mmx_sse_builtins ();
11817}
11818
11819/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
11820 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11821 builtins. */
e37af218 11822static void
f6155fda 11823ix86_init_mmx_sse_builtins ()
bd793c65 11824{
8b60264b 11825 const struct builtin_description * d;
77ebd435 11826 size_t i;
bd793c65
BS
11827
11828 tree pchar_type_node = build_pointer_type (char_type_node);
11829 tree pfloat_type_node = build_pointer_type (float_type_node);
11830 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 11831 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
11832 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11833
11834 /* Comparisons. */
11835 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
11836 = build_function_type_list (integer_type_node,
11837 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11838 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
11839 = build_function_type_list (V4SI_type_node,
11840 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11841 /* MMX/SSE/integer conversions. */
bd793c65 11842 tree int_ftype_v4sf
b4de2f7d
AH
11843 = build_function_type_list (integer_type_node,
11844 V4SF_type_node, NULL_TREE);
bd793c65 11845 tree int_ftype_v8qi
b4de2f7d 11846 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 11847 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
11848 = build_function_type_list (V4SF_type_node,
11849 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 11850 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
11851 = build_function_type_list (V4SF_type_node,
11852 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 11853 tree int_ftype_v4hi_int
b4de2f7d
AH
11854 = build_function_type_list (integer_type_node,
11855 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 11856 tree v4hi_ftype_v4hi_int_int
e7a60f56 11857 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
11858 integer_type_node, integer_type_node,
11859 NULL_TREE);
bd793c65
BS
11860 /* Miscellaneous. */
11861 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
11862 = build_function_type_list (V8QI_type_node,
11863 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 11864 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
11865 = build_function_type_list (V4HI_type_node,
11866 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 11867 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
11868 = build_function_type_list (V4SF_type_node,
11869 V4SF_type_node, V4SF_type_node,
11870 integer_type_node, NULL_TREE);
bd793c65 11871 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
11872 = build_function_type_list (V2SI_type_node,
11873 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 11874 tree v4hi_ftype_v4hi_int
b4de2f7d 11875 = build_function_type_list (V4HI_type_node,
e7a60f56 11876 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 11877 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
11878 = build_function_type_list (V4HI_type_node,
11879 V4HI_type_node, long_long_unsigned_type_node,
11880 NULL_TREE);
bd793c65 11881 tree v2si_ftype_v2si_di
b4de2f7d
AH
11882 = build_function_type_list (V2SI_type_node,
11883 V2SI_type_node, long_long_unsigned_type_node,
11884 NULL_TREE);
bd793c65 11885 tree void_ftype_void
b4de2f7d 11886 = build_function_type (void_type_node, void_list_node);
bd793c65 11887 tree void_ftype_unsigned
b4de2f7d 11888 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 11889 tree unsigned_ftype_void
b4de2f7d 11890 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 11891 tree di_ftype_void
b4de2f7d 11892 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 11893 tree v4sf_ftype_void
b4de2f7d 11894 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 11895 tree v2si_ftype_v4sf
b4de2f7d 11896 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11897 /* Loads/stores. */
bd793c65 11898 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
11899 = build_function_type_list (void_type_node,
11900 V8QI_type_node, V8QI_type_node,
11901 pchar_type_node, NULL_TREE);
bd793c65 11902 tree v4sf_ftype_pfloat
b4de2f7d 11903 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
bd793c65
BS
11904 /* @@@ the type is bogus */
11905 tree v4sf_ftype_v4sf_pv2si
b4de2f7d
AH
11906 = build_function_type_list (V4SF_type_node,
11907 V4SF_type_node, pv2di_type_node, NULL_TREE);
1255c85c 11908 tree void_ftype_pv2si_v4sf
b4de2f7d
AH
11909 = build_function_type_list (void_type_node,
11910 pv2di_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11911 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
11912 = build_function_type_list (void_type_node,
11913 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11914 tree void_ftype_pdi_di
b4de2f7d
AH
11915 = build_function_type_list (void_type_node,
11916 pdi_type_node, long_long_unsigned_type_node,
11917 NULL_TREE);
916b60b7 11918 tree void_ftype_pv2di_v2di
b4de2f7d
AH
11919 = build_function_type_list (void_type_node,
11920 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
11921 /* Normal vector unops. */
11922 tree v4sf_ftype_v4sf
b4de2f7d 11923 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 11924
bd793c65
BS
11925 /* Normal vector binops. */
11926 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
11927 = build_function_type_list (V4SF_type_node,
11928 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 11929 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
11930 = build_function_type_list (V8QI_type_node,
11931 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 11932 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
11933 = build_function_type_list (V4HI_type_node,
11934 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 11935 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
11936 = build_function_type_list (V2SI_type_node,
11937 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 11938 tree di_ftype_di_di
b4de2f7d
AH
11939 = build_function_type_list (long_long_unsigned_type_node,
11940 long_long_unsigned_type_node,
11941 long_long_unsigned_type_node, NULL_TREE);
bd793c65 11942
47f339cf 11943 tree v2si_ftype_v2sf
ae3aa00d 11944 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 11945 tree v2sf_ftype_v2si
b4de2f7d 11946 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 11947 tree v2si_ftype_v2si
b4de2f7d 11948 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 11949 tree v2sf_ftype_v2sf
b4de2f7d 11950 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 11951 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
11952 = build_function_type_list (V2SF_type_node,
11953 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 11954 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
11955 = build_function_type_list (V2SI_type_node,
11956 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
11957 tree pint_type_node = build_pointer_type (integer_type_node);
11958 tree pdouble_type_node = build_pointer_type (double_type_node);
11959 tree int_ftype_v2df_v2df
b4de2f7d
AH
11960 = build_function_type_list (integer_type_node,
11961 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
11962
11963 tree ti_ftype_void
b4de2f7d 11964 = build_function_type (intTI_type_node, void_list_node);
fbe5eb6d 11965 tree ti_ftype_ti_ti
b4de2f7d
AH
11966 = build_function_type_list (intTI_type_node,
11967 intTI_type_node, intTI_type_node, NULL_TREE);
fbe5eb6d 11968 tree void_ftype_pvoid
b4de2f7d 11969 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
fbe5eb6d 11970 tree v2di_ftype_di
b4de2f7d
AH
11971 = build_function_type_list (V2DI_type_node,
11972 long_long_unsigned_type_node, NULL_TREE);
fbe5eb6d 11973 tree v4sf_ftype_v4si
b4de2f7d 11974 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 11975 tree v4si_ftype_v4sf
b4de2f7d 11976 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 11977 tree v2df_ftype_v4si
b4de2f7d 11978 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 11979 tree v4si_ftype_v2df
b4de2f7d 11980 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 11981 tree v2si_ftype_v2df
b4de2f7d 11982 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 11983 tree v4sf_ftype_v2df
b4de2f7d 11984 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 11985 tree v2df_ftype_v2si
b4de2f7d 11986 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 11987 tree v2df_ftype_v4sf
b4de2f7d 11988 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 11989 tree int_ftype_v2df
b4de2f7d 11990 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 11991 tree v2df_ftype_v2df_int
b4de2f7d
AH
11992 = build_function_type_list (V2DF_type_node,
11993 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 11994 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
11995 = build_function_type_list (V4SF_type_node,
11996 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 11997 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
11998 = build_function_type_list (V2DF_type_node,
11999 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12000 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12001 = build_function_type_list (V2DF_type_node,
12002 V2DF_type_node, V2DF_type_node,
12003 integer_type_node,
12004 NULL_TREE);
fbe5eb6d 12005 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12006 = build_function_type_list (V2DF_type_node,
12007 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12008 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12009 = build_function_type_list (void_type_node,
12010 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12011 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12012 = build_function_type_list (void_type_node,
12013 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12014 tree void_ftype_pint_int
b4de2f7d
AH
12015 = build_function_type_list (void_type_node,
12016 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12017 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12018 = build_function_type_list (void_type_node,
12019 V16QI_type_node, V16QI_type_node,
12020 pchar_type_node, NULL_TREE);
fbe5eb6d 12021 tree v2df_ftype_pdouble
b4de2f7d 12022 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
fbe5eb6d 12023 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12024 = build_function_type_list (V2DF_type_node,
12025 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12026 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12027 = build_function_type_list (V16QI_type_node,
12028 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12029 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12030 = build_function_type_list (V8HI_type_node,
12031 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12032 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12033 = build_function_type_list (V4SI_type_node,
12034 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12035 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12036 = build_function_type_list (V2DI_type_node,
12037 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12038 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12039 = build_function_type_list (V2DI_type_node,
12040 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12041 tree v2df_ftype_v2df
b4de2f7d 12042 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12043 tree v2df_ftype_double
b4de2f7d 12044 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12045 tree v2df_ftype_double_double
b4de2f7d
AH
12046 = build_function_type_list (V2DF_type_node,
12047 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12048 tree int_ftype_v8hi_int
b4de2f7d
AH
12049 = build_function_type_list (integer_type_node,
12050 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12051 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12052 = build_function_type_list (V8HI_type_node,
12053 V8HI_type_node, integer_type_node,
12054 integer_type_node, NULL_TREE);
916b60b7 12055 tree v2di_ftype_v2di_int
b4de2f7d
AH
12056 = build_function_type_list (V2DI_type_node,
12057 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12058 tree v4si_ftype_v4si_int
b4de2f7d
AH
12059 = build_function_type_list (V4SI_type_node,
12060 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12061 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12062 = build_function_type_list (V8HI_type_node,
12063 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12064 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12065 = build_function_type_list (V8HI_type_node,
12066 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12067 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12068 = build_function_type_list (V4SI_type_node,
12069 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12070 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12071 = build_function_type_list (V4SI_type_node,
12072 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12073 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12074 = build_function_type_list (long_long_unsigned_type_node,
12075 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 12076 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12077 = build_function_type_list (V2DI_type_node,
12078 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 12079 tree int_ftype_v16qi
b4de2f7d 12080 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
47f339cf 12081
bd793c65
BS
12082 /* Add all builtins that are more or less simple operations on two
12083 operands. */
ca7558fc 12084 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12085 {
12086 /* Use one of the operands; the target can have a different mode for
12087 mask-generating compares. */
12088 enum machine_mode mode;
12089 tree type;
12090
12091 if (d->name == 0)
12092 continue;
12093 mode = insn_data[d->icode].operand[1].mode;
12094
bd793c65
BS
12095 switch (mode)
12096 {
fbe5eb6d
BS
12097 case V16QImode:
12098 type = v16qi_ftype_v16qi_v16qi;
12099 break;
12100 case V8HImode:
12101 type = v8hi_ftype_v8hi_v8hi;
12102 break;
12103 case V4SImode:
12104 type = v4si_ftype_v4si_v4si;
12105 break;
12106 case V2DImode:
12107 type = v2di_ftype_v2di_v2di;
12108 break;
12109 case V2DFmode:
12110 type = v2df_ftype_v2df_v2df;
12111 break;
12112 case TImode:
12113 type = ti_ftype_ti_ti;
12114 break;
bd793c65
BS
12115 case V4SFmode:
12116 type = v4sf_ftype_v4sf_v4sf;
12117 break;
12118 case V8QImode:
12119 type = v8qi_ftype_v8qi_v8qi;
12120 break;
12121 case V4HImode:
12122 type = v4hi_ftype_v4hi_v4hi;
12123 break;
12124 case V2SImode:
12125 type = v2si_ftype_v2si_v2si;
12126 break;
bd793c65
BS
12127 case DImode:
12128 type = di_ftype_di_di;
12129 break;
12130
12131 default:
12132 abort ();
12133 }
0f290768 12134
bd793c65
BS
12135 /* Override for comparisons. */
12136 if (d->icode == CODE_FOR_maskcmpv4sf3
12137 || d->icode == CODE_FOR_maskncmpv4sf3
12138 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12139 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12140 type = v4si_ftype_v4sf_v4sf;
12141
fbe5eb6d
BS
12142 if (d->icode == CODE_FOR_maskcmpv2df3
12143 || d->icode == CODE_FOR_maskncmpv2df3
12144 || d->icode == CODE_FOR_vmmaskcmpv2df3
12145 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12146 type = v2di_ftype_v2df_v2df;
12147
eeb06b1b 12148 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12149 }
12150
12151 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12152 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12153 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12154 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12155 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12156 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12157 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12158 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12159
12160 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12161 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12162 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12163
12164 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12165 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12166
12167 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12168 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12169
bd793c65 12170 /* comi/ucomi insns. */
ca7558fc 12171 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12172 if (d->mask == MASK_SSE2)
12173 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12174 else
12175 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12176
1255c85c
BS
12177 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12178 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12179 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12180
fbe5eb6d
BS
12181 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12182 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12183 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12184 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12185 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12186 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12187
fbe5eb6d
BS
12188 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12189 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12190 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12191 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
e37af218 12192
fbe5eb6d
BS
12193 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12194 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12195
fbe5eb6d 12196 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12197
fbe5eb6d
BS
12198 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12199 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12200 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12201 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12202 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12203 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12204
fbe5eb6d
BS
12205 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12206 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12207 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12208 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12209
fbe5eb6d
BS
12210 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12211 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12212 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12213 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12214
fbe5eb6d 12215 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12216
916b60b7 12217 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12218
fbe5eb6d
BS
12219 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12220 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12221 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12222 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12223 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12224 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 12225
fbe5eb6d 12226 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12227
47f339cf
BS
12228 /* Original 3DNow! */
12229 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12230 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12231 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12232 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12233 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12234 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12235 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12236 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12237 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12238 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12239 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12240 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12241 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12242 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12243 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12244 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12245 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12246 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12247 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12248 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12249
12250 /* 3DNow! extension as used in the Athlon CPU. */
12251 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12252 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12253 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12254 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12255 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12256 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12257
fbe5eb6d
BS
12258 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12259
12260 /* SSE2 */
12261 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12262 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12263
12264 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12265 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12266
12267 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12268 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12269 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12270 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12271 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12272 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12273
12274 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12275 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12276 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12277 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12278
12279 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12280 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12281 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12282 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12283 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12284
12285 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12286 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12287 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12288 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12289
12290 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12291 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12292
12293 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12294
12295 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12296 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12297
12298 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12299 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12300 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12301 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12302 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12303
12304 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12305
12306 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12307 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12308
12309 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12310 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12311 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12312
12313 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12314 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12315 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12316
12317 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12318 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12319 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12320 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12321 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12322 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12323 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12324
12325 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12326 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12327 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
12328
12329 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12330 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12331 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12332
12333 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12334 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12335 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12336
12337 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12338 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12339
12340 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12341 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12342 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12343
12344 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12345 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12346 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12347
12348 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12349 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12350
12351 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
12352}
12353
12354/* Errors in the source file can cause expand_expr to return const0_rtx
12355 where we expect a vector. To avoid crashing, use one of the vector
12356 clear instructions. */
12357static rtx
12358safe_vector_operand (x, mode)
12359 rtx x;
12360 enum machine_mode mode;
12361{
12362 if (x != const0_rtx)
12363 return x;
12364 x = gen_reg_rtx (mode);
12365
47f339cf 12366 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12367 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12368 : gen_rtx_SUBREG (DImode, x, 0)));
12369 else
e37af218
RH
12370 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12371 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
12372 return x;
12373}
12374
12375/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12376
12377static rtx
12378ix86_expand_binop_builtin (icode, arglist, target)
12379 enum insn_code icode;
12380 tree arglist;
12381 rtx target;
12382{
12383 rtx pat;
12384 tree arg0 = TREE_VALUE (arglist);
12385 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12386 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12387 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12388 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12389 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12390 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12391
12392 if (VECTOR_MODE_P (mode0))
12393 op0 = safe_vector_operand (op0, mode0);
12394 if (VECTOR_MODE_P (mode1))
12395 op1 = safe_vector_operand (op1, mode1);
12396
12397 if (! target
12398 || GET_MODE (target) != tmode
12399 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12400 target = gen_reg_rtx (tmode);
12401
12402 /* In case the insn wants input operands in modes different from
12403 the result, abort. */
12404 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12405 abort ();
12406
12407 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12408 op0 = copy_to_mode_reg (mode0, op0);
12409 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12410 op1 = copy_to_mode_reg (mode1, op1);
12411
59bef189
RH
12412 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12413 yet one of the two must not be a memory. This is normally enforced
12414 by expanders, but we didn't bother to create one here. */
12415 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12416 op0 = copy_to_mode_reg (mode0, op0);
12417
bd793c65
BS
12418 pat = GEN_FCN (icode) (target, op0, op1);
12419 if (! pat)
12420 return 0;
12421 emit_insn (pat);
12422 return target;
12423}
12424
fce5a9f2 12425/* In type_for_mode we restrict the ability to create TImode types
e37af218
RH
12426 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12427 to have a V4SFmode signature. Convert them in-place to TImode. */
12428
12429static rtx
12430ix86_expand_timode_binop_builtin (icode, arglist, target)
12431 enum insn_code icode;
12432 tree arglist;
12433 rtx target;
12434{
12435 rtx pat;
12436 tree arg0 = TREE_VALUE (arglist);
12437 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12438 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12439 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12440
12441 op0 = gen_lowpart (TImode, op0);
12442 op1 = gen_lowpart (TImode, op1);
12443 target = gen_reg_rtx (TImode);
12444
12445 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12446 op0 = copy_to_mode_reg (TImode, op0);
12447 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12448 op1 = copy_to_mode_reg (TImode, op1);
12449
59bef189
RH
12450 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12451 yet one of the two must not be a memory. This is normally enforced
12452 by expanders, but we didn't bother to create one here. */
12453 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12454 op0 = copy_to_mode_reg (TImode, op0);
12455
e37af218
RH
12456 pat = GEN_FCN (icode) (target, op0, op1);
12457 if (! pat)
12458 return 0;
12459 emit_insn (pat);
12460
12461 return gen_lowpart (V4SFmode, target);
12462}
12463
bd793c65
BS
12464/* Subroutine of ix86_expand_builtin to take care of stores. */
12465
12466static rtx
e37af218 12467ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
12468 enum insn_code icode;
12469 tree arglist;
bd793c65
BS
12470{
12471 rtx pat;
12472 tree arg0 = TREE_VALUE (arglist);
12473 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12474 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12475 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12476 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12477 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12478
12479 if (VECTOR_MODE_P (mode1))
12480 op1 = safe_vector_operand (op1, mode1);
12481
12482 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
12483
12484 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12485 op1 = copy_to_mode_reg (mode1, op1);
12486
bd793c65
BS
12487 pat = GEN_FCN (icode) (op0, op1);
12488 if (pat)
12489 emit_insn (pat);
12490 return 0;
12491}
12492
12493/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12494
12495static rtx
12496ix86_expand_unop_builtin (icode, arglist, target, do_load)
12497 enum insn_code icode;
12498 tree arglist;
12499 rtx target;
12500 int do_load;
12501{
12502 rtx pat;
12503 tree arg0 = TREE_VALUE (arglist);
12504 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12505 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12506 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12507
12508 if (! target
12509 || GET_MODE (target) != tmode
12510 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12511 target = gen_reg_rtx (tmode);
12512 if (do_load)
12513 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12514 else
12515 {
12516 if (VECTOR_MODE_P (mode0))
12517 op0 = safe_vector_operand (op0, mode0);
12518
12519 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12520 op0 = copy_to_mode_reg (mode0, op0);
12521 }
12522
12523 pat = GEN_FCN (icode) (target, op0);
12524 if (! pat)
12525 return 0;
12526 emit_insn (pat);
12527 return target;
12528}
12529
12530/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12531 sqrtss, rsqrtss, rcpss. */
12532
12533static rtx
12534ix86_expand_unop1_builtin (icode, arglist, target)
12535 enum insn_code icode;
12536 tree arglist;
12537 rtx target;
12538{
12539 rtx pat;
12540 tree arg0 = TREE_VALUE (arglist);
59bef189 12541 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12542 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12543 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12544
12545 if (! target
12546 || GET_MODE (target) != tmode
12547 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12548 target = gen_reg_rtx (tmode);
12549
12550 if (VECTOR_MODE_P (mode0))
12551 op0 = safe_vector_operand (op0, mode0);
12552
12553 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12554 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 12555
59bef189
RH
12556 op1 = op0;
12557 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12558 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 12559
59bef189 12560 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12561 if (! pat)
12562 return 0;
12563 emit_insn (pat);
12564 return target;
12565}
12566
12567/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12568
12569static rtx
12570ix86_expand_sse_compare (d, arglist, target)
8b60264b 12571 const struct builtin_description *d;
bd793c65
BS
12572 tree arglist;
12573 rtx target;
12574{
12575 rtx pat;
12576 tree arg0 = TREE_VALUE (arglist);
12577 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12578 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12579 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12580 rtx op2;
12581 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12582 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12583 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12584 enum rtx_code comparison = d->comparison;
12585
12586 if (VECTOR_MODE_P (mode0))
12587 op0 = safe_vector_operand (op0, mode0);
12588 if (VECTOR_MODE_P (mode1))
12589 op1 = safe_vector_operand (op1, mode1);
12590
12591 /* Swap operands if we have a comparison that isn't available in
12592 hardware. */
12593 if (d->flag)
12594 {
21e1b5f1
BS
12595 rtx tmp = gen_reg_rtx (mode1);
12596 emit_move_insn (tmp, op1);
bd793c65 12597 op1 = op0;
21e1b5f1 12598 op0 = tmp;
bd793c65 12599 }
21e1b5f1
BS
12600
12601 if (! target
12602 || GET_MODE (target) != tmode
12603 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12604 target = gen_reg_rtx (tmode);
12605
12606 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12607 op0 = copy_to_mode_reg (mode0, op0);
12608 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12609 op1 = copy_to_mode_reg (mode1, op1);
12610
12611 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12612 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12613 if (! pat)
12614 return 0;
12615 emit_insn (pat);
12616 return target;
12617}
12618
12619/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12620
12621static rtx
12622ix86_expand_sse_comi (d, arglist, target)
8b60264b 12623 const struct builtin_description *d;
bd793c65
BS
12624 tree arglist;
12625 rtx target;
12626{
12627 rtx pat;
12628 tree arg0 = TREE_VALUE (arglist);
12629 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12630 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12631 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12632 rtx op2;
12633 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12634 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12635 enum rtx_code comparison = d->comparison;
12636
12637 if (VECTOR_MODE_P (mode0))
12638 op0 = safe_vector_operand (op0, mode0);
12639 if (VECTOR_MODE_P (mode1))
12640 op1 = safe_vector_operand (op1, mode1);
12641
12642 /* Swap operands if we have a comparison that isn't available in
12643 hardware. */
12644 if (d->flag)
12645 {
12646 rtx tmp = op1;
12647 op1 = op0;
12648 op0 = tmp;
bd793c65
BS
12649 }
12650
12651 target = gen_reg_rtx (SImode);
12652 emit_move_insn (target, const0_rtx);
12653 target = gen_rtx_SUBREG (QImode, target, 0);
12654
12655 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12656 op0 = copy_to_mode_reg (mode0, op0);
12657 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12658 op1 = copy_to_mode_reg (mode1, op1);
12659
12660 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12661 pat = GEN_FCN (d->icode) (op0, op1, op2);
12662 if (! pat)
12663 return 0;
12664 emit_insn (pat);
29628f27
BS
12665 emit_insn (gen_rtx_SET (VOIDmode,
12666 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12667 gen_rtx_fmt_ee (comparison, QImode,
12668 gen_rtx_REG (CCmode, FLAGS_REG),
12669 const0_rtx)));
bd793c65 12670
6f1a6c5b 12671 return SUBREG_REG (target);
bd793c65
BS
12672}
12673
12674/* Expand an expression EXP that calls a built-in function,
12675 with result going to TARGET if that's convenient
12676 (and in mode MODE if that's convenient).
12677 SUBTARGET may be used as the target for computing one of EXP's operands.
12678 IGNORE is nonzero if the value is to be ignored. */
12679
12680rtx
12681ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12682 tree exp;
12683 rtx target;
12684 rtx subtarget ATTRIBUTE_UNUSED;
12685 enum machine_mode mode ATTRIBUTE_UNUSED;
12686 int ignore ATTRIBUTE_UNUSED;
12687{
8b60264b 12688 const struct builtin_description *d;
77ebd435 12689 size_t i;
bd793c65
BS
12690 enum insn_code icode;
12691 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12692 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12693 tree arg0, arg1, arg2;
bd793c65
BS
12694 rtx op0, op1, op2, pat;
12695 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12696 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12697
12698 switch (fcode)
12699 {
12700 case IX86_BUILTIN_EMMS:
12701 emit_insn (gen_emms ());
12702 return 0;
12703
12704 case IX86_BUILTIN_SFENCE:
12705 emit_insn (gen_sfence ());
12706 return 0;
12707
bd793c65 12708 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12709 case IX86_BUILTIN_PEXTRW128:
12710 icode = (fcode == IX86_BUILTIN_PEXTRW
12711 ? CODE_FOR_mmx_pextrw
12712 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12713 arg0 = TREE_VALUE (arglist);
12714 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12715 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12716 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12717 tmode = insn_data[icode].operand[0].mode;
12718 mode0 = insn_data[icode].operand[1].mode;
12719 mode1 = insn_data[icode].operand[2].mode;
12720
12721 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12722 op0 = copy_to_mode_reg (mode0, op0);
12723 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12724 {
12725 /* @@@ better error message */
12726 error ("selector must be an immediate");
6f1a6c5b 12727 return gen_reg_rtx (tmode);
bd793c65
BS
12728 }
12729 if (target == 0
12730 || GET_MODE (target) != tmode
12731 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12732 target = gen_reg_rtx (tmode);
12733 pat = GEN_FCN (icode) (target, op0, op1);
12734 if (! pat)
12735 return 0;
12736 emit_insn (pat);
12737 return target;
12738
12739 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12740 case IX86_BUILTIN_PINSRW128:
12741 icode = (fcode == IX86_BUILTIN_PINSRW
12742 ? CODE_FOR_mmx_pinsrw
12743 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
12744 arg0 = TREE_VALUE (arglist);
12745 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12746 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12747 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12748 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12749 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12750 tmode = insn_data[icode].operand[0].mode;
12751 mode0 = insn_data[icode].operand[1].mode;
12752 mode1 = insn_data[icode].operand[2].mode;
12753 mode2 = insn_data[icode].operand[3].mode;
12754
12755 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12756 op0 = copy_to_mode_reg (mode0, op0);
12757 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12758 op1 = copy_to_mode_reg (mode1, op1);
12759 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12760 {
12761 /* @@@ better error message */
12762 error ("selector must be an immediate");
12763 return const0_rtx;
12764 }
12765 if (target == 0
12766 || GET_MODE (target) != tmode
12767 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12768 target = gen_reg_rtx (tmode);
12769 pat = GEN_FCN (icode) (target, op0, op1, op2);
12770 if (! pat)
12771 return 0;
12772 emit_insn (pat);
12773 return target;
12774
12775 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
12776 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12777 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12778 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
12779 /* Note the arg order is different from the operand order. */
12780 arg1 = TREE_VALUE (arglist);
12781 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12782 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12783 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12784 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12785 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12786 mode0 = insn_data[icode].operand[0].mode;
12787 mode1 = insn_data[icode].operand[1].mode;
12788 mode2 = insn_data[icode].operand[2].mode;
12789
5c464583 12790 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
12791 op0 = copy_to_mode_reg (mode0, op0);
12792 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12793 op1 = copy_to_mode_reg (mode1, op1);
12794 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12795 op2 = copy_to_mode_reg (mode2, op2);
12796 pat = GEN_FCN (icode) (op0, op1, op2);
12797 if (! pat)
12798 return 0;
12799 emit_insn (pat);
12800 return 0;
12801
12802 case IX86_BUILTIN_SQRTSS:
12803 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12804 case IX86_BUILTIN_RSQRTSS:
12805 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12806 case IX86_BUILTIN_RCPSS:
12807 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12808
e37af218
RH
12809 case IX86_BUILTIN_ANDPS:
12810 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12811 arglist, target);
12812 case IX86_BUILTIN_ANDNPS:
12813 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12814 arglist, target);
12815 case IX86_BUILTIN_ORPS:
12816 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12817 arglist, target);
12818 case IX86_BUILTIN_XORPS:
12819 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12820 arglist, target);
12821
bd793c65
BS
12822 case IX86_BUILTIN_LOADAPS:
12823 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12824
12825 case IX86_BUILTIN_LOADUPS:
12826 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12827
12828 case IX86_BUILTIN_STOREAPS:
e37af218 12829 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 12830 case IX86_BUILTIN_STOREUPS:
e37af218 12831 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
12832
12833 case IX86_BUILTIN_LOADSS:
12834 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12835
12836 case IX86_BUILTIN_STORESS:
e37af218 12837 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 12838
0f290768 12839 case IX86_BUILTIN_LOADHPS:
bd793c65 12840 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
12841 case IX86_BUILTIN_LOADHPD:
12842 case IX86_BUILTIN_LOADLPD:
12843 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12844 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12845 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12846 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12847 arg0 = TREE_VALUE (arglist);
12848 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12849 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12850 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12851 tmode = insn_data[icode].operand[0].mode;
12852 mode0 = insn_data[icode].operand[1].mode;
12853 mode1 = insn_data[icode].operand[2].mode;
12854
12855 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12856 op0 = copy_to_mode_reg (mode0, op0);
12857 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12858 if (target == 0
12859 || GET_MODE (target) != tmode
12860 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12861 target = gen_reg_rtx (tmode);
12862 pat = GEN_FCN (icode) (target, op0, op1);
12863 if (! pat)
12864 return 0;
12865 emit_insn (pat);
12866 return target;
0f290768 12867
bd793c65
BS
12868 case IX86_BUILTIN_STOREHPS:
12869 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
12870 case IX86_BUILTIN_STOREHPD:
12871 case IX86_BUILTIN_STORELPD:
12872 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12873 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12874 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12875 : CODE_FOR_sse2_movlpd);
bd793c65
BS
12876 arg0 = TREE_VALUE (arglist);
12877 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12878 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12879 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12880 mode0 = insn_data[icode].operand[1].mode;
12881 mode1 = insn_data[icode].operand[2].mode;
12882
12883 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12884 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12885 op1 = copy_to_mode_reg (mode1, op1);
12886
12887 pat = GEN_FCN (icode) (op0, op0, op1);
12888 if (! pat)
12889 return 0;
12890 emit_insn (pat);
12891 return 0;
12892
12893 case IX86_BUILTIN_MOVNTPS:
e37af218 12894 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 12895 case IX86_BUILTIN_MOVNTQ:
e37af218 12896 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
12897
12898 case IX86_BUILTIN_LDMXCSR:
12899 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12900 target = assign_386_stack_local (SImode, 0);
12901 emit_move_insn (target, op0);
12902 emit_insn (gen_ldmxcsr (target));
12903 return 0;
12904
12905 case IX86_BUILTIN_STMXCSR:
12906 target = assign_386_stack_local (SImode, 0);
12907 emit_insn (gen_stmxcsr (target));
12908 return copy_to_mode_reg (SImode, target);
12909
bd793c65 12910 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
12911 case IX86_BUILTIN_SHUFPD:
12912 icode = (fcode == IX86_BUILTIN_SHUFPS
12913 ? CODE_FOR_sse_shufps
12914 : CODE_FOR_sse2_shufpd);
bd793c65
BS
12915 arg0 = TREE_VALUE (arglist);
12916 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12917 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12918 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12919 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12920 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12921 tmode = insn_data[icode].operand[0].mode;
12922 mode0 = insn_data[icode].operand[1].mode;
12923 mode1 = insn_data[icode].operand[2].mode;
12924 mode2 = insn_data[icode].operand[3].mode;
12925
12926 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12927 op0 = copy_to_mode_reg (mode0, op0);
12928 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12929 op1 = copy_to_mode_reg (mode1, op1);
12930 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12931 {
12932 /* @@@ better error message */
12933 error ("mask must be an immediate");
6f1a6c5b 12934 return gen_reg_rtx (tmode);
bd793c65
BS
12935 }
12936 if (target == 0
12937 || GET_MODE (target) != tmode
12938 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12939 target = gen_reg_rtx (tmode);
12940 pat = GEN_FCN (icode) (target, op0, op1, op2);
12941 if (! pat)
12942 return 0;
12943 emit_insn (pat);
12944 return target;
12945
12946 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
12947 case IX86_BUILTIN_PSHUFD:
12948 case IX86_BUILTIN_PSHUFHW:
12949 case IX86_BUILTIN_PSHUFLW:
12950 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12951 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12952 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12953 : CODE_FOR_mmx_pshufw);
bd793c65
BS
12954 arg0 = TREE_VALUE (arglist);
12955 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12956 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12957 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12958 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
12959 mode1 = insn_data[icode].operand[1].mode;
12960 mode2 = insn_data[icode].operand[2].mode;
bd793c65 12961
29628f27
BS
12962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12963 op0 = copy_to_mode_reg (mode1, op0);
12964 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
12965 {
12966 /* @@@ better error message */
12967 error ("mask must be an immediate");
12968 return const0_rtx;
12969 }
12970 if (target == 0
12971 || GET_MODE (target) != tmode
12972 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12973 target = gen_reg_rtx (tmode);
29628f27 12974 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12975 if (! pat)
12976 return 0;
12977 emit_insn (pat);
12978 return target;
12979
47f339cf
BS
12980 case IX86_BUILTIN_FEMMS:
12981 emit_insn (gen_femms ());
12982 return NULL_RTX;
12983
12984 case IX86_BUILTIN_PAVGUSB:
12985 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12986
12987 case IX86_BUILTIN_PF2ID:
12988 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
12989
12990 case IX86_BUILTIN_PFACC:
12991 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
12992
12993 case IX86_BUILTIN_PFADD:
12994 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12995
12996 case IX86_BUILTIN_PFCMPEQ:
12997 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12998
12999 case IX86_BUILTIN_PFCMPGE:
13000 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13001
13002 case IX86_BUILTIN_PFCMPGT:
13003 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13004
13005 case IX86_BUILTIN_PFMAX:
13006 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13007
13008 case IX86_BUILTIN_PFMIN:
13009 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13010
13011 case IX86_BUILTIN_PFMUL:
13012 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13013
13014 case IX86_BUILTIN_PFRCP:
13015 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13016
13017 case IX86_BUILTIN_PFRCPIT1:
13018 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13019
13020 case IX86_BUILTIN_PFRCPIT2:
13021 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13022
13023 case IX86_BUILTIN_PFRSQIT1:
13024 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13025
13026 case IX86_BUILTIN_PFRSQRT:
13027 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13028
13029 case IX86_BUILTIN_PFSUB:
13030 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13031
13032 case IX86_BUILTIN_PFSUBR:
13033 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13034
13035 case IX86_BUILTIN_PI2FD:
13036 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13037
13038 case IX86_BUILTIN_PMULHRW:
13039 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13040
47f339cf
BS
13041 case IX86_BUILTIN_PF2IW:
13042 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13043
13044 case IX86_BUILTIN_PFNACC:
13045 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13046
13047 case IX86_BUILTIN_PFPNACC:
13048 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13049
13050 case IX86_BUILTIN_PI2FW:
13051 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13052
13053 case IX86_BUILTIN_PSWAPDSI:
13054 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13055
13056 case IX86_BUILTIN_PSWAPDSF:
13057 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13058
e37af218
RH
13059 case IX86_BUILTIN_SSE_ZERO:
13060 target = gen_reg_rtx (V4SFmode);
13061 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
13062 return target;
13063
bd793c65
BS
13064 case IX86_BUILTIN_MMX_ZERO:
13065 target = gen_reg_rtx (DImode);
13066 emit_insn (gen_mmx_clrdi (target));
13067 return target;
13068
fbe5eb6d
BS
13069 case IX86_BUILTIN_SQRTSD:
13070 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13071 case IX86_BUILTIN_LOADAPD:
13072 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13073 case IX86_BUILTIN_LOADUPD:
13074 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13075
13076 case IX86_BUILTIN_STOREAPD:
13077 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13078 case IX86_BUILTIN_STOREUPD:
13079 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13080
13081 case IX86_BUILTIN_LOADSD:
13082 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13083
13084 case IX86_BUILTIN_STORESD:
13085 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13086
13087 case IX86_BUILTIN_SETPD1:
13088 target = assign_386_stack_local (DFmode, 0);
13089 arg0 = TREE_VALUE (arglist);
13090 emit_move_insn (adjust_address (target, DFmode, 0),
13091 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13092 op0 = gen_reg_rtx (V2DFmode);
13093 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13094 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13095 return op0;
13096
13097 case IX86_BUILTIN_SETPD:
13098 target = assign_386_stack_local (V2DFmode, 0);
13099 arg0 = TREE_VALUE (arglist);
13100 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13101 emit_move_insn (adjust_address (target, DFmode, 0),
13102 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13103 emit_move_insn (adjust_address (target, DFmode, 8),
13104 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13105 op0 = gen_reg_rtx (V2DFmode);
13106 emit_insn (gen_sse2_movapd (op0, target));
13107 return op0;
13108
13109 case IX86_BUILTIN_LOADRPD:
13110 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13111 gen_reg_rtx (V2DFmode), 1);
13112 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13113 return target;
13114
13115 case IX86_BUILTIN_LOADPD1:
13116 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13117 gen_reg_rtx (V2DFmode), 1);
13118 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13119 return target;
13120
13121 case IX86_BUILTIN_STOREPD1:
13122 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13123 case IX86_BUILTIN_STORERPD:
13124 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13125
13126 case IX86_BUILTIN_MFENCE:
13127 emit_insn (gen_sse2_mfence ());
13128 return 0;
13129 case IX86_BUILTIN_LFENCE:
13130 emit_insn (gen_sse2_lfence ());
13131 return 0;
13132
13133 case IX86_BUILTIN_CLFLUSH:
13134 arg0 = TREE_VALUE (arglist);
13135 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13136 icode = CODE_FOR_sse2_clflush;
13137 mode0 = insn_data[icode].operand[0].mode;
13138 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13139 op0 = copy_to_mode_reg (mode0, op0);
13140
13141 emit_insn (gen_sse2_clflush (op0));
13142 return 0;
13143
13144 case IX86_BUILTIN_MOVNTPD:
13145 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13146 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13147 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13148 case IX86_BUILTIN_MOVNTI:
13149 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13150
bd793c65
BS
13151 default:
13152 break;
13153 }
13154
ca7558fc 13155 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13156 if (d->code == fcode)
13157 {
13158 /* Compares are treated specially. */
13159 if (d->icode == CODE_FOR_maskcmpv4sf3
13160 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13161 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13162 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13163 || d->icode == CODE_FOR_maskcmpv2df3
13164 || d->icode == CODE_FOR_vmmaskcmpv2df3
13165 || d->icode == CODE_FOR_maskncmpv2df3
13166 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13167 return ix86_expand_sse_compare (d, arglist, target);
13168
13169 return ix86_expand_binop_builtin (d->icode, arglist, target);
13170 }
13171
ca7558fc 13172 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13173 if (d->code == fcode)
13174 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13175
ca7558fc 13176 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13177 if (d->code == fcode)
13178 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13179
bd793c65
BS
13180 /* @@@ Should really do something sensible here. */
13181 return 0;
bd793c65 13182}
4211a8fb
JH
13183
13184/* Store OPERAND to the memory after reload is completed. This means
f710504c 13185 that we can't easily use assign_stack_local. */
4211a8fb
JH
13186rtx
13187ix86_force_to_memory (mode, operand)
13188 enum machine_mode mode;
13189 rtx operand;
13190{
898d374d 13191 rtx result;
4211a8fb
JH
13192 if (!reload_completed)
13193 abort ();
898d374d
JH
13194 if (TARGET_64BIT && TARGET_RED_ZONE)
13195 {
13196 result = gen_rtx_MEM (mode,
13197 gen_rtx_PLUS (Pmode,
13198 stack_pointer_rtx,
13199 GEN_INT (-RED_ZONE_SIZE)));
13200 emit_move_insn (result, operand);
13201 }
13202 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13203 {
898d374d 13204 switch (mode)
4211a8fb 13205 {
898d374d
JH
13206 case HImode:
13207 case SImode:
13208 operand = gen_lowpart (DImode, operand);
13209 /* FALLTHRU */
13210 case DImode:
4211a8fb 13211 emit_insn (
898d374d
JH
13212 gen_rtx_SET (VOIDmode,
13213 gen_rtx_MEM (DImode,
13214 gen_rtx_PRE_DEC (DImode,
13215 stack_pointer_rtx)),
13216 operand));
13217 break;
13218 default:
13219 abort ();
13220 }
13221 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13222 }
13223 else
13224 {
13225 switch (mode)
13226 {
13227 case DImode:
13228 {
13229 rtx operands[2];
13230 split_di (&operand, 1, operands, operands + 1);
13231 emit_insn (
13232 gen_rtx_SET (VOIDmode,
13233 gen_rtx_MEM (SImode,
13234 gen_rtx_PRE_DEC (Pmode,
13235 stack_pointer_rtx)),
13236 operands[1]));
13237 emit_insn (
13238 gen_rtx_SET (VOIDmode,
13239 gen_rtx_MEM (SImode,
13240 gen_rtx_PRE_DEC (Pmode,
13241 stack_pointer_rtx)),
13242 operands[0]));
13243 }
13244 break;
13245 case HImode:
13246 /* It is better to store HImodes as SImodes. */
13247 if (!TARGET_PARTIAL_REG_STALL)
13248 operand = gen_lowpart (SImode, operand);
13249 /* FALLTHRU */
13250 case SImode:
4211a8fb 13251 emit_insn (
898d374d
JH
13252 gen_rtx_SET (VOIDmode,
13253 gen_rtx_MEM (GET_MODE (operand),
13254 gen_rtx_PRE_DEC (SImode,
13255 stack_pointer_rtx)),
13256 operand));
13257 break;
13258 default:
13259 abort ();
4211a8fb 13260 }
898d374d 13261 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13262 }
898d374d 13263 return result;
4211a8fb
JH
13264}
13265
13266/* Free operand from the memory. */
13267void
13268ix86_free_from_memory (mode)
13269 enum machine_mode mode;
13270{
898d374d
JH
13271 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13272 {
13273 int size;
13274
13275 if (mode == DImode || TARGET_64BIT)
13276 size = 8;
13277 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13278 size = 2;
13279 else
13280 size = 4;
13281 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13282 to pop or add instruction if registers are available. */
13283 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13284 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13285 GEN_INT (size))));
13286 }
4211a8fb 13287}
a946dd00 13288
f84aa48a
JH
13289/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13290 QImode must go into class Q_REGS.
13291 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13292 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
13293enum reg_class
13294ix86_preferred_reload_class (x, class)
13295 rtx x;
13296 enum reg_class class;
13297{
13298 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13299 {
13300 /* SSE can't load any constant directly yet. */
13301 if (SSE_CLASS_P (class))
13302 return NO_REGS;
13303 /* Floats can load 0 and 1. */
13304 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13305 {
13306 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13307 if (MAYBE_SSE_CLASS_P (class))
13308 return (reg_class_subset_p (class, GENERAL_REGS)
13309 ? GENERAL_REGS : FLOAT_REGS);
13310 else
13311 return class;
13312 }
13313 /* General regs can load everything. */
13314 if (reg_class_subset_p (class, GENERAL_REGS))
13315 return GENERAL_REGS;
13316 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13317 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13318 return NO_REGS;
13319 }
13320 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13321 return NO_REGS;
13322 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13323 return Q_REGS;
13324 return class;
13325}
13326
13327/* If we are copying between general and FP registers, we need a memory
13328 location. The same is true for SSE and MMX registers.
13329
13330 The macro can't work reliably when one of the CLASSES is class containing
13331 registers from multiple units (SSE, MMX, integer). We avoid this by never
13332 combining those units in single alternative in the machine description.
13333 Ensure that this constraint holds to avoid unexpected surprises.
13334
13335 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13336 enforce these sanity checks. */
13337int
13338ix86_secondary_memory_needed (class1, class2, mode, strict)
13339 enum reg_class class1, class2;
13340 enum machine_mode mode;
13341 int strict;
13342{
13343 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13344 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13345 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13346 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13347 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13348 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13349 {
13350 if (strict)
13351 abort ();
13352 else
13353 return 1;
13354 }
13355 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13356 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13357 && (mode) != SImode)
13358 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13359 && (mode) != SImode));
13360}
13361/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13362 one in class CLASS2.
f84aa48a
JH
13363
13364 It is not required that the cost always equal 2 when FROM is the same as TO;
13365 on some machines it is expensive to move between registers if they are not
13366 general registers. */
13367int
13368ix86_register_move_cost (mode, class1, class2)
13369 enum machine_mode mode;
13370 enum reg_class class1, class2;
13371{
13372 /* In case we require secondary memory, compute cost of the store followed
13373 by load. In case of copying from general_purpose_register we may emit
13374 multiple stores followed by single load causing memory size mismatch
13375 stall. Count this as arbitarily high cost of 20. */
13376 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13377 {
92d0fb09 13378 int add_cost = 0;
62415523 13379 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
92d0fb09 13380 add_cost = 20;
62415523 13381 return (MEMORY_MOVE_COST (mode, class1, 0)
92d0fb09 13382 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
f84aa48a 13383 }
92d0fb09 13384 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
13385 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13386 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
13387 return ix86_cost->mmxsse_to_integer;
13388 if (MAYBE_FLOAT_CLASS_P (class1))
13389 return ix86_cost->fp_move;
13390 if (MAYBE_SSE_CLASS_P (class1))
13391 return ix86_cost->sse_move;
13392 if (MAYBE_MMX_CLASS_P (class1))
13393 return ix86_cost->mmx_move;
f84aa48a
JH
13394 return 2;
13395}
13396
a946dd00
JH
13397/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13398int
13399ix86_hard_regno_mode_ok (regno, mode)
13400 int regno;
13401 enum machine_mode mode;
13402{
13403 /* Flags and only flags can only hold CCmode values. */
13404 if (CC_REGNO_P (regno))
13405 return GET_MODE_CLASS (mode) == MODE_CC;
13406 if (GET_MODE_CLASS (mode) == MODE_CC
13407 || GET_MODE_CLASS (mode) == MODE_RANDOM
13408 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13409 return 0;
13410 if (FP_REGNO_P (regno))
13411 return VALID_FP_MODE_P (mode);
13412 if (SSE_REGNO_P (regno))
13413 return VALID_SSE_REG_MODE (mode);
13414 if (MMX_REGNO_P (regno))
47f339cf 13415 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
13416 /* We handle both integer and floats in the general purpose registers.
13417 In future we should be able to handle vector modes as well. */
13418 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13419 return 0;
13420 /* Take care for QImode values - they can be in non-QI regs, but then
13421 they do cause partial register stalls. */
d2836273 13422 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
13423 return 1;
13424 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13425}
fa79946e
JH
13426
13427/* Return the cost of moving data of mode M between a
13428 register and memory. A value of 2 is the default; this cost is
13429 relative to those in `REGISTER_MOVE_COST'.
13430
13431 If moving between registers and memory is more expensive than
13432 between two registers, you should define this macro to express the
a4f31c00
AJ
13433 relative cost.
13434
fa79946e
JH
13435 Model also increased moving costs of QImode registers in non
13436 Q_REGS classes.
13437 */
13438int
13439ix86_memory_move_cost (mode, class, in)
13440 enum machine_mode mode;
13441 enum reg_class class;
13442 int in;
13443{
13444 if (FLOAT_CLASS_P (class))
13445 {
13446 int index;
13447 switch (mode)
13448 {
13449 case SFmode:
13450 index = 0;
13451 break;
13452 case DFmode:
13453 index = 1;
13454 break;
13455 case XFmode:
13456 case TFmode:
13457 index = 2;
13458 break;
13459 default:
13460 return 100;
13461 }
13462 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13463 }
13464 if (SSE_CLASS_P (class))
13465 {
13466 int index;
13467 switch (GET_MODE_SIZE (mode))
13468 {
13469 case 4:
13470 index = 0;
13471 break;
13472 case 8:
13473 index = 1;
13474 break;
13475 case 16:
13476 index = 2;
13477 break;
13478 default:
13479 return 100;
13480 }
13481 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13482 }
13483 if (MMX_CLASS_P (class))
13484 {
13485 int index;
13486 switch (GET_MODE_SIZE (mode))
13487 {
13488 case 4:
13489 index = 0;
13490 break;
13491 case 8:
13492 index = 1;
13493 break;
13494 default:
13495 return 100;
13496 }
13497 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13498 }
13499 switch (GET_MODE_SIZE (mode))
13500 {
13501 case 1:
13502 if (in)
13503 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13504 : ix86_cost->movzbl_load);
13505 else
13506 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13507 : ix86_cost->int_store[0] + 4);
13508 break;
13509 case 2:
13510 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13511 default:
13512 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13513 if (mode == TFmode)
13514 mode = XFmode;
3bb7e126 13515 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
13516 * (int) GET_MODE_SIZE (mode) / 4);
13517 }
13518}
0ecf09f9 13519
21c318ba 13520#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
13521static void
13522ix86_svr3_asm_out_constructor (symbol, priority)
13523 rtx symbol;
13524 int priority ATTRIBUTE_UNUSED;
13525{
13526 init_section ();
13527 fputs ("\tpushl $", asm_out_file);
13528 assemble_name (asm_out_file, XSTR (symbol, 0));
13529 fputc ('\n', asm_out_file);
13530}
13531#endif
162f023b
JH
13532
13533/* Order the registers for register allocator. */
13534
13535void
13536x86_order_regs_for_local_alloc ()
13537{
13538 int pos = 0;
13539 int i;
13540
13541 /* First allocate the local general purpose registers. */
13542 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13543 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13544 reg_alloc_order [pos++] = i;
13545
13546 /* Global general purpose registers. */
13547 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13548 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13549 reg_alloc_order [pos++] = i;
13550
13551 /* x87 registers come first in case we are doing FP math
13552 using them. */
13553 if (!TARGET_SSE_MATH)
13554 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13555 reg_alloc_order [pos++] = i;
fce5a9f2 13556
162f023b
JH
13557 /* SSE registers. */
13558 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13559 reg_alloc_order [pos++] = i;
13560 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13561 reg_alloc_order [pos++] = i;
13562
13563 /* x87 registerts. */
13564 if (TARGET_SSE_MATH)
13565 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13566 reg_alloc_order [pos++] = i;
13567
13568 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13569 reg_alloc_order [pos++] = i;
13570
13571 /* Initialize the rest of array as we do not allocate some registers
13572 at all. */
13573 while (pos < FIRST_PSEUDO_REGISTER)
13574 reg_alloc_order [pos++] = 0;
13575}
194734e9
JH
13576
13577void
13578x86_output_mi_thunk (file, delta, function)
13579 FILE *file;
13580 int delta;
13581 tree function;
13582{
13583 tree parm;
13584 rtx xops[3];
13585
13586 if (ix86_regparm > 0)
13587 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13588 else
13589 parm = NULL_TREE;
13590 for (; parm; parm = TREE_CHAIN (parm))
13591 if (TREE_VALUE (parm) == void_type_node)
13592 break;
13593
13594 xops[0] = GEN_INT (delta);
13595 if (TARGET_64BIT)
13596 {
13597 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13598 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13599 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13600 if (flag_pic)
13601 {
13602 fprintf (file, "\tjmp *");
13603 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13604 fprintf (file, "@GOTPCREL(%%rip)\n");
13605 }
13606 else
13607 {
13608 fprintf (file, "\tjmp ");
13609 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13610 fprintf (file, "\n");
13611 }
13612 }
13613 else
13614 {
13615 if (parm)
13616 xops[1] = gen_rtx_REG (SImode, 0);
13617 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13618 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13619 else
13620 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13621 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13622
13623 if (flag_pic)
13624 {
13625 xops[0] = pic_offset_table_rtx;
13626 xops[1] = gen_label_rtx ();
13627 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13628
13629 if (ix86_regparm > 2)
13630 abort ();
13631 output_asm_insn ("push{l}\t%0", xops);
13632 output_asm_insn ("call\t%P1", xops);
13633 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13634 output_asm_insn ("pop{l}\t%0", xops);
13635 output_asm_insn
13636 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13637 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13638 output_asm_insn
13639 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13640 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13641 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13642 }
13643 else
13644 {
13645 fprintf (file, "\tjmp ");
13646 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13647 fprintf (file, "\n");
13648 }
13649 }
13650}
e2500fed 13651
e932b21b
JH
13652int
13653x86_field_alignment (field, computed)
13654 tree field;
13655 int computed;
13656{
13657 enum machine_mode mode;
13658 if (TARGET_64BIT || DECL_USER_ALIGN (field) || TARGET_ALIGN_DOUBLE)
13659 return computed;
13660 mode = TYPE_MODE (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE
13661 ? get_inner_array_type (field) : TREE_TYPE (field));
13662 if ((mode == DFmode || mode == DCmode
13663 || mode == DImode || mode == CDImode)
13664 && !TARGET_ALIGN_DOUBLE)
13665 return MIN (32, computed);
13666 return computed;
13667}
13668
e2500fed 13669#include "gt-i386.h"
This page took 3.206628 seconds and 5 git commands to generate.