]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
i386.c (ix86_expand_prologue): Use gen_allocate_stack_worker.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
36210500 3 2002, 2003 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
dafc5b82 48#include "cgraph.h"
2a2ab3f9 49
8dfe5673 50#ifndef CHECK_STACK_LIMIT
07933f72 51#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
52#endif
53
3c50106f
RH
54/* Return index of given mode in mult and division cost tables. */
55#define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
2ab0437e 62/* Processor costs (relative to an add) */
fce5a9f2 63static const
2ab0437e
JH
64struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
4977bab6 69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 70 0, /* cost of multiply per each bit set */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
72 3, /* cost of movsx */
73 3, /* cost of movzx */
2ab0437e
JH
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
f4365627
JH
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
4977bab6 98 1, /* Branch cost */
229b303a
RS
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
2ab0437e 105};
229b303a 106
32b5b1aa 107/* Processor costs (relative to an add) */
fce5a9f2 108static const
32b5b1aa 109struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 110 1, /* cost of an add instruction */
32b5b1aa
SC
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
4977bab6 114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 115 1, /* cost of multiply per each bit set */
4977bab6 116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
117 3, /* cost of movsx */
118 2, /* cost of movzx */
96e7ae40 119 15, /* "large" insn */
e2e52e1b 120 3, /* MOVE_RATIO */
7c6b971d 121 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
0f290768 124 Relative to reg-reg move (2). */
96e7ae40
JH
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
fa79946e
JH
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
f4365627
JH
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
4977bab6 143 1, /* Branch cost */
229b303a
RS
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
150};
151
fce5a9f2 152static const
32b5b1aa
SC
153struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
4977bab6 158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 159 1, /* cost of multiply per each bit set */
4977bab6 160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
161 3, /* cost of movsx */
162 2, /* cost of movzx */
96e7ae40 163 15, /* "large" insn */
e2e52e1b 164 3, /* MOVE_RATIO */
7c6b971d 165 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
96e7ae40
JH
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
fa79946e
JH
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
f4365627
JH
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
4977bab6 187 1, /* Branch cost */
229b303a
RS
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
194};
195
fce5a9f2 196static const
e5cb57e8 197struct processor_costs pentium_cost = {
32b5b1aa
SC
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
856b07a1 200 4, /* variable shift costs */
e5cb57e8 201 1, /* constant shift costs */
4977bab6 202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 203 0, /* cost of multiply per each bit set */
4977bab6 204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
205 3, /* cost of movsx */
206 2, /* cost of movzx */
96e7ae40 207 8, /* "large" insn */
e2e52e1b 208 6, /* MOVE_RATIO */
7c6b971d 209 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
0f290768 212 Relative to reg-reg move (2). */
96e7ae40
JH
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
fa79946e
JH
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
f4365627
JH
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
4977bab6 231 2, /* Branch cost */
229b303a
RS
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
238};
239
fce5a9f2 240static const
856b07a1
SC
241struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
e075ae69 244 1, /* variable shift costs */
856b07a1 245 1, /* constant shift costs */
4977bab6 246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 247 0, /* cost of multiply per each bit set */
4977bab6 248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
249 1, /* cost of movsx */
250 1, /* cost of movzx */
96e7ae40 251 8, /* "large" insn */
e2e52e1b 252 6, /* MOVE_RATIO */
7c6b971d 253 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
0f290768 256 Relative to reg-reg move (2). */
96e7ae40
JH
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
f4365627
JH
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
4977bab6 275 2, /* Branch cost */
229b303a
RS
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
856b07a1
SC
282};
283
fce5a9f2 284static const
a269a03c
JC
285struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
e075ae69 287 2, /* cost of a lea instruction */
a269a03c
JC
288 1, /* variable shift costs */
289 1, /* constant shift costs */
4977bab6 290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 291 0, /* cost of multiply per each bit set */
4977bab6 292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
293 2, /* cost of movsx */
294 2, /* cost of movzx */
96e7ae40 295 8, /* "large" insn */
e2e52e1b 296 4, /* MOVE_RATIO */
7c6b971d 297 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
0f290768 300 Relative to reg-reg move (2). */
96e7ae40
JH
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
fa79946e
JH
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
f4365627
JH
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
4977bab6 319 1, /* Branch cost */
229b303a
RS
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
4f770e7b
RS
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
229b303a
RS
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
a269a03c
JC
326};
327
fce5a9f2 328static const
309ada50
JH
329struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
0b5107cf 331 2, /* cost of a lea instruction */
309ada50
JH
332 1, /* variable shift costs */
333 1, /* constant shift costs */
4977bab6 334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 335 0, /* cost of multiply per each bit set */
4977bab6 336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
337 1, /* cost of movsx */
338 1, /* cost of movzx */
309ada50 339 8, /* "large" insn */
e2e52e1b 340 9, /* MOVE_RATIO */
309ada50 341 4, /* cost for loading QImode using movzbl */
b72b1c29 342 {3, 4, 3}, /* cost of loading integer registers
309ada50 343 in QImode, HImode and SImode.
0f290768 344 Relative to reg-reg move (2). */
b72b1c29 345 {3, 4, 3}, /* cost of storing integer registers */
309ada50 346 4, /* cost of reg,reg fld/fst */
b72b1c29 347 {4, 4, 12}, /* cost of loading fp registers
309ada50 348 in SFmode, DFmode and XFmode */
b72b1c29 349 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 350 2, /* cost of moving MMX register */
b72b1c29 351 {4, 4}, /* cost of loading MMX registers
fa79946e 352 in SImode and DImode */
b72b1c29 353 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
b72b1c29 356 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 357 in SImode, DImode and TImode */
b72b1c29 358 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 5, /* MMX or SSE register to integer */
f4365627
JH
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
4977bab6 363 2, /* Branch cost */
229b303a
RS
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
309ada50
JH
370};
371
4977bab6
ZW
372static const
373struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414};
415
fce5a9f2 416static const
b4e89e2d
JH
417struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
4977bab6
ZW
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 423 0, /* cost of multiply per each bit set */
4977bab6 424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
425 1, /* cost of movsx */
426 1, /* cost of movzx */
b4e89e2d
JH
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
f4365627
JH
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
4977bab6 451 2, /* Branch cost */
229b303a
RS
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
458};
459
8b60264b 460const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 461
a269a03c
JC
462/* Processor feature/optimization bitmasks. */
463#define m_386 (1<<PROCESSOR_I386)
464#define m_486 (1<<PROCESSOR_I486)
465#define m_PENT (1<<PROCESSOR_PENTIUM)
466#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467#define m_K6 (1<<PROCESSOR_K6)
309ada50 468#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 469#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
470#define m_K8 (1<<PROCESSOR_K8)
471#define m_ATHLON_K8 (m_K8 | m_ATHLON)
a269a03c 472
4977bab6
ZW
473const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
a269a03c 475const int x86_zero_extend_with_and = m_486 | m_PENT;
4977bab6 476const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 477const int x86_double_with_add = ~m_386;
a269a03c 478const int x86_use_bit_test = m_386;
4977bab6
ZW
479const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481const int x86_3dnow_a = m_ATHLON_K8;
482const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
ef6257cd 483const int x86_branch_hints = m_PENT4;
b4e89e2d 484const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
485const int x86_partial_reg_stall = m_PPRO;
486const int x86_use_loop = m_K6;
4977bab6 487const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
488const int x86_use_mov0 = m_K6;
489const int x86_use_cltd = ~(m_PENT | m_K6);
490const int x86_read_modify_write = ~m_PENT;
491const int x86_read_modify = ~(m_PENT | m_PPRO);
492const int x86_split_long_moves = m_PPRO;
4977bab6 493const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 494const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 495const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
496const int x86_qimode_math = ~(0);
497const int x86_promote_qi_regs = 0;
498const int x86_himode_math = ~(m_PPRO);
499const int x86_promote_hi_regs = m_PPRO;
4977bab6
ZW
500const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
b972dd02 510const int x86_decompose_lea = m_PENT4;
495333a6 511const int x86_shift1 = ~m_486;
4977bab6
ZW
512const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 515 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
516 scalar values in proper format leaving the upper part undefined. */
517const int x86_sse_partial_regs = m_ATHLON_K8;
518/* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521const int x86_sse_typeless_stores = m_ATHLON_K8;
522const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523const int x86_use_ffreep = m_ATHLON_K8;
524const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 525const int x86_inter_unit_moves = ~(m_ATHLON_K8);
881b2a96 526const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 527
d1f87653 528/* In case the average insn count for single function invocation is
6ab16dd9
JH
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
4977bab6 531#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 532
5bf0ebab
RH
533/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
537
538/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 540
e075ae69 541enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
542{
543 /* ax, dx, cx, bx */
ab408a86 544 AREG, DREG, CREG, BREG,
4c0d89b5 545 /* si, di, bp, sp */
e075ae69 546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
547 /* FP registers */
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 550 /* arg pointer */
83774849 551 NON_Q_REGS,
564d80f4 552 /* flags, fpsr, dirflag, frame */
a7180f70
BS
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555 SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
557 MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561 SSE_REGS, SSE_REGS,
4c0d89b5 562};
c572e5ba 563
3d117b30 564/* The "default" register map used in 32bit mode. */
83774849 565
0f290768 566int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
567{
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
575};
576
5bf0ebab
RH
577static int const x86_64_int_parameter_registers[6] =
578{
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
581};
582
583static int const x86_64_int_return_registers[4] =
584{
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
586};
53c17031 587
0f7fa3d0
JH
588/* The "default" register map used in 64bit mode. */
589int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
590{
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
598};
599
83774849
RH
600/* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
644 numbers.
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
653*/
0f290768 654int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
655{
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
663};
664
c572e5ba
JVA
665/* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
667
07933f72
GS
668rtx ix86_compare_op0 = NULL_RTX;
669rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 670
7a2e09f4 671#define MAX_386_STACK_LOCALS 3
8362f420
JH
672/* Size of the register save area. */
673#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
674
675/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
676
677struct stack_local_entry GTY(())
678{
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
683};
684
4dd2ac2c
JH
685/* Structure describing stack frame layout.
686 Stack grows downward:
687
688 [arguments]
689 <- ARG_POINTER
690 saved pc
691
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
694 [saved regs]
695
696 [padding1] \
697 )
698 [va_arg registers] (
699 > to_allocate <- FRAME_POINTER
700 [frame] (
701 )
702 [padding2] /
703 */
704struct ix86_frame
705{
706 int nregs;
707 int padding1;
8362f420 708 int va_arg_size;
4dd2ac2c
JH
709 HOST_WIDE_INT frame;
710 int padding2;
711 int outgoing_arguments_size;
8362f420 712 int red_zone_size;
4dd2ac2c
JH
713
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
719
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
4dd2ac2c
JH
723};
724
c93e80a5
JH
725/* Used to enable/disable debugging features. */
726const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
727/* Code model option as passed by user. */
728const char *ix86_cmodel_string;
729/* Parsed value. */
730enum cmodel ix86_cmodel;
80f33d06
GS
731/* Asm dialect. */
732const char *ix86_asm_string;
733enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
734/* TLS dialext. */
735const char *ix86_tls_dialect_string;
736enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 737
5bf0ebab 738/* Which unit we are generating floating point math for. */
965f5423
JH
739enum fpmath_unit ix86_fpmath;
740
5bf0ebab 741/* Which cpu are we scheduling for. */
9e555526 742enum processor_type ix86_tune;
5bf0ebab
RH
743/* Which instruction set architecture to use. */
744enum processor_type ix86_arch;
c8c5cb99
SC
745
746/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 747const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 748const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 749const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 750
0f290768 751/* # of registers to use to pass arguments. */
e075ae69 752const char *ix86_regparm_string;
e9a25f70 753
f4365627
JH
754/* true if sse prefetch instruction is not NOOP. */
755int x86_prefetch_sse;
756
e075ae69
RH
757/* ix86_regparm_string as a number */
758int ix86_regparm;
e9a25f70
JL
759
760/* Alignment to use for loops and jumps: */
761
0f290768 762/* Power of two alignment for loops. */
e075ae69 763const char *ix86_align_loops_string;
e9a25f70 764
0f290768 765/* Power of two alignment for non-loop jumps. */
e075ae69 766const char *ix86_align_jumps_string;
e9a25f70 767
3af4bd89 768/* Power of two alignment for stack boundary in bytes. */
e075ae69 769const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
770
771/* Preferred alignment for stack boundary in bits. */
e075ae69 772int ix86_preferred_stack_boundary;
3af4bd89 773
e9a25f70 774/* Values 1-5: see jump.c */
e075ae69
RH
775int ix86_branch_cost;
776const char *ix86_branch_cost_string;
e9a25f70 777
0f290768 778/* Power of two alignment for functions. */
e075ae69 779const char *ix86_align_funcs_string;
623fe810
RH
780
781/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782static char internal_label_prefix[16];
783static int internal_label_prefix_len;
e075ae69 784\f
b96a374d
AJ
785static int local_symbolic_operand (rtx, enum machine_mode);
786static int tls_symbolic_operand_1 (rtx, enum tls_model);
787static void output_pic_addr_const (FILE *, rtx, int);
788static void put_condition_code (enum rtx_code, enum machine_mode,
789 int, int, FILE *);
790static const char *get_some_local_dynamic_name (void);
791static int get_some_local_dynamic_name_1 (rtx *, void *);
792static rtx maybe_get_pool_constant (rtx);
793static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
795 rtx *);
796static rtx get_thread_pointer (int);
797static rtx legitimize_tls_address (rtx, enum tls_model, int);
798static void get_pc_thunk_name (char [32], unsigned int);
799static rtx gen_push (rtx);
800static int memory_address_length (rtx addr);
801static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804static void ix86_dump_ppro_packet (FILE *);
805static void ix86_reorder_insn (rtx *, rtx *);
806static struct machine_function * ix86_init_machine_status (void);
807static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808static int ix86_nsaved_regs (void);
809static void ix86_emit_save_regs (void);
810static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814static void ix86_sched_reorder_ppro (rtx *, rtx *);
815static HOST_WIDE_INT ix86_GOT_alias_set (void);
816static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817static rtx ix86_expand_aligntest (rtx, int);
818static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819static int ix86_issue_rate (void);
820static int ix86_adjust_cost (rtx, rtx, rtx, int);
821static void ix86_sched_init (FILE *, int, int);
822static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823static int ix86_variable_issue (FILE *, int, rtx, int);
824static int ia32_use_dfa_pipeline_interface (void);
825static int ia32_multipass_dfa_lookahead (void);
826static void ix86_init_mmx_sse_builtins (void);
827static rtx x86_this_parameter (tree);
828static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831static void x86_file_start (void);
832static void ix86_reorg (void);
833bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
e075ae69
RH
834
835struct ix86_address
836{
837 rtx base, index, disp;
838 HOST_WIDE_INT scale;
74dc3e94 839 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
e075ae69 840};
b08de47e 841
b96a374d
AJ
842static int ix86_decompose_address (rtx, struct ix86_address *);
843static int ix86_address_cost (rtx);
844static bool ix86_cannot_force_const_mem (rtx);
845static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
846
847struct builtin_description;
b96a374d
AJ
848static rtx ix86_expand_sse_comi (const struct builtin_description *,
849 tree, rtx);
850static rtx ix86_expand_sse_compare (const struct builtin_description *,
851 tree, rtx);
852static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
853static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
854static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
855static rtx ix86_expand_store_builtin (enum insn_code, tree);
856static rtx safe_vector_operand (rtx, enum machine_mode);
857static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
858static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
859 enum rtx_code *, enum rtx_code *);
860static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
861static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
862static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
863static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
864static int ix86_fp_comparison_cost (enum rtx_code code);
865static unsigned int ix86_select_alt_pic_regnum (void);
866static int ix86_save_reg (unsigned int, int);
867static void ix86_compute_frame_layout (struct ix86_frame *);
868static int ix86_comp_type_attributes (tree, tree);
e767b5be 869static int ix86_function_regparm (tree, tree);
91d231cb 870const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
871static bool ix86_function_ok_for_sibcall (tree, tree);
872static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
873static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
874static int ix86_value_regno (enum machine_mode);
875static bool contains_128bit_aligned_vector_p (tree);
876static bool ix86_ms_bitfield_layout_p (tree);
877static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
878static int extended_reg_mentioned_1 (rtx *, void *);
879static bool ix86_rtx_costs (rtx, int, int, int *);
880static int min_insn_size (rtx);
881static void k8_avoid_jump_misspredicts (void);
7c262518 882
21c318ba 883#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
b96a374d 884static void ix86_svr3_asm_out_constructor (rtx, int);
2cc07db4 885#endif
e56feed6 886
53c17031
JH
887/* Register class used for passing given 64bit part of the argument.
888 These represent classes as documented by the PS ABI, with the exception
889 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 890 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 891
d1f87653 892 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
893 whenever possible (upper half does contain padding).
894 */
895enum x86_64_reg_class
896 {
897 X86_64_NO_CLASS,
898 X86_64_INTEGER_CLASS,
899 X86_64_INTEGERSI_CLASS,
900 X86_64_SSE_CLASS,
901 X86_64_SSESF_CLASS,
902 X86_64_SSEDF_CLASS,
903 X86_64_SSEUP_CLASS,
904 X86_64_X87_CLASS,
905 X86_64_X87UP_CLASS,
906 X86_64_MEMORY_CLASS
907 };
0b5826ac 908static const char * const x86_64_reg_class_name[] =
53c17031
JH
909 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
910
911#define MAX_CLASSES 4
b96a374d
AJ
912static int classify_argument (enum machine_mode, tree,
913 enum x86_64_reg_class [MAX_CLASSES], int);
914static int examine_argument (enum machine_mode, tree, int, int *, int *);
915static rtx construct_container (enum machine_mode, tree, int, int, int,
916 const int *, int);
917static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
918 enum x86_64_reg_class);
881b2a96
RS
919
920/* Table of constants used by fldpi, fldln2, etc... */
921static REAL_VALUE_TYPE ext_80387_constants_table [5];
922static bool ext_80387_constants_init = 0;
b96a374d 923static void init_ext_80387_constants (void);
672a6f42
NB
924\f
925/* Initialize the GCC target structure. */
91d231cb
JM
926#undef TARGET_ATTRIBUTE_TABLE
927#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 928#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
929# undef TARGET_MERGE_DECL_ATTRIBUTES
930# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
931#endif
932
8d8e52be
JM
933#undef TARGET_COMP_TYPE_ATTRIBUTES
934#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
935
f6155fda
SS
936#undef TARGET_INIT_BUILTINS
937#define TARGET_INIT_BUILTINS ix86_init_builtins
938
939#undef TARGET_EXPAND_BUILTIN
940#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
941
bd09bdeb
RH
942#undef TARGET_ASM_FUNCTION_EPILOGUE
943#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 944
17b53c33
NB
945#undef TARGET_ASM_OPEN_PAREN
946#define TARGET_ASM_OPEN_PAREN ""
947#undef TARGET_ASM_CLOSE_PAREN
948#define TARGET_ASM_CLOSE_PAREN ""
949
301d03af
RS
950#undef TARGET_ASM_ALIGNED_HI_OP
951#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952#undef TARGET_ASM_ALIGNED_SI_OP
953#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
954#ifdef ASM_QUAD
955#undef TARGET_ASM_ALIGNED_DI_OP
956#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
957#endif
958
959#undef TARGET_ASM_UNALIGNED_HI_OP
960#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961#undef TARGET_ASM_UNALIGNED_SI_OP
962#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963#undef TARGET_ASM_UNALIGNED_DI_OP
964#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
965
c237e94a
ZW
966#undef TARGET_SCHED_ADJUST_COST
967#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968#undef TARGET_SCHED_ISSUE_RATE
969#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970#undef TARGET_SCHED_VARIABLE_ISSUE
971#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972#undef TARGET_SCHED_INIT
973#define TARGET_SCHED_INIT ix86_sched_init
974#undef TARGET_SCHED_REORDER
975#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 976#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
977#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
c237e94a 982
4977bab6
ZW
983#undef TARGET_FUNCTION_OK_FOR_SIBCALL
984#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
985
f996902d
RH
986#ifdef HAVE_AS_TLS
987#undef TARGET_HAVE_TLS
988#define TARGET_HAVE_TLS true
989#endif
3a04ff64
RH
990#undef TARGET_CANNOT_FORCE_CONST_MEM
991#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 992
7daebb7a 993#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 994#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 995
4977bab6
ZW
996#undef TARGET_MS_BITFIELD_LAYOUT_P
997#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
998
c590b625
RH
999#undef TARGET_ASM_OUTPUT_MI_THUNK
1000#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1001#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1003
1bc7c5b6
ZW
1004#undef TARGET_ASM_FILE_START
1005#define TARGET_ASM_FILE_START x86_file_start
1006
3c50106f
RH
1007#undef TARGET_RTX_COSTS
1008#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1009#undef TARGET_ADDRESS_COST
1010#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1011
18dbd950
RS
1012#undef TARGET_MACHINE_DEPENDENT_REORG
1013#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1014
f6897b10 1015struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 1016\f
67c2b45f
JS
1017/* The svr4 ABI for the i386 says that records and unions are returned
1018 in memory. */
1019#ifndef DEFAULT_PCC_STRUCT_RETURN
1020#define DEFAULT_PCC_STRUCT_RETURN 1
1021#endif
1022
f5316dfe
MM
1023/* Sometimes certain combinations of command options do not make
1024 sense on a particular target machine. You can define a macro
1025 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1026 defined, is executed once just after all the command options have
1027 been parsed.
1028
1029 Don't use this macro to turn on various extra optimizations for
1030 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1031
1032void
b96a374d 1033override_options (void)
f5316dfe 1034{
400500c4 1035 int i;
e075ae69
RH
1036 /* Comes from final.c -- no real reason to change it. */
1037#define MAX_CODE_ALIGN 16
f5316dfe 1038
c8c5cb99
SC
1039 static struct ptt
1040 {
8b60264b
KG
1041 const struct processor_costs *cost; /* Processor costs */
1042 const int target_enable; /* Target flags to enable. */
1043 const int target_disable; /* Target flags to disable. */
1044 const int align_loop; /* Default alignments. */
2cca7283 1045 const int align_loop_max_skip;
8b60264b 1046 const int align_jump;
2cca7283 1047 const int align_jump_max_skip;
8b60264b 1048 const int align_func;
e075ae69 1049 }
0f290768 1050 const processor_target_table[PROCESSOR_max] =
e075ae69 1051 {
4977bab6
ZW
1052 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1053 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1054 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1055 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1056 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1057 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1058 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1059 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1060 };
1061
f4365627 1062 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1063 static struct pta
1064 {
8b60264b
KG
1065 const char *const name; /* processor name or nickname. */
1066 const enum processor_type processor;
0dd0e980
JH
1067 const enum pta_flags
1068 {
1069 PTA_SSE = 1,
1070 PTA_SSE2 = 2,
1071 PTA_MMX = 4,
f4365627 1072 PTA_PREFETCH_SSE = 8,
0dd0e980 1073 PTA_3DNOW = 16,
4977bab6
ZW
1074 PTA_3DNOW_A = 64,
1075 PTA_64BIT = 128
0dd0e980 1076 } flags;
e075ae69 1077 }
0f290768 1078 const processor_alias_table[] =
e075ae69 1079 {
0dd0e980
JH
1080 {"i386", PROCESSOR_I386, 0},
1081 {"i486", PROCESSOR_I486, 0},
1082 {"i586", PROCESSOR_PENTIUM, 0},
1083 {"pentium", PROCESSOR_PENTIUM, 0},
1084 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1085 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1086 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1087 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1088 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1089 {"i686", PROCESSOR_PENTIUMPRO, 0},
1090 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1092 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 1093 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 1094 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1095 {"k6", PROCESSOR_K6, PTA_MMX},
1096 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1097 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1098 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1099 | PTA_3DNOW_A},
f4365627 1100 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1101 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1102 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1103 | PTA_3DNOW_A | PTA_SSE},
f4365627 1104 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1105 | PTA_3DNOW_A | PTA_SSE},
f4365627 1106 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1107 | PTA_3DNOW_A | PTA_SSE},
4977bab6
ZW
1108 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1109 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1110 };
c8c5cb99 1111
ca7558fc 1112 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1113
41ed2237 1114 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1115 in case they weren't overwritten by command line options. */
55ba61f3
JH
1116 if (TARGET_64BIT)
1117 {
1118 if (flag_omit_frame_pointer == 2)
1119 flag_omit_frame_pointer = 1;
1120 if (flag_asynchronous_unwind_tables == 2)
1121 flag_asynchronous_unwind_tables = 1;
1122 if (flag_pcc_struct_return == 2)
1123 flag_pcc_struct_return = 0;
1124 }
1125 else
1126 {
1127 if (flag_omit_frame_pointer == 2)
1128 flag_omit_frame_pointer = 0;
1129 if (flag_asynchronous_unwind_tables == 2)
1130 flag_asynchronous_unwind_tables = 0;
1131 if (flag_pcc_struct_return == 2)
7c712dcc 1132 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1133 }
1134
f5316dfe
MM
1135#ifdef SUBTARGET_OVERRIDE_OPTIONS
1136 SUBTARGET_OVERRIDE_OPTIONS;
1137#endif
1138
9e555526
RH
1139 if (!ix86_tune_string && ix86_arch_string)
1140 ix86_tune_string = ix86_arch_string;
1141 if (!ix86_tune_string)
1142 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
f4365627 1143 if (!ix86_arch_string)
4977bab6 1144 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
e075ae69 1145
6189a572
JH
1146 if (ix86_cmodel_string != 0)
1147 {
1148 if (!strcmp (ix86_cmodel_string, "small"))
1149 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1150 else if (flag_pic)
c725bd79 1151 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1152 else if (!strcmp (ix86_cmodel_string, "32"))
1153 ix86_cmodel = CM_32;
1154 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1155 ix86_cmodel = CM_KERNEL;
1156 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1157 ix86_cmodel = CM_MEDIUM;
1158 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1159 ix86_cmodel = CM_LARGE;
1160 else
1161 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1162 }
1163 else
1164 {
1165 ix86_cmodel = CM_32;
1166 if (TARGET_64BIT)
1167 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1168 }
c93e80a5
JH
1169 if (ix86_asm_string != 0)
1170 {
1171 if (!strcmp (ix86_asm_string, "intel"))
1172 ix86_asm_dialect = ASM_INTEL;
1173 else if (!strcmp (ix86_asm_string, "att"))
1174 ix86_asm_dialect = ASM_ATT;
1175 else
1176 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1177 }
6189a572 1178 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1179 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1180 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1181 if (ix86_cmodel == CM_LARGE)
c725bd79 1182 sorry ("code model `large' not supported yet");
0c2dc519 1183 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1184 sorry ("%i-bit mode not compiled in",
0c2dc519 1185 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1186
f4365627
JH
1187 for (i = 0; i < pta_size; i++)
1188 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1189 {
1190 ix86_arch = processor_alias_table[i].processor;
1191 /* Default cpu tuning to the architecture. */
9e555526 1192 ix86_tune = ix86_arch;
f4365627 1193 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1194 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1195 target_flags |= MASK_MMX;
1196 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1197 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1198 target_flags |= MASK_3DNOW;
1199 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1200 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1201 target_flags |= MASK_3DNOW_A;
1202 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1203 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1204 target_flags |= MASK_SSE;
1205 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1206 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1207 target_flags |= MASK_SSE2;
1208 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1209 x86_prefetch_sse = true;
4977bab6
ZW
1210 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1211 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1212 break;
1213 }
400500c4 1214
f4365627
JH
1215 if (i == pta_size)
1216 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1217
f4365627 1218 for (i = 0; i < pta_size; i++)
9e555526 1219 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1220 {
9e555526 1221 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1222 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1223 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1224 break;
1225 }
1226 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1227 x86_prefetch_sse = true;
1228 if (i == pta_size)
9e555526 1229 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1230
2ab0437e
JH
1231 if (optimize_size)
1232 ix86_cost = &size_cost;
1233 else
9e555526
RH
1234 ix86_cost = processor_target_table[ix86_tune].cost;
1235 target_flags |= processor_target_table[ix86_tune].target_enable;
1236 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1237
36edd3cc
BS
1238 /* Arrange to set up i386_stack_locals for all functions. */
1239 init_machine_status = ix86_init_machine_status;
fce5a9f2 1240
0f290768 1241 /* Validate -mregparm= value. */
e075ae69 1242 if (ix86_regparm_string)
b08de47e 1243 {
400500c4
RK
1244 i = atoi (ix86_regparm_string);
1245 if (i < 0 || i > REGPARM_MAX)
1246 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1247 else
1248 ix86_regparm = i;
b08de47e 1249 }
0d7d98ee
JH
1250 else
1251 if (TARGET_64BIT)
1252 ix86_regparm = REGPARM_MAX;
b08de47e 1253
3e18fdf6 1254 /* If the user has provided any of the -malign-* options,
a4f31c00 1255 warn and use that value only if -falign-* is not set.
3e18fdf6 1256 Remove this code in GCC 3.2 or later. */
e075ae69 1257 if (ix86_align_loops_string)
b08de47e 1258 {
3e18fdf6
GK
1259 warning ("-malign-loops is obsolete, use -falign-loops");
1260 if (align_loops == 0)
1261 {
1262 i = atoi (ix86_align_loops_string);
1263 if (i < 0 || i > MAX_CODE_ALIGN)
1264 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1265 else
1266 align_loops = 1 << i;
1267 }
b08de47e 1268 }
3af4bd89 1269
e075ae69 1270 if (ix86_align_jumps_string)
b08de47e 1271 {
3e18fdf6
GK
1272 warning ("-malign-jumps is obsolete, use -falign-jumps");
1273 if (align_jumps == 0)
1274 {
1275 i = atoi (ix86_align_jumps_string);
1276 if (i < 0 || i > MAX_CODE_ALIGN)
1277 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1278 else
1279 align_jumps = 1 << i;
1280 }
b08de47e 1281 }
b08de47e 1282
e075ae69 1283 if (ix86_align_funcs_string)
b08de47e 1284 {
3e18fdf6
GK
1285 warning ("-malign-functions is obsolete, use -falign-functions");
1286 if (align_functions == 0)
1287 {
1288 i = atoi (ix86_align_funcs_string);
1289 if (i < 0 || i > MAX_CODE_ALIGN)
1290 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1291 else
1292 align_functions = 1 << i;
1293 }
b08de47e 1294 }
3af4bd89 1295
3e18fdf6 1296 /* Default align_* from the processor table. */
3e18fdf6 1297 if (align_loops == 0)
2cca7283 1298 {
9e555526
RH
1299 align_loops = processor_target_table[ix86_tune].align_loop;
1300 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1301 }
3e18fdf6 1302 if (align_jumps == 0)
2cca7283 1303 {
9e555526
RH
1304 align_jumps = processor_target_table[ix86_tune].align_jump;
1305 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1306 }
3e18fdf6 1307 if (align_functions == 0)
2cca7283 1308 {
9e555526 1309 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1310 }
3e18fdf6 1311
e4c0478d 1312 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1313 The default of 128 bits is for Pentium III's SSE __m128, but we
1314 don't want additional code to keep the stack aligned when
1315 optimizing for code size. */
1316 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1317 ? TARGET_64BIT ? 128 : 32
fbb83b43 1318 : 128);
e075ae69 1319 if (ix86_preferred_stack_boundary_string)
3af4bd89 1320 {
400500c4 1321 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1322 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1323 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1324 TARGET_64BIT ? 4 : 2);
400500c4
RK
1325 else
1326 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1327 }
77a989d1 1328
0f290768 1329 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1330 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1331 if (ix86_branch_cost_string)
804a8ee0 1332 {
400500c4
RK
1333 i = atoi (ix86_branch_cost_string);
1334 if (i < 0 || i > 5)
1335 error ("-mbranch-cost=%d is not between 0 and 5", i);
1336 else
1337 ix86_branch_cost = i;
804a8ee0 1338 }
804a8ee0 1339
f996902d
RH
1340 if (ix86_tls_dialect_string)
1341 {
1342 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1343 ix86_tls_dialect = TLS_DIALECT_GNU;
1344 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1345 ix86_tls_dialect = TLS_DIALECT_SUN;
1346 else
1347 error ("bad value (%s) for -mtls-dialect= switch",
1348 ix86_tls_dialect_string);
1349 }
1350
e9a25f70
JL
1351 /* Keep nonleaf frame pointers. */
1352 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1353 flag_omit_frame_pointer = 1;
e075ae69
RH
1354
1355 /* If we're doing fast math, we don't care about comparison order
1356 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1357 if (flag_unsafe_math_optimizations)
e075ae69
RH
1358 target_flags &= ~MASK_IEEE_FP;
1359
30c99a84
RH
1360 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1361 since the insns won't need emulation. */
1362 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1363 target_flags &= ~MASK_NO_FANCY_MATH_387;
1364
22c7c85e
L
1365 /* Turn on SSE2 builtins for -mpni. */
1366 if (TARGET_PNI)
1367 target_flags |= MASK_SSE2;
1368
1369 /* Turn on SSE builtins for -msse2. */
1370 if (TARGET_SSE2)
1371 target_flags |= MASK_SSE;
1372
14f73b5a
JH
1373 if (TARGET_64BIT)
1374 {
1375 if (TARGET_ALIGN_DOUBLE)
c725bd79 1376 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1377 if (TARGET_RTD)
c725bd79 1378 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1379 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1380 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1381 ix86_fpmath = FPMATH_SSE;
14f73b5a 1382 }
965f5423 1383 else
a5b378d6
JH
1384 {
1385 ix86_fpmath = FPMATH_387;
1386 /* i386 ABI does not specify red zone. It still makes sense to use it
1387 when programmer takes care to stack from being destroyed. */
1388 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1389 target_flags |= MASK_NO_RED_ZONE;
1390 }
965f5423
JH
1391
1392 if (ix86_fpmath_string != 0)
1393 {
1394 if (! strcmp (ix86_fpmath_string, "387"))
1395 ix86_fpmath = FPMATH_387;
1396 else if (! strcmp (ix86_fpmath_string, "sse"))
1397 {
1398 if (!TARGET_SSE)
1399 {
1400 warning ("SSE instruction set disabled, using 387 arithmetics");
1401 ix86_fpmath = FPMATH_387;
1402 }
1403 else
1404 ix86_fpmath = FPMATH_SSE;
1405 }
1406 else if (! strcmp (ix86_fpmath_string, "387,sse")
1407 || ! strcmp (ix86_fpmath_string, "sse,387"))
1408 {
1409 if (!TARGET_SSE)
1410 {
1411 warning ("SSE instruction set disabled, using 387 arithmetics");
1412 ix86_fpmath = FPMATH_387;
1413 }
1414 else if (!TARGET_80387)
1415 {
1416 warning ("387 instruction set disabled, using SSE arithmetics");
1417 ix86_fpmath = FPMATH_SSE;
1418 }
1419 else
1420 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1421 }
fce5a9f2 1422 else
965f5423
JH
1423 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1424 }
14f73b5a 1425
a7180f70
BS
1426 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1427 on by -msse. */
1428 if (TARGET_SSE)
e37af218
RH
1429 {
1430 target_flags |= MASK_MMX;
1431 x86_prefetch_sse = true;
1432 }
c6036a37 1433
47f339cf
BS
1434 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1435 if (TARGET_3DNOW)
1436 {
1437 target_flags |= MASK_MMX;
d1f87653 1438 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1439 extensions it adds. */
1440 if (x86_3dnow_a & (1 << ix86_arch))
1441 target_flags |= MASK_3DNOW_A;
1442 }
9e555526 1443 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1444 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1445 && !optimize_size)
1446 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1447
1448 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1449 {
1450 char *p;
1451 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1452 p = strchr (internal_label_prefix, 'X');
1453 internal_label_prefix_len = p - internal_label_prefix;
1454 *p = '\0';
1455 }
f5316dfe
MM
1456}
1457\f
32b5b1aa 1458void
b96a374d 1459optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1460{
e9a25f70
JL
1461 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1462 make the problem with not enough registers even worse. */
32b5b1aa
SC
1463#ifdef INSN_SCHEDULING
1464 if (level > 1)
1465 flag_schedule_insns = 0;
1466#endif
55ba61f3
JH
1467
1468 /* The default values of these switches depend on the TARGET_64BIT
1469 that is not known at this moment. Mark these values with 2 and
1470 let user the to override these. In case there is no command line option
1471 specifying them, we will set the defaults in override_options. */
1472 if (optimize >= 1)
1473 flag_omit_frame_pointer = 2;
1474 flag_pcc_struct_return = 2;
1475 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1476}
b08de47e 1477\f
91d231cb
JM
1478/* Table of valid machine attributes. */
1479const struct attribute_spec ix86_attribute_table[] =
b08de47e 1480{
91d231cb 1481 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1482 /* Stdcall attribute says callee is responsible for popping arguments
1483 if they are not variable. */
91d231cb 1484 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1485 /* Fastcall attribute says callee is responsible for popping arguments
1486 if they are not variable. */
1487 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1488 /* Cdecl attribute says the callee is a normal C declaration */
1489 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1490 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1491 passed in registers. */
91d231cb
JM
1492 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1493#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1494 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1495 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1496 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1497#endif
fe77449a
DR
1498 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1499 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1500 { NULL, 0, 0, false, false, false, NULL }
1501};
1502
5fbf0217
EB
1503/* Decide whether we can make a sibling call to a function. DECL is the
1504 declaration of the function being targeted by the call and EXP is the
1505 CALL_EXPR representing the call. */
4977bab6
ZW
1506
1507static bool
b96a374d 1508ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1509{
1510 /* If we are generating position-independent code, we cannot sibcall
1511 optimize any indirect call, or a direct call to a global function,
1512 as the PLT requires %ebx be live. */
1513 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1514 return false;
1515
1516 /* If we are returning floats on the 80387 register stack, we cannot
1517 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1518 function that does or, conversely, from a function that does return
1519 a float to a function that doesn't; the necessary stack adjustment
1520 would not be executed. */
4977bab6 1521 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1522 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1523 return false;
1524
1525 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1526 register for the address of the target function. Make sure that all
4977bab6
ZW
1527 such registers are not used for passing parameters. */
1528 if (!decl && !TARGET_64BIT)
1529 {
e767b5be 1530 tree type;
4977bab6
ZW
1531
1532 /* We're looking at the CALL_EXPR, we need the type of the function. */
1533 type = TREE_OPERAND (exp, 0); /* pointer expression */
1534 type = TREE_TYPE (type); /* pointer type */
1535 type = TREE_TYPE (type); /* function type */
1536
e767b5be 1537 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1538 {
1539 /* ??? Need to count the actual number of registers to be used,
1540 not the possible number of registers. Fix later. */
1541 return false;
1542 }
1543 }
1544
1545 /* Otherwise okay. That also includes certain types of indirect calls. */
1546 return true;
1547}
1548
e91f04de 1549/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1550 arguments as in struct attribute_spec.handler. */
1551static tree
b96a374d
AJ
1552ix86_handle_cdecl_attribute (tree *node, tree name,
1553 tree args ATTRIBUTE_UNUSED,
1554 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1555{
1556 if (TREE_CODE (*node) != FUNCTION_TYPE
1557 && TREE_CODE (*node) != METHOD_TYPE
1558 && TREE_CODE (*node) != FIELD_DECL
1559 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1560 {
91d231cb
JM
1561 warning ("`%s' attribute only applies to functions",
1562 IDENTIFIER_POINTER (name));
1563 *no_add_attrs = true;
1564 }
e91f04de
CH
1565 else
1566 {
1567 if (is_attribute_p ("fastcall", name))
1568 {
1569 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1570 {
1571 error ("fastcall and stdcall attributes are not compatible");
1572 }
1573 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1574 {
1575 error ("fastcall and regparm attributes are not compatible");
1576 }
1577 }
1578 else if (is_attribute_p ("stdcall", name))
1579 {
1580 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1581 {
1582 error ("fastcall and stdcall attributes are not compatible");
1583 }
1584 }
1585 }
b08de47e 1586
91d231cb
JM
1587 if (TARGET_64BIT)
1588 {
1589 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1590 *no_add_attrs = true;
1591 }
b08de47e 1592
91d231cb
JM
1593 return NULL_TREE;
1594}
b08de47e 1595
91d231cb
JM
1596/* Handle a "regparm" attribute;
1597 arguments as in struct attribute_spec.handler. */
1598static tree
b96a374d
AJ
1599ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1600 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1601{
1602 if (TREE_CODE (*node) != FUNCTION_TYPE
1603 && TREE_CODE (*node) != METHOD_TYPE
1604 && TREE_CODE (*node) != FIELD_DECL
1605 && TREE_CODE (*node) != TYPE_DECL)
1606 {
1607 warning ("`%s' attribute only applies to functions",
1608 IDENTIFIER_POINTER (name));
1609 *no_add_attrs = true;
1610 }
1611 else
1612 {
1613 tree cst;
b08de47e 1614
91d231cb
JM
1615 cst = TREE_VALUE (args);
1616 if (TREE_CODE (cst) != INTEGER_CST)
1617 {
1618 warning ("`%s' attribute requires an integer constant argument",
1619 IDENTIFIER_POINTER (name));
1620 *no_add_attrs = true;
1621 }
1622 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1623 {
1624 warning ("argument to `%s' attribute larger than %d",
1625 IDENTIFIER_POINTER (name), REGPARM_MAX);
1626 *no_add_attrs = true;
1627 }
e91f04de
CH
1628
1629 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1630 {
1631 error ("fastcall and regparm attributes are not compatible");
1632 }
b08de47e
MM
1633 }
1634
91d231cb 1635 return NULL_TREE;
b08de47e
MM
1636}
1637
1638/* Return 0 if the attributes for two types are incompatible, 1 if they
1639 are compatible, and 2 if they are nearly compatible (which causes a
1640 warning to be generated). */
1641
8d8e52be 1642static int
b96a374d 1643ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1644{
0f290768 1645 /* Check for mismatch of non-default calling convention. */
27c38fbe 1646 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1647
1648 if (TREE_CODE (type1) != FUNCTION_TYPE)
1649 return 1;
1650
b96a374d 1651 /* Check for mismatched fastcall types */
e91f04de
CH
1652 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1653 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1654 return 0;
e91f04de 1655
afcfe58c 1656 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1657 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1658 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1659 return 0;
b08de47e
MM
1660 return 1;
1661}
b08de47e 1662\f
e767b5be
JH
1663/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1664 DECL may be NULL when calling function indirectly
1665 or considerling a libcall. */
483ab821
MM
1666
1667static int
e767b5be 1668ix86_function_regparm (tree type, tree decl)
483ab821
MM
1669{
1670 tree attr;
e767b5be
JH
1671 int regparm = ix86_regparm;
1672 bool user_convention = false;
483ab821 1673
e767b5be
JH
1674 if (!TARGET_64BIT)
1675 {
1676 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1677 if (attr)
1678 {
1679 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1680 user_convention = true;
1681 }
1682
1683 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1684 {
1685 regparm = 2;
1686 user_convention = true;
1687 }
1688
1689 /* Use register calling convention for local functions when possible. */
1690 if (!TARGET_64BIT && !user_convention && decl
1691 && flag_unit_at_a_time)
1692 {
1693 struct cgraph_local_info *i = cgraph_local_info (decl);
1694 if (i && i->local)
1695 {
1696 /* We can't use regparm(3) for nested functions as these use
1697 static chain pointer in third argument. */
1698 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1699 regparm = 2;
1700 else
1701 regparm = 3;
1702 }
1703 }
1704 }
1705 return regparm;
483ab821
MM
1706}
1707
b08de47e
MM
1708/* Value is the number of bytes of arguments automatically
1709 popped when returning from a subroutine call.
1710 FUNDECL is the declaration node of the function (as a tree),
1711 FUNTYPE is the data type of the function (as a tree),
1712 or for a library call it is an identifier node for the subroutine name.
1713 SIZE is the number of bytes of arguments passed on the stack.
1714
1715 On the 80386, the RTD insn may be used to pop them if the number
1716 of args is fixed, but if the number is variable then the caller
1717 must pop them all. RTD can't be used for library calls now
1718 because the library is compiled with the Unix compiler.
1719 Use of RTD is a selectable option, since it is incompatible with
1720 standard Unix calling sequences. If the option is not selected,
1721 the caller must always pop the args.
1722
1723 The attribute stdcall is equivalent to RTD on a per module basis. */
1724
1725int
b96a374d 1726ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1727{
3345ee7d 1728 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1729
0f290768 1730 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1731 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1732
e91f04de
CH
1733 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1734 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1735 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1736 rtd = 1;
79325812 1737
698cdd84
SC
1738 if (rtd
1739 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1740 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1741 == void_type_node)))
698cdd84
SC
1742 return size;
1743 }
79325812 1744
232b8f52 1745 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1746 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
0d7d98ee 1747 && !TARGET_64BIT)
232b8f52 1748 {
e767b5be 1749 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1750
1751 if (!nregs)
1752 return GET_MODE_SIZE (Pmode);
1753 }
1754
1755 return 0;
b08de47e 1756}
b08de47e
MM
1757\f
1758/* Argument support functions. */
1759
53c17031
JH
1760/* Return true when register may be used to pass function parameters. */
1761bool
b96a374d 1762ix86_function_arg_regno_p (int regno)
53c17031
JH
1763{
1764 int i;
1765 if (!TARGET_64BIT)
0333394e
JJ
1766 return (regno < REGPARM_MAX
1767 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1768 if (SSE_REGNO_P (regno) && TARGET_SSE)
1769 return true;
1770 /* RAX is used as hidden argument to va_arg functions. */
1771 if (!regno)
1772 return true;
1773 for (i = 0; i < REGPARM_MAX; i++)
1774 if (regno == x86_64_int_parameter_registers[i])
1775 return true;
1776 return false;
1777}
1778
b08de47e
MM
1779/* Initialize a variable CUM of type CUMULATIVE_ARGS
1780 for a call to a function whose data type is FNTYPE.
1781 For a library call, FNTYPE is 0. */
1782
1783void
b96a374d
AJ
1784init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1785 tree fntype, /* tree ptr for function decl */
1786 rtx libname, /* SYMBOL_REF of library name or 0 */
1787 tree fndecl)
b08de47e
MM
1788{
1789 static CUMULATIVE_ARGS zero_cum;
1790 tree param, next_param;
1791
1792 if (TARGET_DEBUG_ARG)
1793 {
1794 fprintf (stderr, "\ninit_cumulative_args (");
1795 if (fntype)
e9a25f70
JL
1796 fprintf (stderr, "fntype code = %s, ret code = %s",
1797 tree_code_name[(int) TREE_CODE (fntype)],
1798 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1799 else
1800 fprintf (stderr, "no fntype");
1801
1802 if (libname)
1803 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1804 }
1805
1806 *cum = zero_cum;
1807
1808 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1809 if (fntype)
1810 cum->nregs = ix86_function_regparm (fntype, fndecl);
1811 else
1812 cum->nregs = ix86_regparm;
53c17031 1813 cum->sse_nregs = SSE_REGPARM_MAX;
53c17031 1814 cum->maybe_vaarg = false;
b08de47e 1815
e91f04de
CH
1816 /* Use ecx and edx registers if function has fastcall attribute */
1817 if (fntype && !TARGET_64BIT)
1818 {
1819 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1820 {
1821 cum->nregs = 2;
1822 cum->fastcall = 1;
1823 }
1824 }
1825
1826
b08de47e
MM
1827 /* Determine if this function has variable arguments. This is
1828 indicated by the last argument being 'void_type_mode' if there
1829 are no variable arguments. If there are variable arguments, then
1830 we won't pass anything in registers */
1831
1832 if (cum->nregs)
1833 {
1834 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1835 param != 0; param = next_param)
b08de47e
MM
1836 {
1837 next_param = TREE_CHAIN (param);
e9a25f70 1838 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1839 {
1840 if (!TARGET_64BIT)
e91f04de
CH
1841 {
1842 cum->nregs = 0;
1843 cum->fastcall = 0;
1844 }
53c17031
JH
1845 cum->maybe_vaarg = true;
1846 }
b08de47e
MM
1847 }
1848 }
53c17031
JH
1849 if ((!fntype && !libname)
1850 || (fntype && !TYPE_ARG_TYPES (fntype)))
1851 cum->maybe_vaarg = 1;
b08de47e
MM
1852
1853 if (TARGET_DEBUG_ARG)
1854 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1855
1856 return;
1857}
1858
d1f87653 1859/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1860 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1861 class and assign registers accordingly. */
1862
1863/* Return the union class of CLASS1 and CLASS2.
1864 See the x86-64 PS ABI for details. */
1865
1866static enum x86_64_reg_class
b96a374d 1867merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
1868{
1869 /* Rule #1: If both classes are equal, this is the resulting class. */
1870 if (class1 == class2)
1871 return class1;
1872
1873 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1874 the other class. */
1875 if (class1 == X86_64_NO_CLASS)
1876 return class2;
1877 if (class2 == X86_64_NO_CLASS)
1878 return class1;
1879
1880 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1881 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1882 return X86_64_MEMORY_CLASS;
1883
1884 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1885 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1886 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1887 return X86_64_INTEGERSI_CLASS;
1888 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1889 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1890 return X86_64_INTEGER_CLASS;
1891
1892 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1893 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1894 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1895 return X86_64_MEMORY_CLASS;
1896
1897 /* Rule #6: Otherwise class SSE is used. */
1898 return X86_64_SSE_CLASS;
1899}
1900
1901/* Classify the argument of type TYPE and mode MODE.
1902 CLASSES will be filled by the register class used to pass each word
1903 of the operand. The number of words is returned. In case the parameter
1904 should be passed in memory, 0 is returned. As a special case for zero
1905 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1906
1907 BIT_OFFSET is used internally for handling records and specifies offset
1908 of the offset in bits modulo 256 to avoid overflow cases.
1909
1910 See the x86-64 PS ABI for details.
1911*/
1912
1913static int
b96a374d
AJ
1914classify_argument (enum machine_mode mode, tree type,
1915 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031
JH
1916{
1917 int bytes =
1918 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1919 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1920
c60ee6f5
JH
1921 /* Variable sized entities are always passed/returned in memory. */
1922 if (bytes < 0)
1923 return 0;
1924
dafc5b82
JH
1925 if (mode != VOIDmode
1926 && MUST_PASS_IN_STACK (mode, type))
1927 return 0;
1928
53c17031
JH
1929 if (type && AGGREGATE_TYPE_P (type))
1930 {
1931 int i;
1932 tree field;
1933 enum x86_64_reg_class subclasses[MAX_CLASSES];
1934
1935 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1936 if (bytes > 16)
1937 return 0;
1938
1939 for (i = 0; i < words; i++)
1940 classes[i] = X86_64_NO_CLASS;
1941
1942 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1943 signalize memory class, so handle it as special case. */
1944 if (!words)
1945 {
1946 classes[0] = X86_64_NO_CLASS;
1947 return 1;
1948 }
1949
1950 /* Classify each field of record and merge classes. */
1951 if (TREE_CODE (type) == RECORD_TYPE)
1952 {
91ea38f9
JH
1953 /* For classes first merge in the field of the subclasses. */
1954 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1955 {
1956 tree bases = TYPE_BINFO_BASETYPES (type);
1957 int n_bases = TREE_VEC_LENGTH (bases);
1958 int i;
1959
1960 for (i = 0; i < n_bases; ++i)
1961 {
1962 tree binfo = TREE_VEC_ELT (bases, i);
1963 int num;
1964 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1965 tree type = BINFO_TYPE (binfo);
1966
1967 num = classify_argument (TYPE_MODE (type),
1968 type, subclasses,
1969 (offset + bit_offset) % 256);
1970 if (!num)
1971 return 0;
1972 for (i = 0; i < num; i++)
1973 {
db01f480 1974 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1975 classes[i + pos] =
1976 merge_classes (subclasses[i], classes[i + pos]);
1977 }
1978 }
1979 }
1980 /* And now merge the fields of structure. */
53c17031
JH
1981 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1982 {
1983 if (TREE_CODE (field) == FIELD_DECL)
1984 {
1985 int num;
1986
1987 /* Bitfields are always classified as integer. Handle them
1988 early, since later code would consider them to be
1989 misaligned integers. */
1990 if (DECL_BIT_FIELD (field))
1991 {
1992 for (i = int_bit_position (field) / 8 / 8;
1993 i < (int_bit_position (field)
1994 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 1995 + 63) / 8 / 8; i++)
53c17031
JH
1996 classes[i] =
1997 merge_classes (X86_64_INTEGER_CLASS,
1998 classes[i]);
1999 }
2000 else
2001 {
2002 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2003 TREE_TYPE (field), subclasses,
2004 (int_bit_position (field)
2005 + bit_offset) % 256);
2006 if (!num)
2007 return 0;
2008 for (i = 0; i < num; i++)
2009 {
2010 int pos =
db01f480 2011 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2012 classes[i + pos] =
2013 merge_classes (subclasses[i], classes[i + pos]);
2014 }
2015 }
2016 }
2017 }
2018 }
2019 /* Arrays are handled as small records. */
2020 else if (TREE_CODE (type) == ARRAY_TYPE)
2021 {
2022 int num;
2023 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2024 TREE_TYPE (type), subclasses, bit_offset);
2025 if (!num)
2026 return 0;
2027
2028 /* The partial classes are now full classes. */
2029 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2030 subclasses[0] = X86_64_SSE_CLASS;
2031 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2032 subclasses[0] = X86_64_INTEGER_CLASS;
2033
2034 for (i = 0; i < words; i++)
2035 classes[i] = subclasses[i % num];
2036 }
2037 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2038 else if (TREE_CODE (type) == UNION_TYPE
2039 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2040 {
91ea38f9
JH
2041 /* For classes first merge in the field of the subclasses. */
2042 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2043 {
2044 tree bases = TYPE_BINFO_BASETYPES (type);
2045 int n_bases = TREE_VEC_LENGTH (bases);
2046 int i;
2047
2048 for (i = 0; i < n_bases; ++i)
2049 {
2050 tree binfo = TREE_VEC_ELT (bases, i);
2051 int num;
2052 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2053 tree type = BINFO_TYPE (binfo);
2054
2055 num = classify_argument (TYPE_MODE (type),
2056 type, subclasses,
db01f480 2057 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2058 if (!num)
2059 return 0;
2060 for (i = 0; i < num; i++)
2061 {
c16576e6 2062 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2063 classes[i + pos] =
2064 merge_classes (subclasses[i], classes[i + pos]);
2065 }
2066 }
2067 }
53c17031
JH
2068 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2069 {
2070 if (TREE_CODE (field) == FIELD_DECL)
2071 {
2072 int num;
2073 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2074 TREE_TYPE (field), subclasses,
2075 bit_offset);
2076 if (!num)
2077 return 0;
2078 for (i = 0; i < num; i++)
2079 classes[i] = merge_classes (subclasses[i], classes[i]);
2080 }
2081 }
2082 }
2083 else
2084 abort ();
2085
2086 /* Final merger cleanup. */
2087 for (i = 0; i < words; i++)
2088 {
2089 /* If one class is MEMORY, everything should be passed in
2090 memory. */
2091 if (classes[i] == X86_64_MEMORY_CLASS)
2092 return 0;
2093
d6a7951f 2094 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2095 X86_64_SSE_CLASS. */
2096 if (classes[i] == X86_64_SSEUP_CLASS
2097 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2098 classes[i] = X86_64_SSE_CLASS;
2099
d6a7951f 2100 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2101 if (classes[i] == X86_64_X87UP_CLASS
2102 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2103 classes[i] = X86_64_SSE_CLASS;
2104 }
2105 return words;
2106 }
2107
2108 /* Compute alignment needed. We align all types to natural boundaries with
2109 exception of XFmode that is aligned to 64bits. */
2110 if (mode != VOIDmode && mode != BLKmode)
2111 {
2112 int mode_alignment = GET_MODE_BITSIZE (mode);
2113
2114 if (mode == XFmode)
2115 mode_alignment = 128;
2116 else if (mode == XCmode)
2117 mode_alignment = 256;
f5143c46 2118 /* Misaligned fields are always returned in memory. */
53c17031
JH
2119 if (bit_offset % mode_alignment)
2120 return 0;
2121 }
2122
2123 /* Classification of atomic types. */
2124 switch (mode)
2125 {
2126 case DImode:
2127 case SImode:
2128 case HImode:
2129 case QImode:
2130 case CSImode:
2131 case CHImode:
2132 case CQImode:
2133 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2134 classes[0] = X86_64_INTEGERSI_CLASS;
2135 else
2136 classes[0] = X86_64_INTEGER_CLASS;
2137 return 1;
2138 case CDImode:
2139 case TImode:
2140 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2141 return 2;
2142 case CTImode:
2143 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2144 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2145 return 4;
2146 case SFmode:
2147 if (!(bit_offset % 64))
2148 classes[0] = X86_64_SSESF_CLASS;
2149 else
2150 classes[0] = X86_64_SSE_CLASS;
2151 return 1;
2152 case DFmode:
2153 classes[0] = X86_64_SSEDF_CLASS;
2154 return 1;
2155 case TFmode:
2156 classes[0] = X86_64_X87_CLASS;
2157 classes[1] = X86_64_X87UP_CLASS;
2158 return 2;
2159 case TCmode:
2160 classes[0] = X86_64_X87_CLASS;
2161 classes[1] = X86_64_X87UP_CLASS;
2162 classes[2] = X86_64_X87_CLASS;
2163 classes[3] = X86_64_X87UP_CLASS;
2164 return 4;
2165 case DCmode:
2166 classes[0] = X86_64_SSEDF_CLASS;
2167 classes[1] = X86_64_SSEDF_CLASS;
2168 return 2;
2169 case SCmode:
2170 classes[0] = X86_64_SSE_CLASS;
2171 return 1;
e95d6b23
JH
2172 case V4SFmode:
2173 case V4SImode:
495333a6
JH
2174 case V16QImode:
2175 case V8HImode:
2176 case V2DFmode:
2177 case V2DImode:
e95d6b23
JH
2178 classes[0] = X86_64_SSE_CLASS;
2179 classes[1] = X86_64_SSEUP_CLASS;
2180 return 2;
2181 case V2SFmode:
2182 case V2SImode:
2183 case V4HImode:
2184 case V8QImode:
1194ca05 2185 return 0;
53c17031 2186 case BLKmode:
e95d6b23 2187 case VOIDmode:
53c17031
JH
2188 return 0;
2189 default:
2190 abort ();
2191 }
2192}
2193
2194/* Examine the argument and return set number of register required in each
f5143c46 2195 class. Return 0 iff parameter should be passed in memory. */
53c17031 2196static int
b96a374d
AJ
2197examine_argument (enum machine_mode mode, tree type, int in_return,
2198 int *int_nregs, int *sse_nregs)
53c17031
JH
2199{
2200 enum x86_64_reg_class class[MAX_CLASSES];
2201 int n = classify_argument (mode, type, class, 0);
2202
2203 *int_nregs = 0;
2204 *sse_nregs = 0;
2205 if (!n)
2206 return 0;
2207 for (n--; n >= 0; n--)
2208 switch (class[n])
2209 {
2210 case X86_64_INTEGER_CLASS:
2211 case X86_64_INTEGERSI_CLASS:
2212 (*int_nregs)++;
2213 break;
2214 case X86_64_SSE_CLASS:
2215 case X86_64_SSESF_CLASS:
2216 case X86_64_SSEDF_CLASS:
2217 (*sse_nregs)++;
2218 break;
2219 case X86_64_NO_CLASS:
2220 case X86_64_SSEUP_CLASS:
2221 break;
2222 case X86_64_X87_CLASS:
2223 case X86_64_X87UP_CLASS:
2224 if (!in_return)
2225 return 0;
2226 break;
2227 case X86_64_MEMORY_CLASS:
2228 abort ();
2229 }
2230 return 1;
2231}
2232/* Construct container for the argument used by GCC interface. See
2233 FUNCTION_ARG for the detailed description. */
2234static rtx
b96a374d
AJ
2235construct_container (enum machine_mode mode, tree type, int in_return,
2236 int nintregs, int nsseregs, const int * intreg,
2237 int sse_regno)
53c17031
JH
2238{
2239 enum machine_mode tmpmode;
2240 int bytes =
2241 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2242 enum x86_64_reg_class class[MAX_CLASSES];
2243 int n;
2244 int i;
2245 int nexps = 0;
2246 int needed_sseregs, needed_intregs;
2247 rtx exp[MAX_CLASSES];
2248 rtx ret;
2249
2250 n = classify_argument (mode, type, class, 0);
2251 if (TARGET_DEBUG_ARG)
2252 {
2253 if (!n)
2254 fprintf (stderr, "Memory class\n");
2255 else
2256 {
2257 fprintf (stderr, "Classes:");
2258 for (i = 0; i < n; i++)
2259 {
2260 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2261 }
2262 fprintf (stderr, "\n");
2263 }
2264 }
2265 if (!n)
2266 return NULL;
2267 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2268 return NULL;
2269 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2270 return NULL;
2271
2272 /* First construct simple cases. Avoid SCmode, since we want to use
2273 single register to pass this type. */
2274 if (n == 1 && mode != SCmode)
2275 switch (class[0])
2276 {
2277 case X86_64_INTEGER_CLASS:
2278 case X86_64_INTEGERSI_CLASS:
2279 return gen_rtx_REG (mode, intreg[0]);
2280 case X86_64_SSE_CLASS:
2281 case X86_64_SSESF_CLASS:
2282 case X86_64_SSEDF_CLASS:
2283 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2284 case X86_64_X87_CLASS:
2285 return gen_rtx_REG (mode, FIRST_STACK_REG);
2286 case X86_64_NO_CLASS:
2287 /* Zero sized array, struct or class. */
2288 return NULL;
2289 default:
2290 abort ();
2291 }
2292 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2293 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2294 if (n == 2
2295 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2296 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2297 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2298 && class[1] == X86_64_INTEGER_CLASS
2299 && (mode == CDImode || mode == TImode)
2300 && intreg[0] + 1 == intreg[1])
2301 return gen_rtx_REG (mode, intreg[0]);
2302 if (n == 4
2303 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2304 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2305 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2306
2307 /* Otherwise figure out the entries of the PARALLEL. */
2308 for (i = 0; i < n; i++)
2309 {
2310 switch (class[i])
2311 {
2312 case X86_64_NO_CLASS:
2313 break;
2314 case X86_64_INTEGER_CLASS:
2315 case X86_64_INTEGERSI_CLASS:
d1f87653 2316 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2317 if (i * 8 + 8 > bytes)
2318 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2319 else if (class[i] == X86_64_INTEGERSI_CLASS)
2320 tmpmode = SImode;
2321 else
2322 tmpmode = DImode;
2323 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2324 if (tmpmode == BLKmode)
2325 tmpmode = DImode;
2326 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2327 gen_rtx_REG (tmpmode, *intreg),
2328 GEN_INT (i*8));
2329 intreg++;
2330 break;
2331 case X86_64_SSESF_CLASS:
2332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2333 gen_rtx_REG (SFmode,
2334 SSE_REGNO (sse_regno)),
2335 GEN_INT (i*8));
2336 sse_regno++;
2337 break;
2338 case X86_64_SSEDF_CLASS:
2339 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2340 gen_rtx_REG (DFmode,
2341 SSE_REGNO (sse_regno)),
2342 GEN_INT (i*8));
2343 sse_regno++;
2344 break;
2345 case X86_64_SSE_CLASS:
12f5c45e
JH
2346 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2347 tmpmode = TImode;
53c17031
JH
2348 else
2349 tmpmode = DImode;
2350 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2351 gen_rtx_REG (tmpmode,
2352 SSE_REGNO (sse_regno)),
2353 GEN_INT (i*8));
12f5c45e
JH
2354 if (tmpmode == TImode)
2355 i++;
53c17031
JH
2356 sse_regno++;
2357 break;
2358 default:
2359 abort ();
2360 }
2361 }
2362 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2363 for (i = 0; i < nexps; i++)
2364 XVECEXP (ret, 0, i) = exp [i];
2365 return ret;
2366}
2367
b08de47e
MM
2368/* Update the data in CUM to advance over an argument
2369 of mode MODE and data type TYPE.
2370 (TYPE is null for libcalls where that information may not be available.) */
2371
2372void
b96a374d
AJ
2373function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2374 enum machine_mode mode, /* current arg mode */
2375 tree type, /* type of the argument or 0 if lib support */
2376 int named) /* whether or not the argument was named */
b08de47e 2377{
5ac9118e
KG
2378 int bytes =
2379 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2380 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2381
2382 if (TARGET_DEBUG_ARG)
2383 fprintf (stderr,
e9a25f70 2384 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2385 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2386 if (TARGET_64BIT)
b08de47e 2387 {
53c17031
JH
2388 int int_nregs, sse_nregs;
2389 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2390 cum->words += words;
2391 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2392 {
53c17031
JH
2393 cum->nregs -= int_nregs;
2394 cum->sse_nregs -= sse_nregs;
2395 cum->regno += int_nregs;
2396 cum->sse_regno += sse_nregs;
82a127a9 2397 }
53c17031
JH
2398 else
2399 cum->words += words;
b08de47e 2400 }
a4f31c00 2401 else
82a127a9 2402 {
53c17031
JH
2403 if (TARGET_SSE && mode == TImode)
2404 {
2405 cum->sse_words += words;
2406 cum->sse_nregs -= 1;
2407 cum->sse_regno += 1;
2408 if (cum->sse_nregs <= 0)
2409 {
2410 cum->sse_nregs = 0;
2411 cum->sse_regno = 0;
2412 }
2413 }
2414 else
82a127a9 2415 {
53c17031
JH
2416 cum->words += words;
2417 cum->nregs -= words;
2418 cum->regno += words;
2419
2420 if (cum->nregs <= 0)
2421 {
2422 cum->nregs = 0;
2423 cum->regno = 0;
2424 }
82a127a9
CM
2425 }
2426 }
b08de47e
MM
2427 return;
2428}
2429
2430/* Define where to put the arguments to a function.
2431 Value is zero to push the argument on the stack,
2432 or a hard register in which to store the argument.
2433
2434 MODE is the argument's machine mode.
2435 TYPE is the data type of the argument (as a tree).
2436 This is null for libcalls where that information may
2437 not be available.
2438 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2439 the preceding args and about the function being called.
2440 NAMED is nonzero if this argument is a named parameter
2441 (otherwise it is an extra parameter matching an ellipsis). */
2442
07933f72 2443rtx
b96a374d
AJ
2444function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2445 enum machine_mode mode, /* current arg mode */
2446 tree type, /* type of the argument or 0 if lib support */
2447 int named) /* != 0 for normal args, == 0 for ... args */
b08de47e
MM
2448{
2449 rtx ret = NULL_RTX;
5ac9118e
KG
2450 int bytes =
2451 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2452 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2453
5bdc5878 2454 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2455 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2456 any AL settings. */
32ee7d1d 2457 if (mode == VOIDmode)
b08de47e 2458 {
53c17031
JH
2459 if (TARGET_64BIT)
2460 return GEN_INT (cum->maybe_vaarg
2461 ? (cum->sse_nregs < 0
2462 ? SSE_REGPARM_MAX
2463 : cum->sse_regno)
2464 : -1);
2465 else
2466 return constm1_rtx;
b08de47e 2467 }
53c17031
JH
2468 if (TARGET_64BIT)
2469 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2470 &x86_64_int_parameter_registers [cum->regno],
2471 cum->sse_regno);
2472 else
2473 switch (mode)
2474 {
2475 /* For now, pass fp/complex values on the stack. */
2476 default:
2477 break;
2478
2479 case BLKmode:
8d454008
RH
2480 if (bytes < 0)
2481 break;
2482 /* FALLTHRU */
53c17031
JH
2483 case DImode:
2484 case SImode:
2485 case HImode:
2486 case QImode:
2487 if (words <= cum->nregs)
b96a374d
AJ
2488 {
2489 int regno = cum->regno;
2490
2491 /* Fastcall allocates the first two DWORD (SImode) or
2492 smaller arguments to ECX and EDX. */
2493 if (cum->fastcall)
2494 {
2495 if (mode == BLKmode || mode == DImode)
2496 break;
2497
2498 /* ECX not EAX is the first allocated register. */
2499 if (regno == 0)
e767b5be 2500 regno = 2;
b96a374d
AJ
2501 }
2502 ret = gen_rtx_REG (mode, regno);
2503 }
53c17031
JH
2504 break;
2505 case TImode:
2506 if (cum->sse_nregs)
2507 ret = gen_rtx_REG (mode, cum->sse_regno);
2508 break;
2509 }
b08de47e
MM
2510
2511 if (TARGET_DEBUG_ARG)
2512 {
2513 fprintf (stderr,
91ea38f9 2514 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2515 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2516
2517 if (ret)
91ea38f9 2518 print_simple_rtl (stderr, ret);
b08de47e
MM
2519 else
2520 fprintf (stderr, ", stack");
2521
2522 fprintf (stderr, " )\n");
2523 }
2524
2525 return ret;
2526}
53c17031 2527
09b2e78d
ZD
2528/* A C expression that indicates when an argument must be passed by
2529 reference. If nonzero for an argument, a copy of that argument is
2530 made in memory and a pointer to the argument is passed instead of
2531 the argument itself. The pointer is passed in whatever way is
2532 appropriate for passing a pointer to that type. */
2533
2534int
b96a374d
AJ
2535function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2536 enum machine_mode mode ATTRIBUTE_UNUSED,
2537 tree type, int named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2538{
2539 if (!TARGET_64BIT)
2540 return 0;
2541
2542 if (type && int_size_in_bytes (type) == -1)
2543 {
2544 if (TARGET_DEBUG_ARG)
2545 fprintf (stderr, "function_arg_pass_by_reference\n");
2546 return 1;
2547 }
2548
2549 return 0;
2550}
2551
8b978a57
JH
2552/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2553 ABI */
2554static bool
b96a374d 2555contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2556{
2557 enum machine_mode mode = TYPE_MODE (type);
2558 if (SSE_REG_MODE_P (mode)
2559 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2560 return true;
2561 if (TYPE_ALIGN (type) < 128)
2562 return false;
2563
2564 if (AGGREGATE_TYPE_P (type))
2565 {
2a43945f 2566 /* Walk the aggregates recursively. */
8b978a57
JH
2567 if (TREE_CODE (type) == RECORD_TYPE
2568 || TREE_CODE (type) == UNION_TYPE
2569 || TREE_CODE (type) == QUAL_UNION_TYPE)
2570 {
2571 tree field;
2572
2573 if (TYPE_BINFO (type) != NULL
2574 && TYPE_BINFO_BASETYPES (type) != NULL)
2575 {
2576 tree bases = TYPE_BINFO_BASETYPES (type);
2577 int n_bases = TREE_VEC_LENGTH (bases);
2578 int i;
2579
2580 for (i = 0; i < n_bases; ++i)
2581 {
2582 tree binfo = TREE_VEC_ELT (bases, i);
2583 tree type = BINFO_TYPE (binfo);
2584
2585 if (contains_128bit_aligned_vector_p (type))
2586 return true;
2587 }
2588 }
2589 /* And now merge the fields of structure. */
2590 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2591 {
2592 if (TREE_CODE (field) == FIELD_DECL
2593 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2594 return true;
2595 }
2596 }
2597 /* Just for use if some languages passes arrays by value. */
2598 else if (TREE_CODE (type) == ARRAY_TYPE)
2599 {
2600 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2601 return true;
2602 }
2603 else
2604 abort ();
2605 }
2606 return false;
2607}
2608
bb498ea3
AH
2609/* Gives the alignment boundary, in bits, of an argument with the
2610 specified mode and type. */
53c17031
JH
2611
2612int
b96a374d 2613ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2614{
2615 int align;
53c17031
JH
2616 if (type)
2617 align = TYPE_ALIGN (type);
2618 else
2619 align = GET_MODE_ALIGNMENT (mode);
2620 if (align < PARM_BOUNDARY)
2621 align = PARM_BOUNDARY;
8b978a57
JH
2622 if (!TARGET_64BIT)
2623 {
2624 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2625 make an exception for SSE modes since these require 128bit
b96a374d 2626 alignment.
8b978a57
JH
2627
2628 The handling here differs from field_alignment. ICC aligns MMX
2629 arguments to 4 byte boundaries, while structure fields are aligned
2630 to 8 byte boundaries. */
2631 if (!type)
2632 {
2633 if (!SSE_REG_MODE_P (mode))
2634 align = PARM_BOUNDARY;
2635 }
2636 else
2637 {
2638 if (!contains_128bit_aligned_vector_p (type))
2639 align = PARM_BOUNDARY;
2640 }
8b978a57 2641 }
53c17031
JH
2642 if (align > 128)
2643 align = 128;
2644 return align;
2645}
2646
2647/* Return true if N is a possible register number of function value. */
2648bool
b96a374d 2649ix86_function_value_regno_p (int regno)
53c17031
JH
2650{
2651 if (!TARGET_64BIT)
2652 {
2653 return ((regno) == 0
2654 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2655 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2656 }
2657 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2658 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2659 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2660}
2661
2662/* Define how to find the value returned by a function.
2663 VALTYPE is the data type of the value (as a tree).
2664 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2665 otherwise, FUNC is 0. */
2666rtx
b96a374d 2667ix86_function_value (tree valtype)
53c17031
JH
2668{
2669 if (TARGET_64BIT)
2670 {
2671 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2672 REGPARM_MAX, SSE_REGPARM_MAX,
2673 x86_64_int_return_registers, 0);
d1f87653
KH
2674 /* For zero sized structures, construct_container return NULL, but we need
2675 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2676 if (!ret)
2677 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2678 return ret;
2679 }
2680 else
b069de3b
SS
2681 return gen_rtx_REG (TYPE_MODE (valtype),
2682 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2683}
2684
f5143c46 2685/* Return false iff type is returned in memory. */
53c17031 2686int
b96a374d 2687ix86_return_in_memory (tree type)
53c17031 2688{
a30b6839
RH
2689 int needed_intregs, needed_sseregs, size;
2690 enum machine_mode mode = TYPE_MODE (type);
2691
53c17031 2692 if (TARGET_64BIT)
a30b6839
RH
2693 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2694
2695 if (mode == BLKmode)
2696 return 1;
2697
2698 size = int_size_in_bytes (type);
2699
2700 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2701 return 0;
2702
2703 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 2704 {
a30b6839
RH
2705 /* User-created vectors small enough to fit in EAX. */
2706 if (size < 8)
5e062767 2707 return 0;
a30b6839
RH
2708
2709 /* MMX/3dNow values are returned on the stack, since we've
2710 got to EMMS/FEMMS before returning. */
2711 if (size == 8)
53c17031 2712 return 1;
a30b6839
RH
2713
2714 /* SSE values are returned in XMM0. */
2715 /* ??? Except when it doesn't exist? We have a choice of
2716 either (1) being abi incompatible with a -march switch,
2717 or (2) generating an error here. Given no good solution,
2718 I think the safest thing is one warning. The user won't
2719 be able to use -Werror, but... */
2720 if (size == 16)
2721 {
2722 static bool warned;
2723
2724 if (TARGET_SSE)
2725 return 0;
2726
2727 if (!warned)
2728 {
2729 warned = true;
2730 warning ("SSE vector return without SSE enabled "
2731 "changes the ABI");
2732 }
2733 return 1;
2734 }
53c17031 2735 }
a30b6839
RH
2736
2737 if (mode == TFmode)
2738 return 0;
2739 if (size > 12)
2740 return 1;
2741 return 0;
53c17031
JH
2742}
2743
2744/* Define how to find the value returned by a library function
2745 assuming the value has mode MODE. */
2746rtx
b96a374d 2747ix86_libcall_value (enum machine_mode mode)
53c17031
JH
2748{
2749 if (TARGET_64BIT)
2750 {
2751 switch (mode)
2752 {
2753 case SFmode:
2754 case SCmode:
2755 case DFmode:
2756 case DCmode:
2757 return gen_rtx_REG (mode, FIRST_SSE_REG);
2758 case TFmode:
2759 case TCmode:
2760 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2761 default:
2762 return gen_rtx_REG (mode, 0);
2763 }
2764 }
2765 else
b069de3b
SS
2766 return gen_rtx_REG (mode, ix86_value_regno (mode));
2767}
2768
2769/* Given a mode, return the register to use for a return value. */
2770
2771static int
b96a374d 2772ix86_value_regno (enum machine_mode mode)
b069de3b 2773{
a30b6839 2774 /* Floating point return values in %st(0). */
b069de3b
SS
2775 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2776 return FIRST_FLOAT_REG;
a30b6839
RH
2777 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2778 we prevent this case when sse is not available. */
2779 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 2780 return FIRST_SSE_REG;
a30b6839 2781 /* Everything else in %eax. */
b069de3b 2782 return 0;
53c17031 2783}
ad919812
JH
2784\f
2785/* Create the va_list data type. */
53c17031 2786
ad919812 2787tree
b96a374d 2788ix86_build_va_list (void)
ad919812
JH
2789{
2790 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2791
ad919812
JH
2792 /* For i386 we use plain pointer to argument area. */
2793 if (!TARGET_64BIT)
2794 return build_pointer_type (char_type_node);
2795
f1e639b1 2796 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2797 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2798
fce5a9f2 2799 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2800 unsigned_type_node);
fce5a9f2 2801 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2802 unsigned_type_node);
2803 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2804 ptr_type_node);
2805 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2806 ptr_type_node);
2807
2808 DECL_FIELD_CONTEXT (f_gpr) = record;
2809 DECL_FIELD_CONTEXT (f_fpr) = record;
2810 DECL_FIELD_CONTEXT (f_ovf) = record;
2811 DECL_FIELD_CONTEXT (f_sav) = record;
2812
2813 TREE_CHAIN (record) = type_decl;
2814 TYPE_NAME (record) = type_decl;
2815 TYPE_FIELDS (record) = f_gpr;
2816 TREE_CHAIN (f_gpr) = f_fpr;
2817 TREE_CHAIN (f_fpr) = f_ovf;
2818 TREE_CHAIN (f_ovf) = f_sav;
2819
2820 layout_type (record);
2821
2822 /* The correct type is an array type of one element. */
2823 return build_array_type (record, build_index_type (size_zero_node));
2824}
2825
2826/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2827 variable number of arguments.
ad919812
JH
2828
2829 CUM is as above.
2830
2831 MODE and TYPE are the mode and type of the current parameter.
2832
2833 PRETEND_SIZE is a variable that should be set to the amount of stack
2834 that must be pushed by the prolog to pretend that our caller pushed
2835 it.
2836
2837 Normally, this macro will push all remaining incoming registers on the
2838 stack and set PRETEND_SIZE to the length of the registers pushed. */
2839
2840void
b96a374d
AJ
2841ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2842 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2843 int no_rtl)
ad919812
JH
2844{
2845 CUMULATIVE_ARGS next_cum;
2846 rtx save_area = NULL_RTX, mem;
2847 rtx label;
2848 rtx label_ref;
2849 rtx tmp_reg;
2850 rtx nsse_reg;
2851 int set;
2852 tree fntype;
2853 int stdarg_p;
2854 int i;
2855
2856 if (!TARGET_64BIT)
2857 return;
2858
2859 /* Indicate to allocate space on the stack for varargs save area. */
2860 ix86_save_varrargs_registers = 1;
2861
5474eed5
JH
2862 cfun->stack_alignment_needed = 128;
2863
ad919812
JH
2864 fntype = TREE_TYPE (current_function_decl);
2865 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2866 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2867 != void_type_node));
2868
2869 /* For varargs, we do not want to skip the dummy va_dcl argument.
2870 For stdargs, we do want to skip the last named argument. */
2871 next_cum = *cum;
2872 if (stdarg_p)
2873 function_arg_advance (&next_cum, mode, type, 1);
2874
2875 if (!no_rtl)
2876 save_area = frame_pointer_rtx;
2877
2878 set = get_varargs_alias_set ();
2879
2880 for (i = next_cum.regno; i < ix86_regparm; i++)
2881 {
2882 mem = gen_rtx_MEM (Pmode,
2883 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2884 set_mem_alias_set (mem, set);
ad919812
JH
2885 emit_move_insn (mem, gen_rtx_REG (Pmode,
2886 x86_64_int_parameter_registers[i]));
2887 }
2888
2889 if (next_cum.sse_nregs)
2890 {
2891 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 2892 of SSE parameter registers used to call this function. We use
ad919812
JH
2893 sse_prologue_save insn template that produces computed jump across
2894 SSE saves. We need some preparation work to get this working. */
2895
2896 label = gen_label_rtx ();
2897 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2898
2899 /* Compute address to jump to :
2900 label - 5*eax + nnamed_sse_arguments*5 */
2901 tmp_reg = gen_reg_rtx (Pmode);
2902 nsse_reg = gen_reg_rtx (Pmode);
2903 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2904 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2905 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2906 GEN_INT (4))));
2907 if (next_cum.sse_regno)
2908 emit_move_insn
2909 (nsse_reg,
2910 gen_rtx_CONST (DImode,
2911 gen_rtx_PLUS (DImode,
2912 label_ref,
2913 GEN_INT (next_cum.sse_regno * 4))));
2914 else
2915 emit_move_insn (nsse_reg, label_ref);
2916 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2917
2918 /* Compute address of memory block we save into. We always use pointer
2919 pointing 127 bytes after first byte to store - this is needed to keep
2920 instruction size limited by 4 bytes. */
2921 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2922 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2923 plus_constant (save_area,
2924 8 * REGPARM_MAX + 127)));
ad919812 2925 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2926 set_mem_alias_set (mem, set);
8ac61af7 2927 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2928
2929 /* And finally do the dirty job! */
8ac61af7
RK
2930 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2931 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2932 }
2933
2934}
2935
2936/* Implement va_start. */
2937
2938void
b96a374d 2939ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
2940{
2941 HOST_WIDE_INT words, n_gpr, n_fpr;
2942 tree f_gpr, f_fpr, f_ovf, f_sav;
2943 tree gpr, fpr, ovf, sav, t;
2944
2945 /* Only 64bit target needs something special. */
2946 if (!TARGET_64BIT)
2947 {
e5faf155 2948 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2949 return;
2950 }
2951
2952 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2953 f_fpr = TREE_CHAIN (f_gpr);
2954 f_ovf = TREE_CHAIN (f_fpr);
2955 f_sav = TREE_CHAIN (f_ovf);
2956
2957 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2958 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2959 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2960 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2961 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2962
2963 /* Count number of gp and fp argument registers used. */
2964 words = current_function_args_info.words;
2965 n_gpr = current_function_args_info.regno;
2966 n_fpr = current_function_args_info.sse_regno;
2967
2968 if (TARGET_DEBUG_ARG)
2969 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2970 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2971
2972 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2973 build_int_2 (n_gpr * 8, 0));
2974 TREE_SIDE_EFFECTS (t) = 1;
2975 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2976
2977 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2978 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2979 TREE_SIDE_EFFECTS (t) = 1;
2980 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2981
2982 /* Find the overflow area. */
2983 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2984 if (words != 0)
2985 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2986 build_int_2 (words * UNITS_PER_WORD, 0));
2987 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2988 TREE_SIDE_EFFECTS (t) = 1;
2989 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2990
2991 /* Find the register save area.
2992 Prologue of the function save it right above stack frame. */
2993 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2994 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2995 TREE_SIDE_EFFECTS (t) = 1;
2996 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2997}
2998
2999/* Implement va_arg. */
3000rtx
b96a374d 3001ix86_va_arg (tree valist, tree type)
ad919812 3002{
0139adca 3003 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
3004 tree f_gpr, f_fpr, f_ovf, f_sav;
3005 tree gpr, fpr, ovf, sav, t;
b932f770 3006 int size, rsize;
ad919812
JH
3007 rtx lab_false, lab_over = NULL_RTX;
3008 rtx addr_rtx, r;
3009 rtx container;
09b2e78d 3010 int indirect_p = 0;
ad919812
JH
3011
3012 /* Only 64bit target needs something special. */
3013 if (!TARGET_64BIT)
3014 {
3015 return std_expand_builtin_va_arg (valist, type);
3016 }
3017
3018 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3019 f_fpr = TREE_CHAIN (f_gpr);
3020 f_ovf = TREE_CHAIN (f_fpr);
3021 f_sav = TREE_CHAIN (f_ovf);
3022
3023 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3024 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3025 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3026 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3027 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3028
3029 size = int_size_in_bytes (type);
09b2e78d
ZD
3030 if (size == -1)
3031 {
3032 /* Passed by reference. */
3033 indirect_p = 1;
3034 type = build_pointer_type (type);
3035 size = int_size_in_bytes (type);
3036 }
ad919812
JH
3037 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3038
3039 container = construct_container (TYPE_MODE (type), type, 0,
3040 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3041 /*
3042 * Pull the value out of the saved registers ...
3043 */
3044
3045 addr_rtx = gen_reg_rtx (Pmode);
3046
3047 if (container)
3048 {
3049 rtx int_addr_rtx, sse_addr_rtx;
3050 int needed_intregs, needed_sseregs;
3051 int need_temp;
3052
3053 lab_over = gen_label_rtx ();
3054 lab_false = gen_label_rtx ();
8bad7136 3055
ad919812
JH
3056 examine_argument (TYPE_MODE (type), type, 0,
3057 &needed_intregs, &needed_sseregs);
3058
3059
3060 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3061 || TYPE_ALIGN (type) > 128);
3062
d1f87653 3063 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
3064 on the register save area. If not we need to do moves. */
3065 if (!need_temp && !REG_P (container))
3066 {
d1f87653 3067 /* Verify that all registers are strictly consecutive */
ad919812
JH
3068 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3069 {
3070 int i;
3071
3072 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3073 {
3074 rtx slot = XVECEXP (container, 0, i);
b531087a 3075 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
3076 || INTVAL (XEXP (slot, 1)) != i * 16)
3077 need_temp = 1;
3078 }
3079 }
3080 else
3081 {
3082 int i;
3083
3084 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3085 {
3086 rtx slot = XVECEXP (container, 0, i);
b531087a 3087 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
3088 || INTVAL (XEXP (slot, 1)) != i * 8)
3089 need_temp = 1;
3090 }
3091 }
3092 }
3093 if (!need_temp)
3094 {
3095 int_addr_rtx = addr_rtx;
3096 sse_addr_rtx = addr_rtx;
3097 }
3098 else
3099 {
3100 int_addr_rtx = gen_reg_rtx (Pmode);
3101 sse_addr_rtx = gen_reg_rtx (Pmode);
3102 }
3103 /* First ensure that we fit completely in registers. */
3104 if (needed_intregs)
3105 {
3106 emit_cmp_and_jump_insns (expand_expr
3107 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3108 GEN_INT ((REGPARM_MAX - needed_intregs +
3109 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 3110 1, lab_false);
ad919812
JH
3111 }
3112 if (needed_sseregs)
3113 {
3114 emit_cmp_and_jump_insns (expand_expr
3115 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3116 GEN_INT ((SSE_REGPARM_MAX -
3117 needed_sseregs + 1) * 16 +
3118 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 3119 SImode, 1, lab_false);
ad919812
JH
3120 }
3121
3122 /* Compute index to start of area used for integer regs. */
3123 if (needed_intregs)
3124 {
3125 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3126 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3127 if (r != int_addr_rtx)
3128 emit_move_insn (int_addr_rtx, r);
3129 }
3130 if (needed_sseregs)
3131 {
3132 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3133 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3134 if (r != sse_addr_rtx)
3135 emit_move_insn (sse_addr_rtx, r);
3136 }
3137 if (need_temp)
3138 {
3139 int i;
3140 rtx mem;
70642ee3 3141 rtx x;
ad919812 3142
b932f770 3143 /* Never use the memory itself, as it has the alias set. */
70642ee3
JH
3144 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3145 mem = gen_rtx_MEM (BLKmode, x);
3146 force_operand (x, addr_rtx);
0692acba 3147 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3148 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3149
ad919812
JH
3150 for (i = 0; i < XVECLEN (container, 0); i++)
3151 {
3152 rtx slot = XVECEXP (container, 0, i);
3153 rtx reg = XEXP (slot, 0);
3154 enum machine_mode mode = GET_MODE (reg);
3155 rtx src_addr;
3156 rtx src_mem;
3157 int src_offset;
3158 rtx dest_mem;
3159
3160 if (SSE_REGNO_P (REGNO (reg)))
3161 {
3162 src_addr = sse_addr_rtx;
3163 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3164 }
3165 else
3166 {
3167 src_addr = int_addr_rtx;
3168 src_offset = REGNO (reg) * 8;
3169 }
3170 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3171 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3172 src_mem = adjust_address (src_mem, mode, src_offset);
3173 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3174 emit_move_insn (dest_mem, src_mem);
3175 }
3176 }
3177
3178 if (needed_intregs)
3179 {
3180 t =
3181 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3182 build_int_2 (needed_intregs * 8, 0));
3183 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3184 TREE_SIDE_EFFECTS (t) = 1;
3185 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3186 }
3187 if (needed_sseregs)
3188 {
3189 t =
3190 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3191 build_int_2 (needed_sseregs * 16, 0));
3192 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3193 TREE_SIDE_EFFECTS (t) = 1;
3194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3195 }
3196
3197 emit_jump_insn (gen_jump (lab_over));
3198 emit_barrier ();
3199 emit_label (lab_false);
3200 }
3201
3202 /* ... otherwise out of the overflow area. */
3203
3204 /* Care for on-stack alignment if needed. */
3205 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3206 t = ovf;
3207 else
3208 {
3209 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3210 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3211 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3212 }
3213 t = save_expr (t);
3214
3215 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3216 if (r != addr_rtx)
3217 emit_move_insn (addr_rtx, r);
3218
3219 t =
3220 build (PLUS_EXPR, TREE_TYPE (t), t,
3221 build_int_2 (rsize * UNITS_PER_WORD, 0));
3222 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3223 TREE_SIDE_EFFECTS (t) = 1;
3224 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3225
3226 if (container)
3227 emit_label (lab_over);
3228
09b2e78d
ZD
3229 if (indirect_p)
3230 {
3231 r = gen_rtx_MEM (Pmode, addr_rtx);
3232 set_mem_alias_set (r, get_varargs_alias_set ());
3233 emit_move_insn (addr_rtx, r);
3234 }
3235
ad919812
JH
3236 return addr_rtx;
3237}
3238\f
c3c637e3
GS
3239/* Return nonzero if OP is either a i387 or SSE fp register. */
3240int
b96a374d 3241any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3242{
3243 return ANY_FP_REG_P (op);
3244}
3245
3246/* Return nonzero if OP is an i387 fp register. */
3247int
b96a374d 3248fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3249{
3250 return FP_REG_P (op);
3251}
3252
3253/* Return nonzero if OP is a non-fp register_operand. */
3254int
b96a374d 3255register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3256{
3257 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3258}
3259
40b982a9 3260/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3261 i387 fp register. */
3262int
b96a374d 3263register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3264{
3265 return register_operand (op, mode) && !FP_REG_P (op);
3266}
3267
7dd4b4a3
JH
3268/* Return nonzero if OP is general operand representable on x86_64. */
3269
3270int
b96a374d 3271x86_64_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3272{
3273 if (!TARGET_64BIT)
3274 return general_operand (op, mode);
3275 if (nonimmediate_operand (op, mode))
3276 return 1;
c05dbe81 3277 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3278}
3279
3280/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3281 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3282
3283int
b96a374d 3284x86_64_szext_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3285{
3286 if (!TARGET_64BIT)
3287 return general_operand (op, mode);
3288 if (nonimmediate_operand (op, mode))
3289 return 1;
c05dbe81 3290 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3291}
3292
3293/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3294
3295int
b96a374d 3296x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3297{
3298 if (!TARGET_64BIT)
3299 return nonmemory_operand (op, mode);
3300 if (register_operand (op, mode))
3301 return 1;
c05dbe81 3302 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3303}
3304
3305/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3306
3307int
b96a374d 3308x86_64_movabs_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3309{
3310 if (!TARGET_64BIT || !flag_pic)
3311 return nonmemory_operand (op, mode);
c05dbe81 3312 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3313 return 1;
3314 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3315 return 1;
3316 return 0;
3317}
3318
7e6dc358
JJ
3319/* Return nonzero if OPNUM's MEM should be matched
3320 in movabs* patterns. */
3321
3322int
3323ix86_check_movabs (rtx insn, int opnum)
3324{
3325 rtx set, mem;
3326
3327 set = PATTERN (insn);
3328 if (GET_CODE (set) == PARALLEL)
3329 set = XVECEXP (set, 0, 0);
3330 if (GET_CODE (set) != SET)
3331 abort ();
3332 mem = XEXP (set, opnum);
3333 while (GET_CODE (mem) == SUBREG)
3334 mem = SUBREG_REG (mem);
3335 if (GET_CODE (mem) != MEM)
3336 abort ();
3337 return (volatile_ok || !MEM_VOLATILE_P (mem));
3338}
3339
7dd4b4a3
JH
3340/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3341
3342int
b96a374d 3343x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3344{
3345 if (!TARGET_64BIT)
3346 return nonmemory_operand (op, mode);
3347 if (register_operand (op, mode))
3348 return 1;
c05dbe81 3349 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3350}
3351
3352/* Return nonzero if OP is immediate operand representable on x86_64. */
3353
3354int
b96a374d 3355x86_64_immediate_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3356{
3357 if (!TARGET_64BIT)
3358 return immediate_operand (op, mode);
c05dbe81 3359 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3360}
3361
3362/* Return nonzero if OP is immediate operand representable on x86_64. */
3363
3364int
b96a374d 3365x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7dd4b4a3
JH
3366{
3367 return x86_64_zero_extended_value (op);
3368}
3369
8bad7136
JL
3370/* Return nonzero if OP is (const_int 1), else return zero. */
3371
3372int
b96a374d 3373const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8bad7136 3374{
dac4a0de 3375 return op == const1_rtx;
8bad7136
JL
3376}
3377
794a292d
JJ
3378/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3379 for shift & compare patterns, as shifting by 0 does not change flags),
3380 else return zero. */
3381
3382int
b96a374d 3383const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
794a292d
JJ
3384{
3385 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3386}
3387
e075ae69
RH
3388/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3389 reference and a constant. */
b08de47e
MM
3390
3391int
b96a374d 3392symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3393{
e075ae69 3394 switch (GET_CODE (op))
2a2ab3f9 3395 {
e075ae69
RH
3396 case SYMBOL_REF:
3397 case LABEL_REF:
3398 return 1;
3399
3400 case CONST:
3401 op = XEXP (op, 0);
3402 if (GET_CODE (op) == SYMBOL_REF
3403 || GET_CODE (op) == LABEL_REF
3404 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3405 && (XINT (op, 1) == UNSPEC_GOT
3406 || XINT (op, 1) == UNSPEC_GOTOFF
3407 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3408 return 1;
3409 if (GET_CODE (op) != PLUS
3410 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3411 return 0;
3412
3413 op = XEXP (op, 0);
3414 if (GET_CODE (op) == SYMBOL_REF
3415 || GET_CODE (op) == LABEL_REF)
3416 return 1;
3417 /* Only @GOTOFF gets offsets. */
3418 if (GET_CODE (op) != UNSPEC
8ee41eaf 3419 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3420 return 0;
3421
3422 op = XVECEXP (op, 0, 0);
3423 if (GET_CODE (op) == SYMBOL_REF
3424 || GET_CODE (op) == LABEL_REF)
3425 return 1;
3426 return 0;
3427
3428 default:
3429 return 0;
2a2ab3f9
JVA
3430 }
3431}
2a2ab3f9 3432
e075ae69 3433/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3434
e075ae69 3435int
b96a374d 3436pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3437{
6eb791fc
JH
3438 if (GET_CODE (op) != CONST)
3439 return 0;
3440 op = XEXP (op, 0);
3441 if (TARGET_64BIT)
3442 {
a0c8285b
JH
3443 if (GET_CODE (op) == UNSPEC
3444 && XINT (op, 1) == UNSPEC_GOTPCREL)
3445 return 1;
3446 if (GET_CODE (op) == PLUS
fdacb904
JH
3447 && GET_CODE (XEXP (op, 0)) == UNSPEC
3448 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
6eb791fc
JH
3449 return 1;
3450 }
fce5a9f2 3451 else
2a2ab3f9 3452 {
e075ae69
RH
3453 if (GET_CODE (op) == UNSPEC)
3454 return 1;
3455 if (GET_CODE (op) != PLUS
3456 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3457 return 0;
3458 op = XEXP (op, 0);
3459 if (GET_CODE (op) == UNSPEC)
3460 return 1;
2a2ab3f9 3461 }
e075ae69 3462 return 0;
2a2ab3f9 3463}
2a2ab3f9 3464
623fe810
RH
3465/* Return true if OP is a symbolic operand that resolves locally. */
3466
3467static int
b96a374d 3468local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
623fe810 3469{
623fe810
RH
3470 if (GET_CODE (op) == CONST
3471 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3472 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3473 op = XEXP (XEXP (op, 0), 0);
3474
8bfb45f8
JJ
3475 if (GET_CODE (op) == LABEL_REF)
3476 return 1;
3477
623fe810
RH
3478 if (GET_CODE (op) != SYMBOL_REF)
3479 return 0;
3480
2ae5ae57 3481 if (SYMBOL_REF_LOCAL_P (op))
623fe810
RH
3482 return 1;
3483
3484 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3485 the compiler that assumes it can just stick the results of
623fe810
RH
3486 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3487 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3488 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3489 if (strncmp (XSTR (op, 0), internal_label_prefix,
3490 internal_label_prefix_len) == 0)
3491 return 1;
3492
3493 return 0;
3494}
3495
2ae5ae57 3496/* Test for various thread-local symbols. */
f996902d
RH
3497
3498int
b96a374d 3499tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d 3500{
f996902d
RH
3501 if (GET_CODE (op) != SYMBOL_REF)
3502 return 0;
2ae5ae57 3503 return SYMBOL_REF_TLS_MODEL (op);
f996902d
RH
3504}
3505
2ae5ae57 3506static inline int
b96a374d 3507tls_symbolic_operand_1 (rtx op, enum tls_model kind)
f996902d 3508{
f996902d
RH
3509 if (GET_CODE (op) != SYMBOL_REF)
3510 return 0;
2ae5ae57 3511 return SYMBOL_REF_TLS_MODEL (op) == kind;
f996902d
RH
3512}
3513
3514int
b96a374d
AJ
3515global_dynamic_symbolic_operand (register rtx op,
3516 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3517{
3518 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3519}
3520
3521int
b96a374d
AJ
3522local_dynamic_symbolic_operand (register rtx op,
3523 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3524{
3525 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3526}
3527
3528int
b96a374d
AJ
3529initial_exec_symbolic_operand (register rtx op,
3530 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3531{
3532 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3533}
3534
3535int
b96a374d
AJ
3536local_exec_symbolic_operand (register rtx op,
3537 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3538{
3539 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3540}
3541
28d52ffb
RH
3542/* Test for a valid operand for a call instruction. Don't allow the
3543 arg pointer register or virtual regs since they may decay into
3544 reg + const, which the patterns can't handle. */
2a2ab3f9 3545
e075ae69 3546int
b96a374d 3547call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3548{
e075ae69
RH
3549 /* Disallow indirect through a virtual register. This leads to
3550 compiler aborts when trying to eliminate them. */
3551 if (GET_CODE (op) == REG
3552 && (op == arg_pointer_rtx
564d80f4 3553 || op == frame_pointer_rtx
e075ae69
RH
3554 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3555 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3556 return 0;
2a2ab3f9 3557
28d52ffb
RH
3558 /* Disallow `call 1234'. Due to varying assembler lameness this
3559 gets either rejected or translated to `call .+1234'. */
3560 if (GET_CODE (op) == CONST_INT)
3561 return 0;
3562
cbbf65e0
RH
3563 /* Explicitly allow SYMBOL_REF even if pic. */
3564 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3565 return 1;
2a2ab3f9 3566
cbbf65e0
RH
3567 /* Otherwise we can allow any general_operand in the address. */
3568 return general_operand (op, Pmode);
e075ae69 3569}
79325812 3570
4977bab6
ZW
3571/* Test for a valid operand for a call instruction. Don't allow the
3572 arg pointer register or virtual regs since they may decay into
3573 reg + const, which the patterns can't handle. */
3574
3575int
b96a374d 3576sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3577{
3578 /* Disallow indirect through a virtual register. This leads to
3579 compiler aborts when trying to eliminate them. */
3580 if (GET_CODE (op) == REG
3581 && (op == arg_pointer_rtx
3582 || op == frame_pointer_rtx
3583 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3584 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3585 return 0;
3586
3587 /* Explicitly allow SYMBOL_REF even if pic. */
3588 if (GET_CODE (op) == SYMBOL_REF)
3589 return 1;
3590
3591 /* Otherwise we can only allow register operands. */
3592 return register_operand (op, Pmode);
3593}
3594
e075ae69 3595int
b96a374d 3596constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3597{
eaf19aba
JJ
3598 if (GET_CODE (op) == CONST
3599 && GET_CODE (XEXP (op, 0)) == PLUS
3600 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3601 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3602 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3603}
2a2ab3f9 3604
e075ae69 3605/* Match exactly zero and one. */
e9a25f70 3606
0f290768 3607int
b96a374d 3608const0_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3609{
3610 return op == CONST0_RTX (mode);
3611}
e9a25f70 3612
0f290768 3613int
b96a374d 3614const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3615{
3616 return op == const1_rtx;
3617}
2a2ab3f9 3618
e075ae69 3619/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3620
e075ae69 3621int
b96a374d 3622const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3623{
3624 return (GET_CODE (op) == CONST_INT
3625 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3626}
e9a25f70 3627
ebe75517
JH
3628int
3629const_0_to_3_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3630{
3631 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3632}
3633
3634int
3635const_0_to_7_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3636{
3637 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3638}
3639
3640int
3641const_0_to_15_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3642{
3643 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3644}
3645
3646int
3647const_0_to_255_operand (register rtx op,
3648 enum machine_mode mode ATTRIBUTE_UNUSED)
3649{
3650 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3651}
3652
3653
d1f87653 3654/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3655
e075ae69 3656int
b96a374d 3657incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3658{
f5143c46 3659 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3660 registers, since carry flag is not set. */
3661 if (TARGET_PENTIUM4 && !optimize_size)
3662 return 0;
2b1c08f5 3663 return op == const1_rtx || op == constm1_rtx;
e075ae69 3664}
2a2ab3f9 3665
371bc54b
JH
3666/* Return nonzero if OP is acceptable as operand of DImode shift
3667 expander. */
3668
3669int
b96a374d 3670shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
371bc54b
JH
3671{
3672 if (TARGET_64BIT)
3673 return nonimmediate_operand (op, mode);
3674 else
3675 return register_operand (op, mode);
3676}
3677
0f290768 3678/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3679 register eliminable to the stack pointer. Otherwise, this is
3680 a register operand.
2a2ab3f9 3681
e075ae69
RH
3682 This is used to prevent esp from being used as an index reg.
3683 Which would only happen in pathological cases. */
5f1ec3e6 3684
e075ae69 3685int
b96a374d 3686reg_no_sp_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3687{
3688 rtx t = op;
3689 if (GET_CODE (t) == SUBREG)
3690 t = SUBREG_REG (t);
564d80f4 3691 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3692 return 0;
2a2ab3f9 3693
e075ae69 3694 return register_operand (op, mode);
2a2ab3f9 3695}
b840bfb0 3696
915119a5 3697int
b96a374d 3698mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3699{
3700 return MMX_REG_P (op);
3701}
3702
2c5a510c
RH
3703/* Return false if this is any eliminable register. Otherwise
3704 general_operand. */
3705
3706int
b96a374d 3707general_no_elim_operand (register rtx op, enum machine_mode mode)
2c5a510c
RH
3708{
3709 rtx t = op;
3710 if (GET_CODE (t) == SUBREG)
3711 t = SUBREG_REG (t);
3712 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3713 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3714 || t == virtual_stack_dynamic_rtx)
3715 return 0;
1020a5ab
RH
3716 if (REG_P (t)
3717 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3718 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3719 return 0;
2c5a510c
RH
3720
3721 return general_operand (op, mode);
3722}
3723
3724/* Return false if this is any eliminable register. Otherwise
3725 register_operand or const_int. */
3726
3727int
b96a374d 3728nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
2c5a510c
RH
3729{
3730 rtx t = op;
3731 if (GET_CODE (t) == SUBREG)
3732 t = SUBREG_REG (t);
3733 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3734 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3735 || t == virtual_stack_dynamic_rtx)
3736 return 0;
3737
3738 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3739}
3740
7ec70495
JH
3741/* Return false if this is any eliminable register or stack register,
3742 otherwise work like register_operand. */
3743
3744int
b96a374d 3745index_register_operand (register rtx op, enum machine_mode mode)
7ec70495
JH
3746{
3747 rtx t = op;
3748 if (GET_CODE (t) == SUBREG)
3749 t = SUBREG_REG (t);
3750 if (!REG_P (t))
3751 return 0;
3752 if (t == arg_pointer_rtx
3753 || t == frame_pointer_rtx
3754 || t == virtual_incoming_args_rtx
3755 || t == virtual_stack_vars_rtx
3756 || t == virtual_stack_dynamic_rtx
3757 || REGNO (t) == STACK_POINTER_REGNUM)
3758 return 0;
3759
3760 return general_operand (op, mode);
3761}
3762
e075ae69 3763/* Return true if op is a Q_REGS class register. */
b840bfb0 3764
e075ae69 3765int
b96a374d 3766q_regs_operand (register rtx op, enum machine_mode mode)
b840bfb0 3767{
e075ae69
RH
3768 if (mode != VOIDmode && GET_MODE (op) != mode)
3769 return 0;
3770 if (GET_CODE (op) == SUBREG)
3771 op = SUBREG_REG (op);
7799175f 3772 return ANY_QI_REG_P (op);
0f290768 3773}
b840bfb0 3774
4977bab6
ZW
3775/* Return true if op is an flags register. */
3776
3777int
b96a374d 3778flags_reg_operand (register rtx op, enum machine_mode mode)
4977bab6
ZW
3779{
3780 if (mode != VOIDmode && GET_MODE (op) != mode)
3781 return 0;
3782 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3783}
3784
e075ae69 3785/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3786
e075ae69 3787int
b96a374d 3788non_q_regs_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3789{
3790 if (mode != VOIDmode && GET_MODE (op) != mode)
3791 return 0;
3792 if (GET_CODE (op) == SUBREG)
3793 op = SUBREG_REG (op);
3794 return NON_QI_REG_P (op);
0f290768 3795}
b840bfb0 3796
4977bab6 3797int
b96a374d
AJ
3798zero_extended_scalar_load_operand (rtx op,
3799 enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3800{
3801 unsigned n_elts;
3802 if (GET_CODE (op) != MEM)
3803 return 0;
3804 op = maybe_get_pool_constant (op);
3805 if (!op)
3806 return 0;
3807 if (GET_CODE (op) != CONST_VECTOR)
3808 return 0;
3809 n_elts =
3810 (GET_MODE_SIZE (GET_MODE (op)) /
3811 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3812 for (n_elts--; n_elts > 0; n_elts--)
3813 {
3814 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3815 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3816 return 0;
3817 }
3818 return 1;
3819}
3820
fdc4b40b
JH
3821/* Return 1 when OP is operand acceptable for standard SSE move. */
3822int
b96a374d 3823vector_move_operand (rtx op, enum machine_mode mode)
fdc4b40b
JH
3824{
3825 if (nonimmediate_operand (op, mode))
3826 return 1;
3827 if (GET_MODE (op) != mode && mode != VOIDmode)
3828 return 0;
3829 return (op == CONST0_RTX (GET_MODE (op)));
3830}
3831
74dc3e94
RH
3832/* Return true if op if a valid address, and does not contain
3833 a segment override. */
3834
3835int
b96a374d 3836no_seg_address_operand (register rtx op, enum machine_mode mode)
74dc3e94
RH
3837{
3838 struct ix86_address parts;
3839
3840 if (! address_operand (op, mode))
3841 return 0;
3842
3843 if (! ix86_decompose_address (op, &parts))
3844 abort ();
3845
3846 return parts.seg == SEG_DEFAULT;
3847}
3848
915119a5
BS
3849/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3850 insns. */
3851int
b96a374d 3852sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3853{
3854 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3855 switch (code)
3856 {
3857 /* Operations supported directly. */
3858 case EQ:
3859 case LT:
3860 case LE:
3861 case UNORDERED:
3862 case NE:
3863 case UNGE:
3864 case UNGT:
3865 case ORDERED:
3866 return 1;
3867 /* These are equivalent to ones above in non-IEEE comparisons. */
3868 case UNEQ:
3869 case UNLT:
3870 case UNLE:
3871 case LTGT:
3872 case GE:
3873 case GT:
3874 return !TARGET_IEEE_FP;
3875 default:
3876 return 0;
3877 }
915119a5 3878}
9076b9c1 3879/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3880int
b96a374d 3881ix86_comparison_operator (register rtx op, enum machine_mode mode)
e075ae69 3882{
9076b9c1 3883 enum machine_mode inmode;
9a915772 3884 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3885 if (mode != VOIDmode && GET_MODE (op) != mode)
3886 return 0;
9a915772
JH
3887 if (GET_RTX_CLASS (code) != '<')
3888 return 0;
3889 inmode = GET_MODE (XEXP (op, 0));
3890
3891 if (inmode == CCFPmode || inmode == CCFPUmode)
3892 {
3893 enum rtx_code second_code, bypass_code;
3894 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3895 return (bypass_code == NIL && second_code == NIL);
3896 }
3897 switch (code)
3a3677ff
RH
3898 {
3899 case EQ: case NE:
3a3677ff 3900 return 1;
9076b9c1 3901 case LT: case GE:
7e08e190 3902 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3903 || inmode == CCGOCmode || inmode == CCNOmode)
3904 return 1;
3905 return 0;
7e08e190 3906 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3907 if (inmode == CCmode)
9076b9c1
JH
3908 return 1;
3909 return 0;
3910 case GT: case LE:
7e08e190 3911 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3912 return 1;
3913 return 0;
3a3677ff
RH
3914 default:
3915 return 0;
3916 }
3917}
3918
e6e81735
JH
3919/* Return 1 if OP is a valid comparison operator testing carry flag
3920 to be set. */
3921int
b96a374d 3922ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
e6e81735
JH
3923{
3924 enum machine_mode inmode;
3925 enum rtx_code code = GET_CODE (op);
3926
3927 if (mode != VOIDmode && GET_MODE (op) != mode)
3928 return 0;
3929 if (GET_RTX_CLASS (code) != '<')
3930 return 0;
3931 inmode = GET_MODE (XEXP (op, 0));
3932 if (GET_CODE (XEXP (op, 0)) != REG
3933 || REGNO (XEXP (op, 0)) != 17
3934 || XEXP (op, 1) != const0_rtx)
3935 return 0;
3936
3937 if (inmode == CCFPmode || inmode == CCFPUmode)
3938 {
3939 enum rtx_code second_code, bypass_code;
3940
3941 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3942 if (bypass_code != NIL || second_code != NIL)
3943 return 0;
3944 code = ix86_fp_compare_code_to_integer (code);
3945 }
3946 else if (inmode != CCmode)
3947 return 0;
3948 return code == LTU;
3949}
3950
9076b9c1 3951/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3952
9076b9c1 3953int
b96a374d 3954fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3a3677ff 3955{
b62d22a2 3956 enum machine_mode inmode;
9a915772 3957 enum rtx_code code = GET_CODE (op);
e6e81735 3958
3a3677ff
RH
3959 if (mode != VOIDmode && GET_MODE (op) != mode)
3960 return 0;
9a915772
JH
3961 if (GET_RTX_CLASS (code) != '<')
3962 return 0;
3963 inmode = GET_MODE (XEXP (op, 0));
3964 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3965 {
9a915772 3966 enum rtx_code second_code, bypass_code;
e6e81735 3967
9a915772
JH
3968 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3969 if (bypass_code != NIL || second_code != NIL)
3970 return 0;
3971 code = ix86_fp_compare_code_to_integer (code);
3972 }
3973 /* i387 supports just limited amount of conditional codes. */
3974 switch (code)
3975 {
3976 case LTU: case GTU: case LEU: case GEU:
3977 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3978 return 1;
3979 return 0;
9a915772
JH
3980 case ORDERED: case UNORDERED:
3981 case EQ: case NE:
3982 return 1;
3a3677ff
RH
3983 default:
3984 return 0;
3985 }
e075ae69 3986}
b840bfb0 3987
e9e80858
JH
3988/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3989
3990int
b96a374d
AJ
3991promotable_binary_operator (register rtx op,
3992 enum machine_mode mode ATTRIBUTE_UNUSED)
e9e80858
JH
3993{
3994 switch (GET_CODE (op))
3995 {
3996 case MULT:
3997 /* Modern CPUs have same latency for HImode and SImode multiply,
3998 but 386 and 486 do HImode multiply faster. */
9e555526 3999 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
4000 case PLUS:
4001 case AND:
4002 case IOR:
4003 case XOR:
4004 case ASHIFT:
4005 return 1;
4006 default:
4007 return 0;
4008 }
4009}
4010
e075ae69
RH
4011/* Nearly general operand, but accept any const_double, since we wish
4012 to be able to drop them into memory rather than have them get pulled
4013 into registers. */
b840bfb0 4014
2a2ab3f9 4015int
b96a374d 4016cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
2a2ab3f9 4017{
e075ae69 4018 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 4019 return 0;
e075ae69 4020 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 4021 return 1;
e075ae69 4022 return general_operand (op, mode);
2a2ab3f9
JVA
4023}
4024
e075ae69 4025/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
4026
4027int
b96a374d 4028ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 4029{
3522082b 4030 int regno;
0d7d98ee
JH
4031 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4032 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 4033 return 0;
3522082b
JH
4034
4035 if (!register_operand (op, VOIDmode))
4036 return 0;
4037
d1f87653 4038 /* Be careful to accept only registers having upper parts. */
3522082b
JH
4039 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4040 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
4041}
4042
4043/* Return 1 if this is a valid binary floating-point operation.
0f290768 4044 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
4045
4046int
b96a374d 4047binary_fp_operator (register rtx op, enum machine_mode mode)
e075ae69
RH
4048{
4049 if (mode != VOIDmode && mode != GET_MODE (op))
4050 return 0;
4051
2a2ab3f9
JVA
4052 switch (GET_CODE (op))
4053 {
e075ae69
RH
4054 case PLUS:
4055 case MINUS:
4056 case MULT:
4057 case DIV:
4058 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 4059
2a2ab3f9
JVA
4060 default:
4061 return 0;
4062 }
4063}
fee2770d 4064
e075ae69 4065int
b96a374d 4066mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4067{
4068 return GET_CODE (op) == MULT;
4069}
4070
4071int
b96a374d 4072div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4073{
4074 return GET_CODE (op) == DIV;
4075}
0a726ef1
JL
4076
4077int
b96a374d 4078arith_or_logical_operator (rtx op, enum machine_mode mode)
0a726ef1 4079{
e075ae69
RH
4080 return ((mode == VOIDmode || GET_MODE (op) == mode)
4081 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4082 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
4083}
4084
e075ae69 4085/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4086
4087int
b96a374d 4088memory_displacement_operand (register rtx op, enum machine_mode mode)
4f2c8ebb 4089{
e075ae69 4090 struct ix86_address parts;
e9a25f70 4091
e075ae69
RH
4092 if (! memory_operand (op, mode))
4093 return 0;
4094
4095 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4096 abort ();
4097
4098 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4099}
4100
16189740 4101/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4102 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4103
4104 ??? It seems likely that this will only work because cmpsi is an
4105 expander, and no actual insns use this. */
4f2c8ebb
RS
4106
4107int
b96a374d 4108cmpsi_operand (rtx op, enum machine_mode mode)
fee2770d 4109{
b9b2c339 4110 if (nonimmediate_operand (op, mode))
e075ae69
RH
4111 return 1;
4112
4113 if (GET_CODE (op) == AND
4114 && GET_MODE (op) == SImode
4115 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4116 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4117 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4118 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4119 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4120 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4121 return 1;
e9a25f70 4122
fee2770d
RS
4123 return 0;
4124}
d784886d 4125
e075ae69
RH
4126/* Returns 1 if OP is memory operand that can not be represented by the
4127 modRM array. */
d784886d
RK
4128
4129int
b96a374d 4130long_memory_operand (register rtx op, enum machine_mode mode)
d784886d 4131{
e075ae69 4132 if (! memory_operand (op, mode))
d784886d
RK
4133 return 0;
4134
e075ae69 4135 return memory_address_length (op) != 0;
d784886d 4136}
2247f6ed
JH
4137
4138/* Return nonzero if the rtx is known aligned. */
4139
4140int
b96a374d 4141aligned_operand (rtx op, enum machine_mode mode)
2247f6ed
JH
4142{
4143 struct ix86_address parts;
4144
4145 if (!general_operand (op, mode))
4146 return 0;
4147
0f290768 4148 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4149 if (GET_CODE (op) != MEM)
4150 return 1;
4151
0f290768 4152 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4153 if (MEM_VOLATILE_P (op))
4154 return 0;
4155
4156 op = XEXP (op, 0);
4157
4158 /* Pushes and pops are only valid on the stack pointer. */
4159 if (GET_CODE (op) == PRE_DEC
4160 || GET_CODE (op) == POST_INC)
4161 return 1;
4162
4163 /* Decode the address. */
4164 if (! ix86_decompose_address (op, &parts))
4165 abort ();
4166
1540f9eb
JH
4167 if (parts.base && GET_CODE (parts.base) == SUBREG)
4168 parts.base = SUBREG_REG (parts.base);
4169 if (parts.index && GET_CODE (parts.index) == SUBREG)
4170 parts.index = SUBREG_REG (parts.index);
4171
2247f6ed
JH
4172 /* Look for some component that isn't known to be aligned. */
4173 if (parts.index)
4174 {
4175 if (parts.scale < 4
bdb429a5 4176 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4177 return 0;
4178 }
4179 if (parts.base)
4180 {
bdb429a5 4181 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4182 return 0;
4183 }
4184 if (parts.disp)
4185 {
4186 if (GET_CODE (parts.disp) != CONST_INT
4187 || (INTVAL (parts.disp) & 3) != 0)
4188 return 0;
4189 }
4190
4191 /* Didn't find one -- this must be an aligned address. */
4192 return 1;
4193}
e075ae69 4194\f
881b2a96
RS
4195/* Initialize the table of extra 80387 mathematical constants. */
4196
4197static void
b96a374d 4198init_ext_80387_constants (void)
881b2a96
RS
4199{
4200 static const char * cst[5] =
4201 {
4202 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4203 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4204 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4205 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4206 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4207 };
4208 int i;
4209
4210 for (i = 0; i < 5; i++)
4211 {
4212 real_from_string (&ext_80387_constants_table[i], cst[i]);
4213 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d
JH
4214 real_convert (&ext_80387_constants_table[i],
4215 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
881b2a96
RS
4216 &ext_80387_constants_table[i]);
4217 }
4218
4219 ext_80387_constants_init = 1;
4220}
4221
e075ae69 4222/* Return true if the constant is something that can be loaded with
881b2a96 4223 a special instruction. */
57dbca5e
BS
4224
4225int
b96a374d 4226standard_80387_constant_p (rtx x)
57dbca5e 4227{
2b04e52b 4228 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4229 return -1;
881b2a96 4230
2b04e52b
JH
4231 if (x == CONST0_RTX (GET_MODE (x)))
4232 return 1;
4233 if (x == CONST1_RTX (GET_MODE (x)))
4234 return 2;
881b2a96
RS
4235
4236 /* For XFmode constants, try to find a special 80387 instruction on
4237 those CPUs that benefit from them. */
1f48e56d 4238 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
9e555526 4239 && x86_ext_80387_constants & TUNEMASK)
881b2a96
RS
4240 {
4241 REAL_VALUE_TYPE r;
4242 int i;
4243
4244 if (! ext_80387_constants_init)
4245 init_ext_80387_constants ();
4246
4247 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4248 for (i = 0; i < 5; i++)
4249 if (real_identical (&r, &ext_80387_constants_table[i]))
4250 return i + 3;
4251 }
4252
e075ae69 4253 return 0;
57dbca5e
BS
4254}
4255
881b2a96
RS
4256/* Return the opcode of the special instruction to be used to load
4257 the constant X. */
4258
4259const char *
b96a374d 4260standard_80387_constant_opcode (rtx x)
881b2a96
RS
4261{
4262 switch (standard_80387_constant_p (x))
4263 {
b96a374d 4264 case 1:
881b2a96
RS
4265 return "fldz";
4266 case 2:
4267 return "fld1";
b96a374d 4268 case 3:
881b2a96
RS
4269 return "fldlg2";
4270 case 4:
4271 return "fldln2";
b96a374d 4272 case 5:
881b2a96
RS
4273 return "fldl2e";
4274 case 6:
4275 return "fldl2t";
b96a374d 4276 case 7:
881b2a96
RS
4277 return "fldpi";
4278 }
4279 abort ();
4280}
4281
4282/* Return the CONST_DOUBLE representing the 80387 constant that is
4283 loaded by the specified special instruction. The argument IDX
4284 matches the return value from standard_80387_constant_p. */
4285
4286rtx
b96a374d 4287standard_80387_constant_rtx (int idx)
881b2a96
RS
4288{
4289 int i;
4290
4291 if (! ext_80387_constants_init)
4292 init_ext_80387_constants ();
4293
4294 switch (idx)
4295 {
4296 case 3:
4297 case 4:
4298 case 5:
4299 case 6:
4300 case 7:
4301 i = idx - 3;
4302 break;
4303
4304 default:
4305 abort ();
4306 }
4307
1f48e56d
JH
4308 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4309 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
881b2a96
RS
4310}
4311
2b04e52b
JH
4312/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4313 */
4314int
b96a374d 4315standard_sse_constant_p (rtx x)
2b04e52b 4316{
0e67d460
JH
4317 if (x == const0_rtx)
4318 return 1;
2b04e52b
JH
4319 return (x == CONST0_RTX (GET_MODE (x)));
4320}
4321
2a2ab3f9
JVA
4322/* Returns 1 if OP contains a symbol reference */
4323
4324int
b96a374d 4325symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4326{
6f7d635c 4327 register const char *fmt;
2a2ab3f9
JVA
4328 register int i;
4329
4330 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4331 return 1;
4332
4333 fmt = GET_RTX_FORMAT (GET_CODE (op));
4334 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4335 {
4336 if (fmt[i] == 'E')
4337 {
4338 register int j;
4339
4340 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4341 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4342 return 1;
4343 }
e9a25f70 4344
2a2ab3f9
JVA
4345 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4346 return 1;
4347 }
4348
4349 return 0;
4350}
e075ae69
RH
4351
4352/* Return 1 if it is appropriate to emit `ret' instructions in the
4353 body of a function. Do this only if the epilogue is simple, needing a
4354 couple of insns. Prior to reloading, we can't tell how many registers
4355 must be saved, so return 0 then. Return 0 if there is no frame
4356 marker to de-allocate.
4357
4358 If NON_SAVING_SETJMP is defined and true, then it is not possible
4359 for the epilogue to be simple, so return 0. This is a special case
4360 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4361 until final, but jump_optimize may need to know sooner if a
4362 `return' is OK. */
32b5b1aa
SC
4363
4364int
b96a374d 4365ix86_can_use_return_insn_p (void)
32b5b1aa 4366{
4dd2ac2c 4367 struct ix86_frame frame;
9a7372d6 4368
e075ae69
RH
4369#ifdef NON_SAVING_SETJMP
4370 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4371 return 0;
4372#endif
9a7372d6
RH
4373
4374 if (! reload_completed || frame_pointer_needed)
4375 return 0;
32b5b1aa 4376
9a7372d6
RH
4377 /* Don't allow more than 32 pop, since that's all we can do
4378 with one instruction. */
4379 if (current_function_pops_args
4380 && current_function_args_size >= 32768)
e075ae69 4381 return 0;
32b5b1aa 4382
4dd2ac2c
JH
4383 ix86_compute_frame_layout (&frame);
4384 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4385}
6189a572
JH
4386\f
4387/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4388int
b96a374d 4389x86_64_sign_extended_value (rtx value)
6189a572
JH
4390{
4391 switch (GET_CODE (value))
4392 {
4393 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4394 to be at least 32 and this all acceptable constants are
4395 represented as CONST_INT. */
4396 case CONST_INT:
4397 if (HOST_BITS_PER_WIDE_INT == 32)
4398 return 1;
4399 else
4400 {
4401 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4402 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4403 }
4404 break;
4405
75d38379
JJ
4406 /* For certain code models, the symbolic references are known to fit.
4407 in CM_SMALL_PIC model we know it fits if it is local to the shared
4408 library. Don't count TLS SYMBOL_REFs here, since they should fit
4409 only if inside of UNSPEC handled below. */
6189a572 4410 case SYMBOL_REF:
c05dbe81 4411 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4412
4413 /* For certain code models, the code is near as well. */
4414 case LABEL_REF:
c05dbe81
JH
4415 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4416 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4417
4418 /* We also may accept the offsetted memory references in certain special
4419 cases. */
4420 case CONST:
75d38379
JJ
4421 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4422 switch (XINT (XEXP (value, 0), 1))
4423 {
4424 case UNSPEC_GOTPCREL:
4425 case UNSPEC_DTPOFF:
4426 case UNSPEC_GOTNTPOFF:
4427 case UNSPEC_NTPOFF:
4428 return 1;
4429 default:
4430 break;
4431 }
4432 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4433 {
4434 rtx op1 = XEXP (XEXP (value, 0), 0);
4435 rtx op2 = XEXP (XEXP (value, 0), 1);
4436 HOST_WIDE_INT offset;
4437
4438 if (ix86_cmodel == CM_LARGE)
4439 return 0;
4440 if (GET_CODE (op2) != CONST_INT)
4441 return 0;
4442 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4443 switch (GET_CODE (op1))
4444 {
4445 case SYMBOL_REF:
75d38379 4446 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4447 end of 31bits boundary. We may also accept pretty
4448 large negative constants knowing that all objects are
4449 in the positive half of address space. */
4450 if (ix86_cmodel == CM_SMALL
75d38379 4451 && offset < 16*1024*1024
6189a572
JH
4452 && trunc_int_for_mode (offset, SImode) == offset)
4453 return 1;
4454 /* For CM_KERNEL we know that all object resist in the
4455 negative half of 32bits address space. We may not
4456 accept negative offsets, since they may be just off
d6a7951f 4457 and we may accept pretty large positive ones. */
6189a572
JH
4458 if (ix86_cmodel == CM_KERNEL
4459 && offset > 0
4460 && trunc_int_for_mode (offset, SImode) == offset)
4461 return 1;
4462 break;
4463 case LABEL_REF:
4464 /* These conditions are similar to SYMBOL_REF ones, just the
4465 constraints for code models differ. */
c05dbe81 4466 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4467 && offset < 16*1024*1024
6189a572
JH
4468 && trunc_int_for_mode (offset, SImode) == offset)
4469 return 1;
4470 if (ix86_cmodel == CM_KERNEL
4471 && offset > 0
4472 && trunc_int_for_mode (offset, SImode) == offset)
4473 return 1;
4474 break;
75d38379
JJ
4475 case UNSPEC:
4476 switch (XINT (op1, 1))
4477 {
4478 case UNSPEC_DTPOFF:
4479 case UNSPEC_NTPOFF:
4480 if (offset > 0
4481 && trunc_int_for_mode (offset, SImode) == offset)
4482 return 1;
4483 }
4484 break;
6189a572
JH
4485 default:
4486 return 0;
4487 }
4488 }
4489 return 0;
4490 default:
4491 return 0;
4492 }
4493}
4494
4495/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4496int
b96a374d 4497x86_64_zero_extended_value (rtx value)
6189a572
JH
4498{
4499 switch (GET_CODE (value))
4500 {
4501 case CONST_DOUBLE:
4502 if (HOST_BITS_PER_WIDE_INT == 32)
4503 return (GET_MODE (value) == VOIDmode
4504 && !CONST_DOUBLE_HIGH (value));
4505 else
4506 return 0;
4507 case CONST_INT:
4508 if (HOST_BITS_PER_WIDE_INT == 32)
4509 return INTVAL (value) >= 0;
4510 else
b531087a 4511 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4512 break;
4513
4514 /* For certain code models, the symbolic references are known to fit. */
4515 case SYMBOL_REF:
4516 return ix86_cmodel == CM_SMALL;
4517
4518 /* For certain code models, the code is near as well. */
4519 case LABEL_REF:
4520 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4521
4522 /* We also may accept the offsetted memory references in certain special
4523 cases. */
4524 case CONST:
4525 if (GET_CODE (XEXP (value, 0)) == PLUS)
4526 {
4527 rtx op1 = XEXP (XEXP (value, 0), 0);
4528 rtx op2 = XEXP (XEXP (value, 0), 1);
4529
4530 if (ix86_cmodel == CM_LARGE)
4531 return 0;
4532 switch (GET_CODE (op1))
4533 {
4534 case SYMBOL_REF:
4535 return 0;
d6a7951f 4536 /* For small code model we may accept pretty large positive
6189a572
JH
4537 offsets, since one bit is available for free. Negative
4538 offsets are limited by the size of NULL pointer area
4539 specified by the ABI. */
4540 if (ix86_cmodel == CM_SMALL
4541 && GET_CODE (op2) == CONST_INT
4542 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4543 && (trunc_int_for_mode (INTVAL (op2), SImode)
4544 == INTVAL (op2)))
4545 return 1;
4546 /* ??? For the kernel, we may accept adjustment of
4547 -0x10000000, since we know that it will just convert
d6a7951f 4548 negative address space to positive, but perhaps this
6189a572
JH
4549 is not worthwhile. */
4550 break;
4551 case LABEL_REF:
4552 /* These conditions are similar to SYMBOL_REF ones, just the
4553 constraints for code models differ. */
4554 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4555 && GET_CODE (op2) == CONST_INT
4556 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4557 && (trunc_int_for_mode (INTVAL (op2), SImode)
4558 == INTVAL (op2)))
4559 return 1;
4560 break;
4561 default:
4562 return 0;
4563 }
4564 }
4565 return 0;
4566 default:
4567 return 0;
4568 }
4569}
6fca22eb
RH
4570
4571/* Value should be nonzero if functions must have frame pointers.
4572 Zero means the frame pointer need not be set up (and parms may
4573 be accessed via the stack pointer) in functions that seem suitable. */
4574
4575int
b96a374d 4576ix86_frame_pointer_required (void)
6fca22eb
RH
4577{
4578 /* If we accessed previous frames, then the generated code expects
4579 to be able to access the saved ebp value in our frame. */
4580 if (cfun->machine->accesses_prev_frame)
4581 return 1;
a4f31c00 4582
6fca22eb
RH
4583 /* Several x86 os'es need a frame pointer for other reasons,
4584 usually pertaining to setjmp. */
4585 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4586 return 1;
4587
4588 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4589 the frame pointer by default. Turn it back on now if we've not
4590 got a leaf function. */
a7943381 4591 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4592 && (!current_function_is_leaf))
4593 return 1;
4594
4595 if (current_function_profile)
6fca22eb
RH
4596 return 1;
4597
4598 return 0;
4599}
4600
4601/* Record that the current function accesses previous call frames. */
4602
4603void
b96a374d 4604ix86_setup_frame_addresses (void)
6fca22eb
RH
4605{
4606 cfun->machine->accesses_prev_frame = 1;
4607}
e075ae69 4608\f
145aacc2
RH
4609#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4610# define USE_HIDDEN_LINKONCE 1
4611#else
4612# define USE_HIDDEN_LINKONCE 0
4613#endif
4614
bd09bdeb 4615static int pic_labels_used;
e9a25f70 4616
145aacc2
RH
4617/* Fills in the label name that should be used for a pc thunk for
4618 the given register. */
4619
4620static void
b96a374d 4621get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4622{
4623 if (USE_HIDDEN_LINKONCE)
4624 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4625 else
4626 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4627}
4628
4629
e075ae69
RH
4630/* This function generates code for -fpic that loads %ebx with
4631 the return address of the caller and then returns. */
4632
4633void
b96a374d 4634ix86_file_end (void)
e075ae69
RH
4635{
4636 rtx xops[2];
bd09bdeb 4637 int regno;
32b5b1aa 4638
bd09bdeb 4639 for (regno = 0; regno < 8; ++regno)
7c262518 4640 {
145aacc2
RH
4641 char name[32];
4642
bd09bdeb
RH
4643 if (! ((pic_labels_used >> regno) & 1))
4644 continue;
4645
145aacc2 4646 get_pc_thunk_name (name, regno);
bd09bdeb 4647
145aacc2
RH
4648 if (USE_HIDDEN_LINKONCE)
4649 {
4650 tree decl;
4651
4652 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4653 error_mark_node);
4654 TREE_PUBLIC (decl) = 1;
4655 TREE_STATIC (decl) = 1;
4656 DECL_ONE_ONLY (decl) = 1;
4657
4658 (*targetm.asm_out.unique_section) (decl, 0);
4659 named_section (decl, NULL, 0);
4660
a5fe455b
ZW
4661 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4662 fputs ("\t.hidden\t", asm_out_file);
4663 assemble_name (asm_out_file, name);
4664 fputc ('\n', asm_out_file);
4665 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4666 }
4667 else
4668 {
4669 text_section ();
a5fe455b 4670 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4671 }
bd09bdeb
RH
4672
4673 xops[0] = gen_rtx_REG (SImode, regno);
4674 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4675 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4676 output_asm_insn ("ret", xops);
7c262518 4677 }
3edc56a9 4678
a5fe455b
ZW
4679 if (NEED_INDICATE_EXEC_STACK)
4680 file_end_indicate_exec_stack ();
32b5b1aa 4681}
32b5b1aa 4682
c8c03509 4683/* Emit code for the SET_GOT patterns. */
32b5b1aa 4684
c8c03509 4685const char *
b96a374d 4686output_set_got (rtx dest)
c8c03509
RH
4687{
4688 rtx xops[3];
0d7d98ee 4689
c8c03509 4690 xops[0] = dest;
5fc0e5df 4691 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4692
c8c03509 4693 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4694 {
c8c03509
RH
4695 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4696
4697 if (!flag_pic)
4698 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4699 else
4700 output_asm_insn ("call\t%a2", xops);
4701
b069de3b
SS
4702#if TARGET_MACHO
4703 /* Output the "canonical" label name ("Lxx$pb") here too. This
4704 is what will be referred to by the Mach-O PIC subsystem. */
4705 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4706#endif
4977bab6 4707 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4708 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4709
4710 if (flag_pic)
4711 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4712 }
e075ae69 4713 else
e5cb57e8 4714 {
145aacc2
RH
4715 char name[32];
4716 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4717 pic_labels_used |= 1 << REGNO (dest);
f996902d 4718
145aacc2 4719 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4720 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4721 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4722 }
e5cb57e8 4723
c8c03509
RH
4724 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4725 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4726 else if (!TARGET_MACHO)
8e9fadc3 4727 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4728
c8c03509 4729 return "";
e9a25f70 4730}
8dfe5673 4731
0d7d98ee 4732/* Generate an "push" pattern for input ARG. */
e9a25f70 4733
e075ae69 4734static rtx
b96a374d 4735gen_push (rtx arg)
e9a25f70 4736{
c5c76735 4737 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4738 gen_rtx_MEM (Pmode,
4739 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4740 stack_pointer_rtx)),
4741 arg);
e9a25f70
JL
4742}
4743
bd09bdeb
RH
4744/* Return >= 0 if there is an unused call-clobbered register available
4745 for the entire function. */
4746
4747static unsigned int
b96a374d 4748ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
4749{
4750 if (current_function_is_leaf && !current_function_profile)
4751 {
4752 int i;
4753 for (i = 2; i >= 0; --i)
4754 if (!regs_ever_live[i])
4755 return i;
4756 }
4757
4758 return INVALID_REGNUM;
4759}
fce5a9f2 4760
4dd2ac2c
JH
4761/* Return 1 if we need to save REGNO. */
4762static int
b96a374d 4763ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 4764{
bd09bdeb
RH
4765 if (pic_offset_table_rtx
4766 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4767 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4768 || current_function_profile
8c38a24f
MM
4769 || current_function_calls_eh_return
4770 || current_function_uses_const_pool))
bd09bdeb
RH
4771 {
4772 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4773 return 0;
4774 return 1;
4775 }
1020a5ab
RH
4776
4777 if (current_function_calls_eh_return && maybe_eh_return)
4778 {
4779 unsigned i;
4780 for (i = 0; ; i++)
4781 {
b531087a 4782 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4783 if (test == INVALID_REGNUM)
4784 break;
9b690711 4785 if (test == regno)
1020a5ab
RH
4786 return 1;
4787 }
4788 }
4dd2ac2c 4789
1020a5ab
RH
4790 return (regs_ever_live[regno]
4791 && !call_used_regs[regno]
4792 && !fixed_regs[regno]
4793 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4794}
4795
0903fcab
JH
4796/* Return number of registers to be saved on the stack. */
4797
4798static int
b96a374d 4799ix86_nsaved_regs (void)
0903fcab
JH
4800{
4801 int nregs = 0;
0903fcab
JH
4802 int regno;
4803
4dd2ac2c 4804 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4805 if (ix86_save_reg (regno, true))
4dd2ac2c 4806 nregs++;
0903fcab
JH
4807 return nregs;
4808}
4809
4810/* Return the offset between two registers, one to be eliminated, and the other
4811 its replacement, at the start of a routine. */
4812
4813HOST_WIDE_INT
b96a374d 4814ix86_initial_elimination_offset (int from, int to)
0903fcab 4815{
4dd2ac2c
JH
4816 struct ix86_frame frame;
4817 ix86_compute_frame_layout (&frame);
564d80f4
JH
4818
4819 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4820 return frame.hard_frame_pointer_offset;
564d80f4
JH
4821 else if (from == FRAME_POINTER_REGNUM
4822 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4823 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4824 else
4825 {
564d80f4
JH
4826 if (to != STACK_POINTER_REGNUM)
4827 abort ();
4828 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4829 return frame.stack_pointer_offset;
564d80f4
JH
4830 else if (from != FRAME_POINTER_REGNUM)
4831 abort ();
0903fcab 4832 else
4dd2ac2c 4833 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4834 }
4835}
4836
4dd2ac2c 4837/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4838
4dd2ac2c 4839static void
b96a374d 4840ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4841{
65954bd8 4842 HOST_WIDE_INT total_size;
564d80f4 4843 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4844 int offset;
4845 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4846 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4847
4dd2ac2c 4848 frame->nregs = ix86_nsaved_regs ();
564d80f4 4849 total_size = size;
65954bd8 4850
d7394366
JH
4851 /* During reload iteration the amount of registers saved can change.
4852 Recompute the value as needed. Do not recompute when amount of registers
4853 didn't change as reload does mutiple calls to the function and does not
4854 expect the decision to change within single iteration. */
4855 if (!optimize_size
4856 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
4857 {
4858 int count = frame->nregs;
4859
d7394366 4860 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
4861 /* The fast prologue uses move instead of push to save registers. This
4862 is significantly longer, but also executes faster as modern hardware
4863 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 4864
d9b40e8d
JH
4865 Be careful about choosing what prologue to emit: When function takes
4866 many instructions to execute we may use slow version as well as in
4867 case function is known to be outside hot spot (this is known with
4868 feedback only). Weight the size of function by number of registers
4869 to save as it is cheap to use one or two push instructions but very
4870 slow to use many of them. */
4871 if (count)
4872 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4873 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4874 || (flag_branch_probabilities
4875 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4876 cfun->machine->use_fast_prologue_epilogue = false;
4877 else
4878 cfun->machine->use_fast_prologue_epilogue
4879 = !expensive_function_p (count);
4880 }
4881 if (TARGET_PROLOGUE_USING_MOVE
4882 && cfun->machine->use_fast_prologue_epilogue)
4883 frame->save_regs_using_mov = true;
4884 else
4885 frame->save_regs_using_mov = false;
4886
4887
9ba81eaa 4888 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4889 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4890
4891 frame->hard_frame_pointer_offset = offset;
564d80f4 4892
fcbfaa65
RK
4893 /* Do some sanity checking of stack_alignment_needed and
4894 preferred_alignment, since i386 port is the only using those features
f710504c 4895 that may break easily. */
564d80f4 4896
44affdae
JH
4897 if (size && !stack_alignment_needed)
4898 abort ();
44affdae
JH
4899 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4900 abort ();
4901 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4902 abort ();
4903 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4904 abort ();
564d80f4 4905
4dd2ac2c
JH
4906 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4907 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4908
4dd2ac2c
JH
4909 /* Register save area */
4910 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4911
8362f420
JH
4912 /* Va-arg area */
4913 if (ix86_save_varrargs_registers)
4914 {
4915 offset += X86_64_VARARGS_SIZE;
4916 frame->va_arg_size = X86_64_VARARGS_SIZE;
4917 }
4918 else
4919 frame->va_arg_size = 0;
4920
4dd2ac2c
JH
4921 /* Align start of frame for local function. */
4922 frame->padding1 = ((offset + stack_alignment_needed - 1)
4923 & -stack_alignment_needed) - offset;
f73ad30e 4924
4dd2ac2c 4925 offset += frame->padding1;
65954bd8 4926
4dd2ac2c
JH
4927 /* Frame pointer points here. */
4928 frame->frame_pointer_offset = offset;
54ff41b7 4929
4dd2ac2c 4930 offset += size;
65954bd8 4931
0b7ae565
RH
4932 /* Add outgoing arguments area. Can be skipped if we eliminated
4933 all the function calls as dead code. */
4934 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4935 {
4936 offset += current_function_outgoing_args_size;
4937 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4938 }
4939 else
4940 frame->outgoing_arguments_size = 0;
564d80f4 4941
002ff5bc
RH
4942 /* Align stack boundary. Only needed if we're calling another function
4943 or using alloca. */
4944 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4945 frame->padding2 = ((offset + preferred_alignment - 1)
4946 & -preferred_alignment) - offset;
4947 else
4948 frame->padding2 = 0;
4dd2ac2c
JH
4949
4950 offset += frame->padding2;
4951
4952 /* We've reached end of stack frame. */
4953 frame->stack_pointer_offset = offset;
4954
4955 /* Size prologue needs to allocate. */
4956 frame->to_allocate =
4957 (size + frame->padding1 + frame->padding2
8362f420 4958 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4959
d9b40e8d
JH
4960 if (!frame->to_allocate && frame->nregs <= 1)
4961 frame->save_regs_using_mov = false;
4962
a5b378d6 4963 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
4964 && current_function_is_leaf)
4965 {
4966 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
4967 if (frame->save_regs_using_mov)
4968 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
4969 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4970 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4971 }
4972 else
4973 frame->red_zone_size = 0;
4974 frame->to_allocate -= frame->red_zone_size;
4975 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4976#if 0
4977 fprintf (stderr, "nregs: %i\n", frame->nregs);
4978 fprintf (stderr, "size: %i\n", size);
4979 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4980 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4981 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4982 fprintf (stderr, "padding2: %i\n", frame->padding2);
4983 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4984 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4985 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4986 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4987 frame->hard_frame_pointer_offset);
4988 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4989#endif
65954bd8
JL
4990}
4991
0903fcab
JH
4992/* Emit code to save registers in the prologue. */
4993
4994static void
b96a374d 4995ix86_emit_save_regs (void)
0903fcab
JH
4996{
4997 register int regno;
0903fcab 4998 rtx insn;
0903fcab 4999
4dd2ac2c 5000 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5001 if (ix86_save_reg (regno, true))
0903fcab 5002 {
0d7d98ee 5003 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5004 RTX_FRAME_RELATED_P (insn) = 1;
5005 }
5006}
5007
c6036a37
JH
5008/* Emit code to save registers using MOV insns. First register
5009 is restored from POINTER + OFFSET. */
5010static void
b96a374d 5011ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
5012{
5013 int regno;
5014 rtx insn;
5015
5016 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5017 if (ix86_save_reg (regno, true))
5018 {
b72f00af
RK
5019 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5020 Pmode, offset),
c6036a37
JH
5021 gen_rtx_REG (Pmode, regno));
5022 RTX_FRAME_RELATED_P (insn) = 1;
5023 offset += UNITS_PER_WORD;
5024 }
5025}
5026
0f290768 5027/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5028
5029void
b96a374d 5030ix86_expand_prologue (void)
2a2ab3f9 5031{
564d80f4 5032 rtx insn;
bd09bdeb 5033 bool pic_reg_used;
4dd2ac2c 5034 struct ix86_frame frame;
c6036a37 5035 HOST_WIDE_INT allocate;
4dd2ac2c 5036
4977bab6 5037 ix86_compute_frame_layout (&frame);
79325812 5038
e075ae69
RH
5039 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5040 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5041
2a2ab3f9
JVA
5042 if (frame_pointer_needed)
5043 {
564d80f4 5044 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5045 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5046
564d80f4 5047 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5048 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5049 }
5050
c6036a37 5051 allocate = frame.to_allocate;
c6036a37 5052
d9b40e8d 5053 if (!frame.save_regs_using_mov)
c6036a37
JH
5054 ix86_emit_save_regs ();
5055 else
5056 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5057
d9b40e8d
JH
5058 /* When using red zone we may start register saving before allocating
5059 the stack frame saving one cycle of the prologue. */
5060 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5061 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5062 : stack_pointer_rtx,
5063 -frame.nregs * UNITS_PER_WORD);
5064
c6036a37 5065 if (allocate == 0)
8dfe5673 5066 ;
e323735c 5067 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 5068 {
f2042df3
RH
5069 insn = emit_insn (gen_pro_epilogue_adjust_stack
5070 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 5071 GEN_INT (-allocate)));
e075ae69 5072 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 5073 }
79325812 5074 else
8dfe5673 5075 {
b1177d69 5076 /* Only valid for Win32 */
e9a25f70 5077
b1177d69
KC
5078 const rtx eax = gen_rtx_REG (SImode, 0);
5079 rtx rtx_allocate = GEN_INT(allocate);
e9a25f70 5080
8362f420 5081 if (TARGET_64BIT)
b1177d69 5082 abort ();
e075ae69 5083
b1177d69
KC
5084 insn = emit_move_insn (eax, rtx_allocate);
5085 RTX_FRAME_RELATED_P (insn) = 1;
98417968 5086
b1177d69
KC
5087 insn = emit_insn (gen_allocate_stack_worker (eax));
5088 RTX_FRAME_RELATED_P (insn) = 1;
e075ae69 5089 }
d9b40e8d 5090 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5091 {
5092 if (!frame_pointer_needed || !frame.to_allocate)
5093 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5094 else
5095 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5096 -frame.nregs * UNITS_PER_WORD);
5097 }
e9a25f70 5098
bd09bdeb
RH
5099 pic_reg_used = false;
5100 if (pic_offset_table_rtx
5101 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5102 || current_function_profile))
5103 {
5104 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5105
5106 if (alt_pic_reg_used != INVALID_REGNUM)
5107 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5108
5109 pic_reg_used = true;
5110 }
5111
e9a25f70 5112 if (pic_reg_used)
c8c03509
RH
5113 {
5114 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5115
66edd3b4
RH
5116 /* Even with accurate pre-reload life analysis, we can wind up
5117 deleting all references to the pic register after reload.
5118 Consider if cross-jumping unifies two sides of a branch
d1f87653 5119 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5120 In which case, allow the set_got to be deleted, though we're
5121 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5122 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5123 }
77a989d1 5124
66edd3b4
RH
5125 /* Prevent function calls from be scheduled before the call to mcount.
5126 In the pic_reg_used case, make sure that the got load isn't deleted. */
5127 if (current_function_profile)
5128 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5129}
5130
da2d1d3a
JH
5131/* Emit code to restore saved registers using MOV insns. First register
5132 is restored from POINTER + OFFSET. */
5133static void
b96a374d 5134ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
da2d1d3a
JH
5135{
5136 int regno;
da2d1d3a 5137
4dd2ac2c 5138 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5139 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5140 {
4dd2ac2c 5141 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
5142 adjust_address (gen_rtx_MEM (Pmode, pointer),
5143 Pmode, offset));
4dd2ac2c 5144 offset += UNITS_PER_WORD;
da2d1d3a
JH
5145 }
5146}
5147
0f290768 5148/* Restore function stack, frame, and registers. */
e9a25f70 5149
2a2ab3f9 5150void
b96a374d 5151ix86_expand_epilogue (int style)
2a2ab3f9 5152{
1c71e60e 5153 int regno;
fdb8a883 5154 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5155 struct ix86_frame frame;
65954bd8 5156 HOST_WIDE_INT offset;
4dd2ac2c
JH
5157
5158 ix86_compute_frame_layout (&frame);
2a2ab3f9 5159
a4f31c00 5160 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5161 must be taken for the normal return case of a function using
5162 eh_return: the eax and edx registers are marked as saved, but not
5163 restored along this path. */
5164 offset = frame.nregs;
5165 if (current_function_calls_eh_return && style != 2)
5166 offset -= 2;
5167 offset *= -UNITS_PER_WORD;
2a2ab3f9 5168
fdb8a883
JW
5169 /* If we're only restoring one register and sp is not valid then
5170 using a move instruction to restore the register since it's
0f290768 5171 less work than reloading sp and popping the register.
da2d1d3a
JH
5172
5173 The default code result in stack adjustment using add/lea instruction,
5174 while this code results in LEAVE instruction (or discrete equivalent),
5175 so it is profitable in some other cases as well. Especially when there
5176 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5177 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5178 tuning in future. */
4dd2ac2c 5179 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5180 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5181 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5182 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5183 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5184 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5185 && cfun->machine->use_fast_prologue_epilogue
5186 && frame.nregs == 1)
2ab0437e 5187 || current_function_calls_eh_return)
2a2ab3f9 5188 {
da2d1d3a
JH
5189 /* Restore registers. We can use ebp or esp to address the memory
5190 locations. If both are available, default to ebp, since offsets
5191 are known to be small. Only exception is esp pointing directly to the
5192 end of block of saved registers, where we may simplify addressing
5193 mode. */
5194
4dd2ac2c 5195 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5196 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5197 frame.to_allocate, style == 2);
da2d1d3a 5198 else
1020a5ab
RH
5199 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5200 offset, style == 2);
5201
5202 /* eh_return epilogues need %ecx added to the stack pointer. */
5203 if (style == 2)
5204 {
5205 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5206
1020a5ab
RH
5207 if (frame_pointer_needed)
5208 {
5209 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5210 tmp = plus_constant (tmp, UNITS_PER_WORD);
5211 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5212
5213 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5214 emit_move_insn (hard_frame_pointer_rtx, tmp);
5215
5216 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 5217 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
5218 }
5219 else
5220 {
5221 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5222 tmp = plus_constant (tmp, (frame.to_allocate
5223 + frame.nregs * UNITS_PER_WORD));
5224 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5225 }
5226 }
5227 else if (!frame_pointer_needed)
f2042df3
RH
5228 emit_insn (gen_pro_epilogue_adjust_stack
5229 (stack_pointer_rtx, stack_pointer_rtx,
5230 GEN_INT (frame.to_allocate
5231 + frame.nregs * UNITS_PER_WORD)));
0f290768 5232 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5233 else if (TARGET_USE_LEAVE || optimize_size
5234 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5235 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5236 else
2a2ab3f9 5237 {
1c71e60e
JH
5238 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5239 hard_frame_pointer_rtx,
f2042df3 5240 const0_rtx));
8362f420
JH
5241 if (TARGET_64BIT)
5242 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5243 else
5244 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5245 }
5246 }
1c71e60e 5247 else
68f654ec 5248 {
1c71e60e
JH
5249 /* First step is to deallocate the stack frame so that we can
5250 pop the registers. */
5251 if (!sp_valid)
5252 {
5253 if (!frame_pointer_needed)
5254 abort ();
5255 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5256 hard_frame_pointer_rtx,
f2042df3 5257 GEN_INT (offset)));
1c71e60e 5258 }
4dd2ac2c 5259 else if (frame.to_allocate)
f2042df3
RH
5260 emit_insn (gen_pro_epilogue_adjust_stack
5261 (stack_pointer_rtx, stack_pointer_rtx,
5262 GEN_INT (frame.to_allocate)));
1c71e60e 5263
4dd2ac2c 5264 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5265 if (ix86_save_reg (regno, false))
8362f420
JH
5266 {
5267 if (TARGET_64BIT)
5268 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5269 else
5270 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5271 }
4dd2ac2c 5272 if (frame_pointer_needed)
8362f420 5273 {
f5143c46 5274 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5275 able to grok it fast. */
5276 if (TARGET_USE_LEAVE)
5277 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5278 else if (TARGET_64BIT)
8362f420
JH
5279 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5280 else
5281 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5282 }
68f654ec 5283 }
68f654ec 5284
cbbf65e0 5285 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5286 if (style == 0)
cbbf65e0
RH
5287 return;
5288
2a2ab3f9
JVA
5289 if (current_function_pops_args && current_function_args_size)
5290 {
e075ae69 5291 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5292
b8c752c8
UD
5293 /* i386 can only pop 64K bytes. If asked to pop more, pop
5294 return address, do explicit add, and jump indirectly to the
0f290768 5295 caller. */
2a2ab3f9 5296
b8c752c8 5297 if (current_function_pops_args >= 65536)
2a2ab3f9 5298 {
e075ae69 5299 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5300
8362f420
JH
5301 /* There are is no "pascal" calling convention in 64bit ABI. */
5302 if (TARGET_64BIT)
b531087a 5303 abort ();
8362f420 5304
e075ae69
RH
5305 emit_insn (gen_popsi1 (ecx));
5306 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5307 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5308 }
79325812 5309 else
e075ae69
RH
5310 emit_jump_insn (gen_return_pop_internal (popc));
5311 }
5312 else
5313 emit_jump_insn (gen_return_internal ());
5314}
bd09bdeb
RH
5315
5316/* Reset from the function's potential modifications. */
5317
5318static void
b96a374d
AJ
5319ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5320 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5321{
5322 if (pic_offset_table_rtx)
5323 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5324}
e075ae69
RH
5325\f
5326/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5327 for an instruction. Return 0 if the structure of the address is
5328 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5329 strictly valid, but still used for computing length of lea instruction. */
e075ae69
RH
5330
5331static int
b96a374d 5332ix86_decompose_address (register rtx addr, struct ix86_address *out)
e075ae69
RH
5333{
5334 rtx base = NULL_RTX;
5335 rtx index = NULL_RTX;
5336 rtx disp = NULL_RTX;
5337 HOST_WIDE_INT scale = 1;
5338 rtx scale_rtx = NULL_RTX;
b446e5a2 5339 int retval = 1;
74dc3e94 5340 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5341
1540f9eb 5342 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
5343 base = addr;
5344 else if (GET_CODE (addr) == PLUS)
5345 {
74dc3e94
RH
5346 rtx addends[4], op;
5347 int n = 0, i;
e075ae69 5348
74dc3e94
RH
5349 op = addr;
5350 do
e075ae69 5351 {
74dc3e94
RH
5352 if (n >= 4)
5353 return 0;
5354 addends[n++] = XEXP (op, 1);
5355 op = XEXP (op, 0);
2a2ab3f9 5356 }
74dc3e94
RH
5357 while (GET_CODE (op) == PLUS);
5358 if (n >= 4)
5359 return 0;
5360 addends[n] = op;
5361
5362 for (i = n; i >= 0; --i)
e075ae69 5363 {
74dc3e94
RH
5364 op = addends[i];
5365 switch (GET_CODE (op))
5366 {
5367 case MULT:
5368 if (index)
5369 return 0;
5370 index = XEXP (op, 0);
5371 scale_rtx = XEXP (op, 1);
5372 break;
5373
5374 case UNSPEC:
5375 if (XINT (op, 1) == UNSPEC_TP
5376 && TARGET_TLS_DIRECT_SEG_REFS
5377 && seg == SEG_DEFAULT)
5378 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5379 else
5380 return 0;
5381 break;
5382
5383 case REG:
5384 case SUBREG:
5385 if (!base)
5386 base = op;
5387 else if (!index)
5388 index = op;
5389 else
5390 return 0;
5391 break;
5392
5393 case CONST:
5394 case CONST_INT:
5395 case SYMBOL_REF:
5396 case LABEL_REF:
5397 if (disp)
5398 return 0;
5399 disp = op;
5400 break;
5401
5402 default:
5403 return 0;
5404 }
e075ae69 5405 }
e075ae69
RH
5406 }
5407 else if (GET_CODE (addr) == MULT)
5408 {
5409 index = XEXP (addr, 0); /* index*scale */
5410 scale_rtx = XEXP (addr, 1);
5411 }
5412 else if (GET_CODE (addr) == ASHIFT)
5413 {
5414 rtx tmp;
5415
5416 /* We're called for lea too, which implements ashift on occasion. */
5417 index = XEXP (addr, 0);
5418 tmp = XEXP (addr, 1);
5419 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5420 return 0;
e075ae69
RH
5421 scale = INTVAL (tmp);
5422 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5423 return 0;
e075ae69 5424 scale = 1 << scale;
b446e5a2 5425 retval = -1;
2a2ab3f9 5426 }
2a2ab3f9 5427 else
e075ae69
RH
5428 disp = addr; /* displacement */
5429
5430 /* Extract the integral value of scale. */
5431 if (scale_rtx)
e9a25f70 5432 {
e075ae69 5433 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5434 return 0;
e075ae69 5435 scale = INTVAL (scale_rtx);
e9a25f70 5436 }
3b3c6a3f 5437
74dc3e94 5438 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 5439 if (base && index && scale == 1
74dc3e94
RH
5440 && (index == arg_pointer_rtx
5441 || index == frame_pointer_rtx
5442 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
5443 {
5444 rtx tmp = base;
5445 base = index;
5446 index = tmp;
5447 }
5448
5449 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5450 if ((base == hard_frame_pointer_rtx
5451 || base == frame_pointer_rtx
5452 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5453 disp = const0_rtx;
5454
5455 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5456 Avoid this by transforming to [%esi+0]. */
9e555526 5457 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5458 && base && !index && !disp
329e1d01 5459 && REG_P (base)
e075ae69
RH
5460 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5461 disp = const0_rtx;
5462
5463 /* Special case: encode reg+reg instead of reg*2. */
5464 if (!base && index && scale && scale == 2)
5465 base = index, scale = 1;
0f290768 5466
e075ae69
RH
5467 /* Special case: scaling cannot be encoded without base or displacement. */
5468 if (!base && !disp && index && scale != 1)
5469 disp = const0_rtx;
5470
5471 out->base = base;
5472 out->index = index;
5473 out->disp = disp;
5474 out->scale = scale;
74dc3e94 5475 out->seg = seg;
3b3c6a3f 5476
b446e5a2 5477 return retval;
e075ae69 5478}
01329426
JH
5479\f
5480/* Return cost of the memory address x.
5481 For i386, it is better to use a complex address than let gcc copy
5482 the address into a reg and make a new pseudo. But not if the address
5483 requires to two regs - that would mean more pseudos with longer
5484 lifetimes. */
dcefdf67 5485static int
b96a374d 5486ix86_address_cost (rtx x)
01329426
JH
5487{
5488 struct ix86_address parts;
5489 int cost = 1;
3b3c6a3f 5490
01329426
JH
5491 if (!ix86_decompose_address (x, &parts))
5492 abort ();
5493
1540f9eb
JH
5494 if (parts.base && GET_CODE (parts.base) == SUBREG)
5495 parts.base = SUBREG_REG (parts.base);
5496 if (parts.index && GET_CODE (parts.index) == SUBREG)
5497 parts.index = SUBREG_REG (parts.index);
5498
01329426
JH
5499 /* More complex memory references are better. */
5500 if (parts.disp && parts.disp != const0_rtx)
5501 cost--;
74dc3e94
RH
5502 if (parts.seg != SEG_DEFAULT)
5503 cost--;
01329426
JH
5504
5505 /* Attempt to minimize number of registers in the address. */
5506 if ((parts.base
5507 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5508 || (parts.index
5509 && (!REG_P (parts.index)
5510 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5511 cost++;
5512
5513 if (parts.base
5514 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5515 && parts.index
5516 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5517 && parts.base != parts.index)
5518 cost++;
5519
5520 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5521 since it's predecode logic can't detect the length of instructions
5522 and it degenerates to vector decoded. Increase cost of such
5523 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5524 to split such addresses or even refuse such addresses at all.
01329426
JH
5525
5526 Following addressing modes are affected:
5527 [base+scale*index]
5528 [scale*index+disp]
5529 [base+index]
0f290768 5530
01329426
JH
5531 The first and last case may be avoidable by explicitly coding the zero in
5532 memory address, but I don't have AMD-K6 machine handy to check this
5533 theory. */
5534
5535 if (TARGET_K6
5536 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5537 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5538 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5539 cost += 10;
0f290768 5540
01329426
JH
5541 return cost;
5542}
5543\f
b949ea8b
JW
5544/* If X is a machine specific address (i.e. a symbol or label being
5545 referenced as a displacement from the GOT implemented using an
5546 UNSPEC), then return the base term. Otherwise return X. */
5547
5548rtx
b96a374d 5549ix86_find_base_term (rtx x)
b949ea8b
JW
5550{
5551 rtx term;
5552
6eb791fc
JH
5553 if (TARGET_64BIT)
5554 {
5555 if (GET_CODE (x) != CONST)
5556 return x;
5557 term = XEXP (x, 0);
5558 if (GET_CODE (term) == PLUS
5559 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5560 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5561 term = XEXP (term, 0);
5562 if (GET_CODE (term) != UNSPEC
8ee41eaf 5563 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5564 return x;
5565
5566 term = XVECEXP (term, 0, 0);
5567
5568 if (GET_CODE (term) != SYMBOL_REF
5569 && GET_CODE (term) != LABEL_REF)
5570 return x;
5571
5572 return term;
5573 }
5574
69bd9368 5575 term = ix86_delegitimize_address (x);
b949ea8b
JW
5576
5577 if (GET_CODE (term) != SYMBOL_REF
5578 && GET_CODE (term) != LABEL_REF)
5579 return x;
5580
5581 return term;
5582}
5583\f
f996902d
RH
5584/* Determine if a given RTX is a valid constant. We already know this
5585 satisfies CONSTANT_P. */
5586
5587bool
b96a374d 5588legitimate_constant_p (rtx x)
f996902d
RH
5589{
5590 rtx inner;
5591
5592 switch (GET_CODE (x))
5593 {
5594 case SYMBOL_REF:
5595 /* TLS symbols are not constant. */
5596 if (tls_symbolic_operand (x, Pmode))
5597 return false;
5598 break;
5599
5600 case CONST:
5601 inner = XEXP (x, 0);
5602
5603 /* Offsets of TLS symbols are never valid.
5604 Discourage CSE from creating them. */
5605 if (GET_CODE (inner) == PLUS
5606 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5607 return false;
5608
799b33a0
JH
5609 if (GET_CODE (inner) == PLUS)
5610 {
5611 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5612 return false;
5613 inner = XEXP (inner, 0);
5614 }
5615
f996902d
RH
5616 /* Only some unspecs are valid as "constants". */
5617 if (GET_CODE (inner) == UNSPEC)
5618 switch (XINT (inner, 1))
5619 {
5620 case UNSPEC_TPOFF:
cb0e3e3f 5621 case UNSPEC_NTPOFF:
f996902d 5622 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
cb0e3e3f
RH
5623 case UNSPEC_DTPOFF:
5624 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5625 default:
5626 return false;
5627 }
5628 break;
5629
5630 default:
5631 break;
5632 }
5633
5634 /* Otherwise we handle everything else in the move patterns. */
5635 return true;
5636}
5637
3a04ff64
RH
5638/* Determine if it's legal to put X into the constant pool. This
5639 is not possible for the address of thread-local symbols, which
5640 is checked above. */
5641
5642static bool
b96a374d 5643ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5644{
5645 return !legitimate_constant_p (x);
5646}
5647
f996902d
RH
5648/* Determine if a given RTX is a valid constant address. */
5649
5650bool
b96a374d 5651constant_address_p (rtx x)
f996902d 5652{
a94f136b 5653 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5654}
5655
5656/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5657 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5658 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5659
5660bool
b96a374d 5661legitimate_pic_operand_p (rtx x)
f996902d
RH
5662{
5663 rtx inner;
5664
5665 switch (GET_CODE (x))
5666 {
5667 case CONST:
5668 inner = XEXP (x, 0);
5669
5670 /* Only some unspecs are valid as "constants". */
5671 if (GET_CODE (inner) == UNSPEC)
5672 switch (XINT (inner, 1))
5673 {
5674 case UNSPEC_TPOFF:
5675 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5676 default:
5677 return false;
5678 }
5679 /* FALLTHRU */
5680
5681 case SYMBOL_REF:
5682 case LABEL_REF:
5683 return legitimate_pic_address_disp_p (x);
5684
5685 default:
5686 return true;
5687 }
5688}
5689
e075ae69
RH
5690/* Determine if a given CONST RTX is a valid memory displacement
5691 in PIC mode. */
0f290768 5692
59be65f6 5693int
b96a374d 5694legitimate_pic_address_disp_p (register rtx disp)
91bb873f 5695{
f996902d
RH
5696 bool saw_plus;
5697
6eb791fc
JH
5698 /* In 64bit mode we can allow direct addresses of symbols and labels
5699 when they are not dynamic symbols. */
c05dbe81
JH
5700 if (TARGET_64BIT)
5701 {
5702 /* TLS references should always be enclosed in UNSPEC. */
5703 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5704 return 0;
5705 if (GET_CODE (disp) == SYMBOL_REF
5706 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5707 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
5708 return 1;
5709 if (GET_CODE (disp) == LABEL_REF)
5710 return 1;
5711 if (GET_CODE (disp) == CONST
a132b6a8
JJ
5712 && GET_CODE (XEXP (disp, 0)) == PLUS)
5713 {
5714 rtx op0 = XEXP (XEXP (disp, 0), 0);
5715 rtx op1 = XEXP (XEXP (disp, 0), 1);
5716
5717 /* TLS references should always be enclosed in UNSPEC. */
5718 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5719 return 0;
5720 if (((GET_CODE (op0) == SYMBOL_REF
5721 && ix86_cmodel == CM_SMALL_PIC
5722 && SYMBOL_REF_LOCAL_P (op0))
5723 || GET_CODE (op0) == LABEL_REF)
5724 && GET_CODE (op1) == CONST_INT
5725 && INTVAL (op1) < 16*1024*1024
5726 && INTVAL (op1) >= -16*1024*1024)
5727 return 1;
5728 }
c05dbe81 5729 }
91bb873f
RH
5730 if (GET_CODE (disp) != CONST)
5731 return 0;
5732 disp = XEXP (disp, 0);
5733
6eb791fc
JH
5734 if (TARGET_64BIT)
5735 {
5736 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5737 of GOT tables. We should not need these anyway. */
5738 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5739 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5740 return 0;
5741
5742 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5743 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5744 return 0;
5745 return 1;
5746 }
5747
f996902d 5748 saw_plus = false;
91bb873f
RH
5749 if (GET_CODE (disp) == PLUS)
5750 {
5751 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5752 return 0;
5753 disp = XEXP (disp, 0);
f996902d 5754 saw_plus = true;
91bb873f
RH
5755 }
5756
b069de3b
SS
5757 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5758 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5759 {
5760 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5761 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5762 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5763 {
5764 const char *sym_name = XSTR (XEXP (disp, 1), 0);
86ecdfb6 5765 if (! strcmp (sym_name, "<pic base>"))
b069de3b
SS
5766 return 1;
5767 }
5768 }
5769
8ee41eaf 5770 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5771 return 0;
5772
623fe810
RH
5773 switch (XINT (disp, 1))
5774 {
8ee41eaf 5775 case UNSPEC_GOT:
f996902d
RH
5776 if (saw_plus)
5777 return false;
623fe810 5778 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5779 case UNSPEC_GOTOFF:
799b33a0
JH
5780 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5781 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5782 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5783 return false;
f996902d 5784 case UNSPEC_GOTTPOFF:
dea73790
JJ
5785 case UNSPEC_GOTNTPOFF:
5786 case UNSPEC_INDNTPOFF:
f996902d
RH
5787 if (saw_plus)
5788 return false;
5789 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5790 case UNSPEC_NTPOFF:
f996902d
RH
5791 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5792 case UNSPEC_DTPOFF:
f996902d 5793 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5794 }
fce5a9f2 5795
623fe810 5796 return 0;
91bb873f
RH
5797}
5798
e075ae69
RH
5799/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5800 memory address for an instruction. The MODE argument is the machine mode
5801 for the MEM expression that wants to use this address.
5802
5803 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5804 convert common non-canonical forms to canonical form so that they will
5805 be recognized. */
5806
3b3c6a3f 5807int
b96a374d 5808legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
3b3c6a3f 5809{
e075ae69
RH
5810 struct ix86_address parts;
5811 rtx base, index, disp;
5812 HOST_WIDE_INT scale;
5813 const char *reason = NULL;
5814 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5815
5816 if (TARGET_DEBUG_ADDR)
5817 {
5818 fprintf (stderr,
e9a25f70 5819 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5820 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5821 debug_rtx (addr);
5822 }
5823
b446e5a2 5824 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5825 {
e075ae69 5826 reason = "decomposition failed";
50e60bc3 5827 goto report_error;
3b3c6a3f
MM
5828 }
5829
e075ae69
RH
5830 base = parts.base;
5831 index = parts.index;
5832 disp = parts.disp;
5833 scale = parts.scale;
91f0226f 5834
e075ae69 5835 /* Validate base register.
e9a25f70
JL
5836
5837 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5838 is one word out of a two word structure, which is represented internally
5839 as a DImode int. */
e9a25f70 5840
3b3c6a3f
MM
5841 if (base)
5842 {
1540f9eb 5843 rtx reg;
e075ae69
RH
5844 reason_rtx = base;
5845
1540f9eb
JH
5846 if (GET_CODE (base) == SUBREG)
5847 reg = SUBREG_REG (base);
5848 else
5849 reg = base;
5850
5851 if (GET_CODE (reg) != REG)
3b3c6a3f 5852 {
e075ae69 5853 reason = "base is not a register";
50e60bc3 5854 goto report_error;
3b3c6a3f
MM
5855 }
5856
c954bd01
RH
5857 if (GET_MODE (base) != Pmode)
5858 {
e075ae69 5859 reason = "base is not in Pmode";
50e60bc3 5860 goto report_error;
c954bd01
RH
5861 }
5862
1540f9eb
JH
5863 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5864 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5865 {
e075ae69 5866 reason = "base is not valid";
50e60bc3 5867 goto report_error;
3b3c6a3f
MM
5868 }
5869 }
5870
e075ae69 5871 /* Validate index register.
e9a25f70
JL
5872
5873 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5874 is one word out of a two word structure, which is represented internally
5875 as a DImode int. */
e075ae69
RH
5876
5877 if (index)
3b3c6a3f 5878 {
1540f9eb 5879 rtx reg;
e075ae69
RH
5880 reason_rtx = index;
5881
1540f9eb
JH
5882 if (GET_CODE (index) == SUBREG)
5883 reg = SUBREG_REG (index);
5884 else
5885 reg = index;
5886
5887 if (GET_CODE (reg) != REG)
3b3c6a3f 5888 {
e075ae69 5889 reason = "index is not a register";
50e60bc3 5890 goto report_error;
3b3c6a3f
MM
5891 }
5892
e075ae69 5893 if (GET_MODE (index) != Pmode)
c954bd01 5894 {
e075ae69 5895 reason = "index is not in Pmode";
50e60bc3 5896 goto report_error;
c954bd01
RH
5897 }
5898
1540f9eb
JH
5899 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5900 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5901 {
e075ae69 5902 reason = "index is not valid";
50e60bc3 5903 goto report_error;
3b3c6a3f
MM
5904 }
5905 }
3b3c6a3f 5906
e075ae69
RH
5907 /* Validate scale factor. */
5908 if (scale != 1)
3b3c6a3f 5909 {
e075ae69
RH
5910 reason_rtx = GEN_INT (scale);
5911 if (!index)
3b3c6a3f 5912 {
e075ae69 5913 reason = "scale without index";
50e60bc3 5914 goto report_error;
3b3c6a3f
MM
5915 }
5916
e075ae69 5917 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5918 {
e075ae69 5919 reason = "scale is not a valid multiplier";
50e60bc3 5920 goto report_error;
3b3c6a3f
MM
5921 }
5922 }
5923
91bb873f 5924 /* Validate displacement. */
3b3c6a3f
MM
5925 if (disp)
5926 {
e075ae69
RH
5927 reason_rtx = disp;
5928
f996902d
RH
5929 if (GET_CODE (disp) == CONST
5930 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5931 switch (XINT (XEXP (disp, 0), 1))
5932 {
5933 case UNSPEC_GOT:
5934 case UNSPEC_GOTOFF:
5935 case UNSPEC_GOTPCREL:
5936 if (!flag_pic)
5937 abort ();
5938 goto is_legitimate_pic;
5939
5940 case UNSPEC_GOTTPOFF:
dea73790
JJ
5941 case UNSPEC_GOTNTPOFF:
5942 case UNSPEC_INDNTPOFF:
f996902d
RH
5943 case UNSPEC_NTPOFF:
5944 case UNSPEC_DTPOFF:
5945 break;
5946
5947 default:
5948 reason = "invalid address unspec";
5949 goto report_error;
5950 }
5951
b069de3b
SS
5952 else if (flag_pic && (SYMBOLIC_CONST (disp)
5953#if TARGET_MACHO
5954 && !machopic_operand_p (disp)
5955#endif
5956 ))
3b3c6a3f 5957 {
f996902d 5958 is_legitimate_pic:
0d7d98ee
JH
5959 if (TARGET_64BIT && (index || base))
5960 {
75d38379
JJ
5961 /* foo@dtpoff(%rX) is ok. */
5962 if (GET_CODE (disp) != CONST
5963 || GET_CODE (XEXP (disp, 0)) != PLUS
5964 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5965 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5966 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5967 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5968 {
5969 reason = "non-constant pic memory reference";
5970 goto report_error;
5971 }
0d7d98ee 5972 }
75d38379 5973 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5974 {
e075ae69 5975 reason = "displacement is an invalid pic construct";
50e60bc3 5976 goto report_error;
91bb873f
RH
5977 }
5978
4e9efe54 5979 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5980 includes the pic_offset_table_rtx register.
5981
4e9efe54
JH
5982 While this is good idea, unfortunately these constructs may
5983 be created by "adds using lea" optimization for incorrect
5984 code like:
5985
5986 int a;
5987 int foo(int i)
5988 {
5989 return *(&a+i);
5990 }
5991
50e60bc3 5992 This code is nonsensical, but results in addressing
4e9efe54 5993 GOT table with pic_offset_table_rtx base. We can't
f710504c 5994 just refuse it easily, since it gets matched by
4e9efe54
JH
5995 "addsi3" pattern, that later gets split to lea in the
5996 case output register differs from input. While this
5997 can be handled by separate addsi pattern for this case
5998 that never results in lea, this seems to be easier and
5999 correct fix for crash to disable this test. */
3b3c6a3f 6000 }
a94f136b
JH
6001 else if (GET_CODE (disp) != LABEL_REF
6002 && GET_CODE (disp) != CONST_INT
6003 && (GET_CODE (disp) != CONST
6004 || !legitimate_constant_p (disp))
6005 && (GET_CODE (disp) != SYMBOL_REF
6006 || !legitimate_constant_p (disp)))
f996902d
RH
6007 {
6008 reason = "displacement is not constant";
6009 goto report_error;
6010 }
c05dbe81
JH
6011 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6012 {
6013 reason = "displacement is out of range";
6014 goto report_error;
6015 }
3b3c6a3f
MM
6016 }
6017
e075ae69 6018 /* Everything looks valid. */
3b3c6a3f 6019 if (TARGET_DEBUG_ADDR)
e075ae69 6020 fprintf (stderr, "Success.\n");
3b3c6a3f 6021 return TRUE;
e075ae69 6022
5bf0ebab 6023 report_error:
e075ae69
RH
6024 if (TARGET_DEBUG_ADDR)
6025 {
6026 fprintf (stderr, "Error: %s\n", reason);
6027 debug_rtx (reason_rtx);
6028 }
6029 return FALSE;
3b3c6a3f 6030}
3b3c6a3f 6031\f
55efb413
JW
6032/* Return an unique alias set for the GOT. */
6033
0f290768 6034static HOST_WIDE_INT
b96a374d 6035ix86_GOT_alias_set (void)
55efb413 6036{
5bf0ebab
RH
6037 static HOST_WIDE_INT set = -1;
6038 if (set == -1)
6039 set = new_alias_set ();
6040 return set;
0f290768 6041}
55efb413 6042
3b3c6a3f
MM
6043/* Return a legitimate reference for ORIG (an address) using the
6044 register REG. If REG is 0, a new pseudo is generated.
6045
91bb873f 6046 There are two types of references that must be handled:
3b3c6a3f
MM
6047
6048 1. Global data references must load the address from the GOT, via
6049 the PIC reg. An insn is emitted to do this load, and the reg is
6050 returned.
6051
91bb873f
RH
6052 2. Static data references, constant pool addresses, and code labels
6053 compute the address as an offset from the GOT, whose base is in
2ae5ae57 6054 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
6055 differentiate them from global data objects. The returned
6056 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6057
6058 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6059 reg also appears in the address. */
3b3c6a3f
MM
6060
6061rtx
b96a374d 6062legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
6063{
6064 rtx addr = orig;
6065 rtx new = orig;
91bb873f 6066 rtx base;
3b3c6a3f 6067
b069de3b
SS
6068#if TARGET_MACHO
6069 if (reg == 0)
6070 reg = gen_reg_rtx (Pmode);
6071 /* Use the generic Mach-O PIC machinery. */
6072 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6073#endif
6074
c05dbe81
JH
6075 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6076 new = addr;
6077 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6078 {
c05dbe81
JH
6079 /* This symbol may be referenced via a displacement from the PIC
6080 base address (@GOTOFF). */
3b3c6a3f 6081
c05dbe81
JH
6082 if (reload_in_progress)
6083 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6084 if (GET_CODE (addr) == CONST)
6085 addr = XEXP (addr, 0);
6086 if (GET_CODE (addr) == PLUS)
6087 {
6088 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6089 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6090 }
6091 else
6092 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6093 new = gen_rtx_CONST (Pmode, new);
6094 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6095
c05dbe81
JH
6096 if (reg != 0)
6097 {
6098 emit_move_insn (reg, new);
6099 new = reg;
6100 }
3b3c6a3f 6101 }
91bb873f 6102 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6103 {
14f73b5a
JH
6104 if (TARGET_64BIT)
6105 {
8ee41eaf 6106 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6107 new = gen_rtx_CONST (Pmode, new);
6108 new = gen_rtx_MEM (Pmode, new);
6109 RTX_UNCHANGING_P (new) = 1;
6110 set_mem_alias_set (new, ix86_GOT_alias_set ());
6111
6112 if (reg == 0)
6113 reg = gen_reg_rtx (Pmode);
6114 /* Use directly gen_movsi, otherwise the address is loaded
6115 into register for CSE. We don't want to CSE this addresses,
6116 instead we CSE addresses from the GOT table, so skip this. */
6117 emit_insn (gen_movsi (reg, new));
6118 new = reg;
6119 }
6120 else
6121 {
6122 /* This symbol must be referenced via a load from the
6123 Global Offset Table (@GOT). */
3b3c6a3f 6124
66edd3b4
RH
6125 if (reload_in_progress)
6126 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6127 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6128 new = gen_rtx_CONST (Pmode, new);
6129 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6130 new = gen_rtx_MEM (Pmode, new);
6131 RTX_UNCHANGING_P (new) = 1;
6132 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6133
14f73b5a
JH
6134 if (reg == 0)
6135 reg = gen_reg_rtx (Pmode);
6136 emit_move_insn (reg, new);
6137 new = reg;
6138 }
0f290768 6139 }
91bb873f
RH
6140 else
6141 {
6142 if (GET_CODE (addr) == CONST)
3b3c6a3f 6143 {
91bb873f 6144 addr = XEXP (addr, 0);
e3c8ea67
RH
6145
6146 /* We must match stuff we generate before. Assume the only
6147 unspecs that can get here are ours. Not that we could do
6148 anything with them anyway... */
6149 if (GET_CODE (addr) == UNSPEC
6150 || (GET_CODE (addr) == PLUS
6151 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6152 return orig;
6153 if (GET_CODE (addr) != PLUS)
564d80f4 6154 abort ();
3b3c6a3f 6155 }
91bb873f
RH
6156 if (GET_CODE (addr) == PLUS)
6157 {
6158 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6159
91bb873f
RH
6160 /* Check first to see if this is a constant offset from a @GOTOFF
6161 symbol reference. */
623fe810 6162 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6163 && GET_CODE (op1) == CONST_INT)
6164 {
6eb791fc
JH
6165 if (!TARGET_64BIT)
6166 {
66edd3b4
RH
6167 if (reload_in_progress)
6168 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6169 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6170 UNSPEC_GOTOFF);
6eb791fc
JH
6171 new = gen_rtx_PLUS (Pmode, new, op1);
6172 new = gen_rtx_CONST (Pmode, new);
6173 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6174
6eb791fc
JH
6175 if (reg != 0)
6176 {
6177 emit_move_insn (reg, new);
6178 new = reg;
6179 }
6180 }
6181 else
91bb873f 6182 {
75d38379
JJ
6183 if (INTVAL (op1) < -16*1024*1024
6184 || INTVAL (op1) >= 16*1024*1024)
6185 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6186 }
6187 }
6188 else
6189 {
6190 base = legitimize_pic_address (XEXP (addr, 0), reg);
6191 new = legitimize_pic_address (XEXP (addr, 1),
6192 base == reg ? NULL_RTX : reg);
6193
6194 if (GET_CODE (new) == CONST_INT)
6195 new = plus_constant (base, INTVAL (new));
6196 else
6197 {
6198 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6199 {
6200 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6201 new = XEXP (new, 1);
6202 }
6203 new = gen_rtx_PLUS (Pmode, base, new);
6204 }
6205 }
6206 }
3b3c6a3f
MM
6207 }
6208 return new;
6209}
6210\f
74dc3e94 6211/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6212
6213static rtx
b96a374d 6214get_thread_pointer (int to_reg)
f996902d 6215{
74dc3e94 6216 rtx tp, reg, insn;
f996902d
RH
6217
6218 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6219 if (!to_reg)
6220 return tp;
f996902d 6221
74dc3e94
RH
6222 reg = gen_reg_rtx (Pmode);
6223 insn = gen_rtx_SET (VOIDmode, reg, tp);
6224 insn = emit_insn (insn);
6225
6226 return reg;
6227}
6228
6229/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6230 false if we expect this to be used for a memory address and true if
6231 we expect to load the address into a register. */
6232
6233static rtx
b96a374d 6234legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
6235{
6236 rtx dest, base, off, pic;
6237 int type;
6238
6239 switch (model)
6240 {
6241 case TLS_MODEL_GLOBAL_DYNAMIC:
6242 dest = gen_reg_rtx (Pmode);
6243 if (TARGET_64BIT)
6244 {
6245 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6246
6247 start_sequence ();
6248 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6249 insns = get_insns ();
6250 end_sequence ();
6251
6252 emit_libcall_block (insns, dest, rax, x);
6253 }
6254 else
6255 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6256 break;
6257
6258 case TLS_MODEL_LOCAL_DYNAMIC:
6259 base = gen_reg_rtx (Pmode);
6260 if (TARGET_64BIT)
6261 {
6262 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6263
6264 start_sequence ();
6265 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6266 insns = get_insns ();
6267 end_sequence ();
6268
6269 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6270 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6271 emit_libcall_block (insns, base, rax, note);
6272 }
6273 else
6274 emit_insn (gen_tls_local_dynamic_base_32 (base));
6275
6276 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6277 off = gen_rtx_CONST (Pmode, off);
6278
6279 return gen_rtx_PLUS (Pmode, base, off);
6280
6281 case TLS_MODEL_INITIAL_EXEC:
6282 if (TARGET_64BIT)
6283 {
6284 pic = NULL;
6285 type = UNSPEC_GOTNTPOFF;
6286 }
6287 else if (flag_pic)
6288 {
6289 if (reload_in_progress)
6290 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6291 pic = pic_offset_table_rtx;
6292 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6293 }
6294 else if (!TARGET_GNU_TLS)
6295 {
6296 pic = gen_reg_rtx (Pmode);
6297 emit_insn (gen_set_got (pic));
6298 type = UNSPEC_GOTTPOFF;
6299 }
6300 else
6301 {
6302 pic = NULL;
6303 type = UNSPEC_INDNTPOFF;
6304 }
6305
6306 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6307 off = gen_rtx_CONST (Pmode, off);
6308 if (pic)
6309 off = gen_rtx_PLUS (Pmode, pic, off);
6310 off = gen_rtx_MEM (Pmode, off);
6311 RTX_UNCHANGING_P (off) = 1;
6312 set_mem_alias_set (off, ix86_GOT_alias_set ());
6313
6314 if (TARGET_64BIT || TARGET_GNU_TLS)
6315 {
6316 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6317 off = force_reg (Pmode, off);
6318 return gen_rtx_PLUS (Pmode, base, off);
6319 }
6320 else
6321 {
6322 base = get_thread_pointer (true);
6323 dest = gen_reg_rtx (Pmode);
6324 emit_insn (gen_subsi3 (dest, base, off));
6325 }
6326 break;
6327
6328 case TLS_MODEL_LOCAL_EXEC:
6329 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6330 (TARGET_64BIT || TARGET_GNU_TLS)
6331 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6332 off = gen_rtx_CONST (Pmode, off);
6333
6334 if (TARGET_64BIT || TARGET_GNU_TLS)
6335 {
6336 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6337 return gen_rtx_PLUS (Pmode, base, off);
6338 }
6339 else
6340 {
6341 base = get_thread_pointer (true);
6342 dest = gen_reg_rtx (Pmode);
6343 emit_insn (gen_subsi3 (dest, base, off));
6344 }
6345 break;
6346
6347 default:
6348 abort ();
6349 }
6350
6351 return dest;
f996902d 6352}
fce5a9f2 6353
3b3c6a3f
MM
6354/* Try machine-dependent ways of modifying an illegitimate address
6355 to be legitimate. If we find one, return the new, valid address.
6356 This macro is used in only one place: `memory_address' in explow.c.
6357
6358 OLDX is the address as it was before break_out_memory_refs was called.
6359 In some cases it is useful to look at this to decide what needs to be done.
6360
6361 MODE and WIN are passed so that this macro can use
6362 GO_IF_LEGITIMATE_ADDRESS.
6363
6364 It is always safe for this macro to do nothing. It exists to recognize
6365 opportunities to optimize the output.
6366
6367 For the 80386, we handle X+REG by loading X into a register R and
6368 using R+REG. R will go in a general reg and indexing will be used.
6369 However, if REG is a broken-out memory address or multiplication,
6370 nothing needs to be done because REG can certainly go in a general reg.
6371
6372 When -fpic is used, special handling is needed for symbolic references.
6373 See comments by legitimize_pic_address in i386.c for details. */
6374
6375rtx
b96a374d
AJ
6376legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6377 enum machine_mode mode)
3b3c6a3f
MM
6378{
6379 int changed = 0;
6380 unsigned log;
6381
6382 if (TARGET_DEBUG_ADDR)
6383 {
e9a25f70
JL
6384 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6385 GET_MODE_NAME (mode));
3b3c6a3f
MM
6386 debug_rtx (x);
6387 }
6388
f996902d
RH
6389 log = tls_symbolic_operand (x, mode);
6390 if (log)
74dc3e94 6391 return legitimize_tls_address (x, log, false);
f996902d 6392
3b3c6a3f
MM
6393 if (flag_pic && SYMBOLIC_CONST (x))
6394 return legitimize_pic_address (x, 0);
6395
6396 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6397 if (GET_CODE (x) == ASHIFT
6398 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6399 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6400 {
6401 changed = 1;
a269a03c
JC
6402 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6403 GEN_INT (1 << log));
3b3c6a3f
MM
6404 }
6405
6406 if (GET_CODE (x) == PLUS)
6407 {
0f290768 6408 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6409
3b3c6a3f
MM
6410 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6411 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6412 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6413 {
6414 changed = 1;
c5c76735
JL
6415 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6416 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6417 GEN_INT (1 << log));
3b3c6a3f
MM
6418 }
6419
6420 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6421 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6422 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6423 {
6424 changed = 1;
c5c76735
JL
6425 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6426 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6427 GEN_INT (1 << log));
3b3c6a3f
MM
6428 }
6429
0f290768 6430 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6431 if (GET_CODE (XEXP (x, 1)) == MULT)
6432 {
6433 rtx tmp = XEXP (x, 0);
6434 XEXP (x, 0) = XEXP (x, 1);
6435 XEXP (x, 1) = tmp;
6436 changed = 1;
6437 }
6438
6439 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6440 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6441 created by virtual register instantiation, register elimination, and
6442 similar optimizations. */
6443 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6444 {
6445 changed = 1;
c5c76735
JL
6446 x = gen_rtx_PLUS (Pmode,
6447 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6448 XEXP (XEXP (x, 1), 0)),
6449 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6450 }
6451
e9a25f70
JL
6452 /* Canonicalize
6453 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6454 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6455 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6456 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6457 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6458 && CONSTANT_P (XEXP (x, 1)))
6459 {
00c79232
ML
6460 rtx constant;
6461 rtx other = NULL_RTX;
3b3c6a3f
MM
6462
6463 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6464 {
6465 constant = XEXP (x, 1);
6466 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6467 }
6468 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6469 {
6470 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6471 other = XEXP (x, 1);
6472 }
6473 else
6474 constant = 0;
6475
6476 if (constant)
6477 {
6478 changed = 1;
c5c76735
JL
6479 x = gen_rtx_PLUS (Pmode,
6480 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6481 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6482 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6483 }
6484 }
6485
6486 if (changed && legitimate_address_p (mode, x, FALSE))
6487 return x;
6488
6489 if (GET_CODE (XEXP (x, 0)) == MULT)
6490 {
6491 changed = 1;
6492 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6493 }
6494
6495 if (GET_CODE (XEXP (x, 1)) == MULT)
6496 {
6497 changed = 1;
6498 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6499 }
6500
6501 if (changed
6502 && GET_CODE (XEXP (x, 1)) == REG
6503 && GET_CODE (XEXP (x, 0)) == REG)
6504 return x;
6505
6506 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6507 {
6508 changed = 1;
6509 x = legitimize_pic_address (x, 0);
6510 }
6511
6512 if (changed && legitimate_address_p (mode, x, FALSE))
6513 return x;
6514
6515 if (GET_CODE (XEXP (x, 0)) == REG)
6516 {
6517 register rtx temp = gen_reg_rtx (Pmode);
6518 register rtx val = force_operand (XEXP (x, 1), temp);
6519 if (val != temp)
6520 emit_move_insn (temp, val);
6521
6522 XEXP (x, 1) = temp;
6523 return x;
6524 }
6525
6526 else if (GET_CODE (XEXP (x, 1)) == REG)
6527 {
6528 register rtx temp = gen_reg_rtx (Pmode);
6529 register rtx val = force_operand (XEXP (x, 0), temp);
6530 if (val != temp)
6531 emit_move_insn (temp, val);
6532
6533 XEXP (x, 0) = temp;
6534 return x;
6535 }
6536 }
6537
6538 return x;
6539}
2a2ab3f9
JVA
6540\f
6541/* Print an integer constant expression in assembler syntax. Addition
6542 and subtraction are the only arithmetic that may appear in these
6543 expressions. FILE is the stdio stream to write to, X is the rtx, and
6544 CODE is the operand print code from the output string. */
6545
6546static void
b96a374d 6547output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6548{
6549 char buf[256];
6550
6551 switch (GET_CODE (x))
6552 {
6553 case PC:
6554 if (flag_pic)
6555 putc ('.', file);
6556 else
6557 abort ();
6558 break;
6559
6560 case SYMBOL_REF:
91bb873f 6561 assemble_name (file, XSTR (x, 0));
12969f45 6562 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6563 fputs ("@PLT", file);
2a2ab3f9
JVA
6564 break;
6565
91bb873f
RH
6566 case LABEL_REF:
6567 x = XEXP (x, 0);
6568 /* FALLTHRU */
2a2ab3f9
JVA
6569 case CODE_LABEL:
6570 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6571 assemble_name (asm_out_file, buf);
6572 break;
6573
6574 case CONST_INT:
f64cecad 6575 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6576 break;
6577
6578 case CONST:
6579 /* This used to output parentheses around the expression,
6580 but that does not work on the 386 (either ATT or BSD assembler). */
6581 output_pic_addr_const (file, XEXP (x, 0), code);
6582 break;
6583
6584 case CONST_DOUBLE:
6585 if (GET_MODE (x) == VOIDmode)
6586 {
6587 /* We can use %d if the number is <32 bits and positive. */
6588 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6589 fprintf (file, "0x%lx%08lx",
6590 (unsigned long) CONST_DOUBLE_HIGH (x),
6591 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6592 else
f64cecad 6593 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6594 }
6595 else
6596 /* We can't handle floating point constants;
6597 PRINT_OPERAND must handle them. */
6598 output_operand_lossage ("floating constant misused");
6599 break;
6600
6601 case PLUS:
e9a25f70 6602 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6603 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6604 {
2a2ab3f9 6605 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6606 putc ('+', file);
e9a25f70 6607 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6608 }
91bb873f 6609 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6610 {
2a2ab3f9 6611 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6612 putc ('+', file);
e9a25f70 6613 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6614 }
91bb873f
RH
6615 else
6616 abort ();
2a2ab3f9
JVA
6617 break;
6618
6619 case MINUS:
b069de3b
SS
6620 if (!TARGET_MACHO)
6621 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6622 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6623 putc ('-', file);
2a2ab3f9 6624 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6625 if (!TARGET_MACHO)
6626 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6627 break;
6628
91bb873f
RH
6629 case UNSPEC:
6630 if (XVECLEN (x, 0) != 1)
5bf0ebab 6631 abort ();
91bb873f
RH
6632 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6633 switch (XINT (x, 1))
77ebd435 6634 {
8ee41eaf 6635 case UNSPEC_GOT:
77ebd435
AJ
6636 fputs ("@GOT", file);
6637 break;
8ee41eaf 6638 case UNSPEC_GOTOFF:
77ebd435
AJ
6639 fputs ("@GOTOFF", file);
6640 break;
8ee41eaf 6641 case UNSPEC_GOTPCREL:
edfe8595 6642 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6643 break;
f996902d 6644 case UNSPEC_GOTTPOFF:
dea73790 6645 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6646 fputs ("@GOTTPOFF", file);
6647 break;
6648 case UNSPEC_TPOFF:
6649 fputs ("@TPOFF", file);
6650 break;
6651 case UNSPEC_NTPOFF:
75d38379
JJ
6652 if (TARGET_64BIT)
6653 fputs ("@TPOFF", file);
6654 else
6655 fputs ("@NTPOFF", file);
f996902d
RH
6656 break;
6657 case UNSPEC_DTPOFF:
6658 fputs ("@DTPOFF", file);
6659 break;
dea73790 6660 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6661 if (TARGET_64BIT)
6662 fputs ("@GOTTPOFF(%rip)", file);
6663 else
6664 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6665 break;
6666 case UNSPEC_INDNTPOFF:
6667 fputs ("@INDNTPOFF", file);
6668 break;
77ebd435
AJ
6669 default:
6670 output_operand_lossage ("invalid UNSPEC as operand");
6671 break;
6672 }
91bb873f
RH
6673 break;
6674
2a2ab3f9
JVA
6675 default:
6676 output_operand_lossage ("invalid expression as operand");
6677 }
6678}
1865dbb5 6679
0f290768 6680/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6681 We need to handle our special PIC relocations. */
6682
0f290768 6683void
b96a374d 6684i386_dwarf_output_addr_const (FILE *file, rtx x)
1865dbb5 6685{
14f73b5a 6686#ifdef ASM_QUAD
18b5b8d6 6687 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6688#else
6689 if (TARGET_64BIT)
6690 abort ();
18b5b8d6 6691 fprintf (file, "%s", ASM_LONG);
14f73b5a 6692#endif
1865dbb5
JM
6693 if (flag_pic)
6694 output_pic_addr_const (file, x, '\0');
6695 else
6696 output_addr_const (file, x);
6697 fputc ('\n', file);
6698}
6699
b9203463
RH
6700/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6701 We need to emit DTP-relative relocations. */
6702
6703void
b96a374d 6704i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 6705{
75d38379
JJ
6706 fputs (ASM_LONG, file);
6707 output_addr_const (file, x);
6708 fputs ("@DTPOFF", file);
b9203463
RH
6709 switch (size)
6710 {
6711 case 4:
b9203463
RH
6712 break;
6713 case 8:
75d38379 6714 fputs (", 0", file);
b9203463 6715 break;
b9203463
RH
6716 default:
6717 abort ();
6718 }
b9203463
RH
6719}
6720
1865dbb5
JM
6721/* In the name of slightly smaller debug output, and to cater to
6722 general assembler losage, recognize PIC+GOTOFF and turn it back
6723 into a direct symbol reference. */
6724
69bd9368 6725static rtx
b96a374d 6726ix86_delegitimize_address (rtx orig_x)
1865dbb5 6727{
ec65b2e3 6728 rtx x = orig_x, y;
1865dbb5 6729
4c8c0dec
JJ
6730 if (GET_CODE (x) == MEM)
6731 x = XEXP (x, 0);
6732
6eb791fc
JH
6733 if (TARGET_64BIT)
6734 {
6735 if (GET_CODE (x) != CONST
6736 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6737 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6738 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6739 return orig_x;
6740 return XVECEXP (XEXP (x, 0), 0, 0);
6741 }
6742
1865dbb5 6743 if (GET_CODE (x) != PLUS
1865dbb5
JM
6744 || GET_CODE (XEXP (x, 1)) != CONST)
6745 return orig_x;
6746
ec65b2e3
JJ
6747 if (GET_CODE (XEXP (x, 0)) == REG
6748 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6749 /* %ebx + GOT/GOTOFF */
6750 y = NULL;
6751 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6752 {
6753 /* %ebx + %reg * scale + GOT/GOTOFF */
6754 y = XEXP (x, 0);
6755 if (GET_CODE (XEXP (y, 0)) == REG
6756 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6757 y = XEXP (y, 1);
6758 else if (GET_CODE (XEXP (y, 1)) == REG
6759 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6760 y = XEXP (y, 0);
6761 else
6762 return orig_x;
6763 if (GET_CODE (y) != REG
6764 && GET_CODE (y) != MULT
6765 && GET_CODE (y) != ASHIFT)
6766 return orig_x;
6767 }
6768 else
6769 return orig_x;
6770
1865dbb5
JM
6771 x = XEXP (XEXP (x, 1), 0);
6772 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6773 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6774 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6775 {
6776 if (y)
6777 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6778 return XVECEXP (x, 0, 0);
6779 }
1865dbb5
JM
6780
6781 if (GET_CODE (x) == PLUS
6782 && GET_CODE (XEXP (x, 0)) == UNSPEC
6783 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6784 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6785 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6786 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6787 {
6788 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6789 if (y)
6790 return gen_rtx_PLUS (Pmode, y, x);
6791 return x;
6792 }
1865dbb5
JM
6793
6794 return orig_x;
6795}
2a2ab3f9 6796\f
a269a03c 6797static void
b96a374d
AJ
6798put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6799 int fp, FILE *file)
a269a03c 6800{
a269a03c
JC
6801 const char *suffix;
6802
9a915772
JH
6803 if (mode == CCFPmode || mode == CCFPUmode)
6804 {
6805 enum rtx_code second_code, bypass_code;
6806 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6807 if (bypass_code != NIL || second_code != NIL)
b531087a 6808 abort ();
9a915772
JH
6809 code = ix86_fp_compare_code_to_integer (code);
6810 mode = CCmode;
6811 }
a269a03c
JC
6812 if (reverse)
6813 code = reverse_condition (code);
e075ae69 6814
a269a03c
JC
6815 switch (code)
6816 {
6817 case EQ:
6818 suffix = "e";
6819 break;
a269a03c
JC
6820 case NE:
6821 suffix = "ne";
6822 break;
a269a03c 6823 case GT:
7e08e190 6824 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6825 abort ();
6826 suffix = "g";
a269a03c 6827 break;
a269a03c 6828 case GTU:
e075ae69
RH
6829 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6830 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6831 if (mode != CCmode)
0f290768 6832 abort ();
e075ae69 6833 suffix = fp ? "nbe" : "a";
a269a03c 6834 break;
a269a03c 6835 case LT:
9076b9c1 6836 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6837 suffix = "s";
7e08e190 6838 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6839 suffix = "l";
9076b9c1 6840 else
0f290768 6841 abort ();
a269a03c 6842 break;
a269a03c 6843 case LTU:
9076b9c1 6844 if (mode != CCmode)
0f290768 6845 abort ();
a269a03c
JC
6846 suffix = "b";
6847 break;
a269a03c 6848 case GE:
9076b9c1 6849 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6850 suffix = "ns";
7e08e190 6851 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6852 suffix = "ge";
9076b9c1 6853 else
0f290768 6854 abort ();
a269a03c 6855 break;
a269a03c 6856 case GEU:
e075ae69 6857 /* ??? As above. */
7e08e190 6858 if (mode != CCmode)
0f290768 6859 abort ();
7e08e190 6860 suffix = fp ? "nb" : "ae";
a269a03c 6861 break;
a269a03c 6862 case LE:
7e08e190 6863 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6864 abort ();
6865 suffix = "le";
a269a03c 6866 break;
a269a03c 6867 case LEU:
9076b9c1
JH
6868 if (mode != CCmode)
6869 abort ();
7e08e190 6870 suffix = "be";
a269a03c 6871 break;
3a3677ff 6872 case UNORDERED:
9e7adcb3 6873 suffix = fp ? "u" : "p";
3a3677ff
RH
6874 break;
6875 case ORDERED:
9e7adcb3 6876 suffix = fp ? "nu" : "np";
3a3677ff 6877 break;
a269a03c
JC
6878 default:
6879 abort ();
6880 }
6881 fputs (suffix, file);
6882}
6883
e075ae69 6884void
b96a374d 6885print_reg (rtx x, int code, FILE *file)
e5cb57e8 6886{
e075ae69 6887 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6888 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6889 || REGNO (x) == FLAGS_REG
6890 || REGNO (x) == FPSR_REG)
6891 abort ();
e9a25f70 6892
5bf0ebab 6893 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6894 putc ('%', file);
6895
ef6257cd 6896 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6897 code = 2;
6898 else if (code == 'b')
6899 code = 1;
6900 else if (code == 'k')
6901 code = 4;
3f3f2124
JH
6902 else if (code == 'q')
6903 code = 8;
e075ae69
RH
6904 else if (code == 'y')
6905 code = 3;
6906 else if (code == 'h')
6907 code = 0;
6908 else
6909 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6910
3f3f2124
JH
6911 /* Irritatingly, AMD extended registers use different naming convention
6912 from the normal registers. */
6913 if (REX_INT_REG_P (x))
6914 {
885a70fd
JH
6915 if (!TARGET_64BIT)
6916 abort ();
3f3f2124
JH
6917 switch (code)
6918 {
ef6257cd 6919 case 0:
c725bd79 6920 error ("extended registers have no high halves");
3f3f2124
JH
6921 break;
6922 case 1:
6923 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6924 break;
6925 case 2:
6926 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6927 break;
6928 case 4:
6929 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6930 break;
6931 case 8:
6932 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6933 break;
6934 default:
c725bd79 6935 error ("unsupported operand size for extended register");
3f3f2124
JH
6936 break;
6937 }
6938 return;
6939 }
e075ae69
RH
6940 switch (code)
6941 {
6942 case 3:
6943 if (STACK_TOP_P (x))
6944 {
6945 fputs ("st(0)", file);
6946 break;
6947 }
6948 /* FALLTHRU */
e075ae69 6949 case 8:
3f3f2124 6950 case 4:
e075ae69 6951 case 12:
446988df 6952 if (! ANY_FP_REG_P (x))
885a70fd 6953 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6954 /* FALLTHRU */
a7180f70 6955 case 16:
e075ae69
RH
6956 case 2:
6957 fputs (hi_reg_name[REGNO (x)], file);
6958 break;
6959 case 1:
6960 fputs (qi_reg_name[REGNO (x)], file);
6961 break;
6962 case 0:
6963 fputs (qi_high_reg_name[REGNO (x)], file);
6964 break;
6965 default:
6966 abort ();
fe25fea3 6967 }
e5cb57e8
SC
6968}
6969
f996902d
RH
6970/* Locate some local-dynamic symbol still in use by this function
6971 so that we can print its name in some tls_local_dynamic_base
6972 pattern. */
6973
6974static const char *
b96a374d 6975get_some_local_dynamic_name (void)
f996902d
RH
6976{
6977 rtx insn;
6978
6979 if (cfun->machine->some_ld_name)
6980 return cfun->machine->some_ld_name;
6981
6982 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6983 if (INSN_P (insn)
6984 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6985 return cfun->machine->some_ld_name;
6986
6987 abort ();
6988}
6989
6990static int
b96a374d 6991get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
6992{
6993 rtx x = *px;
6994
6995 if (GET_CODE (x) == SYMBOL_REF
6996 && local_dynamic_symbolic_operand (x, Pmode))
6997 {
6998 cfun->machine->some_ld_name = XSTR (x, 0);
6999 return 1;
7000 }
7001
7002 return 0;
7003}
7004
2a2ab3f9 7005/* Meaning of CODE:
fe25fea3 7006 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7007 C -- print opcode suffix for set/cmov insn.
fe25fea3 7008 c -- like C, but print reversed condition
ef6257cd 7009 F,f -- likewise, but for floating-point.
f6f5dff2
RO
7010 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7011 otherwise nothing
2a2ab3f9
JVA
7012 R -- print the prefix for register names.
7013 z -- print the opcode suffix for the size of the current operand.
7014 * -- print a star (in certain assembler syntax)
fb204271 7015 A -- print an absolute memory reference.
2a2ab3f9 7016 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7017 s -- print a shift double count, followed by the assemblers argument
7018 delimiter.
fe25fea3
SC
7019 b -- print the QImode name of the register for the indicated operand.
7020 %b0 would print %al if operands[0] is reg 0.
7021 w -- likewise, print the HImode name of the register.
7022 k -- likewise, print the SImode name of the register.
3f3f2124 7023 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7024 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7025 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7026 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7027 P -- if PIC, print an @PLT suffix.
7028 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7029 & -- print some in-use local-dynamic symbol name.
a46d1d38 7030 */
2a2ab3f9
JVA
7031
7032void
b96a374d 7033print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7034{
7035 if (code)
7036 {
7037 switch (code)
7038 {
7039 case '*':
80f33d06 7040 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7041 putc ('*', file);
7042 return;
7043
f996902d
RH
7044 case '&':
7045 assemble_name (file, get_some_local_dynamic_name ());
7046 return;
7047
fb204271 7048 case 'A':
80f33d06 7049 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 7050 putc ('*', file);
80f33d06 7051 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7052 {
7053 /* Intel syntax. For absolute addresses, registers should not
7054 be surrounded by braces. */
7055 if (GET_CODE (x) != REG)
7056 {
7057 putc ('[', file);
7058 PRINT_OPERAND (file, x, 0);
7059 putc (']', file);
7060 return;
7061 }
7062 }
80f33d06
GS
7063 else
7064 abort ();
fb204271
DN
7065
7066 PRINT_OPERAND (file, x, 0);
7067 return;
7068
7069
2a2ab3f9 7070 case 'L':
80f33d06 7071 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7072 putc ('l', file);
2a2ab3f9
JVA
7073 return;
7074
7075 case 'W':
80f33d06 7076 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7077 putc ('w', file);
2a2ab3f9
JVA
7078 return;
7079
7080 case 'B':
80f33d06 7081 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7082 putc ('b', file);
2a2ab3f9
JVA
7083 return;
7084
7085 case 'Q':
80f33d06 7086 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7087 putc ('l', file);
2a2ab3f9
JVA
7088 return;
7089
7090 case 'S':
80f33d06 7091 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7092 putc ('s', file);
2a2ab3f9
JVA
7093 return;
7094
5f1ec3e6 7095 case 'T':
80f33d06 7096 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7097 putc ('t', file);
5f1ec3e6
JVA
7098 return;
7099
2a2ab3f9
JVA
7100 case 'z':
7101 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7102 registers. */
2a2ab3f9
JVA
7103 if (STACK_REG_P (x))
7104 return;
7105
831c4e87
KC
7106 /* Likewise if using Intel opcodes. */
7107 if (ASSEMBLER_DIALECT == ASM_INTEL)
7108 return;
7109
7110 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7111 switch (GET_MODE_SIZE (GET_MODE (x)))
7112 {
2a2ab3f9 7113 case 2:
155d8a47
JW
7114#ifdef HAVE_GAS_FILDS_FISTS
7115 putc ('s', file);
7116#endif
2a2ab3f9
JVA
7117 return;
7118
7119 case 4:
7120 if (GET_MODE (x) == SFmode)
7121 {
e075ae69 7122 putc ('s', file);
2a2ab3f9
JVA
7123 return;
7124 }
7125 else
e075ae69 7126 putc ('l', file);
2a2ab3f9
JVA
7127 return;
7128
5f1ec3e6 7129 case 12:
2b589241 7130 case 16:
e075ae69
RH
7131 putc ('t', file);
7132 return;
5f1ec3e6 7133
2a2ab3f9
JVA
7134 case 8:
7135 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7136 {
7137#ifdef GAS_MNEMONICS
e075ae69 7138 putc ('q', file);
56c0e8fa 7139#else
e075ae69
RH
7140 putc ('l', file);
7141 putc ('l', file);
56c0e8fa
JVA
7142#endif
7143 }
e075ae69
RH
7144 else
7145 putc ('l', file);
2a2ab3f9 7146 return;
155d8a47
JW
7147
7148 default:
7149 abort ();
2a2ab3f9 7150 }
4af3895e
JVA
7151
7152 case 'b':
7153 case 'w':
7154 case 'k':
3f3f2124 7155 case 'q':
4af3895e
JVA
7156 case 'h':
7157 case 'y':
5cb6195d 7158 case 'X':
e075ae69 7159 case 'P':
4af3895e
JVA
7160 break;
7161
2d49677f
SC
7162 case 's':
7163 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7164 {
7165 PRINT_OPERAND (file, x, 0);
e075ae69 7166 putc (',', file);
2d49677f 7167 }
a269a03c
JC
7168 return;
7169
a46d1d38
JH
7170 case 'D':
7171 /* Little bit of braindamage here. The SSE compare instructions
7172 does use completely different names for the comparisons that the
7173 fp conditional moves. */
7174 switch (GET_CODE (x))
7175 {
7176 case EQ:
7177 case UNEQ:
7178 fputs ("eq", file);
7179 break;
7180 case LT:
7181 case UNLT:
7182 fputs ("lt", file);
7183 break;
7184 case LE:
7185 case UNLE:
7186 fputs ("le", file);
7187 break;
7188 case UNORDERED:
7189 fputs ("unord", file);
7190 break;
7191 case NE:
7192 case LTGT:
7193 fputs ("neq", file);
7194 break;
7195 case UNGE:
7196 case GE:
7197 fputs ("nlt", file);
7198 break;
7199 case UNGT:
7200 case GT:
7201 fputs ("nle", file);
7202 break;
7203 case ORDERED:
7204 fputs ("ord", file);
7205 break;
7206 default:
7207 abort ();
7208 break;
7209 }
7210 return;
048b1c95 7211 case 'O':
f6f5dff2 7212#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7213 if (ASSEMBLER_DIALECT == ASM_ATT)
7214 {
7215 switch (GET_MODE (x))
7216 {
7217 case HImode: putc ('w', file); break;
7218 case SImode:
7219 case SFmode: putc ('l', file); break;
7220 case DImode:
7221 case DFmode: putc ('q', file); break;
7222 default: abort ();
7223 }
7224 putc ('.', file);
7225 }
7226#endif
7227 return;
1853aadd 7228 case 'C':
e075ae69 7229 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7230 return;
fe25fea3 7231 case 'F':
f6f5dff2 7232#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7233 if (ASSEMBLER_DIALECT == ASM_ATT)
7234 putc ('.', file);
7235#endif
e075ae69 7236 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7237 return;
7238
e9a25f70 7239 /* Like above, but reverse condition */
e075ae69 7240 case 'c':
fce5a9f2 7241 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
7242 and not a condition code which needs to be reversed. */
7243 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7244 {
7245 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7246 return;
7247 }
e075ae69
RH
7248 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7249 return;
fe25fea3 7250 case 'f':
f6f5dff2 7251#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7252 if (ASSEMBLER_DIALECT == ASM_ATT)
7253 putc ('.', file);
7254#endif
e075ae69 7255 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7256 return;
ef6257cd
JH
7257 case '+':
7258 {
7259 rtx x;
e5cb57e8 7260
ef6257cd
JH
7261 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7262 return;
a4f31c00 7263
ef6257cd
JH
7264 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7265 if (x)
7266 {
7267 int pred_val = INTVAL (XEXP (x, 0));
7268
7269 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7270 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7271 {
7272 int taken = pred_val > REG_BR_PROB_BASE / 2;
7273 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7274
7275 /* Emit hints only in the case default branch prediction
d1f87653 7276 heuristics would fail. */
ef6257cd
JH
7277 if (taken != cputaken)
7278 {
7279 /* We use 3e (DS) prefix for taken branches and
7280 2e (CS) prefix for not taken branches. */
7281 if (taken)
7282 fputs ("ds ; ", file);
7283 else
7284 fputs ("cs ; ", file);
7285 }
7286 }
7287 }
7288 return;
7289 }
4af3895e 7290 default:
a52453cc 7291 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7292 }
7293 }
e9a25f70 7294
2a2ab3f9
JVA
7295 if (GET_CODE (x) == REG)
7296 {
7297 PRINT_REG (x, code, file);
7298 }
e9a25f70 7299
2a2ab3f9
JVA
7300 else if (GET_CODE (x) == MEM)
7301 {
e075ae69 7302 /* No `byte ptr' prefix for call instructions. */
80f33d06 7303 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7304 {
69ddee61 7305 const char * size;
e075ae69
RH
7306 switch (GET_MODE_SIZE (GET_MODE (x)))
7307 {
7308 case 1: size = "BYTE"; break;
7309 case 2: size = "WORD"; break;
7310 case 4: size = "DWORD"; break;
7311 case 8: size = "QWORD"; break;
7312 case 12: size = "XWORD"; break;
a7180f70 7313 case 16: size = "XMMWORD"; break;
e075ae69 7314 default:
564d80f4 7315 abort ();
e075ae69 7316 }
fb204271
DN
7317
7318 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7319 if (code == 'b')
7320 size = "BYTE";
7321 else if (code == 'w')
7322 size = "WORD";
7323 else if (code == 'k')
7324 size = "DWORD";
7325
e075ae69
RH
7326 fputs (size, file);
7327 fputs (" PTR ", file);
2a2ab3f9 7328 }
e075ae69
RH
7329
7330 x = XEXP (x, 0);
0d7d98ee 7331 /* Avoid (%rip) for call operands. */
d10f5ecf 7332 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7333 && GET_CODE (x) != CONST_INT)
7334 output_addr_const (file, x);
c8b94768
RH
7335 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7336 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7337 else
e075ae69 7338 output_address (x);
2a2ab3f9 7339 }
e9a25f70 7340
2a2ab3f9
JVA
7341 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7342 {
e9a25f70
JL
7343 REAL_VALUE_TYPE r;
7344 long l;
7345
5f1ec3e6
JVA
7346 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7347 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7348
80f33d06 7349 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7350 putc ('$', file);
52267fcb 7351 fprintf (file, "0x%lx", l);
5f1ec3e6 7352 }
e9a25f70 7353
74dc3e94
RH
7354 /* These float cases don't actually occur as immediate operands. */
7355 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7356 {
e9a25f70
JL
7357 char dstr[30];
7358
da6eec72 7359 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7360 fprintf (file, "%s", dstr);
2a2ab3f9 7361 }
e9a25f70 7362
2b589241
JH
7363 else if (GET_CODE (x) == CONST_DOUBLE
7364 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 7365 {
e9a25f70
JL
7366 char dstr[30];
7367
da6eec72 7368 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7369 fprintf (file, "%s", dstr);
2a2ab3f9 7370 }
f996902d 7371
79325812 7372 else
2a2ab3f9 7373 {
4af3895e 7374 if (code != 'P')
2a2ab3f9 7375 {
695dac07 7376 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7377 {
80f33d06 7378 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7379 putc ('$', file);
7380 }
2a2ab3f9
JVA
7381 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7382 || GET_CODE (x) == LABEL_REF)
e075ae69 7383 {
80f33d06 7384 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7385 putc ('$', file);
7386 else
7387 fputs ("OFFSET FLAT:", file);
7388 }
2a2ab3f9 7389 }
e075ae69
RH
7390 if (GET_CODE (x) == CONST_INT)
7391 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7392 else if (flag_pic)
2a2ab3f9
JVA
7393 output_pic_addr_const (file, x, code);
7394 else
7395 output_addr_const (file, x);
7396 }
7397}
7398\f
7399/* Print a memory operand whose address is ADDR. */
7400
7401void
b96a374d 7402print_operand_address (FILE *file, register rtx addr)
2a2ab3f9 7403{
e075ae69
RH
7404 struct ix86_address parts;
7405 rtx base, index, disp;
7406 int scale;
e9a25f70 7407
e075ae69
RH
7408 if (! ix86_decompose_address (addr, &parts))
7409 abort ();
e9a25f70 7410
e075ae69
RH
7411 base = parts.base;
7412 index = parts.index;
7413 disp = parts.disp;
7414 scale = parts.scale;
e9a25f70 7415
74dc3e94
RH
7416 switch (parts.seg)
7417 {
7418 case SEG_DEFAULT:
7419 break;
7420 case SEG_FS:
7421 case SEG_GS:
7422 if (USER_LABEL_PREFIX[0] == 0)
7423 putc ('%', file);
7424 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7425 break;
7426 default:
7427 abort ();
7428 }
7429
e075ae69
RH
7430 if (!base && !index)
7431 {
7432 /* Displacement only requires special attention. */
e9a25f70 7433
e075ae69 7434 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7435 {
74dc3e94 7436 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7437 {
7438 if (USER_LABEL_PREFIX[0] == 0)
7439 putc ('%', file);
7440 fputs ("ds:", file);
7441 }
74dc3e94 7442 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7443 }
e075ae69 7444 else if (flag_pic)
74dc3e94 7445 output_pic_addr_const (file, disp, 0);
e075ae69 7446 else
74dc3e94 7447 output_addr_const (file, disp);
0d7d98ee
JH
7448
7449 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7450 if (TARGET_64BIT
74dc3e94
RH
7451 && ((GET_CODE (disp) == SYMBOL_REF
7452 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7453 || GET_CODE (disp) == LABEL_REF
7454 || (GET_CODE (disp) == CONST
7455 && GET_CODE (XEXP (disp, 0)) == PLUS
7456 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7457 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7458 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 7459 fputs ("(%rip)", file);
e075ae69
RH
7460 }
7461 else
7462 {
80f33d06 7463 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7464 {
e075ae69 7465 if (disp)
2a2ab3f9 7466 {
c399861d 7467 if (flag_pic)
e075ae69
RH
7468 output_pic_addr_const (file, disp, 0);
7469 else if (GET_CODE (disp) == LABEL_REF)
7470 output_asm_label (disp);
2a2ab3f9 7471 else
e075ae69 7472 output_addr_const (file, disp);
2a2ab3f9
JVA
7473 }
7474
e075ae69
RH
7475 putc ('(', file);
7476 if (base)
7477 PRINT_REG (base, 0, file);
7478 if (index)
2a2ab3f9 7479 {
e075ae69
RH
7480 putc (',', file);
7481 PRINT_REG (index, 0, file);
7482 if (scale != 1)
7483 fprintf (file, ",%d", scale);
2a2ab3f9 7484 }
e075ae69 7485 putc (')', file);
2a2ab3f9 7486 }
2a2ab3f9
JVA
7487 else
7488 {
e075ae69 7489 rtx offset = NULL_RTX;
e9a25f70 7490
e075ae69
RH
7491 if (disp)
7492 {
7493 /* Pull out the offset of a symbol; print any symbol itself. */
7494 if (GET_CODE (disp) == CONST
7495 && GET_CODE (XEXP (disp, 0)) == PLUS
7496 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7497 {
7498 offset = XEXP (XEXP (disp, 0), 1);
7499 disp = gen_rtx_CONST (VOIDmode,
7500 XEXP (XEXP (disp, 0), 0));
7501 }
ce193852 7502
e075ae69
RH
7503 if (flag_pic)
7504 output_pic_addr_const (file, disp, 0);
7505 else if (GET_CODE (disp) == LABEL_REF)
7506 output_asm_label (disp);
7507 else if (GET_CODE (disp) == CONST_INT)
7508 offset = disp;
7509 else
7510 output_addr_const (file, disp);
7511 }
e9a25f70 7512
e075ae69
RH
7513 putc ('[', file);
7514 if (base)
a8620236 7515 {
e075ae69
RH
7516 PRINT_REG (base, 0, file);
7517 if (offset)
7518 {
7519 if (INTVAL (offset) >= 0)
7520 putc ('+', file);
7521 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7522 }
a8620236 7523 }
e075ae69
RH
7524 else if (offset)
7525 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7526 else
e075ae69 7527 putc ('0', file);
e9a25f70 7528
e075ae69
RH
7529 if (index)
7530 {
7531 putc ('+', file);
7532 PRINT_REG (index, 0, file);
7533 if (scale != 1)
7534 fprintf (file, "*%d", scale);
7535 }
7536 putc (']', file);
7537 }
2a2ab3f9
JVA
7538 }
7539}
f996902d
RH
7540
7541bool
b96a374d 7542output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7543{
7544 rtx op;
7545
7546 if (GET_CODE (x) != UNSPEC)
7547 return false;
7548
7549 op = XVECEXP (x, 0, 0);
7550 switch (XINT (x, 1))
7551 {
7552 case UNSPEC_GOTTPOFF:
7553 output_addr_const (file, op);
dea73790 7554 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7555 fputs ("@GOTTPOFF", file);
7556 break;
7557 case UNSPEC_TPOFF:
7558 output_addr_const (file, op);
7559 fputs ("@TPOFF", file);
7560 break;
7561 case UNSPEC_NTPOFF:
7562 output_addr_const (file, op);
75d38379
JJ
7563 if (TARGET_64BIT)
7564 fputs ("@TPOFF", file);
7565 else
7566 fputs ("@NTPOFF", file);
f996902d
RH
7567 break;
7568 case UNSPEC_DTPOFF:
7569 output_addr_const (file, op);
7570 fputs ("@DTPOFF", file);
7571 break;
dea73790
JJ
7572 case UNSPEC_GOTNTPOFF:
7573 output_addr_const (file, op);
75d38379
JJ
7574 if (TARGET_64BIT)
7575 fputs ("@GOTTPOFF(%rip)", file);
7576 else
7577 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7578 break;
7579 case UNSPEC_INDNTPOFF:
7580 output_addr_const (file, op);
7581 fputs ("@INDNTPOFF", file);
7582 break;
f996902d
RH
7583
7584 default:
7585 return false;
7586 }
7587
7588 return true;
7589}
2a2ab3f9
JVA
7590\f
7591/* Split one or more DImode RTL references into pairs of SImode
7592 references. The RTL can be REG, offsettable MEM, integer constant, or
7593 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7594 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7595 that parallel "operands". */
2a2ab3f9
JVA
7596
7597void
b96a374d 7598split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
7599{
7600 while (num--)
7601 {
57dbca5e 7602 rtx op = operands[num];
b932f770
JH
7603
7604 /* simplify_subreg refuse to split volatile memory addresses,
7605 but we still have to handle it. */
7606 if (GET_CODE (op) == MEM)
2a2ab3f9 7607 {
f4ef873c 7608 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7609 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7610 }
7611 else
b932f770 7612 {
38ca929b
JH
7613 lo_half[num] = simplify_gen_subreg (SImode, op,
7614 GET_MODE (op) == VOIDmode
7615 ? DImode : GET_MODE (op), 0);
7616 hi_half[num] = simplify_gen_subreg (SImode, op,
7617 GET_MODE (op) == VOIDmode
7618 ? DImode : GET_MODE (op), 4);
b932f770 7619 }
2a2ab3f9
JVA
7620 }
7621}
44cf5b6a
JH
7622/* Split one or more TImode RTL references into pairs of SImode
7623 references. The RTL can be REG, offsettable MEM, integer constant, or
7624 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7625 split and "num" is its length. lo_half and hi_half are output arrays
7626 that parallel "operands". */
7627
7628void
b96a374d 7629split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
7630{
7631 while (num--)
7632 {
7633 rtx op = operands[num];
b932f770
JH
7634
7635 /* simplify_subreg refuse to split volatile memory addresses, but we
7636 still have to handle it. */
7637 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7638 {
7639 lo_half[num] = adjust_address (op, DImode, 0);
7640 hi_half[num] = adjust_address (op, DImode, 8);
7641 }
7642 else
b932f770
JH
7643 {
7644 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7645 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7646 }
44cf5b6a
JH
7647 }
7648}
2a2ab3f9 7649\f
2a2ab3f9
JVA
7650/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7651 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7652 is the expression of the binary operation. The output may either be
7653 emitted here, or returned to the caller, like all output_* functions.
7654
7655 There is no guarantee that the operands are the same mode, as they
0f290768 7656 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7657
e3c2afab
AM
7658#ifndef SYSV386_COMPAT
7659/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7660 wants to fix the assemblers because that causes incompatibility
7661 with gcc. No-one wants to fix gcc because that causes
7662 incompatibility with assemblers... You can use the option of
7663 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7664#define SYSV386_COMPAT 1
7665#endif
7666
69ddee61 7667const char *
b96a374d 7668output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 7669{
e3c2afab 7670 static char buf[30];
69ddee61 7671 const char *p;
1deaa899
JH
7672 const char *ssep;
7673 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7674
e3c2afab
AM
7675#ifdef ENABLE_CHECKING
7676 /* Even if we do not want to check the inputs, this documents input
7677 constraints. Which helps in understanding the following code. */
7678 if (STACK_REG_P (operands[0])
7679 && ((REG_P (operands[1])
7680 && REGNO (operands[0]) == REGNO (operands[1])
7681 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7682 || (REG_P (operands[2])
7683 && REGNO (operands[0]) == REGNO (operands[2])
7684 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7685 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7686 ; /* ok */
1deaa899 7687 else if (!is_sse)
e3c2afab
AM
7688 abort ();
7689#endif
7690
2a2ab3f9
JVA
7691 switch (GET_CODE (operands[3]))
7692 {
7693 case PLUS:
e075ae69
RH
7694 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7695 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7696 p = "fiadd";
7697 else
7698 p = "fadd";
1deaa899 7699 ssep = "add";
2a2ab3f9
JVA
7700 break;
7701
7702 case MINUS:
e075ae69
RH
7703 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7704 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7705 p = "fisub";
7706 else
7707 p = "fsub";
1deaa899 7708 ssep = "sub";
2a2ab3f9
JVA
7709 break;
7710
7711 case MULT:
e075ae69
RH
7712 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7713 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7714 p = "fimul";
7715 else
7716 p = "fmul";
1deaa899 7717 ssep = "mul";
2a2ab3f9
JVA
7718 break;
7719
7720 case DIV:
e075ae69
RH
7721 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7722 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7723 p = "fidiv";
7724 else
7725 p = "fdiv";
1deaa899 7726 ssep = "div";
2a2ab3f9
JVA
7727 break;
7728
7729 default:
7730 abort ();
7731 }
7732
1deaa899
JH
7733 if (is_sse)
7734 {
7735 strcpy (buf, ssep);
7736 if (GET_MODE (operands[0]) == SFmode)
7737 strcat (buf, "ss\t{%2, %0|%0, %2}");
7738 else
7739 strcat (buf, "sd\t{%2, %0|%0, %2}");
7740 return buf;
7741 }
e075ae69 7742 strcpy (buf, p);
2a2ab3f9
JVA
7743
7744 switch (GET_CODE (operands[3]))
7745 {
7746 case MULT:
7747 case PLUS:
7748 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7749 {
e3c2afab 7750 rtx temp = operands[2];
2a2ab3f9
JVA
7751 operands[2] = operands[1];
7752 operands[1] = temp;
7753 }
7754
e3c2afab
AM
7755 /* know operands[0] == operands[1]. */
7756
2a2ab3f9 7757 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7758 {
7759 p = "%z2\t%2";
7760 break;
7761 }
2a2ab3f9
JVA
7762
7763 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7764 {
7765 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7766 /* How is it that we are storing to a dead operand[2]?
7767 Well, presumably operands[1] is dead too. We can't
7768 store the result to st(0) as st(0) gets popped on this
7769 instruction. Instead store to operands[2] (which I
7770 think has to be st(1)). st(1) will be popped later.
7771 gcc <= 2.8.1 didn't have this check and generated
7772 assembly code that the Unixware assembler rejected. */
7773 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7774 else
e3c2afab 7775 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7776 break;
6b28fd63 7777 }
2a2ab3f9
JVA
7778
7779 if (STACK_TOP_P (operands[0]))
e3c2afab 7780 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7781 else
e3c2afab 7782 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7783 break;
2a2ab3f9
JVA
7784
7785 case MINUS:
7786 case DIV:
7787 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7788 {
7789 p = "r%z1\t%1";
7790 break;
7791 }
2a2ab3f9
JVA
7792
7793 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7794 {
7795 p = "%z2\t%2";
7796 break;
7797 }
2a2ab3f9 7798
2a2ab3f9 7799 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7800 {
e3c2afab
AM
7801#if SYSV386_COMPAT
7802 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7803 derived assemblers, confusingly reverse the direction of
7804 the operation for fsub{r} and fdiv{r} when the
7805 destination register is not st(0). The Intel assembler
7806 doesn't have this brain damage. Read !SYSV386_COMPAT to
7807 figure out what the hardware really does. */
7808 if (STACK_TOP_P (operands[0]))
7809 p = "{p\t%0, %2|rp\t%2, %0}";
7810 else
7811 p = "{rp\t%2, %0|p\t%0, %2}";
7812#else
6b28fd63 7813 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7814 /* As above for fmul/fadd, we can't store to st(0). */
7815 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7816 else
e3c2afab
AM
7817 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7818#endif
e075ae69 7819 break;
6b28fd63 7820 }
2a2ab3f9
JVA
7821
7822 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7823 {
e3c2afab 7824#if SYSV386_COMPAT
6b28fd63 7825 if (STACK_TOP_P (operands[0]))
e3c2afab 7826 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7827 else
e3c2afab
AM
7828 p = "{p\t%1, %0|rp\t%0, %1}";
7829#else
7830 if (STACK_TOP_P (operands[0]))
7831 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7832 else
7833 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7834#endif
e075ae69 7835 break;
6b28fd63 7836 }
2a2ab3f9
JVA
7837
7838 if (STACK_TOP_P (operands[0]))
7839 {
7840 if (STACK_TOP_P (operands[1]))
e3c2afab 7841 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7842 else
e3c2afab 7843 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7844 break;
2a2ab3f9
JVA
7845 }
7846 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7847 {
7848#if SYSV386_COMPAT
7849 p = "{\t%1, %0|r\t%0, %1}";
7850#else
7851 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7852#endif
7853 }
2a2ab3f9 7854 else
e3c2afab
AM
7855 {
7856#if SYSV386_COMPAT
7857 p = "{r\t%2, %0|\t%0, %2}";
7858#else
7859 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7860#endif
7861 }
e075ae69 7862 break;
2a2ab3f9
JVA
7863
7864 default:
7865 abort ();
7866 }
e075ae69
RH
7867
7868 strcat (buf, p);
7869 return buf;
2a2ab3f9 7870}
e075ae69 7871
a4f31c00 7872/* Output code to initialize control word copies used by
7a2e09f4
JH
7873 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7874 is set to control word rounding downwards. */
7875void
b96a374d 7876emit_i387_cw_initialization (rtx normal, rtx round_down)
7a2e09f4
JH
7877{
7878 rtx reg = gen_reg_rtx (HImode);
7879
7880 emit_insn (gen_x86_fnstcw_1 (normal));
7881 emit_move_insn (reg, normal);
7882 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7883 && !TARGET_64BIT)
7884 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7885 else
7886 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7887 emit_move_insn (round_down, reg);
7888}
7889
2a2ab3f9 7890/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7891 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7892 operand may be [SDX]Fmode. */
2a2ab3f9 7893
69ddee61 7894const char *
b96a374d 7895output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
7896{
7897 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7898 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7899
e075ae69
RH
7900 /* Jump through a hoop or two for DImode, since the hardware has no
7901 non-popping instruction. We used to do this a different way, but
7902 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7903 if (dimode_p && !stack_top_dies)
7904 output_asm_insn ("fld\t%y1", operands);
e075ae69 7905
7a2e09f4 7906 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7907 abort ();
7908
e075ae69 7909 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7910 abort ();
e9a25f70 7911
7a2e09f4 7912 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7913 if (stack_top_dies || dimode_p)
7a2e09f4 7914 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7915 else
7a2e09f4 7916 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7917 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7918
e075ae69 7919 return "";
2a2ab3f9 7920}
cda749b1 7921
e075ae69
RH
7922/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7923 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7924 when fucom should be used. */
7925
69ddee61 7926const char *
b96a374d 7927output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 7928{
e075ae69
RH
7929 int stack_top_dies;
7930 rtx cmp_op0 = operands[0];
7931 rtx cmp_op1 = operands[1];
0644b628 7932 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7933
7934 if (eflags_p == 2)
7935 {
7936 cmp_op0 = cmp_op1;
7937 cmp_op1 = operands[2];
7938 }
0644b628
JH
7939 if (is_sse)
7940 {
7941 if (GET_MODE (operands[0]) == SFmode)
7942 if (unordered_p)
7943 return "ucomiss\t{%1, %0|%0, %1}";
7944 else
a5cf80f0 7945 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
7946 else
7947 if (unordered_p)
7948 return "ucomisd\t{%1, %0|%0, %1}";
7949 else
a5cf80f0 7950 return "comisd\t{%1, %0|%0, %1}";
0644b628 7951 }
cda749b1 7952
e075ae69 7953 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7954 abort ();
7955
e075ae69 7956 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7957
e075ae69
RH
7958 if (STACK_REG_P (cmp_op1)
7959 && stack_top_dies
7960 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7961 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7962 {
e075ae69
RH
7963 /* If both the top of the 387 stack dies, and the other operand
7964 is also a stack register that dies, then this must be a
7965 `fcompp' float compare */
7966
7967 if (eflags_p == 1)
7968 {
7969 /* There is no double popping fcomi variant. Fortunately,
7970 eflags is immune from the fstp's cc clobbering. */
7971 if (unordered_p)
7972 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7973 else
7974 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7975 return "fstp\t%y0";
7976 }
7977 else
cda749b1 7978 {
e075ae69
RH
7979 if (eflags_p == 2)
7980 {
7981 if (unordered_p)
7982 return "fucompp\n\tfnstsw\t%0";
7983 else
7984 return "fcompp\n\tfnstsw\t%0";
7985 }
cda749b1
JW
7986 else
7987 {
e075ae69
RH
7988 if (unordered_p)
7989 return "fucompp";
7990 else
7991 return "fcompp";
cda749b1
JW
7992 }
7993 }
cda749b1
JW
7994 }
7995 else
7996 {
e075ae69 7997 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7998
0f290768 7999 static const char * const alt[24] =
e075ae69
RH
8000 {
8001 "fcom%z1\t%y1",
8002 "fcomp%z1\t%y1",
8003 "fucom%z1\t%y1",
8004 "fucomp%z1\t%y1",
0f290768 8005
e075ae69
RH
8006 "ficom%z1\t%y1",
8007 "ficomp%z1\t%y1",
8008 NULL,
8009 NULL,
8010
8011 "fcomi\t{%y1, %0|%0, %y1}",
8012 "fcomip\t{%y1, %0|%0, %y1}",
8013 "fucomi\t{%y1, %0|%0, %y1}",
8014 "fucomip\t{%y1, %0|%0, %y1}",
8015
8016 NULL,
8017 NULL,
8018 NULL,
8019 NULL,
8020
8021 "fcom%z2\t%y2\n\tfnstsw\t%0",
8022 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8023 "fucom%z2\t%y2\n\tfnstsw\t%0",
8024 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8025
e075ae69
RH
8026 "ficom%z2\t%y2\n\tfnstsw\t%0",
8027 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8028 NULL,
8029 NULL
8030 };
8031
8032 int mask;
69ddee61 8033 const char *ret;
e075ae69
RH
8034
8035 mask = eflags_p << 3;
8036 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8037 mask |= unordered_p << 1;
8038 mask |= stack_top_dies;
8039
8040 if (mask >= 24)
8041 abort ();
8042 ret = alt[mask];
8043 if (ret == NULL)
8044 abort ();
cda749b1 8045
e075ae69 8046 return ret;
cda749b1
JW
8047 }
8048}
2a2ab3f9 8049
f88c65f7 8050void
b96a374d 8051ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
8052{
8053 const char *directive = ASM_LONG;
8054
8055 if (TARGET_64BIT)
8056 {
8057#ifdef ASM_QUAD
8058 directive = ASM_QUAD;
8059#else
8060 abort ();
8061#endif
8062 }
8063
8064 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8065}
8066
8067void
b96a374d 8068ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
8069{
8070 if (TARGET_64BIT)
74411039 8071 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8072 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8073 else if (HAVE_AS_GOTOFF_IN_DATA)
8074 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8075#if TARGET_MACHO
8076 else if (TARGET_MACHO)
86ecdfb6
AP
8077 {
8078 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8079 machopic_output_function_base_name (file);
8080 fprintf(file, "\n");
8081 }
b069de3b 8082#endif
f88c65f7 8083 else
5fc0e5df
KW
8084 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8085 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8086}
32b5b1aa 8087\f
a8bac9ab
RH
8088/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8089 for the target. */
8090
8091void
b96a374d 8092ix86_expand_clear (rtx dest)
a8bac9ab
RH
8093{
8094 rtx tmp;
8095
8096 /* We play register width games, which are only valid after reload. */
8097 if (!reload_completed)
8098 abort ();
8099
8100 /* Avoid HImode and its attendant prefix byte. */
8101 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8102 dest = gen_rtx_REG (SImode, REGNO (dest));
8103
8104 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8105
8106 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8107 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8108 {
8109 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8110 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8111 }
8112
8113 emit_insn (tmp);
8114}
8115
f996902d
RH
8116/* X is an unchanging MEM. If it is a constant pool reference, return
8117 the constant pool rtx, else NULL. */
8118
8119static rtx
b96a374d 8120maybe_get_pool_constant (rtx x)
f996902d 8121{
69bd9368 8122 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8123
8124 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8125 return get_pool_constant (x);
8126
8127 return NULL_RTX;
8128}
8129
79325812 8130void
b96a374d 8131ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8132{
e075ae69 8133 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8134 rtx op0, op1;
8135 enum tls_model model;
f996902d
RH
8136
8137 op0 = operands[0];
8138 op1 = operands[1];
8139
74dc3e94
RH
8140 model = tls_symbolic_operand (op1, Pmode);
8141 if (model)
f996902d 8142 {
74dc3e94
RH
8143 op1 = legitimize_tls_address (op1, model, true);
8144 op1 = force_operand (op1, op0);
8145 if (op1 == op0)
8146 return;
f996902d 8147 }
74dc3e94
RH
8148
8149 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8150 {
b069de3b
SS
8151#if TARGET_MACHO
8152 if (MACHOPIC_PURE)
8153 {
8154 rtx temp = ((reload_in_progress
8155 || ((op0 && GET_CODE (op0) == REG)
8156 && mode == Pmode))
8157 ? op0 : gen_reg_rtx (Pmode));
8158 op1 = machopic_indirect_data_reference (op1, temp);
8159 op1 = machopic_legitimize_pic_address (op1, mode,
8160 temp == op1 ? 0 : temp);
8161 }
74dc3e94
RH
8162 else if (MACHOPIC_INDIRECT)
8163 op1 = machopic_indirect_data_reference (op1, 0);
8164 if (op0 == op1)
8165 return;
8166#else
f996902d
RH
8167 if (GET_CODE (op0) == MEM)
8168 op1 = force_reg (Pmode, op1);
e075ae69 8169 else
32b5b1aa 8170 {
f996902d 8171 rtx temp = op0;
e075ae69
RH
8172 if (GET_CODE (temp) != REG)
8173 temp = gen_reg_rtx (Pmode);
f996902d
RH
8174 temp = legitimize_pic_address (op1, temp);
8175 if (temp == op0)
e075ae69 8176 return;
f996902d 8177 op1 = temp;
32b5b1aa 8178 }
74dc3e94 8179#endif /* TARGET_MACHO */
e075ae69
RH
8180 }
8181 else
8182 {
f996902d 8183 if (GET_CODE (op0) == MEM
44cf5b6a 8184 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8185 || !push_operand (op0, mode))
8186 && GET_CODE (op1) == MEM)
8187 op1 = force_reg (mode, op1);
e9a25f70 8188
f996902d
RH
8189 if (push_operand (op0, mode)
8190 && ! general_no_elim_operand (op1, mode))
8191 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8192
44cf5b6a
JH
8193 /* Force large constants in 64bit compilation into register
8194 to get them CSEed. */
8195 if (TARGET_64BIT && mode == DImode
f996902d
RH
8196 && immediate_operand (op1, mode)
8197 && !x86_64_zero_extended_value (op1)
8198 && !register_operand (op0, mode)
44cf5b6a 8199 && optimize && !reload_completed && !reload_in_progress)
f996902d 8200 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8201
e075ae69 8202 if (FLOAT_MODE_P (mode))
32b5b1aa 8203 {
d7a29404
JH
8204 /* If we are loading a floating point constant to a register,
8205 force the value to memory now, since we'll get better code
8206 out the back end. */
e075ae69
RH
8207
8208 if (strict)
8209 ;
ddc67067
MM
8210 else if (GET_CODE (op1) == CONST_DOUBLE)
8211 {
8212 op1 = validize_mem (force_const_mem (mode, op1));
8213 if (!register_operand (op0, mode))
8214 {
8215 rtx temp = gen_reg_rtx (mode);
8216 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8217 emit_move_insn (op0, temp);
8218 return;
8219 }
8220 }
32b5b1aa 8221 }
32b5b1aa 8222 }
e9a25f70 8223
74dc3e94 8224 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8225}
e9a25f70 8226
e37af218 8227void
b96a374d 8228ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
8229{
8230 /* Force constants other than zero into memory. We do not know how
8231 the instructions used to build constants modify the upper 64 bits
8232 of the register, once we have that information we may be able
8233 to handle some of them more efficiently. */
8234 if ((reload_in_progress | reload_completed) == 0
8235 && register_operand (operands[0], mode)
fdc4b40b 8236 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8237 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8238
8239 /* Make operand1 a register if it isn't already. */
f8ca7923 8240 if (!no_new_pseudos
e37af218 8241 && !register_operand (operands[0], mode)
b105d6da 8242 && !register_operand (operands[1], mode))
e37af218 8243 {
59bef189 8244 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8245 emit_move_insn (operands[0], temp);
8246 return;
8247 }
8248
8249 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8250}
e37af218 8251
e075ae69
RH
8252/* Attempt to expand a binary operator. Make the expansion closer to the
8253 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8254 memory references (one output, two input) in a single insn. */
e9a25f70 8255
e075ae69 8256void
b96a374d
AJ
8257ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8258 rtx operands[])
e075ae69
RH
8259{
8260 int matching_memory;
8261 rtx src1, src2, dst, op, clob;
8262
8263 dst = operands[0];
8264 src1 = operands[1];
8265 src2 = operands[2];
8266
8267 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8268 if (GET_RTX_CLASS (code) == 'c'
8269 && (rtx_equal_p (dst, src2)
8270 || immediate_operand (src1, mode)))
8271 {
8272 rtx temp = src1;
8273 src1 = src2;
8274 src2 = temp;
32b5b1aa 8275 }
e9a25f70 8276
e075ae69
RH
8277 /* If the destination is memory, and we do not have matching source
8278 operands, do things in registers. */
8279 matching_memory = 0;
8280 if (GET_CODE (dst) == MEM)
32b5b1aa 8281 {
e075ae69
RH
8282 if (rtx_equal_p (dst, src1))
8283 matching_memory = 1;
8284 else if (GET_RTX_CLASS (code) == 'c'
8285 && rtx_equal_p (dst, src2))
8286 matching_memory = 2;
8287 else
8288 dst = gen_reg_rtx (mode);
8289 }
0f290768 8290
e075ae69
RH
8291 /* Both source operands cannot be in memory. */
8292 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8293 {
8294 if (matching_memory != 2)
8295 src2 = force_reg (mode, src2);
8296 else
8297 src1 = force_reg (mode, src1);
32b5b1aa 8298 }
e9a25f70 8299
06a964de
JH
8300 /* If the operation is not commutable, source 1 cannot be a constant
8301 or non-matching memory. */
0f290768 8302 if ((CONSTANT_P (src1)
06a964de
JH
8303 || (!matching_memory && GET_CODE (src1) == MEM))
8304 && GET_RTX_CLASS (code) != 'c')
e075ae69 8305 src1 = force_reg (mode, src1);
0f290768 8306
e075ae69 8307 /* If optimizing, copy to regs to improve CSE */
fe577e58 8308 if (optimize && ! no_new_pseudos)
32b5b1aa 8309 {
e075ae69
RH
8310 if (GET_CODE (dst) == MEM)
8311 dst = gen_reg_rtx (mode);
8312 if (GET_CODE (src1) == MEM)
8313 src1 = force_reg (mode, src1);
8314 if (GET_CODE (src2) == MEM)
8315 src2 = force_reg (mode, src2);
32b5b1aa 8316 }
e9a25f70 8317
e075ae69
RH
8318 /* Emit the instruction. */
8319
8320 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8321 if (reload_in_progress)
8322 {
8323 /* Reload doesn't know about the flags register, and doesn't know that
8324 it doesn't want to clobber it. We can only do this with PLUS. */
8325 if (code != PLUS)
8326 abort ();
8327 emit_insn (op);
8328 }
8329 else
32b5b1aa 8330 {
e075ae69
RH
8331 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8332 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8333 }
e9a25f70 8334
e075ae69
RH
8335 /* Fix up the destination if needed. */
8336 if (dst != operands[0])
8337 emit_move_insn (operands[0], dst);
8338}
8339
8340/* Return TRUE or FALSE depending on whether the binary operator meets the
8341 appropriate constraints. */
8342
8343int
b96a374d
AJ
8344ix86_binary_operator_ok (enum rtx_code code,
8345 enum machine_mode mode ATTRIBUTE_UNUSED,
8346 rtx operands[3])
e075ae69
RH
8347{
8348 /* Both source operands cannot be in memory. */
8349 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8350 return 0;
8351 /* If the operation is not commutable, source 1 cannot be a constant. */
8352 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8353 return 0;
8354 /* If the destination is memory, we must have a matching source operand. */
8355 if (GET_CODE (operands[0]) == MEM
8356 && ! (rtx_equal_p (operands[0], operands[1])
8357 || (GET_RTX_CLASS (code) == 'c'
8358 && rtx_equal_p (operands[0], operands[2]))))
8359 return 0;
06a964de 8360 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8361 have a matching destination. */
06a964de
JH
8362 if (GET_CODE (operands[1]) == MEM
8363 && GET_RTX_CLASS (code) != 'c'
8364 && ! rtx_equal_p (operands[0], operands[1]))
8365 return 0;
e075ae69
RH
8366 return 1;
8367}
8368
8369/* Attempt to expand a unary operator. Make the expansion closer to the
8370 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8371 memory references (one output, one input) in a single insn. */
e075ae69 8372
9d81fc27 8373void
b96a374d
AJ
8374ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8375 rtx operands[])
e075ae69 8376{
06a964de
JH
8377 int matching_memory;
8378 rtx src, dst, op, clob;
8379
8380 dst = operands[0];
8381 src = operands[1];
e075ae69 8382
06a964de
JH
8383 /* If the destination is memory, and we do not have matching source
8384 operands, do things in registers. */
8385 matching_memory = 0;
8386 if (GET_CODE (dst) == MEM)
32b5b1aa 8387 {
06a964de
JH
8388 if (rtx_equal_p (dst, src))
8389 matching_memory = 1;
e075ae69 8390 else
06a964de 8391 dst = gen_reg_rtx (mode);
32b5b1aa 8392 }
e9a25f70 8393
06a964de
JH
8394 /* When source operand is memory, destination must match. */
8395 if (!matching_memory && GET_CODE (src) == MEM)
8396 src = force_reg (mode, src);
0f290768 8397
06a964de 8398 /* If optimizing, copy to regs to improve CSE */
fe577e58 8399 if (optimize && ! no_new_pseudos)
06a964de
JH
8400 {
8401 if (GET_CODE (dst) == MEM)
8402 dst = gen_reg_rtx (mode);
8403 if (GET_CODE (src) == MEM)
8404 src = force_reg (mode, src);
8405 }
8406
8407 /* Emit the instruction. */
8408
8409 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8410 if (reload_in_progress || code == NOT)
8411 {
8412 /* Reload doesn't know about the flags register, and doesn't know that
8413 it doesn't want to clobber it. */
8414 if (code != NOT)
8415 abort ();
8416 emit_insn (op);
8417 }
8418 else
8419 {
8420 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8421 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8422 }
8423
8424 /* Fix up the destination if needed. */
8425 if (dst != operands[0])
8426 emit_move_insn (operands[0], dst);
e075ae69
RH
8427}
8428
8429/* Return TRUE or FALSE depending on whether the unary operator meets the
8430 appropriate constraints. */
8431
8432int
b96a374d
AJ
8433ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8434 enum machine_mode mode ATTRIBUTE_UNUSED,
8435 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 8436{
06a964de
JH
8437 /* If one of operands is memory, source and destination must match. */
8438 if ((GET_CODE (operands[0]) == MEM
8439 || GET_CODE (operands[1]) == MEM)
8440 && ! rtx_equal_p (operands[0], operands[1]))
8441 return FALSE;
e075ae69
RH
8442 return TRUE;
8443}
8444
16189740
RH
8445/* Return TRUE or FALSE depending on whether the first SET in INSN
8446 has source and destination with matching CC modes, and that the
8447 CC mode is at least as constrained as REQ_MODE. */
8448
8449int
b96a374d 8450ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8451{
8452 rtx set;
8453 enum machine_mode set_mode;
8454
8455 set = PATTERN (insn);
8456 if (GET_CODE (set) == PARALLEL)
8457 set = XVECEXP (set, 0, 0);
8458 if (GET_CODE (set) != SET)
8459 abort ();
9076b9c1
JH
8460 if (GET_CODE (SET_SRC (set)) != COMPARE)
8461 abort ();
16189740
RH
8462
8463 set_mode = GET_MODE (SET_DEST (set));
8464 switch (set_mode)
8465 {
9076b9c1
JH
8466 case CCNOmode:
8467 if (req_mode != CCNOmode
8468 && (req_mode != CCmode
8469 || XEXP (SET_SRC (set), 1) != const0_rtx))
8470 return 0;
8471 break;
16189740 8472 case CCmode:
9076b9c1 8473 if (req_mode == CCGCmode)
16189740
RH
8474 return 0;
8475 /* FALLTHRU */
9076b9c1
JH
8476 case CCGCmode:
8477 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8478 return 0;
8479 /* FALLTHRU */
8480 case CCGOCmode:
16189740
RH
8481 if (req_mode == CCZmode)
8482 return 0;
8483 /* FALLTHRU */
8484 case CCZmode:
8485 break;
8486
8487 default:
8488 abort ();
8489 }
8490
8491 return (GET_MODE (SET_SRC (set)) == set_mode);
8492}
8493
e075ae69
RH
8494/* Generate insn patterns to do an integer compare of OPERANDS. */
8495
8496static rtx
b96a374d 8497ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8498{
8499 enum machine_mode cmpmode;
8500 rtx tmp, flags;
8501
8502 cmpmode = SELECT_CC_MODE (code, op0, op1);
8503 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8504
8505 /* This is very simple, but making the interface the same as in the
8506 FP case makes the rest of the code easier. */
8507 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8508 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8509
8510 /* Return the test that should be put into the flags user, i.e.
8511 the bcc, scc, or cmov instruction. */
8512 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8513}
8514
3a3677ff
RH
8515/* Figure out whether to use ordered or unordered fp comparisons.
8516 Return the appropriate mode to use. */
e075ae69 8517
b1cdafbb 8518enum machine_mode
b96a374d 8519ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8520{
9e7adcb3
JH
8521 /* ??? In order to make all comparisons reversible, we do all comparisons
8522 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8523 all forms trapping and nontrapping comparisons, we can make inequality
8524 comparisons trapping again, since it results in better code when using
8525 FCOM based compares. */
8526 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8527}
8528
9076b9c1 8529enum machine_mode
b96a374d 8530ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8531{
8532 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8533 return ix86_fp_compare_mode (code);
8534 switch (code)
8535 {
8536 /* Only zero flag is needed. */
8537 case EQ: /* ZF=0 */
8538 case NE: /* ZF!=0 */
8539 return CCZmode;
8540 /* Codes needing carry flag. */
265dab10
JH
8541 case GEU: /* CF=0 */
8542 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8543 case LTU: /* CF=1 */
8544 case LEU: /* CF=1 | ZF=1 */
265dab10 8545 return CCmode;
9076b9c1
JH
8546 /* Codes possibly doable only with sign flag when
8547 comparing against zero. */
8548 case GE: /* SF=OF or SF=0 */
7e08e190 8549 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8550 if (op1 == const0_rtx)
8551 return CCGOCmode;
8552 else
8553 /* For other cases Carry flag is not required. */
8554 return CCGCmode;
8555 /* Codes doable only with sign flag when comparing
8556 against zero, but we miss jump instruction for it
4aae8a9a 8557 so we need to use relational tests against overflow
9076b9c1
JH
8558 that thus needs to be zero. */
8559 case GT: /* ZF=0 & SF=OF */
8560 case LE: /* ZF=1 | SF<>OF */
8561 if (op1 == const0_rtx)
8562 return CCNOmode;
8563 else
8564 return CCGCmode;
7fcd7218
JH
8565 /* strcmp pattern do (use flags) and combine may ask us for proper
8566 mode. */
8567 case USE:
8568 return CCmode;
9076b9c1 8569 default:
0f290768 8570 abort ();
9076b9c1
JH
8571 }
8572}
8573
3a3677ff
RH
8574/* Return true if we should use an FCOMI instruction for this fp comparison. */
8575
a940d8bd 8576int
b96a374d 8577ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8578{
9e7adcb3
JH
8579 enum rtx_code swapped_code = swap_condition (code);
8580 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8581 || (ix86_fp_comparison_cost (swapped_code)
8582 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8583}
8584
0f290768 8585/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8586 to a fp comparison. The operands are updated in place; the new
d1f87653 8587 comparison code is returned. */
3a3677ff
RH
8588
8589static enum rtx_code
b96a374d 8590ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8591{
8592 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8593 rtx op0 = *pop0, op1 = *pop1;
8594 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8595 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8596
e075ae69 8597 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8598 The same is true of the XFmode compare instructions. The same is
8599 true of the fcomi compare instructions. */
8600
0644b628
JH
8601 if (!is_sse
8602 && (fpcmp_mode == CCFPUmode
8603 || op_mode == XFmode
8604 || op_mode == TFmode
8605 || ix86_use_fcomi_compare (code)))
e075ae69 8606 {
3a3677ff
RH
8607 op0 = force_reg (op_mode, op0);
8608 op1 = force_reg (op_mode, op1);
e075ae69
RH
8609 }
8610 else
8611 {
8612 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8613 things around if they appear profitable, otherwise force op0
8614 into a register. */
8615
8616 if (standard_80387_constant_p (op0) == 0
8617 || (GET_CODE (op0) == MEM
8618 && ! (standard_80387_constant_p (op1) == 0
8619 || GET_CODE (op1) == MEM)))
32b5b1aa 8620 {
e075ae69
RH
8621 rtx tmp;
8622 tmp = op0, op0 = op1, op1 = tmp;
8623 code = swap_condition (code);
8624 }
8625
8626 if (GET_CODE (op0) != REG)
3a3677ff 8627 op0 = force_reg (op_mode, op0);
e075ae69
RH
8628
8629 if (CONSTANT_P (op1))
8630 {
8631 if (standard_80387_constant_p (op1))
3a3677ff 8632 op1 = force_reg (op_mode, op1);
e075ae69 8633 else
3a3677ff 8634 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8635 }
8636 }
e9a25f70 8637
9e7adcb3
JH
8638 /* Try to rearrange the comparison to make it cheaper. */
8639 if (ix86_fp_comparison_cost (code)
8640 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8641 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8642 {
8643 rtx tmp;
8644 tmp = op0, op0 = op1, op1 = tmp;
8645 code = swap_condition (code);
8646 if (GET_CODE (op0) != REG)
8647 op0 = force_reg (op_mode, op0);
8648 }
8649
3a3677ff
RH
8650 *pop0 = op0;
8651 *pop1 = op1;
8652 return code;
8653}
8654
c0c102a9
JH
8655/* Convert comparison codes we use to represent FP comparison to integer
8656 code that will result in proper branch. Return UNKNOWN if no such code
8657 is available. */
8658static enum rtx_code
b96a374d 8659ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8660{
8661 switch (code)
8662 {
8663 case GT:
8664 return GTU;
8665 case GE:
8666 return GEU;
8667 case ORDERED:
8668 case UNORDERED:
8669 return code;
8670 break;
8671 case UNEQ:
8672 return EQ;
8673 break;
8674 case UNLT:
8675 return LTU;
8676 break;
8677 case UNLE:
8678 return LEU;
8679 break;
8680 case LTGT:
8681 return NE;
8682 break;
8683 default:
8684 return UNKNOWN;
8685 }
8686}
8687
8688/* Split comparison code CODE into comparisons we can do using branch
8689 instructions. BYPASS_CODE is comparison code for branch that will
8690 branch around FIRST_CODE and SECOND_CODE. If some of branches
8691 is not required, set value to NIL.
8692 We never require more than two branches. */
8693static void
b96a374d
AJ
8694ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8695 enum rtx_code *first_code,
8696 enum rtx_code *second_code)
c0c102a9
JH
8697{
8698 *first_code = code;
8699 *bypass_code = NIL;
8700 *second_code = NIL;
8701
8702 /* The fcomi comparison sets flags as follows:
8703
8704 cmp ZF PF CF
8705 > 0 0 0
8706 < 0 0 1
8707 = 1 0 0
8708 un 1 1 1 */
8709
8710 switch (code)
8711 {
8712 case GT: /* GTU - CF=0 & ZF=0 */
8713 case GE: /* GEU - CF=0 */
8714 case ORDERED: /* PF=0 */
8715 case UNORDERED: /* PF=1 */
8716 case UNEQ: /* EQ - ZF=1 */
8717 case UNLT: /* LTU - CF=1 */
8718 case UNLE: /* LEU - CF=1 | ZF=1 */
8719 case LTGT: /* EQ - ZF=0 */
8720 break;
8721 case LT: /* LTU - CF=1 - fails on unordered */
8722 *first_code = UNLT;
8723 *bypass_code = UNORDERED;
8724 break;
8725 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8726 *first_code = UNLE;
8727 *bypass_code = UNORDERED;
8728 break;
8729 case EQ: /* EQ - ZF=1 - fails on unordered */
8730 *first_code = UNEQ;
8731 *bypass_code = UNORDERED;
8732 break;
8733 case NE: /* NE - ZF=0 - fails on unordered */
8734 *first_code = LTGT;
8735 *second_code = UNORDERED;
8736 break;
8737 case UNGE: /* GEU - CF=0 - fails on unordered */
8738 *first_code = GE;
8739 *second_code = UNORDERED;
8740 break;
8741 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8742 *first_code = GT;
8743 *second_code = UNORDERED;
8744 break;
8745 default:
8746 abort ();
8747 }
8748 if (!TARGET_IEEE_FP)
8749 {
8750 *second_code = NIL;
8751 *bypass_code = NIL;
8752 }
8753}
8754
9e7adcb3 8755/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8756 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8757 In future this should be tweaked to compute bytes for optimize_size and
8758 take into account performance of various instructions on various CPUs. */
8759static int
b96a374d 8760ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
8761{
8762 if (!TARGET_IEEE_FP)
8763 return 4;
8764 /* The cost of code output by ix86_expand_fp_compare. */
8765 switch (code)
8766 {
8767 case UNLE:
8768 case UNLT:
8769 case LTGT:
8770 case GT:
8771 case GE:
8772 case UNORDERED:
8773 case ORDERED:
8774 case UNEQ:
8775 return 4;
8776 break;
8777 case LT:
8778 case NE:
8779 case EQ:
8780 case UNGE:
8781 return 5;
8782 break;
8783 case LE:
8784 case UNGT:
8785 return 6;
8786 break;
8787 default:
8788 abort ();
8789 }
8790}
8791
8792/* Return cost of comparison done using fcomi operation.
8793 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8794static int
b96a374d 8795ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
8796{
8797 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8798 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8799 prevents gcc from using it. */
8800 if (!TARGET_CMOVE)
8801 return 1024;
8802 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8803 return (bypass_code != NIL || second_code != NIL) + 2;
8804}
8805
8806/* Return cost of comparison done using sahf operation.
8807 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8808static int
b96a374d 8809ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
8810{
8811 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8812 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8813 avoids gcc from using it. */
8814 if (!TARGET_USE_SAHF && !optimize_size)
8815 return 1024;
8816 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8817 return (bypass_code != NIL || second_code != NIL) + 3;
8818}
8819
8820/* Compute cost of the comparison done using any method.
8821 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8822static int
b96a374d 8823ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
8824{
8825 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8826 int min;
8827
8828 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8829 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8830
8831 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8832 if (min > sahf_cost)
8833 min = sahf_cost;
8834 if (min > fcomi_cost)
8835 min = fcomi_cost;
8836 return min;
8837}
c0c102a9 8838
3a3677ff
RH
8839/* Generate insn patterns to do a floating point compare of OPERANDS. */
8840
9e7adcb3 8841static rtx
b96a374d
AJ
8842ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8843 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
8844{
8845 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8846 rtx tmp, tmp2;
9e7adcb3 8847 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8848 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8849
8850 fpcmp_mode = ix86_fp_compare_mode (code);
8851 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8852
9e7adcb3
JH
8853 if (second_test)
8854 *second_test = NULL_RTX;
8855 if (bypass_test)
8856 *bypass_test = NULL_RTX;
8857
c0c102a9
JH
8858 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8859
9e7adcb3
JH
8860 /* Do fcomi/sahf based test when profitable. */
8861 if ((bypass_code == NIL || bypass_test)
8862 && (second_code == NIL || second_test)
8863 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8864 {
c0c102a9
JH
8865 if (TARGET_CMOVE)
8866 {
8867 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8868 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8869 tmp);
8870 emit_insn (tmp);
8871 }
8872 else
8873 {
8874 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8875 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8876 if (!scratch)
8877 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8878 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8879 emit_insn (gen_x86_sahf_1 (scratch));
8880 }
e075ae69
RH
8881
8882 /* The FP codes work out to act like unsigned. */
9a915772 8883 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8884 code = first_code;
8885 if (bypass_code != NIL)
8886 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8887 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8888 const0_rtx);
8889 if (second_code != NIL)
8890 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8891 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8892 const0_rtx);
e075ae69
RH
8893 }
8894 else
8895 {
8896 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8897 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8898 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8899 if (!scratch)
8900 scratch = gen_reg_rtx (HImode);
3a3677ff 8901 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8902
9a915772
JH
8903 /* In the unordered case, we have to check C2 for NaN's, which
8904 doesn't happen to work out to anything nice combination-wise.
8905 So do some bit twiddling on the value we've got in AH to come
8906 up with an appropriate set of condition codes. */
e075ae69 8907
9a915772
JH
8908 intcmp_mode = CCNOmode;
8909 switch (code)
32b5b1aa 8910 {
9a915772
JH
8911 case GT:
8912 case UNGT:
8913 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8914 {
3a3677ff 8915 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8916 code = EQ;
9a915772
JH
8917 }
8918 else
8919 {
8920 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8921 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8922 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8923 intcmp_mode = CCmode;
8924 code = GEU;
8925 }
8926 break;
8927 case LT:
8928 case UNLT:
8929 if (code == LT && TARGET_IEEE_FP)
8930 {
3a3677ff
RH
8931 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8932 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8933 intcmp_mode = CCmode;
8934 code = EQ;
9a915772
JH
8935 }
8936 else
8937 {
8938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8939 code = NE;
8940 }
8941 break;
8942 case GE:
8943 case UNGE:
8944 if (code == GE || !TARGET_IEEE_FP)
8945 {
3a3677ff 8946 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8947 code = EQ;
9a915772
JH
8948 }
8949 else
8950 {
8951 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8952 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8953 GEN_INT (0x01)));
8954 code = NE;
8955 }
8956 break;
8957 case LE:
8958 case UNLE:
8959 if (code == LE && TARGET_IEEE_FP)
8960 {
3a3677ff
RH
8961 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8962 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8963 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8964 intcmp_mode = CCmode;
8965 code = LTU;
9a915772
JH
8966 }
8967 else
8968 {
8969 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8970 code = NE;
8971 }
8972 break;
8973 case EQ:
8974 case UNEQ:
8975 if (code == EQ && TARGET_IEEE_FP)
8976 {
3a3677ff
RH
8977 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8978 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8979 intcmp_mode = CCmode;
8980 code = EQ;
9a915772
JH
8981 }
8982 else
8983 {
3a3677ff
RH
8984 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8985 code = NE;
8986 break;
9a915772
JH
8987 }
8988 break;
8989 case NE:
8990 case LTGT:
8991 if (code == NE && TARGET_IEEE_FP)
8992 {
3a3677ff 8993 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8994 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8995 GEN_INT (0x40)));
3a3677ff 8996 code = NE;
9a915772
JH
8997 }
8998 else
8999 {
3a3677ff
RH
9000 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9001 code = EQ;
32b5b1aa 9002 }
9a915772
JH
9003 break;
9004
9005 case UNORDERED:
9006 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9007 code = NE;
9008 break;
9009 case ORDERED:
9010 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9011 code = EQ;
9012 break;
9013
9014 default:
9015 abort ();
32b5b1aa 9016 }
32b5b1aa 9017 }
e075ae69
RH
9018
9019 /* Return the test that should be put into the flags user, i.e.
9020 the bcc, scc, or cmov instruction. */
9021 return gen_rtx_fmt_ee (code, VOIDmode,
9022 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9023 const0_rtx);
9024}
9025
9e3e266c 9026rtx
b96a374d 9027ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
9028{
9029 rtx op0, op1, ret;
9030 op0 = ix86_compare_op0;
9031 op1 = ix86_compare_op1;
9032
a1b8572c
JH
9033 if (second_test)
9034 *second_test = NULL_RTX;
9035 if (bypass_test)
9036 *bypass_test = NULL_RTX;
9037
e075ae69 9038 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9039 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9040 second_test, bypass_test);
32b5b1aa 9041 else
e075ae69
RH
9042 ret = ix86_expand_int_compare (code, op0, op1);
9043
9044 return ret;
9045}
9046
03598dea
JH
9047/* Return true if the CODE will result in nontrivial jump sequence. */
9048bool
b96a374d 9049ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
9050{
9051 enum rtx_code bypass_code, first_code, second_code;
9052 if (!TARGET_CMOVE)
9053 return true;
9054 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9055 return bypass_code != NIL || second_code != NIL;
9056}
9057
e075ae69 9058void
b96a374d 9059ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 9060{
3a3677ff 9061 rtx tmp;
e075ae69 9062
3a3677ff 9063 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9064 {
3a3677ff
RH
9065 case QImode:
9066 case HImode:
9067 case SImode:
0d7d98ee 9068 simple:
a1b8572c 9069 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9070 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9071 gen_rtx_LABEL_REF (VOIDmode, label),
9072 pc_rtx);
9073 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9074 return;
e075ae69 9075
3a3677ff
RH
9076 case SFmode:
9077 case DFmode:
0f290768 9078 case XFmode:
2b589241 9079 case TFmode:
3a3677ff
RH
9080 {
9081 rtvec vec;
9082 int use_fcomi;
03598dea 9083 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9084
9085 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9086 &ix86_compare_op1);
fce5a9f2 9087
03598dea
JH
9088 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9089
9090 /* Check whether we will use the natural sequence with one jump. If
9091 so, we can expand jump early. Otherwise delay expansion by
9092 creating compound insn to not confuse optimizers. */
9093 if (bypass_code == NIL && second_code == NIL
9094 && TARGET_CMOVE)
9095 {
9096 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9097 gen_rtx_LABEL_REF (VOIDmode, label),
9098 pc_rtx, NULL_RTX);
9099 }
9100 else
9101 {
9102 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9103 ix86_compare_op0, ix86_compare_op1);
9104 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9105 gen_rtx_LABEL_REF (VOIDmode, label),
9106 pc_rtx);
9107 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9108
9109 use_fcomi = ix86_use_fcomi_compare (code);
9110 vec = rtvec_alloc (3 + !use_fcomi);
9111 RTVEC_ELT (vec, 0) = tmp;
9112 RTVEC_ELT (vec, 1)
9113 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9114 RTVEC_ELT (vec, 2)
9115 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9116 if (! use_fcomi)
9117 RTVEC_ELT (vec, 3)
9118 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9119
9120 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9121 }
3a3677ff
RH
9122 return;
9123 }
32b5b1aa 9124
3a3677ff 9125 case DImode:
0d7d98ee
JH
9126 if (TARGET_64BIT)
9127 goto simple;
3a3677ff
RH
9128 /* Expand DImode branch into multiple compare+branch. */
9129 {
9130 rtx lo[2], hi[2], label2;
9131 enum rtx_code code1, code2, code3;
32b5b1aa 9132
3a3677ff
RH
9133 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9134 {
9135 tmp = ix86_compare_op0;
9136 ix86_compare_op0 = ix86_compare_op1;
9137 ix86_compare_op1 = tmp;
9138 code = swap_condition (code);
9139 }
9140 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9141 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9142
3a3677ff
RH
9143 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9144 avoid two branches. This costs one extra insn, so disable when
9145 optimizing for size. */
32b5b1aa 9146
3a3677ff
RH
9147 if ((code == EQ || code == NE)
9148 && (!optimize_size
9149 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9150 {
9151 rtx xor0, xor1;
32b5b1aa 9152
3a3677ff
RH
9153 xor1 = hi[0];
9154 if (hi[1] != const0_rtx)
9155 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9156 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9157
3a3677ff
RH
9158 xor0 = lo[0];
9159 if (lo[1] != const0_rtx)
9160 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9161 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9162
3a3677ff
RH
9163 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9164 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9165
3a3677ff
RH
9166 ix86_compare_op0 = tmp;
9167 ix86_compare_op1 = const0_rtx;
9168 ix86_expand_branch (code, label);
9169 return;
9170 }
e075ae69 9171
1f9124e4
JJ
9172 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9173 op1 is a constant and the low word is zero, then we can just
9174 examine the high word. */
32b5b1aa 9175
1f9124e4
JJ
9176 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9177 switch (code)
9178 {
9179 case LT: case LTU: case GE: case GEU:
9180 ix86_compare_op0 = hi[0];
9181 ix86_compare_op1 = hi[1];
9182 ix86_expand_branch (code, label);
9183 return;
9184 default:
9185 break;
9186 }
e075ae69 9187
3a3677ff 9188 /* Otherwise, we need two or three jumps. */
e075ae69 9189
3a3677ff 9190 label2 = gen_label_rtx ();
e075ae69 9191
3a3677ff
RH
9192 code1 = code;
9193 code2 = swap_condition (code);
9194 code3 = unsigned_condition (code);
e075ae69 9195
3a3677ff
RH
9196 switch (code)
9197 {
9198 case LT: case GT: case LTU: case GTU:
9199 break;
e075ae69 9200
3a3677ff
RH
9201 case LE: code1 = LT; code2 = GT; break;
9202 case GE: code1 = GT; code2 = LT; break;
9203 case LEU: code1 = LTU; code2 = GTU; break;
9204 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9205
3a3677ff
RH
9206 case EQ: code1 = NIL; code2 = NE; break;
9207 case NE: code2 = NIL; break;
e075ae69 9208
3a3677ff
RH
9209 default:
9210 abort ();
9211 }
e075ae69 9212
3a3677ff
RH
9213 /*
9214 * a < b =>
9215 * if (hi(a) < hi(b)) goto true;
9216 * if (hi(a) > hi(b)) goto false;
9217 * if (lo(a) < lo(b)) goto true;
9218 * false:
9219 */
9220
9221 ix86_compare_op0 = hi[0];
9222 ix86_compare_op1 = hi[1];
9223
9224 if (code1 != NIL)
9225 ix86_expand_branch (code1, label);
9226 if (code2 != NIL)
9227 ix86_expand_branch (code2, label2);
9228
9229 ix86_compare_op0 = lo[0];
9230 ix86_compare_op1 = lo[1];
9231 ix86_expand_branch (code3, label);
9232
9233 if (code2 != NIL)
9234 emit_label (label2);
9235 return;
9236 }
e075ae69 9237
3a3677ff
RH
9238 default:
9239 abort ();
9240 }
32b5b1aa 9241}
e075ae69 9242
9e7adcb3
JH
9243/* Split branch based on floating point condition. */
9244void
b96a374d
AJ
9245ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9246 rtx target1, rtx target2, rtx tmp)
9e7adcb3
JH
9247{
9248 rtx second, bypass;
9249 rtx label = NULL_RTX;
03598dea 9250 rtx condition;
6b24c259
JH
9251 int bypass_probability = -1, second_probability = -1, probability = -1;
9252 rtx i;
9e7adcb3
JH
9253
9254 if (target2 != pc_rtx)
9255 {
9256 rtx tmp = target2;
9257 code = reverse_condition_maybe_unordered (code);
9258 target2 = target1;
9259 target1 = tmp;
9260 }
9261
9262 condition = ix86_expand_fp_compare (code, op1, op2,
9263 tmp, &second, &bypass);
6b24c259
JH
9264
9265 if (split_branch_probability >= 0)
9266 {
9267 /* Distribute the probabilities across the jumps.
9268 Assume the BYPASS and SECOND to be always test
9269 for UNORDERED. */
9270 probability = split_branch_probability;
9271
d6a7951f 9272 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9273 to be updated. Later we may run some experiments and see
9274 if unordered values are more frequent in practice. */
9275 if (bypass)
9276 bypass_probability = 1;
9277 if (second)
9278 second_probability = 1;
9279 }
9e7adcb3
JH
9280 if (bypass != NULL_RTX)
9281 {
9282 label = gen_label_rtx ();
6b24c259
JH
9283 i = emit_jump_insn (gen_rtx_SET
9284 (VOIDmode, pc_rtx,
9285 gen_rtx_IF_THEN_ELSE (VOIDmode,
9286 bypass,
9287 gen_rtx_LABEL_REF (VOIDmode,
9288 label),
9289 pc_rtx)));
9290 if (bypass_probability >= 0)
9291 REG_NOTES (i)
9292 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9293 GEN_INT (bypass_probability),
9294 REG_NOTES (i));
9295 }
9296 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9297 (VOIDmode, pc_rtx,
9298 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9299 condition, target1, target2)));
9300 if (probability >= 0)
9301 REG_NOTES (i)
9302 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9303 GEN_INT (probability),
9304 REG_NOTES (i));
9305 if (second != NULL_RTX)
9e7adcb3 9306 {
6b24c259
JH
9307 i = emit_jump_insn (gen_rtx_SET
9308 (VOIDmode, pc_rtx,
9309 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9310 target2)));
9311 if (second_probability >= 0)
9312 REG_NOTES (i)
9313 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9314 GEN_INT (second_probability),
9315 REG_NOTES (i));
9e7adcb3 9316 }
9e7adcb3
JH
9317 if (label != NULL_RTX)
9318 emit_label (label);
9319}
9320
32b5b1aa 9321int
b96a374d 9322ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9323{
3a627503 9324 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9325 rtx second_test, bypass_test;
e075ae69 9326
885a70fd
JH
9327 if (GET_MODE (ix86_compare_op0) == DImode
9328 && !TARGET_64BIT)
e075ae69
RH
9329 return 0; /* FAIL */
9330
b932f770
JH
9331 if (GET_MODE (dest) != QImode)
9332 abort ();
e075ae69 9333
a1b8572c 9334 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9335 PUT_MODE (ret, QImode);
9336
9337 tmp = dest;
a1b8572c 9338 tmpreg = dest;
32b5b1aa 9339
e075ae69 9340 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9341 if (bypass_test || second_test)
9342 {
9343 rtx test = second_test;
9344 int bypass = 0;
9345 rtx tmp2 = gen_reg_rtx (QImode);
9346 if (bypass_test)
9347 {
9348 if (second_test)
b531087a 9349 abort ();
a1b8572c
JH
9350 test = bypass_test;
9351 bypass = 1;
9352 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9353 }
9354 PUT_MODE (test, QImode);
9355 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9356
9357 if (bypass)
9358 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9359 else
9360 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9361 }
e075ae69 9362
3a627503
RS
9363 /* Attach a REG_EQUAL note describing the comparison result. */
9364 equiv = simplify_gen_relational (code, QImode,
9365 GET_MODE (ix86_compare_op0),
9366 ix86_compare_op0, ix86_compare_op1);
9367 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9368
e075ae69 9369 return 1; /* DONE */
32b5b1aa 9370}
e075ae69 9371
d1f87653 9372/* Expand comparison setting or clearing carry flag. Return true when successful
4977bab6
ZW
9373 and set pop for the operation. */
9374bool
b96a374d 9375ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9376{
9377 enum machine_mode mode =
9378 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9379
9380 /* Do not handle DImode compares that go trought special path. Also we can't
9381 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9382 if ((mode == DImode && !TARGET_64BIT))
9383 return false;
9384 if (FLOAT_MODE_P (mode))
9385 {
9386 rtx second_test = NULL, bypass_test = NULL;
9387 rtx compare_op, compare_seq;
9388
9389 /* Shortcut: following common codes never translate into carry flag compares. */
9390 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9391 || code == ORDERED || code == UNORDERED)
9392 return false;
9393
9394 /* These comparisons require zero flag; swap operands so they won't. */
9395 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9396 && !TARGET_IEEE_FP)
9397 {
9398 rtx tmp = op0;
9399 op0 = op1;
9400 op1 = tmp;
9401 code = swap_condition (code);
9402 }
9403
c51e6d85
KH
9404 /* Try to expand the comparison and verify that we end up with carry flag
9405 based comparison. This is fails to be true only when we decide to expand
9406 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9407 start_sequence ();
9408 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9409 &second_test, &bypass_test);
9410 compare_seq = get_insns ();
9411 end_sequence ();
9412
9413 if (second_test || bypass_test)
9414 return false;
9415 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9416 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9417 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9418 else
9419 code = GET_CODE (compare_op);
9420 if (code != LTU && code != GEU)
9421 return false;
9422 emit_insn (compare_seq);
9423 *pop = compare_op;
9424 return true;
9425 }
9426 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9427 return false;
9428 switch (code)
9429 {
9430 case LTU:
9431 case GEU:
9432 break;
9433
9434 /* Convert a==0 into (unsigned)a<1. */
9435 case EQ:
9436 case NE:
9437 if (op1 != const0_rtx)
9438 return false;
9439 op1 = const1_rtx;
9440 code = (code == EQ ? LTU : GEU);
9441 break;
9442
9443 /* Convert a>b into b<a or a>=b-1. */
9444 case GTU:
9445 case LEU:
9446 if (GET_CODE (op1) == CONST_INT)
9447 {
9448 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9449 /* Bail out on overflow. We still can swap operands but that
9450 would force loading of the constant into register. */
9451 if (op1 == const0_rtx
9452 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9453 return false;
9454 code = (code == GTU ? GEU : LTU);
9455 }
9456 else
9457 {
9458 rtx tmp = op1;
9459 op1 = op0;
9460 op0 = tmp;
9461 code = (code == GTU ? LTU : GEU);
9462 }
9463 break;
9464
ccea753c 9465 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9466 case LT:
9467 case GE:
9468 if (mode == DImode || op1 != const0_rtx)
9469 return false;
ccea753c 9470 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9471 code = (code == LT ? GEU : LTU);
9472 break;
9473 case LE:
9474 case GT:
9475 if (mode == DImode || op1 != constm1_rtx)
9476 return false;
ccea753c 9477 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9478 code = (code == LE ? GEU : LTU);
9479 break;
9480
9481 default:
9482 return false;
9483 }
ebe75517
JH
9484 /* Swapping operands may cause constant to appear as first operand. */
9485 if (!nonimmediate_operand (op0, VOIDmode))
9486 {
9487 if (no_new_pseudos)
9488 return false;
9489 op0 = force_reg (mode, op0);
9490 }
4977bab6
ZW
9491 ix86_compare_op0 = op0;
9492 ix86_compare_op1 = op1;
9493 *pop = ix86_expand_compare (code, NULL, NULL);
9494 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9495 abort ();
9496 return true;
9497}
9498
32b5b1aa 9499int
b96a374d 9500ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9501{
e075ae69
RH
9502 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9503 rtx compare_seq, compare_op;
a1b8572c 9504 rtx second_test, bypass_test;
635559ab 9505 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9506 bool sign_bit_compare_p = false;;
3a3677ff 9507
e075ae69 9508 start_sequence ();
a1b8572c 9509 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9510 compare_seq = get_insns ();
e075ae69
RH
9511 end_sequence ();
9512
9513 compare_code = GET_CODE (compare_op);
9514
4977bab6
ZW
9515 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9516 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9517 sign_bit_compare_p = true;
9518
e075ae69
RH
9519 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9520 HImode insns, we'd be swallowed in word prefix ops. */
9521
4977bab6 9522 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9523 && (mode != DImode || TARGET_64BIT)
0f290768 9524 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9525 && GET_CODE (operands[3]) == CONST_INT)
9526 {
9527 rtx out = operands[0];
9528 HOST_WIDE_INT ct = INTVAL (operands[2]);
9529 HOST_WIDE_INT cf = INTVAL (operands[3]);
9530 HOST_WIDE_INT diff;
9531
4977bab6
ZW
9532 diff = ct - cf;
9533 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9534 sbb. */
4977bab6
ZW
9535 if (sign_bit_compare_p
9536 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9537 ix86_compare_op1, &compare_op))
e075ae69 9538 {
e075ae69
RH
9539 /* Detect overlap between destination and compare sources. */
9540 rtx tmp = out;
9541
4977bab6 9542 if (!sign_bit_compare_p)
36583fea 9543 {
e6e81735
JH
9544 bool fpcmp = false;
9545
4977bab6
ZW
9546 compare_code = GET_CODE (compare_op);
9547
e6e81735
JH
9548 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9549 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9550 {
9551 fpcmp = true;
9552 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9553 }
9554
4977bab6
ZW
9555 /* To simplify rest of code, restrict to the GEU case. */
9556 if (compare_code == LTU)
9557 {
9558 HOST_WIDE_INT tmp = ct;
9559 ct = cf;
9560 cf = tmp;
9561 compare_code = reverse_condition (compare_code);
9562 code = reverse_condition (code);
9563 }
e6e81735
JH
9564 else
9565 {
9566 if (fpcmp)
9567 PUT_CODE (compare_op,
9568 reverse_condition_maybe_unordered
9569 (GET_CODE (compare_op)));
9570 else
9571 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9572 }
4977bab6 9573 diff = ct - cf;
36583fea 9574
4977bab6
ZW
9575 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9576 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9577 tmp = gen_reg_rtx (mode);
e075ae69 9578
4977bab6 9579 if (mode == DImode)
e6e81735 9580 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9581 else
e6e81735 9582 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9583 }
14f73b5a 9584 else
4977bab6
ZW
9585 {
9586 if (code == GT || code == GE)
9587 code = reverse_condition (code);
9588 else
9589 {
9590 HOST_WIDE_INT tmp = ct;
9591 ct = cf;
9592 cf = tmp;
5fb48685 9593 diff = ct - cf;
4977bab6
ZW
9594 }
9595 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9596 ix86_compare_op1, VOIDmode, 0, -1);
9597 }
e075ae69 9598
36583fea
JH
9599 if (diff == 1)
9600 {
9601 /*
9602 * cmpl op0,op1
9603 * sbbl dest,dest
9604 * [addl dest, ct]
9605 *
9606 * Size 5 - 8.
9607 */
9608 if (ct)
b96a374d 9609 tmp = expand_simple_binop (mode, PLUS,
635559ab 9610 tmp, GEN_INT (ct),
4977bab6 9611 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9612 }
9613 else if (cf == -1)
9614 {
9615 /*
9616 * cmpl op0,op1
9617 * sbbl dest,dest
9618 * orl $ct, dest
9619 *
9620 * Size 8.
9621 */
635559ab
JH
9622 tmp = expand_simple_binop (mode, IOR,
9623 tmp, GEN_INT (ct),
4977bab6 9624 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9625 }
9626 else if (diff == -1 && ct)
9627 {
9628 /*
9629 * cmpl op0,op1
9630 * sbbl dest,dest
06ec023f 9631 * notl dest
36583fea
JH
9632 * [addl dest, cf]
9633 *
9634 * Size 8 - 11.
9635 */
4977bab6 9636 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9637 if (cf)
b96a374d 9638 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9639 copy_rtx (tmp), GEN_INT (cf),
9640 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9641 }
9642 else
9643 {
9644 /*
9645 * cmpl op0,op1
9646 * sbbl dest,dest
06ec023f 9647 * [notl dest]
36583fea
JH
9648 * andl cf - ct, dest
9649 * [addl dest, ct]
9650 *
9651 * Size 8 - 11.
9652 */
06ec023f
RB
9653
9654 if (cf == 0)
9655 {
9656 cf = ct;
9657 ct = 0;
4977bab6 9658 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9659 }
9660
635559ab 9661 tmp = expand_simple_binop (mode, AND,
4977bab6 9662 copy_rtx (tmp),
d8bf17f9 9663 gen_int_mode (cf - ct, mode),
4977bab6 9664 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9665 if (ct)
b96a374d 9666 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9667 copy_rtx (tmp), GEN_INT (ct),
9668 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9669 }
e075ae69 9670
4977bab6
ZW
9671 if (!rtx_equal_p (tmp, out))
9672 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9673
9674 return 1; /* DONE */
9675 }
9676
e075ae69
RH
9677 if (diff < 0)
9678 {
9679 HOST_WIDE_INT tmp;
9680 tmp = ct, ct = cf, cf = tmp;
9681 diff = -diff;
734dba19
JH
9682 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9683 {
9684 /* We may be reversing unordered compare to normal compare, that
9685 is not valid in general (we may convert non-trapping condition
9686 to trapping one), however on i386 we currently emit all
9687 comparisons unordered. */
9688 compare_code = reverse_condition_maybe_unordered (compare_code);
9689 code = reverse_condition_maybe_unordered (code);
9690 }
9691 else
9692 {
9693 compare_code = reverse_condition (compare_code);
9694 code = reverse_condition (code);
9695 }
e075ae69 9696 }
0f2a3457
JJ
9697
9698 compare_code = NIL;
9699 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9700 && GET_CODE (ix86_compare_op1) == CONST_INT)
9701 {
9702 if (ix86_compare_op1 == const0_rtx
9703 && (code == LT || code == GE))
9704 compare_code = code;
9705 else if (ix86_compare_op1 == constm1_rtx)
9706 {
9707 if (code == LE)
9708 compare_code = LT;
9709 else if (code == GT)
9710 compare_code = GE;
9711 }
9712 }
9713
9714 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9715 if (compare_code != NIL
9716 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9717 && (cf == -1 || ct == -1))
9718 {
9719 /* If lea code below could be used, only optimize
9720 if it results in a 2 insn sequence. */
9721
9722 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9723 || diff == 3 || diff == 5 || diff == 9)
9724 || (compare_code == LT && ct == -1)
9725 || (compare_code == GE && cf == -1))
9726 {
9727 /*
9728 * notl op1 (if necessary)
9729 * sarl $31, op1
9730 * orl cf, op1
9731 */
9732 if (ct != -1)
9733 {
9734 cf = ct;
b96a374d 9735 ct = -1;
0f2a3457
JJ
9736 code = reverse_condition (code);
9737 }
9738
9739 out = emit_store_flag (out, code, ix86_compare_op0,
9740 ix86_compare_op1, VOIDmode, 0, -1);
9741
9742 out = expand_simple_binop (mode, IOR,
9743 out, GEN_INT (cf),
9744 out, 1, OPTAB_DIRECT);
9745 if (out != operands[0])
9746 emit_move_insn (operands[0], out);
9747
9748 return 1; /* DONE */
9749 }
9750 }
9751
4977bab6 9752
635559ab
JH
9753 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9754 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9755 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 9756 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9757 {
9758 /*
9759 * xorl dest,dest
9760 * cmpl op1,op2
9761 * setcc dest
9762 * lea cf(dest*(ct-cf)),dest
9763 *
9764 * Size 14.
9765 *
9766 * This also catches the degenerate setcc-only case.
9767 */
9768
9769 rtx tmp;
9770 int nops;
9771
9772 out = emit_store_flag (out, code, ix86_compare_op0,
9773 ix86_compare_op1, VOIDmode, 0, 1);
9774
9775 nops = 0;
97f51ac4
RB
9776 /* On x86_64 the lea instruction operates on Pmode, so we need
9777 to get arithmetics done in proper mode to match. */
e075ae69 9778 if (diff == 1)
068f5dea 9779 tmp = copy_rtx (out);
e075ae69
RH
9780 else
9781 {
885a70fd 9782 rtx out1;
068f5dea 9783 out1 = copy_rtx (out);
635559ab 9784 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9785 nops++;
9786 if (diff & 1)
9787 {
635559ab 9788 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9789 nops++;
9790 }
9791 }
9792 if (cf != 0)
9793 {
635559ab 9794 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9795 nops++;
9796 }
4977bab6 9797 if (!rtx_equal_p (tmp, out))
e075ae69 9798 {
14f73b5a 9799 if (nops == 1)
a5cf80f0 9800 out = force_operand (tmp, copy_rtx (out));
e075ae69 9801 else
4977bab6 9802 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9803 }
4977bab6 9804 if (!rtx_equal_p (out, operands[0]))
1985ef90 9805 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9806
9807 return 1; /* DONE */
9808 }
9809
9810 /*
9811 * General case: Jumpful:
9812 * xorl dest,dest cmpl op1, op2
9813 * cmpl op1, op2 movl ct, dest
9814 * setcc dest jcc 1f
9815 * decl dest movl cf, dest
9816 * andl (cf-ct),dest 1:
9817 * addl ct,dest
0f290768 9818 *
e075ae69
RH
9819 * Size 20. Size 14.
9820 *
9821 * This is reasonably steep, but branch mispredict costs are
9822 * high on modern cpus, so consider failing only if optimizing
9823 * for space.
e075ae69
RH
9824 */
9825
4977bab6
ZW
9826 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9827 && BRANCH_COST >= 2)
e075ae69 9828 {
97f51ac4 9829 if (cf == 0)
e075ae69 9830 {
97f51ac4
RB
9831 cf = ct;
9832 ct = 0;
734dba19 9833 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9834 /* We may be reversing unordered compare to normal compare,
9835 that is not valid in general (we may convert non-trapping
9836 condition to trapping one), however on i386 we currently
9837 emit all comparisons unordered. */
9838 code = reverse_condition_maybe_unordered (code);
9839 else
9840 {
9841 code = reverse_condition (code);
9842 if (compare_code != NIL)
9843 compare_code = reverse_condition (compare_code);
9844 }
9845 }
9846
9847 if (compare_code != NIL)
9848 {
9849 /* notl op1 (if needed)
9850 sarl $31, op1
9851 andl (cf-ct), op1
b96a374d 9852 addl ct, op1
0f2a3457
JJ
9853
9854 For x < 0 (resp. x <= -1) there will be no notl,
9855 so if possible swap the constants to get rid of the
9856 complement.
9857 True/false will be -1/0 while code below (store flag
9858 followed by decrement) is 0/-1, so the constants need
9859 to be exchanged once more. */
9860
9861 if (compare_code == GE || !cf)
734dba19 9862 {
b96a374d 9863 code = reverse_condition (code);
0f2a3457 9864 compare_code = LT;
734dba19
JH
9865 }
9866 else
9867 {
0f2a3457 9868 HOST_WIDE_INT tmp = cf;
b96a374d 9869 cf = ct;
0f2a3457 9870 ct = tmp;
734dba19 9871 }
0f2a3457
JJ
9872
9873 out = emit_store_flag (out, code, ix86_compare_op0,
9874 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9875 }
0f2a3457
JJ
9876 else
9877 {
9878 out = emit_store_flag (out, code, ix86_compare_op0,
9879 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9880
4977bab6
ZW
9881 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9882 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9883 }
e075ae69 9884
4977bab6 9885 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9886 gen_int_mode (cf - ct, mode),
4977bab6 9887 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9888 if (ct)
4977bab6
ZW
9889 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9890 copy_rtx (out), 1, OPTAB_DIRECT);
9891 if (!rtx_equal_p (out, operands[0]))
9892 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9893
9894 return 1; /* DONE */
9895 }
9896 }
9897
4977bab6 9898 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
9899 {
9900 /* Try a few things more with specific constants and a variable. */
9901
78a0d70c 9902 optab op;
e075ae69
RH
9903 rtx var, orig_out, out, tmp;
9904
4977bab6 9905 if (BRANCH_COST <= 2)
e075ae69
RH
9906 return 0; /* FAIL */
9907
0f290768 9908 /* If one of the two operands is an interesting constant, load a
e075ae69 9909 constant with the above and mask it in with a logical operation. */
0f290768 9910
e075ae69
RH
9911 if (GET_CODE (operands[2]) == CONST_INT)
9912 {
9913 var = operands[3];
4977bab6 9914 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 9915 operands[3] = constm1_rtx, op = and_optab;
4977bab6 9916 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 9917 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9918 else
9919 return 0; /* FAIL */
e075ae69
RH
9920 }
9921 else if (GET_CODE (operands[3]) == CONST_INT)
9922 {
9923 var = operands[2];
4977bab6 9924 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 9925 operands[2] = constm1_rtx, op = and_optab;
4977bab6 9926 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 9927 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9928 else
9929 return 0; /* FAIL */
e075ae69 9930 }
78a0d70c 9931 else
e075ae69
RH
9932 return 0; /* FAIL */
9933
9934 orig_out = operands[0];
635559ab 9935 tmp = gen_reg_rtx (mode);
e075ae69
RH
9936 operands[0] = tmp;
9937
9938 /* Recurse to get the constant loaded. */
9939 if (ix86_expand_int_movcc (operands) == 0)
9940 return 0; /* FAIL */
9941
9942 /* Mask in the interesting variable. */
635559ab 9943 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 9944 OPTAB_WIDEN);
4977bab6
ZW
9945 if (!rtx_equal_p (out, orig_out))
9946 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
9947
9948 return 1; /* DONE */
9949 }
9950
9951 /*
9952 * For comparison with above,
9953 *
9954 * movl cf,dest
9955 * movl ct,tmp
9956 * cmpl op1,op2
9957 * cmovcc tmp,dest
9958 *
9959 * Size 15.
9960 */
9961
635559ab
JH
9962 if (! nonimmediate_operand (operands[2], mode))
9963 operands[2] = force_reg (mode, operands[2]);
9964 if (! nonimmediate_operand (operands[3], mode))
9965 operands[3] = force_reg (mode, operands[3]);
e075ae69 9966
a1b8572c
JH
9967 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9968 {
635559ab 9969 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9970 emit_move_insn (tmp, operands[3]);
9971 operands[3] = tmp;
9972 }
9973 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9974 {
635559ab 9975 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9976 emit_move_insn (tmp, operands[2]);
9977 operands[2] = tmp;
9978 }
4977bab6 9979
c9682caf 9980 if (! register_operand (operands[2], VOIDmode)
b96a374d 9981 && (mode == QImode
4977bab6 9982 || ! register_operand (operands[3], VOIDmode)))
635559ab 9983 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9984
4977bab6
ZW
9985 if (mode == QImode
9986 && ! register_operand (operands[3], VOIDmode))
9987 operands[3] = force_reg (mode, operands[3]);
9988
e075ae69
RH
9989 emit_insn (compare_seq);
9990 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9991 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9992 compare_op, operands[2],
9993 operands[3])));
a1b8572c 9994 if (bypass_test)
4977bab6 9995 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9996 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9997 bypass_test,
4977bab6
ZW
9998 copy_rtx (operands[3]),
9999 copy_rtx (operands[0]))));
a1b8572c 10000 if (second_test)
4977bab6 10001 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10002 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10003 second_test,
4977bab6
ZW
10004 copy_rtx (operands[2]),
10005 copy_rtx (operands[0]))));
e075ae69
RH
10006
10007 return 1; /* DONE */
e9a25f70 10008}
e075ae69 10009
32b5b1aa 10010int
b96a374d 10011ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 10012{
e075ae69 10013 enum rtx_code code;
e075ae69 10014 rtx tmp;
a1b8572c 10015 rtx compare_op, second_test, bypass_test;
32b5b1aa 10016
0073023d
JH
10017 /* For SF/DFmode conditional moves based on comparisons
10018 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
10019 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10020 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 10021 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
10022 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10023 && (!TARGET_IEEE_FP
10024 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
10025 /* We may be called from the post-reload splitter. */
10026 && (!REG_P (operands[0])
10027 || SSE_REG_P (operands[0])
52a661a6 10028 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
10029 {
10030 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10031 code = GET_CODE (operands[1]);
10032
10033 /* See if we have (cross) match between comparison operands and
10034 conditional move operands. */
10035 if (rtx_equal_p (operands[2], op1))
10036 {
10037 rtx tmp = op0;
10038 op0 = op1;
10039 op1 = tmp;
10040 code = reverse_condition_maybe_unordered (code);
10041 }
10042 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10043 {
10044 /* Check for min operation. */
4977bab6 10045 if (code == LT || code == UNLE)
0073023d 10046 {
4977bab6
ZW
10047 if (code == UNLE)
10048 {
10049 rtx tmp = op0;
10050 op0 = op1;
10051 op1 = tmp;
10052 }
0073023d
JH
10053 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10054 if (memory_operand (op0, VOIDmode))
10055 op0 = force_reg (GET_MODE (operands[0]), op0);
10056 if (GET_MODE (operands[0]) == SFmode)
10057 emit_insn (gen_minsf3 (operands[0], op0, op1));
10058 else
10059 emit_insn (gen_mindf3 (operands[0], op0, op1));
10060 return 1;
10061 }
10062 /* Check for max operation. */
4977bab6 10063 if (code == GT || code == UNGE)
0073023d 10064 {
4977bab6
ZW
10065 if (code == UNGE)
10066 {
10067 rtx tmp = op0;
10068 op0 = op1;
10069 op1 = tmp;
10070 }
0073023d
JH
10071 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10072 if (memory_operand (op0, VOIDmode))
10073 op0 = force_reg (GET_MODE (operands[0]), op0);
10074 if (GET_MODE (operands[0]) == SFmode)
10075 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10076 else
10077 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10078 return 1;
10079 }
10080 }
10081 /* Manage condition to be sse_comparison_operator. In case we are
10082 in non-ieee mode, try to canonicalize the destination operand
10083 to be first in the comparison - this helps reload to avoid extra
10084 moves. */
10085 if (!sse_comparison_operator (operands[1], VOIDmode)
10086 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10087 {
10088 rtx tmp = ix86_compare_op0;
10089 ix86_compare_op0 = ix86_compare_op1;
10090 ix86_compare_op1 = tmp;
10091 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10092 VOIDmode, ix86_compare_op0,
10093 ix86_compare_op1);
10094 }
d1f87653 10095 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10096 move. We also don't support the NE comparison on SSE, so try to
10097 avoid it. */
037f20f1
JH
10098 if ((rtx_equal_p (operands[0], operands[3])
10099 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10100 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10101 {
10102 rtx tmp = operands[2];
10103 operands[2] = operands[3];
92d0fb09 10104 operands[3] = tmp;
0073023d
JH
10105 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10106 (GET_CODE (operands[1])),
10107 VOIDmode, ix86_compare_op0,
10108 ix86_compare_op1);
10109 }
10110 if (GET_MODE (operands[0]) == SFmode)
10111 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10112 operands[2], operands[3],
10113 ix86_compare_op0, ix86_compare_op1));
10114 else
10115 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10116 operands[2], operands[3],
10117 ix86_compare_op0, ix86_compare_op1));
10118 return 1;
10119 }
10120
e075ae69 10121 /* The floating point conditional move instructions don't directly
0f290768 10122 support conditions resulting from a signed integer comparison. */
32b5b1aa 10123
e075ae69 10124 code = GET_CODE (operands[1]);
a1b8572c 10125 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10126
10127 /* The floating point conditional move instructions don't directly
10128 support signed integer comparisons. */
10129
a1b8572c 10130 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10131 {
a1b8572c 10132 if (second_test != NULL || bypass_test != NULL)
b531087a 10133 abort ();
e075ae69 10134 tmp = gen_reg_rtx (QImode);
3a3677ff 10135 ix86_expand_setcc (code, tmp);
e075ae69
RH
10136 code = NE;
10137 ix86_compare_op0 = tmp;
10138 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10139 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10140 }
10141 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10142 {
10143 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10144 emit_move_insn (tmp, operands[3]);
10145 operands[3] = tmp;
10146 }
10147 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10148 {
10149 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10150 emit_move_insn (tmp, operands[2]);
10151 operands[2] = tmp;
e075ae69 10152 }
e9a25f70 10153
e075ae69
RH
10154 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10155 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10156 compare_op,
e075ae69
RH
10157 operands[2],
10158 operands[3])));
a1b8572c
JH
10159 if (bypass_test)
10160 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10161 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10162 bypass_test,
10163 operands[3],
10164 operands[0])));
10165 if (second_test)
10166 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10167 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10168 second_test,
10169 operands[2],
10170 operands[0])));
32b5b1aa 10171
e075ae69 10172 return 1;
32b5b1aa
SC
10173}
10174
7b52eede
JH
10175/* Expand conditional increment or decrement using adb/sbb instructions.
10176 The default case using setcc followed by the conditional move can be
10177 done by generic code. */
10178int
b96a374d 10179ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
10180{
10181 enum rtx_code code = GET_CODE (operands[1]);
10182 rtx compare_op;
10183 rtx val = const0_rtx;
e6e81735 10184 bool fpcmp = false;
e6e81735 10185 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10186
10187 if (operands[3] != const1_rtx
10188 && operands[3] != constm1_rtx)
10189 return 0;
10190 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10191 ix86_compare_op1, &compare_op))
10192 return 0;
e6e81735
JH
10193 code = GET_CODE (compare_op);
10194
10195 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10196 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10197 {
10198 fpcmp = true;
10199 code = ix86_fp_compare_code_to_integer (code);
10200 }
10201
10202 if (code != LTU)
10203 {
10204 val = constm1_rtx;
10205 if (fpcmp)
10206 PUT_CODE (compare_op,
10207 reverse_condition_maybe_unordered
10208 (GET_CODE (compare_op)));
10209 else
10210 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10211 }
10212 PUT_MODE (compare_op, mode);
10213
10214 /* Construct either adc or sbb insn. */
10215 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10216 {
10217 switch (GET_MODE (operands[0]))
10218 {
10219 case QImode:
e6e81735 10220 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10221 break;
10222 case HImode:
e6e81735 10223 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10224 break;
10225 case SImode:
e6e81735 10226 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10227 break;
10228 case DImode:
e6e81735 10229 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10230 break;
10231 default:
10232 abort ();
10233 }
10234 }
10235 else
10236 {
10237 switch (GET_MODE (operands[0]))
10238 {
10239 case QImode:
e6e81735 10240 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10241 break;
10242 case HImode:
e6e81735 10243 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10244 break;
10245 case SImode:
e6e81735 10246 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10247 break;
10248 case DImode:
e6e81735 10249 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10250 break;
10251 default:
10252 abort ();
10253 }
10254 }
10255 return 1; /* DONE */
10256}
10257
10258
2450a057
JH
10259/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10260 works for floating pointer parameters and nonoffsetable memories.
10261 For pushes, it returns just stack offsets; the values will be saved
10262 in the right order. Maximally three parts are generated. */
10263
2b589241 10264static int
b96a374d 10265ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10266{
26e5b205
JH
10267 int size;
10268
10269 if (!TARGET_64BIT)
10270 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10271 else
10272 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10273
a7180f70
BS
10274 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10275 abort ();
2450a057
JH
10276 if (size < 2 || size > 3)
10277 abort ();
10278
f996902d
RH
10279 /* Optimize constant pool reference to immediates. This is used by fp
10280 moves, that force all constants to memory to allow combining. */
10281 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10282 {
10283 rtx tmp = maybe_get_pool_constant (operand);
10284 if (tmp)
10285 operand = tmp;
10286 }
d7a29404 10287
2450a057 10288 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10289 {
2450a057
JH
10290 /* The only non-offsetable memories we handle are pushes. */
10291 if (! push_operand (operand, VOIDmode))
10292 abort ();
10293
26e5b205
JH
10294 operand = copy_rtx (operand);
10295 PUT_MODE (operand, Pmode);
2450a057
JH
10296 parts[0] = parts[1] = parts[2] = operand;
10297 }
26e5b205 10298 else if (!TARGET_64BIT)
2450a057
JH
10299 {
10300 if (mode == DImode)
10301 split_di (&operand, 1, &parts[0], &parts[1]);
10302 else
e075ae69 10303 {
2450a057
JH
10304 if (REG_P (operand))
10305 {
10306 if (!reload_completed)
10307 abort ();
10308 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10309 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10310 if (size == 3)
10311 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10312 }
10313 else if (offsettable_memref_p (operand))
10314 {
f4ef873c 10315 operand = adjust_address (operand, SImode, 0);
2450a057 10316 parts[0] = operand;
b72f00af 10317 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10318 if (size == 3)
b72f00af 10319 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10320 }
10321 else if (GET_CODE (operand) == CONST_DOUBLE)
10322 {
10323 REAL_VALUE_TYPE r;
2b589241 10324 long l[4];
2450a057
JH
10325
10326 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10327 switch (mode)
10328 {
10329 case XFmode:
2b589241 10330 case TFmode:
2450a057 10331 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10332 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10333 break;
10334 case DFmode:
10335 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10336 break;
10337 default:
10338 abort ();
10339 }
d8bf17f9
LB
10340 parts[1] = gen_int_mode (l[1], SImode);
10341 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10342 }
10343 else
10344 abort ();
e075ae69 10345 }
2450a057 10346 }
26e5b205
JH
10347 else
10348 {
44cf5b6a
JH
10349 if (mode == TImode)
10350 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10351 if (mode == XFmode || mode == TFmode)
10352 {
10353 if (REG_P (operand))
10354 {
10355 if (!reload_completed)
10356 abort ();
10357 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10358 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10359 }
10360 else if (offsettable_memref_p (operand))
10361 {
b72f00af 10362 operand = adjust_address (operand, DImode, 0);
26e5b205 10363 parts[0] = operand;
b72f00af 10364 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
10365 }
10366 else if (GET_CODE (operand) == CONST_DOUBLE)
10367 {
10368 REAL_VALUE_TYPE r;
10369 long l[3];
10370
10371 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10372 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10373 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10374 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10375 parts[0]
d8bf17f9 10376 = gen_int_mode
44cf5b6a 10377 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10378 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10379 DImode);
26e5b205
JH
10380 else
10381 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 10382 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
10383 }
10384 else
10385 abort ();
10386 }
10387 }
2450a057 10388
2b589241 10389 return size;
2450a057
JH
10390}
10391
10392/* Emit insns to perform a move or push of DI, DF, and XF values.
10393 Return false when normal moves are needed; true when all required
10394 insns have been emitted. Operands 2-4 contain the input values
10395 int the correct order; operands 5-7 contain the output values. */
10396
26e5b205 10397void
b96a374d 10398ix86_split_long_move (rtx operands[])
2450a057
JH
10399{
10400 rtx part[2][3];
26e5b205 10401 int nparts;
2450a057
JH
10402 int push = 0;
10403 int collisions = 0;
26e5b205
JH
10404 enum machine_mode mode = GET_MODE (operands[0]);
10405
10406 /* The DFmode expanders may ask us to move double.
10407 For 64bit target this is single move. By hiding the fact
10408 here we simplify i386.md splitters. */
10409 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10410 {
8cdfa312
RH
10411 /* Optimize constant pool reference to immediates. This is used by
10412 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10413
10414 if (GET_CODE (operands[1]) == MEM
10415 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10416 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10417 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10418 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10419 {
10420 operands[0] = copy_rtx (operands[0]);
10421 PUT_MODE (operands[0], Pmode);
10422 }
26e5b205
JH
10423 else
10424 operands[0] = gen_lowpart (DImode, operands[0]);
10425 operands[1] = gen_lowpart (DImode, operands[1]);
10426 emit_move_insn (operands[0], operands[1]);
10427 return;
10428 }
2450a057 10429
2450a057
JH
10430 /* The only non-offsettable memory we handle is push. */
10431 if (push_operand (operands[0], VOIDmode))
10432 push = 1;
10433 else if (GET_CODE (operands[0]) == MEM
10434 && ! offsettable_memref_p (operands[0]))
10435 abort ();
10436
26e5b205
JH
10437 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10438 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10439
10440 /* When emitting push, take care for source operands on the stack. */
10441 if (push && GET_CODE (operands[1]) == MEM
10442 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10443 {
26e5b205 10444 if (nparts == 3)
886cbb88
JH
10445 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10446 XEXP (part[1][2], 0));
10447 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10448 XEXP (part[1][1], 0));
2450a057
JH
10449 }
10450
0f290768 10451 /* We need to do copy in the right order in case an address register
2450a057
JH
10452 of the source overlaps the destination. */
10453 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10454 {
10455 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10456 collisions++;
10457 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10458 collisions++;
26e5b205 10459 if (nparts == 3
2450a057
JH
10460 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10461 collisions++;
10462
10463 /* Collision in the middle part can be handled by reordering. */
26e5b205 10464 if (collisions == 1 && nparts == 3
2450a057 10465 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10466 {
2450a057
JH
10467 rtx tmp;
10468 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10469 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10470 }
e075ae69 10471
2450a057
JH
10472 /* If there are more collisions, we can't handle it by reordering.
10473 Do an lea to the last part and use only one colliding move. */
10474 else if (collisions > 1)
10475 {
8231b3f9
RH
10476 rtx base;
10477
2450a057 10478 collisions = 1;
8231b3f9
RH
10479
10480 base = part[0][nparts - 1];
10481
10482 /* Handle the case when the last part isn't valid for lea.
10483 Happens in 64-bit mode storing the 12-byte XFmode. */
10484 if (GET_MODE (base) != Pmode)
10485 base = gen_rtx_REG (Pmode, REGNO (base));
10486
10487 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10488 part[1][0] = replace_equiv_address (part[1][0], base);
10489 part[1][1] = replace_equiv_address (part[1][1],
10490 plus_constant (base, UNITS_PER_WORD));
26e5b205 10491 if (nparts == 3)
8231b3f9
RH
10492 part[1][2] = replace_equiv_address (part[1][2],
10493 plus_constant (base, 8));
2450a057
JH
10494 }
10495 }
10496
10497 if (push)
10498 {
26e5b205 10499 if (!TARGET_64BIT)
2b589241 10500 {
26e5b205
JH
10501 if (nparts == 3)
10502 {
10503 /* We use only first 12 bytes of TFmode value, but for pushing we
10504 are required to adjust stack as if we were pushing real 16byte
10505 value. */
10506 if (mode == TFmode && !TARGET_64BIT)
10507 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10508 GEN_INT (-4)));
10509 emit_move_insn (part[0][2], part[1][2]);
10510 }
2b589241 10511 }
26e5b205
JH
10512 else
10513 {
10514 /* In 64bit mode we don't have 32bit push available. In case this is
10515 register, it is OK - we will just use larger counterpart. We also
10516 retype memory - these comes from attempt to avoid REX prefix on
10517 moving of second half of TFmode value. */
10518 if (GET_MODE (part[1][1]) == SImode)
10519 {
10520 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10521 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10522 else if (REG_P (part[1][1]))
10523 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10524 else
b531087a 10525 abort ();
886cbb88
JH
10526 if (GET_MODE (part[1][0]) == SImode)
10527 part[1][0] = part[1][1];
26e5b205
JH
10528 }
10529 }
10530 emit_move_insn (part[0][1], part[1][1]);
10531 emit_move_insn (part[0][0], part[1][0]);
10532 return;
2450a057
JH
10533 }
10534
10535 /* Choose correct order to not overwrite the source before it is copied. */
10536 if ((REG_P (part[0][0])
10537 && REG_P (part[1][1])
10538 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10539 || (nparts == 3
2450a057
JH
10540 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10541 || (collisions > 0
10542 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10543 {
26e5b205 10544 if (nparts == 3)
2450a057 10545 {
26e5b205
JH
10546 operands[2] = part[0][2];
10547 operands[3] = part[0][1];
10548 operands[4] = part[0][0];
10549 operands[5] = part[1][2];
10550 operands[6] = part[1][1];
10551 operands[7] = part[1][0];
2450a057
JH
10552 }
10553 else
10554 {
26e5b205
JH
10555 operands[2] = part[0][1];
10556 operands[3] = part[0][0];
10557 operands[5] = part[1][1];
10558 operands[6] = part[1][0];
2450a057
JH
10559 }
10560 }
10561 else
10562 {
26e5b205 10563 if (nparts == 3)
2450a057 10564 {
26e5b205
JH
10565 operands[2] = part[0][0];
10566 operands[3] = part[0][1];
10567 operands[4] = part[0][2];
10568 operands[5] = part[1][0];
10569 operands[6] = part[1][1];
10570 operands[7] = part[1][2];
2450a057
JH
10571 }
10572 else
10573 {
26e5b205
JH
10574 operands[2] = part[0][0];
10575 operands[3] = part[0][1];
10576 operands[5] = part[1][0];
10577 operands[6] = part[1][1];
e075ae69
RH
10578 }
10579 }
26e5b205
JH
10580 emit_move_insn (operands[2], operands[5]);
10581 emit_move_insn (operands[3], operands[6]);
10582 if (nparts == 3)
10583 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10584
26e5b205 10585 return;
32b5b1aa 10586}
32b5b1aa 10587
e075ae69 10588void
b96a374d 10589ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10590{
e075ae69
RH
10591 rtx low[2], high[2];
10592 int count;
b985a30f 10593
e075ae69
RH
10594 if (GET_CODE (operands[2]) == CONST_INT)
10595 {
10596 split_di (operands, 2, low, high);
10597 count = INTVAL (operands[2]) & 63;
32b5b1aa 10598
e075ae69
RH
10599 if (count >= 32)
10600 {
10601 emit_move_insn (high[0], low[1]);
10602 emit_move_insn (low[0], const0_rtx);
b985a30f 10603
e075ae69
RH
10604 if (count > 32)
10605 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10606 }
10607 else
10608 {
10609 if (!rtx_equal_p (operands[0], operands[1]))
10610 emit_move_insn (operands[0], operands[1]);
10611 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10612 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10613 }
10614 }
10615 else
10616 {
10617 if (!rtx_equal_p (operands[0], operands[1]))
10618 emit_move_insn (operands[0], operands[1]);
b985a30f 10619
e075ae69 10620 split_di (operands, 1, low, high);
b985a30f 10621
e075ae69
RH
10622 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10623 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10624
fe577e58 10625 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10626 {
fe577e58 10627 if (! no_new_pseudos)
e075ae69
RH
10628 scratch = force_reg (SImode, const0_rtx);
10629 else
10630 emit_move_insn (scratch, const0_rtx);
10631
10632 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10633 scratch));
10634 }
10635 else
10636 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10637 }
e9a25f70 10638}
32b5b1aa 10639
e075ae69 10640void
b96a374d 10641ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10642{
e075ae69
RH
10643 rtx low[2], high[2];
10644 int count;
32b5b1aa 10645
e075ae69
RH
10646 if (GET_CODE (operands[2]) == CONST_INT)
10647 {
10648 split_di (operands, 2, low, high);
10649 count = INTVAL (operands[2]) & 63;
32b5b1aa 10650
e075ae69
RH
10651 if (count >= 32)
10652 {
10653 emit_move_insn (low[0], high[1]);
32b5b1aa 10654
e075ae69
RH
10655 if (! reload_completed)
10656 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10657 else
10658 {
10659 emit_move_insn (high[0], low[0]);
10660 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10661 }
10662
10663 if (count > 32)
10664 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10665 }
10666 else
10667 {
10668 if (!rtx_equal_p (operands[0], operands[1]))
10669 emit_move_insn (operands[0], operands[1]);
10670 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10671 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10672 }
10673 }
10674 else
32b5b1aa 10675 {
e075ae69
RH
10676 if (!rtx_equal_p (operands[0], operands[1]))
10677 emit_move_insn (operands[0], operands[1]);
10678
10679 split_di (operands, 1, low, high);
10680
10681 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10682 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10683
fe577e58 10684 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10685 {
fe577e58 10686 if (! no_new_pseudos)
e075ae69
RH
10687 scratch = gen_reg_rtx (SImode);
10688 emit_move_insn (scratch, high[0]);
10689 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10690 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10691 scratch));
10692 }
10693 else
10694 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10695 }
e075ae69 10696}
32b5b1aa 10697
e075ae69 10698void
b96a374d 10699ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10700{
10701 rtx low[2], high[2];
10702 int count;
32b5b1aa 10703
e075ae69 10704 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10705 {
e075ae69
RH
10706 split_di (operands, 2, low, high);
10707 count = INTVAL (operands[2]) & 63;
10708
10709 if (count >= 32)
c7271385 10710 {
e075ae69
RH
10711 emit_move_insn (low[0], high[1]);
10712 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10713
e075ae69
RH
10714 if (count > 32)
10715 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10716 }
10717 else
10718 {
10719 if (!rtx_equal_p (operands[0], operands[1]))
10720 emit_move_insn (operands[0], operands[1]);
10721 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10722 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10723 }
32b5b1aa 10724 }
e075ae69
RH
10725 else
10726 {
10727 if (!rtx_equal_p (operands[0], operands[1]))
10728 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10729
e075ae69
RH
10730 split_di (operands, 1, low, high);
10731
10732 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10733 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10734
10735 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10736 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10737 {
fe577e58 10738 if (! no_new_pseudos)
e075ae69
RH
10739 scratch = force_reg (SImode, const0_rtx);
10740 else
10741 emit_move_insn (scratch, const0_rtx);
10742
10743 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10744 scratch));
10745 }
10746 else
10747 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10748 }
32b5b1aa 10749}
3f803cd9 10750
0407c02b 10751/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10752 it is aligned to VALUE bytes. If true, jump to the label. */
10753static rtx
b96a374d 10754ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
10755{
10756 rtx label = gen_label_rtx ();
10757 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10758 if (GET_MODE (variable) == DImode)
10759 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10760 else
10761 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10762 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10763 1, label);
0945b39d
JH
10764 return label;
10765}
10766
10767/* Adjust COUNTER by the VALUE. */
10768static void
b96a374d 10769ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
10770{
10771 if (GET_MODE (countreg) == DImode)
10772 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10773 else
10774 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10775}
10776
10777/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10778rtx
b96a374d 10779ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
10780{
10781 rtx r;
10782 if (GET_MODE (exp) == VOIDmode)
10783 return force_reg (Pmode, exp);
10784 if (GET_MODE (exp) == Pmode)
10785 return copy_to_mode_reg (Pmode, exp);
10786 r = gen_reg_rtx (Pmode);
10787 emit_insn (gen_zero_extendsidi2 (r, exp));
10788 return r;
10789}
10790
10791/* Expand string move (memcpy) operation. Use i386 string operations when
10792 profitable. expand_clrstr contains similar code. */
10793int
b96a374d 10794ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d
JH
10795{
10796 rtx srcreg, destreg, countreg;
10797 enum machine_mode counter_mode;
10798 HOST_WIDE_INT align = 0;
10799 unsigned HOST_WIDE_INT count = 0;
10800 rtx insns;
10801
0945b39d
JH
10802 if (GET_CODE (align_exp) == CONST_INT)
10803 align = INTVAL (align_exp);
10804
d0a5295a
RH
10805 /* Can't use any of this if the user has appropriated esi or edi. */
10806 if (global_regs[4] || global_regs[5])
10807 return 0;
10808
5519a4f9 10809 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10810 if (!TARGET_ALIGN_STRINGOPS)
10811 align = 64;
10812
10813 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10814 {
10815 count = INTVAL (count_exp);
10816 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10817 return 0;
10818 }
0945b39d
JH
10819
10820 /* Figure out proper mode for counter. For 32bits it is always SImode,
10821 for 64bits use SImode when possible, otherwise DImode.
10822 Set count to number of bytes copied when known at compile time. */
10823 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10824 || x86_64_zero_extended_value (count_exp))
10825 counter_mode = SImode;
10826 else
10827 counter_mode = DImode;
10828
26771da7
JH
10829 start_sequence ();
10830
0945b39d
JH
10831 if (counter_mode != SImode && counter_mode != DImode)
10832 abort ();
10833
10834 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10835 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10836
10837 emit_insn (gen_cld ());
10838
10839 /* When optimizing for size emit simple rep ; movsb instruction for
10840 counts not divisible by 4. */
10841
10842 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10843 {
10844 countreg = ix86_zero_extend_to_Pmode (count_exp);
10845 if (TARGET_64BIT)
10846 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10847 destreg, srcreg, countreg));
10848 else
10849 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10850 destreg, srcreg, countreg));
10851 }
10852
10853 /* For constant aligned (or small unaligned) copies use rep movsl
10854 followed by code copying the rest. For PentiumPro ensure 8 byte
10855 alignment to allow rep movsl acceleration. */
10856
10857 else if (count != 0
10858 && (align >= 8
10859 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10860 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10861 {
10862 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10863 if (count & ~(size - 1))
10864 {
10865 countreg = copy_to_mode_reg (counter_mode,
10866 GEN_INT ((count >> (size == 4 ? 2 : 3))
10867 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10868 countreg = ix86_zero_extend_to_Pmode (countreg);
10869 if (size == 4)
10870 {
10871 if (TARGET_64BIT)
10872 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10873 destreg, srcreg, countreg));
10874 else
10875 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10876 destreg, srcreg, countreg));
10877 }
10878 else
10879 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10880 destreg, srcreg, countreg));
10881 }
10882 if (size == 8 && (count & 0x04))
10883 emit_insn (gen_strmovsi (destreg, srcreg));
10884 if (count & 0x02)
10885 emit_insn (gen_strmovhi (destreg, srcreg));
10886 if (count & 0x01)
10887 emit_insn (gen_strmovqi (destreg, srcreg));
10888 }
10889 /* The generic code based on the glibc implementation:
10890 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10891 allowing accelerated copying there)
10892 - copy the data using rep movsl
10893 - copy the rest. */
10894 else
10895 {
10896 rtx countreg2;
10897 rtx label = NULL;
37ad04a5
JH
10898 int desired_alignment = (TARGET_PENTIUMPRO
10899 && (count == 0 || count >= (unsigned int) 260)
10900 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10901
10902 /* In case we don't know anything about the alignment, default to
10903 library version, since it is usually equally fast and result in
b96a374d 10904 shorter code.
4977bab6
ZW
10905
10906 Also emit call when we know that the count is large and call overhead
10907 will not be important. */
10908 if (!TARGET_INLINE_ALL_STRINGOPS
10909 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
10910 {
10911 end_sequence ();
10912 return 0;
10913 }
10914
10915 if (TARGET_SINGLE_STRINGOP)
10916 emit_insn (gen_cld ());
10917
10918 countreg2 = gen_reg_rtx (Pmode);
10919 countreg = copy_to_mode_reg (counter_mode, count_exp);
10920
10921 /* We don't use loops to align destination and to copy parts smaller
10922 than 4 bytes, because gcc is able to optimize such code better (in
10923 the case the destination or the count really is aligned, gcc is often
10924 able to predict the branches) and also it is friendlier to the
a4f31c00 10925 hardware branch prediction.
0945b39d 10926
d1f87653 10927 Using loops is beneficial for generic case, because we can
0945b39d
JH
10928 handle small counts using the loops. Many CPUs (such as Athlon)
10929 have large REP prefix setup costs.
10930
4aae8a9a 10931 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
10932 add some customizability to this code. */
10933
37ad04a5 10934 if (count == 0 && align < desired_alignment)
0945b39d
JH
10935 {
10936 label = gen_label_rtx ();
aaae0bb9 10937 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10938 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10939 }
10940 if (align <= 1)
10941 {
10942 rtx label = ix86_expand_aligntest (destreg, 1);
10943 emit_insn (gen_strmovqi (destreg, srcreg));
10944 ix86_adjust_counter (countreg, 1);
10945 emit_label (label);
10946 LABEL_NUSES (label) = 1;
10947 }
10948 if (align <= 2)
10949 {
10950 rtx label = ix86_expand_aligntest (destreg, 2);
10951 emit_insn (gen_strmovhi (destreg, srcreg));
10952 ix86_adjust_counter (countreg, 2);
10953 emit_label (label);
10954 LABEL_NUSES (label) = 1;
10955 }
37ad04a5 10956 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10957 {
10958 rtx label = ix86_expand_aligntest (destreg, 4);
10959 emit_insn (gen_strmovsi (destreg, srcreg));
10960 ix86_adjust_counter (countreg, 4);
10961 emit_label (label);
10962 LABEL_NUSES (label) = 1;
10963 }
10964
37ad04a5
JH
10965 if (label && desired_alignment > 4 && !TARGET_64BIT)
10966 {
10967 emit_label (label);
10968 LABEL_NUSES (label) = 1;
10969 label = NULL_RTX;
10970 }
0945b39d
JH
10971 if (!TARGET_SINGLE_STRINGOP)
10972 emit_insn (gen_cld ());
10973 if (TARGET_64BIT)
10974 {
10975 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10976 GEN_INT (3)));
10977 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10978 destreg, srcreg, countreg2));
10979 }
10980 else
10981 {
10982 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10983 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10984 destreg, srcreg, countreg2));
10985 }
10986
10987 if (label)
10988 {
10989 emit_label (label);
10990 LABEL_NUSES (label) = 1;
10991 }
10992 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10993 emit_insn (gen_strmovsi (destreg, srcreg));
10994 if ((align <= 4 || count == 0) && TARGET_64BIT)
10995 {
10996 rtx label = ix86_expand_aligntest (countreg, 4);
10997 emit_insn (gen_strmovsi (destreg, srcreg));
10998 emit_label (label);
10999 LABEL_NUSES (label) = 1;
11000 }
11001 if (align > 2 && count != 0 && (count & 2))
11002 emit_insn (gen_strmovhi (destreg, srcreg));
11003 if (align <= 2 || count == 0)
11004 {
11005 rtx label = ix86_expand_aligntest (countreg, 2);
11006 emit_insn (gen_strmovhi (destreg, srcreg));
11007 emit_label (label);
11008 LABEL_NUSES (label) = 1;
11009 }
11010 if (align > 1 && count != 0 && (count & 1))
11011 emit_insn (gen_strmovqi (destreg, srcreg));
11012 if (align <= 1 || count == 0)
11013 {
11014 rtx label = ix86_expand_aligntest (countreg, 1);
11015 emit_insn (gen_strmovqi (destreg, srcreg));
11016 emit_label (label);
11017 LABEL_NUSES (label) = 1;
11018 }
11019 }
11020
11021 insns = get_insns ();
11022 end_sequence ();
11023
11024 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 11025 emit_insn (insns);
0945b39d
JH
11026 return 1;
11027}
11028
11029/* Expand string clear operation (bzero). Use i386 string operations when
11030 profitable. expand_movstr contains similar code. */
11031int
b96a374d 11032ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
0945b39d
JH
11033{
11034 rtx destreg, zeroreg, countreg;
11035 enum machine_mode counter_mode;
11036 HOST_WIDE_INT align = 0;
11037 unsigned HOST_WIDE_INT count = 0;
11038
11039 if (GET_CODE (align_exp) == CONST_INT)
11040 align = INTVAL (align_exp);
11041
d0a5295a
RH
11042 /* Can't use any of this if the user has appropriated esi. */
11043 if (global_regs[4])
11044 return 0;
11045
5519a4f9 11046 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11047 if (!TARGET_ALIGN_STRINGOPS)
11048 align = 32;
11049
11050 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11051 {
11052 count = INTVAL (count_exp);
11053 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11054 return 0;
11055 }
0945b39d
JH
11056 /* Figure out proper mode for counter. For 32bits it is always SImode,
11057 for 64bits use SImode when possible, otherwise DImode.
11058 Set count to number of bytes copied when known at compile time. */
11059 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11060 || x86_64_zero_extended_value (count_exp))
11061 counter_mode = SImode;
11062 else
11063 counter_mode = DImode;
11064
11065 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11066
11067 emit_insn (gen_cld ());
11068
11069 /* When optimizing for size emit simple rep ; movsb instruction for
11070 counts not divisible by 4. */
11071
11072 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11073 {
11074 countreg = ix86_zero_extend_to_Pmode (count_exp);
11075 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11076 if (TARGET_64BIT)
11077 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11078 destreg, countreg));
11079 else
11080 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11081 destreg, countreg));
11082 }
11083 else if (count != 0
11084 && (align >= 8
11085 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11086 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11087 {
11088 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11089 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11090 if (count & ~(size - 1))
11091 {
11092 countreg = copy_to_mode_reg (counter_mode,
11093 GEN_INT ((count >> (size == 4 ? 2 : 3))
11094 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11095 countreg = ix86_zero_extend_to_Pmode (countreg);
11096 if (size == 4)
11097 {
11098 if (TARGET_64BIT)
11099 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11100 destreg, countreg));
11101 else
11102 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11103 destreg, countreg));
11104 }
11105 else
11106 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11107 destreg, countreg));
11108 }
11109 if (size == 8 && (count & 0x04))
11110 emit_insn (gen_strsetsi (destreg,
11111 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11112 if (count & 0x02)
11113 emit_insn (gen_strsethi (destreg,
11114 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11115 if (count & 0x01)
11116 emit_insn (gen_strsetqi (destreg,
11117 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11118 }
11119 else
11120 {
11121 rtx countreg2;
11122 rtx label = NULL;
37ad04a5
JH
11123 /* Compute desired alignment of the string operation. */
11124 int desired_alignment = (TARGET_PENTIUMPRO
11125 && (count == 0 || count >= (unsigned int) 260)
11126 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11127
11128 /* In case we don't know anything about the alignment, default to
11129 library version, since it is usually equally fast and result in
4977bab6
ZW
11130 shorter code.
11131
11132 Also emit call when we know that the count is large and call overhead
11133 will not be important. */
11134 if (!TARGET_INLINE_ALL_STRINGOPS
11135 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11136 return 0;
11137
11138 if (TARGET_SINGLE_STRINGOP)
11139 emit_insn (gen_cld ());
11140
11141 countreg2 = gen_reg_rtx (Pmode);
11142 countreg = copy_to_mode_reg (counter_mode, count_exp);
11143 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11144
37ad04a5 11145 if (count == 0 && align < desired_alignment)
0945b39d
JH
11146 {
11147 label = gen_label_rtx ();
37ad04a5 11148 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11149 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11150 }
11151 if (align <= 1)
11152 {
11153 rtx label = ix86_expand_aligntest (destreg, 1);
11154 emit_insn (gen_strsetqi (destreg,
11155 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11156 ix86_adjust_counter (countreg, 1);
11157 emit_label (label);
11158 LABEL_NUSES (label) = 1;
11159 }
11160 if (align <= 2)
11161 {
11162 rtx label = ix86_expand_aligntest (destreg, 2);
11163 emit_insn (gen_strsethi (destreg,
11164 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11165 ix86_adjust_counter (countreg, 2);
11166 emit_label (label);
11167 LABEL_NUSES (label) = 1;
11168 }
37ad04a5 11169 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11170 {
11171 rtx label = ix86_expand_aligntest (destreg, 4);
11172 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11173 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11174 : zeroreg)));
11175 ix86_adjust_counter (countreg, 4);
11176 emit_label (label);
11177 LABEL_NUSES (label) = 1;
11178 }
11179
37ad04a5
JH
11180 if (label && desired_alignment > 4 && !TARGET_64BIT)
11181 {
11182 emit_label (label);
11183 LABEL_NUSES (label) = 1;
11184 label = NULL_RTX;
11185 }
11186
0945b39d
JH
11187 if (!TARGET_SINGLE_STRINGOP)
11188 emit_insn (gen_cld ());
11189 if (TARGET_64BIT)
11190 {
11191 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11192 GEN_INT (3)));
11193 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11194 destreg, countreg2));
11195 }
11196 else
11197 {
11198 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11199 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11200 destreg, countreg2));
11201 }
0945b39d
JH
11202 if (label)
11203 {
11204 emit_label (label);
11205 LABEL_NUSES (label) = 1;
11206 }
37ad04a5 11207
0945b39d
JH
11208 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11209 emit_insn (gen_strsetsi (destreg,
11210 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11211 if (TARGET_64BIT && (align <= 4 || count == 0))
11212 {
79258dce 11213 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
11214 emit_insn (gen_strsetsi (destreg,
11215 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11216 emit_label (label);
11217 LABEL_NUSES (label) = 1;
11218 }
11219 if (align > 2 && count != 0 && (count & 2))
11220 emit_insn (gen_strsethi (destreg,
11221 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11222 if (align <= 2 || count == 0)
11223 {
74411039 11224 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
11225 emit_insn (gen_strsethi (destreg,
11226 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11227 emit_label (label);
11228 LABEL_NUSES (label) = 1;
11229 }
11230 if (align > 1 && count != 0 && (count & 1))
11231 emit_insn (gen_strsetqi (destreg,
11232 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11233 if (align <= 1 || count == 0)
11234 {
74411039 11235 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
11236 emit_insn (gen_strsetqi (destreg,
11237 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11238 emit_label (label);
11239 LABEL_NUSES (label) = 1;
11240 }
11241 }
11242 return 1;
11243}
11244/* Expand strlen. */
11245int
b96a374d 11246ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11247{
11248 rtx addr, scratch1, scratch2, scratch3, scratch4;
11249
11250 /* The generic case of strlen expander is long. Avoid it's
11251 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11252
11253 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11254 && !TARGET_INLINE_ALL_STRINGOPS
11255 && !optimize_size
11256 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11257 return 0;
11258
11259 addr = force_reg (Pmode, XEXP (src, 0));
11260 scratch1 = gen_reg_rtx (Pmode);
11261
11262 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11263 && !optimize_size)
11264 {
11265 /* Well it seems that some optimizer does not combine a call like
11266 foo(strlen(bar), strlen(bar));
11267 when the move and the subtraction is done here. It does calculate
11268 the length just once when these instructions are done inside of
11269 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11270 often used and I use one fewer register for the lifetime of
11271 output_strlen_unroll() this is better. */
11272
11273 emit_move_insn (out, addr);
11274
11275 ix86_expand_strlensi_unroll_1 (out, align);
11276
11277 /* strlensi_unroll_1 returns the address of the zero at the end of
11278 the string, like memchr(), so compute the length by subtracting
11279 the start address. */
11280 if (TARGET_64BIT)
11281 emit_insn (gen_subdi3 (out, out, addr));
11282 else
11283 emit_insn (gen_subsi3 (out, out, addr));
11284 }
11285 else
11286 {
11287 scratch2 = gen_reg_rtx (Pmode);
11288 scratch3 = gen_reg_rtx (Pmode);
11289 scratch4 = force_reg (Pmode, constm1_rtx);
11290
11291 emit_move_insn (scratch3, addr);
11292 eoschar = force_reg (QImode, eoschar);
11293
11294 emit_insn (gen_cld ());
11295 if (TARGET_64BIT)
11296 {
11297 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11298 align, scratch4, scratch3));
11299 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11300 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11301 }
11302 else
11303 {
11304 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11305 align, scratch4, scratch3));
11306 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11307 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11308 }
11309 }
11310 return 1;
11311}
11312
e075ae69
RH
11313/* Expand the appropriate insns for doing strlen if not just doing
11314 repnz; scasb
11315
11316 out = result, initialized with the start address
11317 align_rtx = alignment of the address.
11318 scratch = scratch register, initialized with the startaddress when
77ebd435 11319 not aligned, otherwise undefined
3f803cd9 11320
39e3f58c 11321 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11322 some address computing at the end. These things are done in i386.md. */
11323
0945b39d 11324static void
b96a374d 11325ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
3f803cd9 11326{
e075ae69
RH
11327 int align;
11328 rtx tmp;
11329 rtx align_2_label = NULL_RTX;
11330 rtx align_3_label = NULL_RTX;
11331 rtx align_4_label = gen_label_rtx ();
11332 rtx end_0_label = gen_label_rtx ();
e075ae69 11333 rtx mem;
e2e52e1b 11334 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11335 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11336 rtx cmp;
e075ae69
RH
11337
11338 align = 0;
11339 if (GET_CODE (align_rtx) == CONST_INT)
11340 align = INTVAL (align_rtx);
3f803cd9 11341
e9a25f70 11342 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11343
e9a25f70 11344 /* Is there a known alignment and is it less than 4? */
e075ae69 11345 if (align < 4)
3f803cd9 11346 {
0945b39d
JH
11347 rtx scratch1 = gen_reg_rtx (Pmode);
11348 emit_move_insn (scratch1, out);
e9a25f70 11349 /* Is there a known alignment and is it not 2? */
e075ae69 11350 if (align != 2)
3f803cd9 11351 {
e075ae69
RH
11352 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11353 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11354
11355 /* Leave just the 3 lower bits. */
0945b39d 11356 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11357 NULL_RTX, 0, OPTAB_WIDEN);
11358
9076b9c1 11359 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11360 Pmode, 1, align_4_label);
9076b9c1 11361 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 11362 Pmode, 1, align_2_label);
9076b9c1 11363 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 11364 Pmode, 1, align_3_label);
3f803cd9
SC
11365 }
11366 else
11367 {
e9a25f70
JL
11368 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11369 check if is aligned to 4 - byte. */
e9a25f70 11370
0945b39d 11371 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
11372 NULL_RTX, 0, OPTAB_WIDEN);
11373
9076b9c1 11374 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11375 Pmode, 1, align_4_label);
3f803cd9
SC
11376 }
11377
e075ae69 11378 mem = gen_rtx_MEM (QImode, out);
e9a25f70 11379
e075ae69 11380 /* Now compare the bytes. */
e9a25f70 11381
0f290768 11382 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11383 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11384 QImode, 1, end_0_label);
3f803cd9 11385
0f290768 11386 /* Increment the address. */
0945b39d
JH
11387 if (TARGET_64BIT)
11388 emit_insn (gen_adddi3 (out, out, const1_rtx));
11389 else
11390 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11391
e075ae69
RH
11392 /* Not needed with an alignment of 2 */
11393 if (align != 2)
11394 {
11395 emit_label (align_2_label);
3f803cd9 11396
d43e0b7d
RK
11397 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11398 end_0_label);
e075ae69 11399
0945b39d
JH
11400 if (TARGET_64BIT)
11401 emit_insn (gen_adddi3 (out, out, const1_rtx));
11402 else
11403 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11404
11405 emit_label (align_3_label);
11406 }
11407
d43e0b7d
RK
11408 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11409 end_0_label);
e075ae69 11410
0945b39d
JH
11411 if (TARGET_64BIT)
11412 emit_insn (gen_adddi3 (out, out, const1_rtx));
11413 else
11414 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11415 }
11416
e075ae69
RH
11417 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11418 align this loop. It gives only huge programs, but does not help to
11419 speed up. */
11420 emit_label (align_4_label);
3f803cd9 11421
e075ae69
RH
11422 mem = gen_rtx_MEM (SImode, out);
11423 emit_move_insn (scratch, mem);
0945b39d
JH
11424 if (TARGET_64BIT)
11425 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11426 else
11427 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11428
e2e52e1b
JH
11429 /* This formula yields a nonzero result iff one of the bytes is zero.
11430 This saves three branches inside loop and many cycles. */
11431
11432 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11433 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11434 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11435 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11436 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11437 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11438 align_4_label);
e2e52e1b
JH
11439
11440 if (TARGET_CMOVE)
11441 {
11442 rtx reg = gen_reg_rtx (SImode);
0945b39d 11443 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11444 emit_move_insn (reg, tmpreg);
11445 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11446
0f290768 11447 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11448 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11449 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11450 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11451 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11452 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11453 reg,
11454 tmpreg)));
e2e52e1b 11455 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
11456 emit_insn (gen_rtx_SET (SImode, reg2,
11457 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
11458
11459 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11460 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11461 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11462 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11463 reg2,
11464 out)));
e2e52e1b
JH
11465
11466 }
11467 else
11468 {
11469 rtx end_2_label = gen_label_rtx ();
11470 /* Is zero in the first two bytes? */
11471
16189740 11472 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11473 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11474 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11475 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11476 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11477 pc_rtx);
11478 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11479 JUMP_LABEL (tmp) = end_2_label;
11480
0f290768 11481 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11482 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
11483 if (TARGET_64BIT)
11484 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11485 else
11486 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
11487
11488 emit_label (end_2_label);
11489
11490 }
11491
0f290768 11492 /* Avoid branch in fixing the byte. */
e2e52e1b 11493 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11494 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11495 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11496 if (TARGET_64BIT)
e6e81735 11497 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11498 else
e6e81735 11499 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11500
11501 emit_label (end_0_label);
11502}
0e07aff3
RH
11503
11504void
b96a374d
AJ
11505ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11506 rtx pop, int sibcall)
0e07aff3
RH
11507{
11508 rtx use = NULL, call;
11509
11510 if (pop == const0_rtx)
11511 pop = NULL;
11512 if (TARGET_64BIT && pop)
11513 abort ();
11514
b069de3b
SS
11515#if TARGET_MACHO
11516 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11517 fnaddr = machopic_indirect_call_target (fnaddr);
11518#else
0e07aff3
RH
11519 /* Static functions and indirect calls don't need the pic register. */
11520 if (! TARGET_64BIT && flag_pic
11521 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11522 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11523 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11524
11525 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11526 {
11527 rtx al = gen_rtx_REG (QImode, 0);
11528 emit_move_insn (al, callarg2);
11529 use_reg (&use, al);
11530 }
b069de3b 11531#endif /* TARGET_MACHO */
0e07aff3
RH
11532
11533 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11534 {
11535 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11536 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11537 }
4977bab6
ZW
11538 if (sibcall && TARGET_64BIT
11539 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11540 {
11541 rtx addr;
11542 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11543 fnaddr = gen_rtx_REG (Pmode, 40);
11544 emit_move_insn (fnaddr, addr);
11545 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11546 }
0e07aff3
RH
11547
11548 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11549 if (retval)
11550 call = gen_rtx_SET (VOIDmode, retval, call);
11551 if (pop)
11552 {
11553 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11554 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11555 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11556 }
11557
11558 call = emit_call_insn (call);
11559 if (use)
11560 CALL_INSN_FUNCTION_USAGE (call) = use;
11561}
fce5a9f2 11562
e075ae69 11563\f
e075ae69
RH
11564/* Clear stack slot assignments remembered from previous functions.
11565 This is called from INIT_EXPANDERS once before RTL is emitted for each
11566 function. */
11567
e2500fed 11568static struct machine_function *
b96a374d 11569ix86_init_machine_status (void)
37b15744 11570{
d7394366
JH
11571 struct machine_function *f;
11572
11573 f = ggc_alloc_cleared (sizeof (struct machine_function));
11574 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11575
11576 return f;
1526a060
BS
11577}
11578
e075ae69
RH
11579/* Return a MEM corresponding to a stack slot with mode MODE.
11580 Allocate a new slot if necessary.
11581
11582 The RTL for a function can have several slots available: N is
11583 which slot to use. */
11584
11585rtx
b96a374d 11586assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11587{
ddb0ae00
ZW
11588 struct stack_local_entry *s;
11589
e075ae69
RH
11590 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11591 abort ();
11592
ddb0ae00
ZW
11593 for (s = ix86_stack_locals; s; s = s->next)
11594 if (s->mode == mode && s->n == n)
11595 return s->rtl;
11596
11597 s = (struct stack_local_entry *)
11598 ggc_alloc (sizeof (struct stack_local_entry));
11599 s->n = n;
11600 s->mode = mode;
11601 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11602
ddb0ae00
ZW
11603 s->next = ix86_stack_locals;
11604 ix86_stack_locals = s;
11605 return s->rtl;
e075ae69 11606}
f996902d
RH
11607
11608/* Construct the SYMBOL_REF for the tls_get_addr function. */
11609
e2500fed 11610static GTY(()) rtx ix86_tls_symbol;
f996902d 11611rtx
b96a374d 11612ix86_tls_get_addr (void)
f996902d 11613{
f996902d 11614
e2500fed 11615 if (!ix86_tls_symbol)
f996902d 11616 {
75d38379
JJ
11617 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11618 (TARGET_GNU_TLS && !TARGET_64BIT)
11619 ? "___tls_get_addr"
11620 : "__tls_get_addr");
f996902d
RH
11621 }
11622
e2500fed 11623 return ix86_tls_symbol;
f996902d 11624}
e075ae69
RH
11625\f
11626/* Calculate the length of the memory address in the instruction
11627 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11628
11629static int
b96a374d 11630memory_address_length (rtx addr)
e075ae69
RH
11631{
11632 struct ix86_address parts;
11633 rtx base, index, disp;
11634 int len;
11635
11636 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11637 || GET_CODE (addr) == POST_INC
11638 || GET_CODE (addr) == PRE_MODIFY
11639 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11640 return 0;
3f803cd9 11641
e075ae69
RH
11642 if (! ix86_decompose_address (addr, &parts))
11643 abort ();
3f803cd9 11644
e075ae69
RH
11645 base = parts.base;
11646 index = parts.index;
11647 disp = parts.disp;
11648 len = 0;
3f803cd9 11649
7b65ed54
EB
11650 /* Rule of thumb:
11651 - esp as the base always wants an index,
11652 - ebp as the base always wants a displacement. */
11653
e075ae69
RH
11654 /* Register Indirect. */
11655 if (base && !index && !disp)
11656 {
7b65ed54
EB
11657 /* esp (for its index) and ebp (for its displacement) need
11658 the two-byte modrm form. */
e075ae69
RH
11659 if (addr == stack_pointer_rtx
11660 || addr == arg_pointer_rtx
564d80f4
JH
11661 || addr == frame_pointer_rtx
11662 || addr == hard_frame_pointer_rtx)
e075ae69 11663 len = 1;
3f803cd9 11664 }
e9a25f70 11665
e075ae69
RH
11666 /* Direct Addressing. */
11667 else if (disp && !base && !index)
11668 len = 4;
11669
3f803cd9
SC
11670 else
11671 {
e075ae69
RH
11672 /* Find the length of the displacement constant. */
11673 if (disp)
11674 {
11675 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
11676 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11677 && base)
e075ae69
RH
11678 len = 1;
11679 else
11680 len = 4;
11681 }
7b65ed54
EB
11682 /* ebp always wants a displacement. */
11683 else if (base == hard_frame_pointer_rtx)
11684 len = 1;
3f803cd9 11685
7b65ed54
EB
11686 /* An index requires the two-byte modrm form... */
11687 if (index
11688 /* ...like esp, which always wants an index. */
11689 || base == stack_pointer_rtx
11690 || base == arg_pointer_rtx
11691 || base == frame_pointer_rtx)
e075ae69 11692 len += 1;
3f803cd9
SC
11693 }
11694
e075ae69
RH
11695 return len;
11696}
79325812 11697
5bf0ebab
RH
11698/* Compute default value for "length_immediate" attribute. When SHORTFORM
11699 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11700int
b96a374d 11701ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 11702{
6ef67412
JH
11703 int len = 0;
11704 int i;
6c698a6d 11705 extract_insn_cached (insn);
6ef67412
JH
11706 for (i = recog_data.n_operands - 1; i >= 0; --i)
11707 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11708 {
6ef67412 11709 if (len)
3071fab5 11710 abort ();
6ef67412
JH
11711 if (shortform
11712 && GET_CODE (recog_data.operand[i]) == CONST_INT
11713 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11714 len = 1;
11715 else
11716 {
11717 switch (get_attr_mode (insn))
11718 {
11719 case MODE_QI:
11720 len+=1;
11721 break;
11722 case MODE_HI:
11723 len+=2;
11724 break;
11725 case MODE_SI:
11726 len+=4;
11727 break;
14f73b5a
JH
11728 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11729 case MODE_DI:
11730 len+=4;
11731 break;
6ef67412 11732 default:
c725bd79 11733 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11734 }
11735 }
3071fab5 11736 }
6ef67412
JH
11737 return len;
11738}
11739/* Compute default value for "length_address" attribute. */
11740int
b96a374d 11741ix86_attr_length_address_default (rtx insn)
6ef67412
JH
11742{
11743 int i;
9b73c90a
EB
11744
11745 if (get_attr_type (insn) == TYPE_LEA)
11746 {
11747 rtx set = PATTERN (insn);
11748 if (GET_CODE (set) == SET)
11749 ;
11750 else if (GET_CODE (set) == PARALLEL
11751 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11752 set = XVECEXP (set, 0, 0);
11753 else
11754 {
11755#ifdef ENABLE_CHECKING
11756 abort ();
11757#endif
11758 return 0;
11759 }
11760
11761 return memory_address_length (SET_SRC (set));
11762 }
11763
6c698a6d 11764 extract_insn_cached (insn);
1ccbefce
RH
11765 for (i = recog_data.n_operands - 1; i >= 0; --i)
11766 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11767 {
6ef67412 11768 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11769 break;
11770 }
6ef67412 11771 return 0;
3f803cd9 11772}
e075ae69
RH
11773\f
11774/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11775
c237e94a 11776static int
b96a374d 11777ix86_issue_rate (void)
b657fc39 11778{
9e555526 11779 switch (ix86_tune)
b657fc39 11780 {
e075ae69
RH
11781 case PROCESSOR_PENTIUM:
11782 case PROCESSOR_K6:
11783 return 2;
79325812 11784
e075ae69 11785 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11786 case PROCESSOR_PENTIUM4:
11787 case PROCESSOR_ATHLON:
4977bab6 11788 case PROCESSOR_K8:
e075ae69 11789 return 3;
b657fc39 11790
b657fc39 11791 default:
e075ae69 11792 return 1;
b657fc39 11793 }
b657fc39
L
11794}
11795
e075ae69
RH
11796/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11797 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11798
e075ae69 11799static int
b96a374d 11800ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11801{
11802 rtx set, set2;
b657fc39 11803
e075ae69
RH
11804 /* Simplify the test for uninteresting insns. */
11805 if (insn_type != TYPE_SETCC
11806 && insn_type != TYPE_ICMOV
11807 && insn_type != TYPE_FCMOV
11808 && insn_type != TYPE_IBR)
11809 return 0;
b657fc39 11810
e075ae69
RH
11811 if ((set = single_set (dep_insn)) != 0)
11812 {
11813 set = SET_DEST (set);
11814 set2 = NULL_RTX;
11815 }
11816 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11817 && XVECLEN (PATTERN (dep_insn), 0) == 2
11818 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11819 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11820 {
11821 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11822 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11823 }
78a0d70c
ZW
11824 else
11825 return 0;
b657fc39 11826
78a0d70c
ZW
11827 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11828 return 0;
b657fc39 11829
f5143c46 11830 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11831 not any other potentially set register. */
11832 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11833 return 0;
11834
11835 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11836 return 0;
11837
11838 return 1;
e075ae69 11839}
b657fc39 11840
e075ae69
RH
11841/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11842 address with operands set by DEP_INSN. */
11843
11844static int
b96a374d 11845ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11846{
11847 rtx addr;
11848
6ad48e84
JH
11849 if (insn_type == TYPE_LEA
11850 && TARGET_PENTIUM)
5fbdde42
RH
11851 {
11852 addr = PATTERN (insn);
11853 if (GET_CODE (addr) == SET)
11854 ;
11855 else if (GET_CODE (addr) == PARALLEL
11856 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11857 addr = XVECEXP (addr, 0, 0);
11858 else
11859 abort ();
11860 addr = SET_SRC (addr);
11861 }
e075ae69
RH
11862 else
11863 {
11864 int i;
6c698a6d 11865 extract_insn_cached (insn);
1ccbefce
RH
11866 for (i = recog_data.n_operands - 1; i >= 0; --i)
11867 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11868 {
1ccbefce 11869 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11870 goto found;
11871 }
11872 return 0;
11873 found:;
b657fc39
L
11874 }
11875
e075ae69 11876 return modified_in_p (addr, dep_insn);
b657fc39 11877}
a269a03c 11878
c237e94a 11879static int
b96a374d 11880ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 11881{
e075ae69 11882 enum attr_type insn_type, dep_insn_type;
6ad48e84 11883 enum attr_memory memory, dep_memory;
e075ae69 11884 rtx set, set2;
9b00189f 11885 int dep_insn_code_number;
a269a03c 11886
d1f87653 11887 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 11888 if (REG_NOTE_KIND (link) != 0)
309ada50 11889 return 0;
a269a03c 11890
9b00189f
JH
11891 dep_insn_code_number = recog_memoized (dep_insn);
11892
e075ae69 11893 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11894 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11895 return cost;
a269a03c 11896
1c71e60e
JH
11897 insn_type = get_attr_type (insn);
11898 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11899
9e555526 11900 switch (ix86_tune)
a269a03c
JC
11901 {
11902 case PROCESSOR_PENTIUM:
e075ae69
RH
11903 /* Address Generation Interlock adds a cycle of latency. */
11904 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11905 cost += 1;
11906
11907 /* ??? Compares pair with jump/setcc. */
11908 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11909 cost = 0;
11910
d1f87653 11911 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 11912 if (insn_type == TYPE_FMOV
e075ae69
RH
11913 && get_attr_memory (insn) == MEMORY_STORE
11914 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11915 cost += 1;
11916 break;
a269a03c 11917
e075ae69 11918 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11919 memory = get_attr_memory (insn);
11920 dep_memory = get_attr_memory (dep_insn);
11921
0f290768 11922 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11923 increase the cost here for non-imov insns. */
11924 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11925 && dep_insn_type != TYPE_FMOV
11926 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11927 cost += 1;
11928
11929 /* INT->FP conversion is expensive. */
11930 if (get_attr_fp_int_src (dep_insn))
11931 cost += 5;
11932
11933 /* There is one cycle extra latency between an FP op and a store. */
11934 if (insn_type == TYPE_FMOV
11935 && (set = single_set (dep_insn)) != NULL_RTX
11936 && (set2 = single_set (insn)) != NULL_RTX
11937 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11938 && GET_CODE (SET_DEST (set2)) == MEM)
11939 cost += 1;
6ad48e84
JH
11940
11941 /* Show ability of reorder buffer to hide latency of load by executing
11942 in parallel with previous instruction in case
11943 previous instruction is not needed to compute the address. */
11944 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11945 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11946 {
6ad48e84
JH
11947 /* Claim moves to take one cycle, as core can issue one load
11948 at time and the next load can start cycle later. */
11949 if (dep_insn_type == TYPE_IMOV
11950 || dep_insn_type == TYPE_FMOV)
11951 cost = 1;
11952 else if (cost > 1)
11953 cost--;
11954 }
e075ae69 11955 break;
a269a03c 11956
e075ae69 11957 case PROCESSOR_K6:
6ad48e84
JH
11958 memory = get_attr_memory (insn);
11959 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11960 /* The esp dependency is resolved before the instruction is really
11961 finished. */
11962 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11963 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11964 return 1;
a269a03c 11965
0f290768 11966 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11967 increase the cost here for non-imov insns. */
6ad48e84 11968 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11969 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11970
11971 /* INT->FP conversion is expensive. */
11972 if (get_attr_fp_int_src (dep_insn))
11973 cost += 5;
6ad48e84
JH
11974
11975 /* Show ability of reorder buffer to hide latency of load by executing
11976 in parallel with previous instruction in case
11977 previous instruction is not needed to compute the address. */
11978 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11979 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11980 {
6ad48e84
JH
11981 /* Claim moves to take one cycle, as core can issue one load
11982 at time and the next load can start cycle later. */
11983 if (dep_insn_type == TYPE_IMOV
11984 || dep_insn_type == TYPE_FMOV)
11985 cost = 1;
11986 else if (cost > 2)
11987 cost -= 2;
11988 else
11989 cost = 1;
11990 }
a14003ee 11991 break;
e075ae69 11992
309ada50 11993 case PROCESSOR_ATHLON:
4977bab6 11994 case PROCESSOR_K8:
6ad48e84
JH
11995 memory = get_attr_memory (insn);
11996 dep_memory = get_attr_memory (dep_insn);
11997
6ad48e84
JH
11998 /* Show ability of reorder buffer to hide latency of load by executing
11999 in parallel with previous instruction in case
12000 previous instruction is not needed to compute the address. */
12001 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12002 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12003 {
26f74aa3
JH
12004 enum attr_unit unit = get_attr_unit (insn);
12005 int loadcost = 3;
12006
12007 /* Because of the difference between the length of integer and
12008 floating unit pipeline preparation stages, the memory operands
b96a374d 12009 for floating point are cheaper.
26f74aa3 12010
c51e6d85 12011 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
12012 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12013 loadcost = 3;
12014 else
12015 loadcost = TARGET_ATHLON ? 2 : 0;
12016
12017 if (cost >= loadcost)
12018 cost -= loadcost;
6ad48e84
JH
12019 else
12020 cost = 0;
12021 }
309ada50 12022
a269a03c 12023 default:
a269a03c
JC
12024 break;
12025 }
12026
12027 return cost;
12028}
0a726ef1 12029
e075ae69
RH
12030static union
12031{
12032 struct ppro_sched_data
12033 {
12034 rtx decode[3];
12035 int issued_this_cycle;
12036 } ppro;
12037} ix86_sched_data;
0a726ef1 12038
e075ae69 12039static enum attr_ppro_uops
b96a374d 12040ix86_safe_ppro_uops (rtx insn)
e075ae69
RH
12041{
12042 if (recog_memoized (insn) >= 0)
12043 return get_attr_ppro_uops (insn);
12044 else
12045 return PPRO_UOPS_MANY;
12046}
0a726ef1 12047
e075ae69 12048static void
b96a374d 12049ix86_dump_ppro_packet (FILE *dump)
0a726ef1 12050{
e075ae69 12051 if (ix86_sched_data.ppro.decode[0])
0a726ef1 12052 {
e075ae69
RH
12053 fprintf (dump, "PPRO packet: %d",
12054 INSN_UID (ix86_sched_data.ppro.decode[0]));
12055 if (ix86_sched_data.ppro.decode[1])
12056 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12057 if (ix86_sched_data.ppro.decode[2])
12058 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12059 fputc ('\n', dump);
12060 }
12061}
0a726ef1 12062
e075ae69 12063/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 12064
c237e94a 12065static void
b96a374d
AJ
12066ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12067 int sched_verbose ATTRIBUTE_UNUSED,
12068 int veclen ATTRIBUTE_UNUSED)
e075ae69
RH
12069{
12070 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12071}
12072
12073/* Shift INSN to SLOT, and shift everything else down. */
12074
12075static void
b96a374d 12076ix86_reorder_insn (rtx *insnp, rtx *slot)
e075ae69
RH
12077{
12078 if (insnp != slot)
12079 {
12080 rtx insn = *insnp;
0f290768 12081 do
e075ae69
RH
12082 insnp[0] = insnp[1];
12083 while (++insnp != slot);
12084 *insnp = insn;
0a726ef1 12085 }
e075ae69
RH
12086}
12087
c6991660 12088static void
b96a374d 12089ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
78a0d70c
ZW
12090{
12091 rtx decode[3];
12092 enum attr_ppro_uops cur_uops;
12093 int issued_this_cycle;
12094 rtx *insnp;
12095 int i;
e075ae69 12096
0f290768 12097 /* At this point .ppro.decode contains the state of the three
78a0d70c 12098 decoders from last "cycle". That is, those insns that were
0f290768 12099 actually independent. But here we're scheduling for the
78a0d70c
ZW
12100 decoder, and we may find things that are decodable in the
12101 same cycle. */
e075ae69 12102
0f290768 12103 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 12104 issued_this_cycle = 0;
e075ae69 12105
78a0d70c
ZW
12106 insnp = e_ready;
12107 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 12108
78a0d70c
ZW
12109 /* If the decoders are empty, and we've a complex insn at the
12110 head of the priority queue, let it issue without complaint. */
12111 if (decode[0] == NULL)
12112 {
12113 if (cur_uops == PPRO_UOPS_MANY)
12114 {
12115 decode[0] = *insnp;
12116 goto ppro_done;
12117 }
12118
12119 /* Otherwise, search for a 2-4 uop unsn to issue. */
12120 while (cur_uops != PPRO_UOPS_FEW)
12121 {
12122 if (insnp == ready)
12123 break;
12124 cur_uops = ix86_safe_ppro_uops (*--insnp);
12125 }
12126
12127 /* If so, move it to the head of the line. */
12128 if (cur_uops == PPRO_UOPS_FEW)
12129 ix86_reorder_insn (insnp, e_ready);
0a726ef1 12130
78a0d70c
ZW
12131 /* Issue the head of the queue. */
12132 issued_this_cycle = 1;
12133 decode[0] = *e_ready--;
12134 }
fb693d44 12135
78a0d70c
ZW
12136 /* Look for simple insns to fill in the other two slots. */
12137 for (i = 1; i < 3; ++i)
12138 if (decode[i] == NULL)
12139 {
a151daf0 12140 if (ready > e_ready)
78a0d70c 12141 goto ppro_done;
fb693d44 12142
e075ae69
RH
12143 insnp = e_ready;
12144 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
12145 while (cur_uops != PPRO_UOPS_ONE)
12146 {
12147 if (insnp == ready)
12148 break;
12149 cur_uops = ix86_safe_ppro_uops (*--insnp);
12150 }
fb693d44 12151
78a0d70c
ZW
12152 /* Found one. Move it to the head of the queue and issue it. */
12153 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 12154 {
78a0d70c
ZW
12155 ix86_reorder_insn (insnp, e_ready);
12156 decode[i] = *e_ready--;
12157 issued_this_cycle++;
12158 continue;
12159 }
fb693d44 12160
78a0d70c
ZW
12161 /* ??? Didn't find one. Ideally, here we would do a lazy split
12162 of 2-uop insns, issue one and queue the other. */
12163 }
fb693d44 12164
78a0d70c
ZW
12165 ppro_done:
12166 if (issued_this_cycle == 0)
12167 issued_this_cycle = 1;
12168 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12169}
fb693d44 12170
0f290768 12171/* We are about to being issuing insns for this clock cycle.
78a0d70c 12172 Override the default sort algorithm to better slot instructions. */
c237e94a 12173static int
b96a374d
AJ
12174ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12175 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12176 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
78a0d70c 12177{
c237e94a 12178 int n_ready = *n_readyp;
78a0d70c 12179 rtx *e_ready = ready + n_ready - 1;
fb693d44 12180
fce5a9f2 12181 /* Make sure to go ahead and initialize key items in
a151daf0
JL
12182 ix86_sched_data if we are not going to bother trying to
12183 reorder the ready queue. */
78a0d70c 12184 if (n_ready < 2)
a151daf0
JL
12185 {
12186 ix86_sched_data.ppro.issued_this_cycle = 1;
12187 goto out;
12188 }
e075ae69 12189
9e555526 12190 switch (ix86_tune)
78a0d70c
ZW
12191 {
12192 default:
12193 break;
e075ae69 12194
78a0d70c
ZW
12195 case PROCESSOR_PENTIUMPRO:
12196 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 12197 break;
fb693d44
RH
12198 }
12199
e075ae69
RH
12200out:
12201 return ix86_issue_rate ();
12202}
fb693d44 12203
e075ae69
RH
12204/* We are about to issue INSN. Return the number of insns left on the
12205 ready queue that can be issued this cycle. */
b222082e 12206
c237e94a 12207static int
b96a374d
AJ
12208ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12209 int can_issue_more)
e075ae69
RH
12210{
12211 int i;
9e555526 12212 switch (ix86_tune)
fb693d44 12213 {
e075ae69
RH
12214 default:
12215 return can_issue_more - 1;
fb693d44 12216
e075ae69
RH
12217 case PROCESSOR_PENTIUMPRO:
12218 {
12219 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 12220
e075ae69
RH
12221 if (uops == PPRO_UOPS_MANY)
12222 {
12223 if (sched_verbose)
12224 ix86_dump_ppro_packet (dump);
12225 ix86_sched_data.ppro.decode[0] = insn;
12226 ix86_sched_data.ppro.decode[1] = NULL;
12227 ix86_sched_data.ppro.decode[2] = NULL;
12228 if (sched_verbose)
12229 ix86_dump_ppro_packet (dump);
12230 ix86_sched_data.ppro.decode[0] = NULL;
12231 }
12232 else if (uops == PPRO_UOPS_FEW)
12233 {
12234 if (sched_verbose)
12235 ix86_dump_ppro_packet (dump);
12236 ix86_sched_data.ppro.decode[0] = insn;
12237 ix86_sched_data.ppro.decode[1] = NULL;
12238 ix86_sched_data.ppro.decode[2] = NULL;
12239 }
12240 else
12241 {
12242 for (i = 0; i < 3; ++i)
12243 if (ix86_sched_data.ppro.decode[i] == NULL)
12244 {
12245 ix86_sched_data.ppro.decode[i] = insn;
12246 break;
12247 }
12248 if (i == 3)
12249 abort ();
12250 if (i == 2)
12251 {
12252 if (sched_verbose)
12253 ix86_dump_ppro_packet (dump);
12254 ix86_sched_data.ppro.decode[0] = NULL;
12255 ix86_sched_data.ppro.decode[1] = NULL;
12256 ix86_sched_data.ppro.decode[2] = NULL;
12257 }
12258 }
12259 }
12260 return --ix86_sched_data.ppro.issued_this_cycle;
12261 }
fb693d44 12262}
9b690711
RH
12263
12264static int
b96a374d 12265ia32_use_dfa_pipeline_interface (void)
9b690711 12266{
4977bab6 12267 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
9b690711
RH
12268 return 1;
12269 return 0;
12270}
12271
12272/* How many alternative schedules to try. This should be as wide as the
12273 scheduling freedom in the DFA, but no wider. Making this value too
12274 large results extra work for the scheduler. */
12275
12276static int
b96a374d 12277ia32_multipass_dfa_lookahead (void)
9b690711 12278{
9e555526 12279 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711
RH
12280 return 2;
12281 else
12282 return 0;
12283}
12284
a7180f70 12285\f
0e4970d7
RK
12286/* Walk through INSNS and look for MEM references whose address is DSTREG or
12287 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12288 appropriate. */
12289
12290void
b96a374d
AJ
12291ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12292 rtx srcreg)
0e4970d7
RK
12293{
12294 rtx insn;
12295
12296 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12297 if (INSN_P (insn))
12298 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12299 dstreg, srcreg);
12300}
12301
12302/* Subroutine of above to actually do the updating by recursively walking
12303 the rtx. */
12304
12305static void
b96a374d
AJ
12306ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12307 rtx srcreg)
0e4970d7
RK
12308{
12309 enum rtx_code code = GET_CODE (x);
12310 const char *format_ptr = GET_RTX_FORMAT (code);
12311 int i, j;
12312
12313 if (code == MEM && XEXP (x, 0) == dstreg)
12314 MEM_COPY_ATTRIBUTES (x, dstref);
12315 else if (code == MEM && XEXP (x, 0) == srcreg)
12316 MEM_COPY_ATTRIBUTES (x, srcref);
12317
12318 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12319 {
12320 if (*format_ptr == 'e')
12321 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12322 dstreg, srcreg);
12323 else if (*format_ptr == 'E')
12324 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 12325 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
12326 dstreg, srcreg);
12327 }
12328}
12329\f
a7180f70
BS
12330/* Compute the alignment given to a constant that is being placed in memory.
12331 EXP is the constant and ALIGN is the alignment that the object would
12332 ordinarily have.
12333 The value of this function is used instead of that alignment to align
12334 the object. */
12335
12336int
b96a374d 12337ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12338{
12339 if (TREE_CODE (exp) == REAL_CST)
12340 {
12341 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12342 return 64;
12343 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12344 return 128;
12345 }
12346 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12347 && align < 256)
12348 return 256;
12349
12350 return align;
12351}
12352
12353/* Compute the alignment for a static variable.
12354 TYPE is the data type, and ALIGN is the alignment that
12355 the object would ordinarily have. The value of this function is used
12356 instead of that alignment to align the object. */
12357
12358int
b96a374d 12359ix86_data_alignment (tree type, int align)
a7180f70
BS
12360{
12361 if (AGGREGATE_TYPE_P (type)
12362 && TYPE_SIZE (type)
12363 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12364 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12365 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12366 return 256;
12367
0d7d98ee
JH
12368 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12369 to 16byte boundary. */
12370 if (TARGET_64BIT)
12371 {
12372 if (AGGREGATE_TYPE_P (type)
12373 && TYPE_SIZE (type)
12374 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12375 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12376 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12377 return 128;
12378 }
12379
a7180f70
BS
12380 if (TREE_CODE (type) == ARRAY_TYPE)
12381 {
12382 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12383 return 64;
12384 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12385 return 128;
12386 }
12387 else if (TREE_CODE (type) == COMPLEX_TYPE)
12388 {
0f290768 12389
a7180f70
BS
12390 if (TYPE_MODE (type) == DCmode && align < 64)
12391 return 64;
12392 if (TYPE_MODE (type) == XCmode && align < 128)
12393 return 128;
12394 }
12395 else if ((TREE_CODE (type) == RECORD_TYPE
12396 || TREE_CODE (type) == UNION_TYPE
12397 || TREE_CODE (type) == QUAL_UNION_TYPE)
12398 && TYPE_FIELDS (type))
12399 {
12400 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12401 return 64;
12402 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12403 return 128;
12404 }
12405 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12406 || TREE_CODE (type) == INTEGER_TYPE)
12407 {
12408 if (TYPE_MODE (type) == DFmode && align < 64)
12409 return 64;
12410 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12411 return 128;
12412 }
12413
12414 return align;
12415}
12416
12417/* Compute the alignment for a local variable.
12418 TYPE is the data type, and ALIGN is the alignment that
12419 the object would ordinarily have. The value of this macro is used
12420 instead of that alignment to align the object. */
12421
12422int
b96a374d 12423ix86_local_alignment (tree type, int align)
a7180f70 12424{
0d7d98ee
JH
12425 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12426 to 16byte boundary. */
12427 if (TARGET_64BIT)
12428 {
12429 if (AGGREGATE_TYPE_P (type)
12430 && TYPE_SIZE (type)
12431 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12432 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12433 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12434 return 128;
12435 }
a7180f70
BS
12436 if (TREE_CODE (type) == ARRAY_TYPE)
12437 {
12438 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12439 return 64;
12440 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12441 return 128;
12442 }
12443 else if (TREE_CODE (type) == COMPLEX_TYPE)
12444 {
12445 if (TYPE_MODE (type) == DCmode && align < 64)
12446 return 64;
12447 if (TYPE_MODE (type) == XCmode && align < 128)
12448 return 128;
12449 }
12450 else if ((TREE_CODE (type) == RECORD_TYPE
12451 || TREE_CODE (type) == UNION_TYPE
12452 || TREE_CODE (type) == QUAL_UNION_TYPE)
12453 && TYPE_FIELDS (type))
12454 {
12455 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12456 return 64;
12457 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12458 return 128;
12459 }
12460 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12461 || TREE_CODE (type) == INTEGER_TYPE)
12462 {
0f290768 12463
a7180f70
BS
12464 if (TYPE_MODE (type) == DFmode && align < 64)
12465 return 64;
12466 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12467 return 128;
12468 }
12469 return align;
12470}
0ed08620
JH
12471\f
12472/* Emit RTL insns to initialize the variable parts of a trampoline.
12473 FNADDR is an RTX for the address of the function's pure code.
12474 CXT is an RTX for the static chain value for the function. */
12475void
b96a374d 12476x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12477{
12478 if (!TARGET_64BIT)
12479 {
12480 /* Compute offset from the end of the jmp to the target function. */
12481 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12482 plus_constant (tramp, 10),
12483 NULL_RTX, 1, OPTAB_DIRECT);
12484 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12485 gen_int_mode (0xb9, QImode));
0ed08620
JH
12486 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12487 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12488 gen_int_mode (0xe9, QImode));
0ed08620
JH
12489 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12490 }
12491 else
12492 {
12493 int offset = 0;
12494 /* Try to load address using shorter movl instead of movabs.
12495 We may want to support movq for kernel mode, but kernel does not use
12496 trampolines at the moment. */
12497 if (x86_64_zero_extended_value (fnaddr))
12498 {
12499 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12500 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12501 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12502 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12503 gen_lowpart (SImode, fnaddr));
12504 offset += 6;
12505 }
12506 else
12507 {
12508 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12509 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12510 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12511 fnaddr);
12512 offset += 10;
12513 }
12514 /* Load static chain using movabs to r10. */
12515 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12516 gen_int_mode (0xba49, HImode));
0ed08620
JH
12517 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12518 cxt);
12519 offset += 10;
12520 /* Jump to the r11 */
12521 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12522 gen_int_mode (0xff49, HImode));
0ed08620 12523 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12524 gen_int_mode (0xe3, QImode));
0ed08620
JH
12525 offset += 3;
12526 if (offset > TRAMPOLINE_SIZE)
b531087a 12527 abort ();
0ed08620 12528 }
5791cc29
JT
12529
12530#ifdef TRANSFER_FROM_TRAMPOLINE
12531 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12532 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12533#endif
0ed08620 12534}
eeb06b1b 12535\f
6a2dd09a
RS
12536#define def_builtin(MASK, NAME, TYPE, CODE) \
12537do { \
453ee231
JH
12538 if ((MASK) & target_flags \
12539 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
6a2dd09a
RS
12540 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12541 NULL, NULL_TREE); \
eeb06b1b 12542} while (0)
bd793c65 12543
bd793c65
BS
12544struct builtin_description
12545{
8b60264b
KG
12546 const unsigned int mask;
12547 const enum insn_code icode;
12548 const char *const name;
12549 const enum ix86_builtins code;
12550 const enum rtx_code comparison;
12551 const unsigned int flag;
bd793c65
BS
12552};
12553
8b60264b 12554static const struct builtin_description bdesc_comi[] =
bd793c65 12555{
37f22004
L
12556 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12557 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12558 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12559 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12560 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12561 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12562 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12563 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12564 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12565 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12566 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12567 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12568 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12569 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12570 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12571 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12572 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12573 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12574 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12575 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12576 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12577 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12578 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12579 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12580};
12581
8b60264b 12582static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12583{
12584 /* SSE */
37f22004
L
12585 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12586 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12587 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12588 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12589 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12590 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12591 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12592 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12593
12594 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12595 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12596 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12597 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12598 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12599 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12600 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12601 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12602 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12603 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12604 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12605 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12606 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12607 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12608 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12609 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12610 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12611 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12612 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12613 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12614
12615 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12616 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12617 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12618 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12619
12620 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12621 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12622 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12623 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12624
12625 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12626 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12627 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12628 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12629 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12630
12631 /* MMX */
eeb06b1b
BS
12632 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12633 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12634 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12635 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12636 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12637 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12638 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12639 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12640
12641 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12642 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12643 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12644 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12645 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12646 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12647 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12648 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12649
12650 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12651 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12652 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12653
12654 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12655 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12656 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12657 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12658
37f22004
L
12659 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12660 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12661
12662 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12663 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12664 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12665 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12666 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12667 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12668
37f22004
L
12669 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12670 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12671 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12672 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12673
12674 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12675 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12676 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12677 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12678 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12679 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12680
12681 /* Special. */
eeb06b1b
BS
12682 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12683 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12684 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12685
37f22004
L
12686 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12687 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12688 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12689
12690 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12691 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12692 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12693 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12694 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12695 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12696
12697 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12698 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12699 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12700 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12701 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12702 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12703
12704 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12705 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12706 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12707 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12708
37f22004 12709 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12710 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12711
12712 /* SSE2 */
12713 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12714 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12715 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12716 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12717 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12718 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12719 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12720 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12721
12722 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12723 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12724 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12725 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12726 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12727 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12728 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12729 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12730 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12731 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12732 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12733 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12734 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12735 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12736 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12737 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12738 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12739 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12740 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12741 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12742
12743 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12744 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12746 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12747
1877be45
JH
12748 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12749 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12750 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12751 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12752
12753 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12755 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12756
12757 /* SSE2 MMX */
12758 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12759 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12761 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12762 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12763 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12764 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12765 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12766
12767 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12768 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12769 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12770 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12771 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12772 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12773 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12774 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12775
12776 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12780
916b60b7
BS
12781 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12785
12786 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12787 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12788
12789 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12790 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12793 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12795
12796 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12798 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12799 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12800
12801 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12804 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12805 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12808 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12809
916b60b7
BS
12810 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12812 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12813
12814 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12816
12817 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12823
12824 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12830
12831 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12835
12836 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12837
fbe5eb6d 12838 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 12839 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 12840 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
12841 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12842
12843 /* PNI MMX */
12844 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12845 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12846 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12847 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12848 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12849 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12850};
12851
8b60264b 12852static const struct builtin_description bdesc_1arg[] =
bd793c65 12853{
37f22004
L
12854 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12855 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12856
37f22004
L
12857 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12858 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12859 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12860
37f22004
L
12861 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12862 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12863 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12864 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12865 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12866 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
12867
12868 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12871 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12872
12873 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12874
12875 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12877
fbe5eb6d
BS
12878 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12883
fbe5eb6d 12884 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12885
fbe5eb6d
BS
12886 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
12888 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12889 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
12890
12891 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12893 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12894
22c7c85e
L
12895 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12896
12897 /* PNI */
12898 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12899 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12900 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
12901};
12902
f6155fda 12903void
b96a374d 12904ix86_init_builtins (void)
f6155fda
SS
12905{
12906 if (TARGET_MMX)
12907 ix86_init_mmx_sse_builtins ();
12908}
12909
12910/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12911 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12912 builtins. */
e37af218 12913static void
b96a374d 12914ix86_init_mmx_sse_builtins (void)
bd793c65 12915{
8b60264b 12916 const struct builtin_description * d;
77ebd435 12917 size_t i;
bd793c65
BS
12918
12919 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
12920 tree pcchar_type_node = build_pointer_type (
12921 build_type_variant (char_type_node, 1, 0));
bd793c65 12922 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
12923 tree pcfloat_type_node = build_pointer_type (
12924 build_type_variant (float_type_node, 1, 0));
bd793c65 12925 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12926 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12927 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12928
12929 /* Comparisons. */
12930 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12931 = build_function_type_list (integer_type_node,
12932 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12933 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12934 = build_function_type_list (V4SI_type_node,
12935 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12936 /* MMX/SSE/integer conversions. */
bd793c65 12937 tree int_ftype_v4sf
b4de2f7d
AH
12938 = build_function_type_list (integer_type_node,
12939 V4SF_type_node, NULL_TREE);
453ee231
JH
12940 tree int64_ftype_v4sf
12941 = build_function_type_list (long_long_integer_type_node,
12942 V4SF_type_node, NULL_TREE);
bd793c65 12943 tree int_ftype_v8qi
b4de2f7d 12944 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12945 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12946 = build_function_type_list (V4SF_type_node,
12947 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
12948 tree v4sf_ftype_v4sf_int64
12949 = build_function_type_list (V4SF_type_node,
12950 V4SF_type_node, long_long_integer_type_node,
12951 NULL_TREE);
bd793c65 12952 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12953 = build_function_type_list (V4SF_type_node,
12954 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12955 tree int_ftype_v4hi_int
b4de2f7d
AH
12956 = build_function_type_list (integer_type_node,
12957 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12958 tree v4hi_ftype_v4hi_int_int
e7a60f56 12959 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12960 integer_type_node, integer_type_node,
12961 NULL_TREE);
bd793c65
BS
12962 /* Miscellaneous. */
12963 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12964 = build_function_type_list (V8QI_type_node,
12965 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12966 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12967 = build_function_type_list (V4HI_type_node,
12968 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12969 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12970 = build_function_type_list (V4SF_type_node,
12971 V4SF_type_node, V4SF_type_node,
12972 integer_type_node, NULL_TREE);
bd793c65 12973 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12974 = build_function_type_list (V2SI_type_node,
12975 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12976 tree v4hi_ftype_v4hi_int
b4de2f7d 12977 = build_function_type_list (V4HI_type_node,
e7a60f56 12978 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12979 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12980 = build_function_type_list (V4HI_type_node,
12981 V4HI_type_node, long_long_unsigned_type_node,
12982 NULL_TREE);
bd793c65 12983 tree v2si_ftype_v2si_di
b4de2f7d
AH
12984 = build_function_type_list (V2SI_type_node,
12985 V2SI_type_node, long_long_unsigned_type_node,
12986 NULL_TREE);
bd793c65 12987 tree void_ftype_void
b4de2f7d 12988 = build_function_type (void_type_node, void_list_node);
bd793c65 12989 tree void_ftype_unsigned
b4de2f7d 12990 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
12991 tree void_ftype_unsigned_unsigned
12992 = build_function_type_list (void_type_node, unsigned_type_node,
12993 unsigned_type_node, NULL_TREE);
12994 tree void_ftype_pcvoid_unsigned_unsigned
12995 = build_function_type_list (void_type_node, const_ptr_type_node,
12996 unsigned_type_node, unsigned_type_node,
12997 NULL_TREE);
bd793c65 12998 tree unsigned_ftype_void
b4de2f7d 12999 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13000 tree di_ftype_void
b4de2f7d 13001 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 13002 tree v4sf_ftype_void
b4de2f7d 13003 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 13004 tree v2si_ftype_v4sf
b4de2f7d 13005 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13006 /* Loads/stores. */
bd793c65 13007 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13008 = build_function_type_list (void_type_node,
13009 V8QI_type_node, V8QI_type_node,
13010 pchar_type_node, NULL_TREE);
068f5dea
JH
13011 tree v4sf_ftype_pcfloat
13012 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13013 /* @@@ the type is bogus */
13014 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13015 = build_function_type_list (V4SF_type_node,
f8ca7923 13016 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13017 tree void_ftype_pv2si_v4sf
b4de2f7d 13018 = build_function_type_list (void_type_node,
f8ca7923 13019 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13020 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13021 = build_function_type_list (void_type_node,
13022 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13023 tree void_ftype_pdi_di
b4de2f7d
AH
13024 = build_function_type_list (void_type_node,
13025 pdi_type_node, long_long_unsigned_type_node,
13026 NULL_TREE);
916b60b7 13027 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13028 = build_function_type_list (void_type_node,
13029 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13030 /* Normal vector unops. */
13031 tree v4sf_ftype_v4sf
b4de2f7d 13032 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13033
bd793c65
BS
13034 /* Normal vector binops. */
13035 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13036 = build_function_type_list (V4SF_type_node,
13037 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13038 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13039 = build_function_type_list (V8QI_type_node,
13040 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13041 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13042 = build_function_type_list (V4HI_type_node,
13043 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13044 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13045 = build_function_type_list (V2SI_type_node,
13046 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13047 tree di_ftype_di_di
b4de2f7d
AH
13048 = build_function_type_list (long_long_unsigned_type_node,
13049 long_long_unsigned_type_node,
13050 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13051
47f339cf 13052 tree v2si_ftype_v2sf
ae3aa00d 13053 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13054 tree v2sf_ftype_v2si
b4de2f7d 13055 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13056 tree v2si_ftype_v2si
b4de2f7d 13057 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13058 tree v2sf_ftype_v2sf
b4de2f7d 13059 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13060 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13061 = build_function_type_list (V2SF_type_node,
13062 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13063 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13064 = build_function_type_list (V2SI_type_node,
13065 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 13066 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
13067 tree pcint_type_node = build_pointer_type (
13068 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 13069 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13070 tree pcdouble_type_node = build_pointer_type (
13071 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13072 tree int_ftype_v2df_v2df
b4de2f7d
AH
13073 = build_function_type_list (integer_type_node,
13074 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
13075
13076 tree ti_ftype_void
b4de2f7d 13077 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
13078 tree v2di_ftype_void
13079 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 13080 tree ti_ftype_ti_ti
b4de2f7d
AH
13081 = build_function_type_list (intTI_type_node,
13082 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13083 tree void_ftype_pcvoid
13084 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13085 tree v2di_ftype_di
b4de2f7d
AH
13086 = build_function_type_list (V2DI_type_node,
13087 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13088 tree di_ftype_v2di
13089 = build_function_type_list (long_long_unsigned_type_node,
13090 V2DI_type_node, NULL_TREE);
fbe5eb6d 13091 tree v4sf_ftype_v4si
b4de2f7d 13092 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13093 tree v4si_ftype_v4sf
b4de2f7d 13094 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13095 tree v2df_ftype_v4si
b4de2f7d 13096 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13097 tree v4si_ftype_v2df
b4de2f7d 13098 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13099 tree v2si_ftype_v2df
b4de2f7d 13100 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13101 tree v4sf_ftype_v2df
b4de2f7d 13102 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13103 tree v2df_ftype_v2si
b4de2f7d 13104 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13105 tree v2df_ftype_v4sf
b4de2f7d 13106 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13107 tree int_ftype_v2df
b4de2f7d 13108 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13109 tree int64_ftype_v2df
13110 = build_function_type_list (long_long_integer_type_node,
b96a374d 13111 V2DF_type_node, NULL_TREE);
fbe5eb6d 13112 tree v2df_ftype_v2df_int
b4de2f7d
AH
13113 = build_function_type_list (V2DF_type_node,
13114 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13115 tree v2df_ftype_v2df_int64
13116 = build_function_type_list (V2DF_type_node,
13117 V2DF_type_node, long_long_integer_type_node,
13118 NULL_TREE);
fbe5eb6d 13119 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13120 = build_function_type_list (V4SF_type_node,
13121 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13122 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13123 = build_function_type_list (V2DF_type_node,
13124 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13125 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13126 = build_function_type_list (V2DF_type_node,
13127 V2DF_type_node, V2DF_type_node,
13128 integer_type_node,
13129 NULL_TREE);
fbe5eb6d 13130 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13131 = build_function_type_list (V2DF_type_node,
13132 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13133 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13134 = build_function_type_list (void_type_node,
13135 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13136 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13137 = build_function_type_list (void_type_node,
13138 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13139 tree void_ftype_pint_int
b4de2f7d
AH
13140 = build_function_type_list (void_type_node,
13141 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13142 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13143 = build_function_type_list (void_type_node,
13144 V16QI_type_node, V16QI_type_node,
13145 pchar_type_node, NULL_TREE);
068f5dea
JH
13146 tree v2df_ftype_pcdouble
13147 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13148 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13149 = build_function_type_list (V2DF_type_node,
13150 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13151 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13152 = build_function_type_list (V16QI_type_node,
13153 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13154 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13155 = build_function_type_list (V8HI_type_node,
13156 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13157 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13158 = build_function_type_list (V4SI_type_node,
13159 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13160 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13161 = build_function_type_list (V2DI_type_node,
13162 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13163 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13164 = build_function_type_list (V2DI_type_node,
13165 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13166 tree v2df_ftype_v2df
b4de2f7d 13167 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13168 tree v2df_ftype_double
b4de2f7d 13169 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13170 tree v2df_ftype_double_double
b4de2f7d
AH
13171 = build_function_type_list (V2DF_type_node,
13172 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13173 tree int_ftype_v8hi_int
b4de2f7d
AH
13174 = build_function_type_list (integer_type_node,
13175 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13176 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13177 = build_function_type_list (V8HI_type_node,
13178 V8HI_type_node, integer_type_node,
13179 integer_type_node, NULL_TREE);
916b60b7 13180 tree v2di_ftype_v2di_int
b4de2f7d
AH
13181 = build_function_type_list (V2DI_type_node,
13182 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13183 tree v4si_ftype_v4si_int
b4de2f7d
AH
13184 = build_function_type_list (V4SI_type_node,
13185 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13186 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13187 = build_function_type_list (V8HI_type_node,
13188 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13189 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13190 = build_function_type_list (V8HI_type_node,
13191 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13192 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13193 = build_function_type_list (V4SI_type_node,
13194 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13195 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13196 = build_function_type_list (V4SI_type_node,
13197 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13198 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13199 = build_function_type_list (long_long_unsigned_type_node,
13200 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13201 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13202 = build_function_type_list (V2DI_type_node,
13203 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13204 tree int_ftype_v16qi
b4de2f7d 13205 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13206 tree v16qi_ftype_pcchar
13207 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13208 tree void_ftype_pchar_v16qi
13209 = build_function_type_list (void_type_node,
13210 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13211 tree v4si_ftype_pcint
13212 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13213 tree void_ftype_pcint_v4si
f02e1358 13214 = build_function_type_list (void_type_node,
068f5dea 13215 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13216 tree v2di_ftype_v2di
13217 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13218
bd793c65
BS
13219 /* Add all builtins that are more or less simple operations on two
13220 operands. */
ca7558fc 13221 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13222 {
13223 /* Use one of the operands; the target can have a different mode for
13224 mask-generating compares. */
13225 enum machine_mode mode;
13226 tree type;
13227
13228 if (d->name == 0)
13229 continue;
13230 mode = insn_data[d->icode].operand[1].mode;
13231
bd793c65
BS
13232 switch (mode)
13233 {
fbe5eb6d
BS
13234 case V16QImode:
13235 type = v16qi_ftype_v16qi_v16qi;
13236 break;
13237 case V8HImode:
13238 type = v8hi_ftype_v8hi_v8hi;
13239 break;
13240 case V4SImode:
13241 type = v4si_ftype_v4si_v4si;
13242 break;
13243 case V2DImode:
13244 type = v2di_ftype_v2di_v2di;
13245 break;
13246 case V2DFmode:
13247 type = v2df_ftype_v2df_v2df;
13248 break;
13249 case TImode:
13250 type = ti_ftype_ti_ti;
13251 break;
bd793c65
BS
13252 case V4SFmode:
13253 type = v4sf_ftype_v4sf_v4sf;
13254 break;
13255 case V8QImode:
13256 type = v8qi_ftype_v8qi_v8qi;
13257 break;
13258 case V4HImode:
13259 type = v4hi_ftype_v4hi_v4hi;
13260 break;
13261 case V2SImode:
13262 type = v2si_ftype_v2si_v2si;
13263 break;
bd793c65
BS
13264 case DImode:
13265 type = di_ftype_di_di;
13266 break;
13267
13268 default:
13269 abort ();
13270 }
0f290768 13271
bd793c65
BS
13272 /* Override for comparisons. */
13273 if (d->icode == CODE_FOR_maskcmpv4sf3
13274 || d->icode == CODE_FOR_maskncmpv4sf3
13275 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13276 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13277 type = v4si_ftype_v4sf_v4sf;
13278
fbe5eb6d
BS
13279 if (d->icode == CODE_FOR_maskcmpv2df3
13280 || d->icode == CODE_FOR_maskncmpv2df3
13281 || d->icode == CODE_FOR_vmmaskcmpv2df3
13282 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13283 type = v2di_ftype_v2df_v2df;
13284
eeb06b1b 13285 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13286 }
13287
13288 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13289 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13290 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13291 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13292 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13293 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13294
13295 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13296 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13297 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13298
13299 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13300 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13301
13302 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13303 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13304
bd793c65 13305 /* comi/ucomi insns. */
ca7558fc 13306 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13307 if (d->mask == MASK_SSE2)
13308 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13309 else
13310 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13311
1255c85c
BS
13312 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13313 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13314 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13315
37f22004
L
13316 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13317 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13318 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13319 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13320 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13321 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13322 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13323 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13324 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13325 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13326 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13327
13328 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13329 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13330
13331 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13332
13333 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13334 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13335 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13336 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13337 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13338 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13339
13340 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13341 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13342 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13343 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13344
13345 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13346 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13347 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13348 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13349
13350 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13351
13352 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13353
13354 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13355 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13356 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13357 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13358 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13359 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13360
13361 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13362
47f339cf
BS
13363 /* Original 3DNow! */
13364 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13365 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13366 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13367 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13368 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13369 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13370 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13371 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13372 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13373 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13374 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13375 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13376 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13377 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13378 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13379 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13380 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13381 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13382 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13383 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13384
13385 /* 3DNow! extension as used in the Athlon CPU. */
13386 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13387 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13388 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13389 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13390 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13391 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13392
37f22004 13393 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13394
13395 /* SSE2 */
13396 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13397 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13398
13399 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13400 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13401 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13402
068f5dea
JH
13403 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13404 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13405 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13406 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13407 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13408 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13409
13410 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13411 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13412 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13413 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13414
13415 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13416 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13417 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13418 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13419 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13420
13421 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13422 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13423 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13424 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13425
13426 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13427 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13428
13429 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13430
13431 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13432 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13433
13434 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13435 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13436 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13437 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13438 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13439
13440 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13441
13442 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13443 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13444 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13445 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13446
13447 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13448 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13449 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13450
13451 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13452 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13453 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13454 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13455
13456 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13457 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13458 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13459 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13460 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13461 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13462 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13463
068f5dea 13464 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13465 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13466 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13467
068f5dea
JH
13468 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13469 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13470 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13471 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13472 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13473 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13474 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13475
37f22004 13476 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13477
916b60b7
BS
13478 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13479 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13480 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13481
13482 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13483 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13484 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13485
13486 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13487 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13488
ab3146fd 13489 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13490 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13491 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13492 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13493
ab3146fd 13494 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13495 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13496 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13498
13499 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13500 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13501
13502 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13503
13504 /* Prescott New Instructions. */
13505 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13506 void_ftype_pcvoid_unsigned_unsigned,
13507 IX86_BUILTIN_MONITOR);
13508 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13509 void_ftype_unsigned_unsigned,
13510 IX86_BUILTIN_MWAIT);
13511 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13512 v4sf_ftype_v4sf,
13513 IX86_BUILTIN_MOVSHDUP);
13514 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13515 v4sf_ftype_v4sf,
13516 IX86_BUILTIN_MOVSLDUP);
13517 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13518 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13519 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13520 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13521 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13522 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13523}
13524
13525/* Errors in the source file can cause expand_expr to return const0_rtx
13526 where we expect a vector. To avoid crashing, use one of the vector
13527 clear instructions. */
13528static rtx
b96a374d 13529safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
13530{
13531 if (x != const0_rtx)
13532 return x;
13533 x = gen_reg_rtx (mode);
13534
47f339cf 13535 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13536 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13537 : gen_rtx_SUBREG (DImode, x, 0)));
13538 else
e37af218 13539 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13540 : gen_rtx_SUBREG (V4SFmode, x, 0),
13541 CONST0_RTX (V4SFmode)));
bd793c65
BS
13542 return x;
13543}
13544
13545/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13546
13547static rtx
b96a374d 13548ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13549{
13550 rtx pat;
13551 tree arg0 = TREE_VALUE (arglist);
13552 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13553 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13554 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13555 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13556 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13557 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13558
13559 if (VECTOR_MODE_P (mode0))
13560 op0 = safe_vector_operand (op0, mode0);
13561 if (VECTOR_MODE_P (mode1))
13562 op1 = safe_vector_operand (op1, mode1);
13563
13564 if (! target
13565 || GET_MODE (target) != tmode
13566 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13567 target = gen_reg_rtx (tmode);
13568
d9deed68
JH
13569 if (GET_MODE (op1) == SImode && mode1 == TImode)
13570 {
13571 rtx x = gen_reg_rtx (V4SImode);
13572 emit_insn (gen_sse2_loadd (x, op1));
13573 op1 = gen_lowpart (TImode, x);
13574 }
13575
bd793c65
BS
13576 /* In case the insn wants input operands in modes different from
13577 the result, abort. */
ebe75517
JH
13578 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13579 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13580 abort ();
13581
13582 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13583 op0 = copy_to_mode_reg (mode0, op0);
13584 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13585 op1 = copy_to_mode_reg (mode1, op1);
13586
59bef189
RH
13587 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13588 yet one of the two must not be a memory. This is normally enforced
13589 by expanders, but we didn't bother to create one here. */
13590 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13591 op0 = copy_to_mode_reg (mode0, op0);
13592
bd793c65
BS
13593 pat = GEN_FCN (icode) (target, op0, op1);
13594 if (! pat)
13595 return 0;
13596 emit_insn (pat);
13597 return target;
13598}
13599
13600/* Subroutine of ix86_expand_builtin to take care of stores. */
13601
13602static rtx
b96a374d 13603ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13604{
13605 rtx pat;
13606 tree arg0 = TREE_VALUE (arglist);
13607 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13608 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13609 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13610 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13611 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13612
13613 if (VECTOR_MODE_P (mode1))
13614 op1 = safe_vector_operand (op1, mode1);
13615
13616 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13617 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13618
bd793c65
BS
13619 pat = GEN_FCN (icode) (op0, op1);
13620 if (pat)
13621 emit_insn (pat);
13622 return 0;
13623}
13624
13625/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13626
13627static rtx
b96a374d
AJ
13628ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13629 rtx target, int do_load)
bd793c65
BS
13630{
13631 rtx pat;
13632 tree arg0 = TREE_VALUE (arglist);
13633 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13634 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13635 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13636
13637 if (! target
13638 || GET_MODE (target) != tmode
13639 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13640 target = gen_reg_rtx (tmode);
13641 if (do_load)
13642 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13643 else
13644 {
13645 if (VECTOR_MODE_P (mode0))
13646 op0 = safe_vector_operand (op0, mode0);
13647
13648 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13649 op0 = copy_to_mode_reg (mode0, op0);
13650 }
13651
13652 pat = GEN_FCN (icode) (target, op0);
13653 if (! pat)
13654 return 0;
13655 emit_insn (pat);
13656 return target;
13657}
13658
13659/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13660 sqrtss, rsqrtss, rcpss. */
13661
13662static rtx
b96a374d 13663ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13664{
13665 rtx pat;
13666 tree arg0 = TREE_VALUE (arglist);
59bef189 13667 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13668 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13669 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13670
13671 if (! target
13672 || GET_MODE (target) != tmode
13673 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13674 target = gen_reg_rtx (tmode);
13675
13676 if (VECTOR_MODE_P (mode0))
13677 op0 = safe_vector_operand (op0, mode0);
13678
13679 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13680 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13681
59bef189
RH
13682 op1 = op0;
13683 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13684 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13685
59bef189 13686 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13687 if (! pat)
13688 return 0;
13689 emit_insn (pat);
13690 return target;
13691}
13692
13693/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13694
13695static rtx
b96a374d
AJ
13696ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13697 rtx target)
bd793c65
BS
13698{
13699 rtx pat;
13700 tree arg0 = TREE_VALUE (arglist);
13701 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13702 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13703 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13704 rtx op2;
13705 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13706 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13707 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13708 enum rtx_code comparison = d->comparison;
13709
13710 if (VECTOR_MODE_P (mode0))
13711 op0 = safe_vector_operand (op0, mode0);
13712 if (VECTOR_MODE_P (mode1))
13713 op1 = safe_vector_operand (op1, mode1);
13714
13715 /* Swap operands if we have a comparison that isn't available in
13716 hardware. */
13717 if (d->flag)
13718 {
21e1b5f1
BS
13719 rtx tmp = gen_reg_rtx (mode1);
13720 emit_move_insn (tmp, op1);
bd793c65 13721 op1 = op0;
21e1b5f1 13722 op0 = tmp;
bd793c65 13723 }
21e1b5f1
BS
13724
13725 if (! target
13726 || GET_MODE (target) != tmode
13727 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13728 target = gen_reg_rtx (tmode);
13729
13730 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13731 op0 = copy_to_mode_reg (mode0, op0);
13732 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13733 op1 = copy_to_mode_reg (mode1, op1);
13734
13735 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13736 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13737 if (! pat)
13738 return 0;
13739 emit_insn (pat);
13740 return target;
13741}
13742
13743/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13744
13745static rtx
b96a374d
AJ
13746ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13747 rtx target)
bd793c65
BS
13748{
13749 rtx pat;
13750 tree arg0 = TREE_VALUE (arglist);
13751 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13752 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13753 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13754 rtx op2;
13755 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13756 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13757 enum rtx_code comparison = d->comparison;
13758
13759 if (VECTOR_MODE_P (mode0))
13760 op0 = safe_vector_operand (op0, mode0);
13761 if (VECTOR_MODE_P (mode1))
13762 op1 = safe_vector_operand (op1, mode1);
13763
13764 /* Swap operands if we have a comparison that isn't available in
13765 hardware. */
13766 if (d->flag)
13767 {
13768 rtx tmp = op1;
13769 op1 = op0;
13770 op0 = tmp;
bd793c65
BS
13771 }
13772
13773 target = gen_reg_rtx (SImode);
13774 emit_move_insn (target, const0_rtx);
13775 target = gen_rtx_SUBREG (QImode, target, 0);
13776
13777 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13778 op0 = copy_to_mode_reg (mode0, op0);
13779 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13780 op1 = copy_to_mode_reg (mode1, op1);
13781
13782 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13783 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13784 if (! pat)
13785 return 0;
13786 emit_insn (pat);
29628f27
BS
13787 emit_insn (gen_rtx_SET (VOIDmode,
13788 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13789 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13790 SET_DEST (pat),
29628f27 13791 const0_rtx)));
bd793c65 13792
6f1a6c5b 13793 return SUBREG_REG (target);
bd793c65
BS
13794}
13795
13796/* Expand an expression EXP that calls a built-in function,
13797 with result going to TARGET if that's convenient
13798 (and in mode MODE if that's convenient).
13799 SUBTARGET may be used as the target for computing one of EXP's operands.
13800 IGNORE is nonzero if the value is to be ignored. */
13801
13802rtx
b96a374d
AJ
13803ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13804 enum machine_mode mode ATTRIBUTE_UNUSED,
13805 int ignore ATTRIBUTE_UNUSED)
bd793c65 13806{
8b60264b 13807 const struct builtin_description *d;
77ebd435 13808 size_t i;
bd793c65
BS
13809 enum insn_code icode;
13810 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13811 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13812 tree arg0, arg1, arg2;
bd793c65
BS
13813 rtx op0, op1, op2, pat;
13814 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13815 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13816
13817 switch (fcode)
13818 {
13819 case IX86_BUILTIN_EMMS:
13820 emit_insn (gen_emms ());
13821 return 0;
13822
13823 case IX86_BUILTIN_SFENCE:
13824 emit_insn (gen_sfence ());
13825 return 0;
13826
bd793c65 13827 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13828 case IX86_BUILTIN_PEXTRW128:
13829 icode = (fcode == IX86_BUILTIN_PEXTRW
13830 ? CODE_FOR_mmx_pextrw
13831 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13832 arg0 = TREE_VALUE (arglist);
13833 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13834 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13835 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13836 tmode = insn_data[icode].operand[0].mode;
13837 mode0 = insn_data[icode].operand[1].mode;
13838 mode1 = insn_data[icode].operand[2].mode;
13839
13840 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13841 op0 = copy_to_mode_reg (mode0, op0);
13842 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13843 {
ebe75517
JH
13844 error ("selector must be an integer constant in the range 0..%i",
13845 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 13846 return gen_reg_rtx (tmode);
bd793c65
BS
13847 }
13848 if (target == 0
13849 || GET_MODE (target) != tmode
13850 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13851 target = gen_reg_rtx (tmode);
13852 pat = GEN_FCN (icode) (target, op0, op1);
13853 if (! pat)
13854 return 0;
13855 emit_insn (pat);
13856 return target;
13857
13858 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13859 case IX86_BUILTIN_PINSRW128:
13860 icode = (fcode == IX86_BUILTIN_PINSRW
13861 ? CODE_FOR_mmx_pinsrw
13862 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13863 arg0 = TREE_VALUE (arglist);
13864 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13865 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13866 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13867 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13868 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13869 tmode = insn_data[icode].operand[0].mode;
13870 mode0 = insn_data[icode].operand[1].mode;
13871 mode1 = insn_data[icode].operand[2].mode;
13872 mode2 = insn_data[icode].operand[3].mode;
13873
13874 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13875 op0 = copy_to_mode_reg (mode0, op0);
13876 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13877 op1 = copy_to_mode_reg (mode1, op1);
13878 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13879 {
ebe75517
JH
13880 error ("selector must be an integer constant in the range 0..%i",
13881 fcode == IX86_BUILTIN_PINSRW ? 15:255);
bd793c65
BS
13882 return const0_rtx;
13883 }
13884 if (target == 0
13885 || GET_MODE (target) != tmode
13886 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13887 target = gen_reg_rtx (tmode);
13888 pat = GEN_FCN (icode) (target, op0, op1, op2);
13889 if (! pat)
13890 return 0;
13891 emit_insn (pat);
13892 return target;
13893
13894 case IX86_BUILTIN_MASKMOVQ:
077084dd 13895 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13896 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13897 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13898 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13899 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13900 /* Note the arg order is different from the operand order. */
13901 arg1 = TREE_VALUE (arglist);
13902 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13903 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13904 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13905 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13906 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13907 mode0 = insn_data[icode].operand[0].mode;
13908 mode1 = insn_data[icode].operand[1].mode;
13909 mode2 = insn_data[icode].operand[2].mode;
13910
5c464583 13911 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13912 op0 = copy_to_mode_reg (mode0, op0);
13913 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13914 op1 = copy_to_mode_reg (mode1, op1);
13915 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13916 op2 = copy_to_mode_reg (mode2, op2);
13917 pat = GEN_FCN (icode) (op0, op1, op2);
13918 if (! pat)
13919 return 0;
13920 emit_insn (pat);
13921 return 0;
13922
13923 case IX86_BUILTIN_SQRTSS:
13924 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13925 case IX86_BUILTIN_RSQRTSS:
13926 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13927 case IX86_BUILTIN_RCPSS:
13928 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13929
13930 case IX86_BUILTIN_LOADAPS:
13931 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13932
13933 case IX86_BUILTIN_LOADUPS:
13934 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13935
13936 case IX86_BUILTIN_STOREAPS:
e37af218 13937 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 13938
bd793c65 13939 case IX86_BUILTIN_STOREUPS:
e37af218 13940 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13941
13942 case IX86_BUILTIN_LOADSS:
13943 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13944
13945 case IX86_BUILTIN_STORESS:
e37af218 13946 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13947
0f290768 13948 case IX86_BUILTIN_LOADHPS:
bd793c65 13949 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13950 case IX86_BUILTIN_LOADHPD:
13951 case IX86_BUILTIN_LOADLPD:
13952 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13953 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13954 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13955 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13956 arg0 = TREE_VALUE (arglist);
13957 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13958 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13959 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13960 tmode = insn_data[icode].operand[0].mode;
13961 mode0 = insn_data[icode].operand[1].mode;
13962 mode1 = insn_data[icode].operand[2].mode;
13963
13964 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13965 op0 = copy_to_mode_reg (mode0, op0);
13966 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13967 if (target == 0
13968 || GET_MODE (target) != tmode
13969 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13970 target = gen_reg_rtx (tmode);
13971 pat = GEN_FCN (icode) (target, op0, op1);
13972 if (! pat)
13973 return 0;
13974 emit_insn (pat);
13975 return target;
0f290768 13976
bd793c65
BS
13977 case IX86_BUILTIN_STOREHPS:
13978 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13979 case IX86_BUILTIN_STOREHPD:
13980 case IX86_BUILTIN_STORELPD:
13981 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13982 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13983 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13984 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13985 arg0 = TREE_VALUE (arglist);
13986 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13987 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13988 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13989 mode0 = insn_data[icode].operand[1].mode;
13990 mode1 = insn_data[icode].operand[2].mode;
13991
13992 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13993 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13994 op1 = copy_to_mode_reg (mode1, op1);
13995
13996 pat = GEN_FCN (icode) (op0, op0, op1);
13997 if (! pat)
13998 return 0;
13999 emit_insn (pat);
14000 return 0;
14001
14002 case IX86_BUILTIN_MOVNTPS:
e37af218 14003 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14004 case IX86_BUILTIN_MOVNTQ:
e37af218 14005 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14006
14007 case IX86_BUILTIN_LDMXCSR:
14008 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14009 target = assign_386_stack_local (SImode, 0);
14010 emit_move_insn (target, op0);
14011 emit_insn (gen_ldmxcsr (target));
14012 return 0;
14013
14014 case IX86_BUILTIN_STMXCSR:
14015 target = assign_386_stack_local (SImode, 0);
14016 emit_insn (gen_stmxcsr (target));
14017 return copy_to_mode_reg (SImode, target);
14018
bd793c65 14019 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14020 case IX86_BUILTIN_SHUFPD:
14021 icode = (fcode == IX86_BUILTIN_SHUFPS
14022 ? CODE_FOR_sse_shufps
14023 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14024 arg0 = TREE_VALUE (arglist);
14025 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14026 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14027 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14028 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14029 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14030 tmode = insn_data[icode].operand[0].mode;
14031 mode0 = insn_data[icode].operand[1].mode;
14032 mode1 = insn_data[icode].operand[2].mode;
14033 mode2 = insn_data[icode].operand[3].mode;
14034
14035 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14036 op0 = copy_to_mode_reg (mode0, op0);
14037 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14038 op1 = copy_to_mode_reg (mode1, op1);
14039 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14040 {
14041 /* @@@ better error message */
14042 error ("mask must be an immediate");
6f1a6c5b 14043 return gen_reg_rtx (tmode);
bd793c65
BS
14044 }
14045 if (target == 0
14046 || GET_MODE (target) != tmode
14047 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14048 target = gen_reg_rtx (tmode);
14049 pat = GEN_FCN (icode) (target, op0, op1, op2);
14050 if (! pat)
14051 return 0;
14052 emit_insn (pat);
14053 return target;
14054
14055 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14056 case IX86_BUILTIN_PSHUFD:
14057 case IX86_BUILTIN_PSHUFHW:
14058 case IX86_BUILTIN_PSHUFLW:
14059 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14060 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14061 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14062 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14063 arg0 = TREE_VALUE (arglist);
14064 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14065 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14066 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14067 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14068 mode1 = insn_data[icode].operand[1].mode;
14069 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14070
29628f27
BS
14071 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14072 op0 = copy_to_mode_reg (mode1, op0);
14073 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14074 {
14075 /* @@@ better error message */
14076 error ("mask must be an immediate");
14077 return const0_rtx;
14078 }
14079 if (target == 0
14080 || GET_MODE (target) != tmode
14081 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14082 target = gen_reg_rtx (tmode);
29628f27 14083 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14084 if (! pat)
14085 return 0;
14086 emit_insn (pat);
14087 return target;
14088
ab3146fd
ZD
14089 case IX86_BUILTIN_PSLLDQI128:
14090 case IX86_BUILTIN_PSRLDQI128:
14091 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14092 : CODE_FOR_sse2_lshrti3);
14093 arg0 = TREE_VALUE (arglist);
14094 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14095 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14096 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14097 tmode = insn_data[icode].operand[0].mode;
14098 mode1 = insn_data[icode].operand[1].mode;
14099 mode2 = insn_data[icode].operand[2].mode;
14100
14101 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14102 {
14103 op0 = copy_to_reg (op0);
14104 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14105 }
14106 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14107 {
14108 error ("shift must be an immediate");
14109 return const0_rtx;
14110 }
14111 target = gen_reg_rtx (V2DImode);
14112 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14113 if (! pat)
14114 return 0;
14115 emit_insn (pat);
14116 return target;
14117
47f339cf
BS
14118 case IX86_BUILTIN_FEMMS:
14119 emit_insn (gen_femms ());
14120 return NULL_RTX;
14121
14122 case IX86_BUILTIN_PAVGUSB:
14123 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14124
14125 case IX86_BUILTIN_PF2ID:
14126 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14127
14128 case IX86_BUILTIN_PFACC:
14129 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14130
14131 case IX86_BUILTIN_PFADD:
14132 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14133
14134 case IX86_BUILTIN_PFCMPEQ:
14135 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14136
14137 case IX86_BUILTIN_PFCMPGE:
14138 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14139
14140 case IX86_BUILTIN_PFCMPGT:
14141 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14142
14143 case IX86_BUILTIN_PFMAX:
14144 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14145
14146 case IX86_BUILTIN_PFMIN:
14147 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14148
14149 case IX86_BUILTIN_PFMUL:
14150 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14151
14152 case IX86_BUILTIN_PFRCP:
14153 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14154
14155 case IX86_BUILTIN_PFRCPIT1:
14156 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14157
14158 case IX86_BUILTIN_PFRCPIT2:
14159 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14160
14161 case IX86_BUILTIN_PFRSQIT1:
14162 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14163
14164 case IX86_BUILTIN_PFRSQRT:
14165 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14166
14167 case IX86_BUILTIN_PFSUB:
14168 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14169
14170 case IX86_BUILTIN_PFSUBR:
14171 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14172
14173 case IX86_BUILTIN_PI2FD:
14174 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14175
14176 case IX86_BUILTIN_PMULHRW:
14177 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14178
47f339cf
BS
14179 case IX86_BUILTIN_PF2IW:
14180 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14181
14182 case IX86_BUILTIN_PFNACC:
14183 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14184
14185 case IX86_BUILTIN_PFPNACC:
14186 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14187
14188 case IX86_BUILTIN_PI2FW:
14189 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14190
14191 case IX86_BUILTIN_PSWAPDSI:
14192 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14193
14194 case IX86_BUILTIN_PSWAPDSF:
14195 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14196
e37af218
RH
14197 case IX86_BUILTIN_SSE_ZERO:
14198 target = gen_reg_rtx (V4SFmode);
4977bab6 14199 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14200 return target;
14201
bd793c65
BS
14202 case IX86_BUILTIN_MMX_ZERO:
14203 target = gen_reg_rtx (DImode);
14204 emit_insn (gen_mmx_clrdi (target));
14205 return target;
14206
f02e1358
JH
14207 case IX86_BUILTIN_CLRTI:
14208 target = gen_reg_rtx (V2DImode);
14209 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14210 return target;
14211
14212
fbe5eb6d
BS
14213 case IX86_BUILTIN_SQRTSD:
14214 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14215 case IX86_BUILTIN_LOADAPD:
14216 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14217 case IX86_BUILTIN_LOADUPD:
14218 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14219
14220 case IX86_BUILTIN_STOREAPD:
14221 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14222 case IX86_BUILTIN_STOREUPD:
14223 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14224
14225 case IX86_BUILTIN_LOADSD:
14226 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14227
14228 case IX86_BUILTIN_STORESD:
14229 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14230
14231 case IX86_BUILTIN_SETPD1:
14232 target = assign_386_stack_local (DFmode, 0);
14233 arg0 = TREE_VALUE (arglist);
14234 emit_move_insn (adjust_address (target, DFmode, 0),
14235 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14236 op0 = gen_reg_rtx (V2DFmode);
14237 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14238 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14239 return op0;
14240
14241 case IX86_BUILTIN_SETPD:
14242 target = assign_386_stack_local (V2DFmode, 0);
14243 arg0 = TREE_VALUE (arglist);
14244 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14245 emit_move_insn (adjust_address (target, DFmode, 0),
14246 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14247 emit_move_insn (adjust_address (target, DFmode, 8),
14248 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14249 op0 = gen_reg_rtx (V2DFmode);
14250 emit_insn (gen_sse2_movapd (op0, target));
14251 return op0;
14252
14253 case IX86_BUILTIN_LOADRPD:
14254 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14255 gen_reg_rtx (V2DFmode), 1);
14256 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14257 return target;
14258
14259 case IX86_BUILTIN_LOADPD1:
14260 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14261 gen_reg_rtx (V2DFmode), 1);
14262 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14263 return target;
14264
14265 case IX86_BUILTIN_STOREPD1:
14266 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14267 case IX86_BUILTIN_STORERPD:
14268 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14269
48126a97
JH
14270 case IX86_BUILTIN_CLRPD:
14271 target = gen_reg_rtx (V2DFmode);
14272 emit_insn (gen_sse_clrv2df (target));
14273 return target;
14274
fbe5eb6d
BS
14275 case IX86_BUILTIN_MFENCE:
14276 emit_insn (gen_sse2_mfence ());
14277 return 0;
14278 case IX86_BUILTIN_LFENCE:
14279 emit_insn (gen_sse2_lfence ());
14280 return 0;
14281
14282 case IX86_BUILTIN_CLFLUSH:
14283 arg0 = TREE_VALUE (arglist);
14284 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14285 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14286 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14287 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14288
14289 emit_insn (gen_sse2_clflush (op0));
14290 return 0;
14291
14292 case IX86_BUILTIN_MOVNTPD:
14293 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14294 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14295 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14296 case IX86_BUILTIN_MOVNTI:
14297 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14298
f02e1358
JH
14299 case IX86_BUILTIN_LOADDQA:
14300 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14301 case IX86_BUILTIN_LOADDQU:
14302 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14303 case IX86_BUILTIN_LOADD:
14304 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14305
14306 case IX86_BUILTIN_STOREDQA:
14307 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14308 case IX86_BUILTIN_STOREDQU:
14309 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14310 case IX86_BUILTIN_STORED:
14311 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14312
22c7c85e
L
14313 case IX86_BUILTIN_MONITOR:
14314 arg0 = TREE_VALUE (arglist);
14315 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14316 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14317 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14318 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14319 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14320 if (!REG_P (op0))
14321 op0 = copy_to_mode_reg (SImode, op0);
14322 if (!REG_P (op1))
14323 op1 = copy_to_mode_reg (SImode, op1);
14324 if (!REG_P (op2))
14325 op2 = copy_to_mode_reg (SImode, op2);
14326 emit_insn (gen_monitor (op0, op1, op2));
14327 return 0;
14328
14329 case IX86_BUILTIN_MWAIT:
14330 arg0 = TREE_VALUE (arglist);
14331 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14332 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14333 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14334 if (!REG_P (op0))
14335 op0 = copy_to_mode_reg (SImode, op0);
14336 if (!REG_P (op1))
14337 op1 = copy_to_mode_reg (SImode, op1);
14338 emit_insn (gen_mwait (op0, op1));
14339 return 0;
14340
14341 case IX86_BUILTIN_LOADDDUP:
14342 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14343
14344 case IX86_BUILTIN_LDDQU:
14345 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14346 1);
14347
bd793c65
BS
14348 default:
14349 break;
14350 }
14351
ca7558fc 14352 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14353 if (d->code == fcode)
14354 {
14355 /* Compares are treated specially. */
14356 if (d->icode == CODE_FOR_maskcmpv4sf3
14357 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14358 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14359 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14360 || d->icode == CODE_FOR_maskcmpv2df3
14361 || d->icode == CODE_FOR_vmmaskcmpv2df3
14362 || d->icode == CODE_FOR_maskncmpv2df3
14363 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14364 return ix86_expand_sse_compare (d, arglist, target);
14365
14366 return ix86_expand_binop_builtin (d->icode, arglist, target);
14367 }
14368
ca7558fc 14369 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14370 if (d->code == fcode)
14371 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14372
ca7558fc 14373 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14374 if (d->code == fcode)
14375 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14376
bd793c65
BS
14377 /* @@@ Should really do something sensible here. */
14378 return 0;
bd793c65 14379}
4211a8fb
JH
14380
14381/* Store OPERAND to the memory after reload is completed. This means
f710504c 14382 that we can't easily use assign_stack_local. */
4211a8fb 14383rtx
b96a374d 14384ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14385{
898d374d 14386 rtx result;
4211a8fb
JH
14387 if (!reload_completed)
14388 abort ();
a5b378d6 14389 if (TARGET_RED_ZONE)
898d374d
JH
14390 {
14391 result = gen_rtx_MEM (mode,
14392 gen_rtx_PLUS (Pmode,
14393 stack_pointer_rtx,
14394 GEN_INT (-RED_ZONE_SIZE)));
14395 emit_move_insn (result, operand);
14396 }
a5b378d6 14397 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14398 {
898d374d 14399 switch (mode)
4211a8fb 14400 {
898d374d
JH
14401 case HImode:
14402 case SImode:
14403 operand = gen_lowpart (DImode, operand);
14404 /* FALLTHRU */
14405 case DImode:
4211a8fb 14406 emit_insn (
898d374d
JH
14407 gen_rtx_SET (VOIDmode,
14408 gen_rtx_MEM (DImode,
14409 gen_rtx_PRE_DEC (DImode,
14410 stack_pointer_rtx)),
14411 operand));
14412 break;
14413 default:
14414 abort ();
14415 }
14416 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14417 }
14418 else
14419 {
14420 switch (mode)
14421 {
14422 case DImode:
14423 {
14424 rtx operands[2];
14425 split_di (&operand, 1, operands, operands + 1);
14426 emit_insn (
14427 gen_rtx_SET (VOIDmode,
14428 gen_rtx_MEM (SImode,
14429 gen_rtx_PRE_DEC (Pmode,
14430 stack_pointer_rtx)),
14431 operands[1]));
14432 emit_insn (
14433 gen_rtx_SET (VOIDmode,
14434 gen_rtx_MEM (SImode,
14435 gen_rtx_PRE_DEC (Pmode,
14436 stack_pointer_rtx)),
14437 operands[0]));
14438 }
14439 break;
14440 case HImode:
14441 /* It is better to store HImodes as SImodes. */
14442 if (!TARGET_PARTIAL_REG_STALL)
14443 operand = gen_lowpart (SImode, operand);
14444 /* FALLTHRU */
14445 case SImode:
4211a8fb 14446 emit_insn (
898d374d
JH
14447 gen_rtx_SET (VOIDmode,
14448 gen_rtx_MEM (GET_MODE (operand),
14449 gen_rtx_PRE_DEC (SImode,
14450 stack_pointer_rtx)),
14451 operand));
14452 break;
14453 default:
14454 abort ();
4211a8fb 14455 }
898d374d 14456 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14457 }
898d374d 14458 return result;
4211a8fb
JH
14459}
14460
14461/* Free operand from the memory. */
14462void
b96a374d 14463ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14464{
a5b378d6 14465 if (!TARGET_RED_ZONE)
898d374d
JH
14466 {
14467 int size;
14468
14469 if (mode == DImode || TARGET_64BIT)
14470 size = 8;
14471 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14472 size = 2;
14473 else
14474 size = 4;
14475 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14476 to pop or add instruction if registers are available. */
14477 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14478 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14479 GEN_INT (size))));
14480 }
4211a8fb 14481}
a946dd00 14482
f84aa48a
JH
14483/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14484 QImode must go into class Q_REGS.
14485 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14486 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14487enum reg_class
b96a374d 14488ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14489{
1877be45
JH
14490 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14491 return NO_REGS;
f84aa48a
JH
14492 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14493 {
14494 /* SSE can't load any constant directly yet. */
14495 if (SSE_CLASS_P (class))
14496 return NO_REGS;
14497 /* Floats can load 0 and 1. */
14498 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14499 {
14500 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14501 if (MAYBE_SSE_CLASS_P (class))
14502 return (reg_class_subset_p (class, GENERAL_REGS)
14503 ? GENERAL_REGS : FLOAT_REGS);
14504 else
14505 return class;
14506 }
14507 /* General regs can load everything. */
14508 if (reg_class_subset_p (class, GENERAL_REGS))
14509 return GENERAL_REGS;
14510 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14511 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14512 return NO_REGS;
14513 }
14514 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14515 return NO_REGS;
14516 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14517 return Q_REGS;
14518 return class;
14519}
14520
14521/* If we are copying between general and FP registers, we need a memory
14522 location. The same is true for SSE and MMX registers.
14523
14524 The macro can't work reliably when one of the CLASSES is class containing
14525 registers from multiple units (SSE, MMX, integer). We avoid this by never
14526 combining those units in single alternative in the machine description.
14527 Ensure that this constraint holds to avoid unexpected surprises.
14528
14529 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14530 enforce these sanity checks. */
14531int
b96a374d
AJ
14532ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14533 enum machine_mode mode, int strict)
f84aa48a
JH
14534{
14535 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14536 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14537 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14538 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14539 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14540 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14541 {
14542 if (strict)
14543 abort ();
14544 else
14545 return 1;
14546 }
14547 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14548 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14549 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14550 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14551 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14552}
14553/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14554 one in class CLASS2.
f84aa48a
JH
14555
14556 It is not required that the cost always equal 2 when FROM is the same as TO;
14557 on some machines it is expensive to move between registers if they are not
14558 general registers. */
14559int
b96a374d
AJ
14560ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14561 enum reg_class class2)
f84aa48a
JH
14562{
14563 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14564 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14565 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14566
f84aa48a
JH
14567 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14568 {
d631b80a
RH
14569 int cost = 1;
14570
14571 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14572 MEMORY_MOVE_COST (mode, class1, 1));
14573 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14574 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14575
d631b80a
RH
14576 /* In case of copying from general_purpose_register we may emit multiple
14577 stores followed by single load causing memory size mismatch stall.
d1f87653 14578 Count this as arbitrarily high cost of 20. */
62415523 14579 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14580 cost += 20;
14581
14582 /* In the case of FP/MMX moves, the registers actually overlap, and we
14583 have to switch modes in order to treat them differently. */
14584 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14585 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14586 cost += 20;
14587
14588 return cost;
f84aa48a 14589 }
d631b80a 14590
92d0fb09 14591 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14592 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14593 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14594 return ix86_cost->mmxsse_to_integer;
14595 if (MAYBE_FLOAT_CLASS_P (class1))
14596 return ix86_cost->fp_move;
14597 if (MAYBE_SSE_CLASS_P (class1))
14598 return ix86_cost->sse_move;
14599 if (MAYBE_MMX_CLASS_P (class1))
14600 return ix86_cost->mmx_move;
f84aa48a
JH
14601 return 2;
14602}
14603
a946dd00
JH
14604/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14605int
b96a374d 14606ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14607{
14608 /* Flags and only flags can only hold CCmode values. */
14609 if (CC_REGNO_P (regno))
14610 return GET_MODE_CLASS (mode) == MODE_CC;
14611 if (GET_MODE_CLASS (mode) == MODE_CC
14612 || GET_MODE_CLASS (mode) == MODE_RANDOM
14613 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14614 return 0;
14615 if (FP_REGNO_P (regno))
14616 return VALID_FP_MODE_P (mode);
14617 if (SSE_REGNO_P (regno))
a67a3220 14618 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
a946dd00 14619 if (MMX_REGNO_P (regno))
a67a3220
JH
14620 return (TARGET_MMX
14621 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
a946dd00
JH
14622 /* We handle both integer and floats in the general purpose registers.
14623 In future we should be able to handle vector modes as well. */
14624 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14625 return 0;
14626 /* Take care for QImode values - they can be in non-QI regs, but then
14627 they do cause partial register stalls. */
d2836273 14628 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14629 return 1;
14630 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14631}
fa79946e
JH
14632
14633/* Return the cost of moving data of mode M between a
14634 register and memory. A value of 2 is the default; this cost is
14635 relative to those in `REGISTER_MOVE_COST'.
14636
14637 If moving between registers and memory is more expensive than
14638 between two registers, you should define this macro to express the
a4f31c00
AJ
14639 relative cost.
14640
fa79946e
JH
14641 Model also increased moving costs of QImode registers in non
14642 Q_REGS classes.
14643 */
14644int
b96a374d 14645ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14646{
14647 if (FLOAT_CLASS_P (class))
14648 {
14649 int index;
14650 switch (mode)
14651 {
14652 case SFmode:
14653 index = 0;
14654 break;
14655 case DFmode:
14656 index = 1;
14657 break;
14658 case XFmode:
14659 case TFmode:
14660 index = 2;
14661 break;
14662 default:
14663 return 100;
14664 }
14665 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14666 }
14667 if (SSE_CLASS_P (class))
14668 {
14669 int index;
14670 switch (GET_MODE_SIZE (mode))
14671 {
14672 case 4:
14673 index = 0;
14674 break;
14675 case 8:
14676 index = 1;
14677 break;
14678 case 16:
14679 index = 2;
14680 break;
14681 default:
14682 return 100;
14683 }
14684 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14685 }
14686 if (MMX_CLASS_P (class))
14687 {
14688 int index;
14689 switch (GET_MODE_SIZE (mode))
14690 {
14691 case 4:
14692 index = 0;
14693 break;
14694 case 8:
14695 index = 1;
14696 break;
14697 default:
14698 return 100;
14699 }
14700 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14701 }
14702 switch (GET_MODE_SIZE (mode))
14703 {
14704 case 1:
14705 if (in)
14706 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14707 : ix86_cost->movzbl_load);
14708 else
14709 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14710 : ix86_cost->int_store[0] + 4);
14711 break;
14712 case 2:
14713 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14714 default:
14715 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14716 if (mode == TFmode)
14717 mode = XFmode;
3bb7e126 14718 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14719 * (((int) GET_MODE_SIZE (mode)
14720 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14721 }
14722}
0ecf09f9 14723
3c50106f
RH
14724/* Compute a (partial) cost for rtx X. Return true if the complete
14725 cost has been computed, and false if subexpressions should be
14726 scanned. In either case, *TOTAL contains the cost result. */
14727
14728static bool
b96a374d 14729ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14730{
14731 enum machine_mode mode = GET_MODE (x);
14732
14733 switch (code)
14734 {
14735 case CONST_INT:
14736 case CONST:
14737 case LABEL_REF:
14738 case SYMBOL_REF:
14739 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14740 *total = 3;
14741 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14742 *total = 2;
3504dad3
JH
14743 else if (flag_pic && SYMBOLIC_CONST (x)
14744 && (!TARGET_64BIT
14745 || (!GET_CODE (x) != LABEL_REF
14746 && (GET_CODE (x) != SYMBOL_REF
12969f45 14747 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14748 *total = 1;
14749 else
14750 *total = 0;
14751 return true;
14752
14753 case CONST_DOUBLE:
14754 if (mode == VOIDmode)
14755 *total = 0;
14756 else
14757 switch (standard_80387_constant_p (x))
14758 {
14759 case 1: /* 0.0 */
14760 *total = 1;
14761 break;
881b2a96 14762 default: /* Other constants */
3c50106f
RH
14763 *total = 2;
14764 break;
881b2a96
RS
14765 case 0:
14766 case -1:
3c50106f
RH
14767 /* Start with (MEM (SYMBOL_REF)), since that's where
14768 it'll probably end up. Add a penalty for size. */
14769 *total = (COSTS_N_INSNS (1)
3504dad3 14770 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14771 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14772 break;
14773 }
14774 return true;
14775
14776 case ZERO_EXTEND:
14777 /* The zero extensions is often completely free on x86_64, so make
14778 it as cheap as possible. */
14779 if (TARGET_64BIT && mode == DImode
14780 && GET_MODE (XEXP (x, 0)) == SImode)
14781 *total = 1;
14782 else if (TARGET_ZERO_EXTEND_WITH_AND)
14783 *total = COSTS_N_INSNS (ix86_cost->add);
14784 else
14785 *total = COSTS_N_INSNS (ix86_cost->movzx);
14786 return false;
14787
14788 case SIGN_EXTEND:
14789 *total = COSTS_N_INSNS (ix86_cost->movsx);
14790 return false;
14791
14792 case ASHIFT:
14793 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14794 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14795 {
14796 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14797 if (value == 1)
14798 {
14799 *total = COSTS_N_INSNS (ix86_cost->add);
14800 return false;
14801 }
14802 if ((value == 2 || value == 3)
14803 && !TARGET_DECOMPOSE_LEA
14804 && ix86_cost->lea <= ix86_cost->shift_const)
14805 {
14806 *total = COSTS_N_INSNS (ix86_cost->lea);
14807 return false;
14808 }
14809 }
14810 /* FALLTHRU */
14811
14812 case ROTATE:
14813 case ASHIFTRT:
14814 case LSHIFTRT:
14815 case ROTATERT:
14816 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14817 {
14818 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14819 {
14820 if (INTVAL (XEXP (x, 1)) > 32)
14821 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14822 else
14823 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14824 }
14825 else
14826 {
14827 if (GET_CODE (XEXP (x, 1)) == AND)
14828 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14829 else
14830 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14831 }
14832 }
14833 else
14834 {
14835 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14836 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14837 else
14838 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14839 }
14840 return false;
14841
14842 case MULT:
14843 if (FLOAT_MODE_P (mode))
14844 *total = COSTS_N_INSNS (ix86_cost->fmul);
14845 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14846 {
14847 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14848 int nbits;
14849
14850 for (nbits = 0; value != 0; value >>= 1)
14851 nbits++;
14852
14853 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14854 + nbits * ix86_cost->mult_bit);
14855 }
14856 else
14857 {
14858 /* This is arbitrary */
14859 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14860 + 7 * ix86_cost->mult_bit);
14861 }
14862 return false;
14863
14864 case DIV:
14865 case UDIV:
14866 case MOD:
14867 case UMOD:
14868 if (FLOAT_MODE_P (mode))
14869 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14870 else
14871 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14872 return false;
14873
14874 case PLUS:
14875 if (FLOAT_MODE_P (mode))
14876 *total = COSTS_N_INSNS (ix86_cost->fadd);
14877 else if (!TARGET_DECOMPOSE_LEA
14878 && GET_MODE_CLASS (mode) == MODE_INT
14879 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14880 {
14881 if (GET_CODE (XEXP (x, 0)) == PLUS
14882 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14883 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14884 && CONSTANT_P (XEXP (x, 1)))
14885 {
14886 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14887 if (val == 2 || val == 4 || val == 8)
14888 {
14889 *total = COSTS_N_INSNS (ix86_cost->lea);
14890 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14891 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14892 outer_code);
14893 *total += rtx_cost (XEXP (x, 1), outer_code);
14894 return true;
14895 }
14896 }
14897 else if (GET_CODE (XEXP (x, 0)) == MULT
14898 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14899 {
14900 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14901 if (val == 2 || val == 4 || val == 8)
14902 {
14903 *total = COSTS_N_INSNS (ix86_cost->lea);
14904 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14905 *total += rtx_cost (XEXP (x, 1), outer_code);
14906 return true;
14907 }
14908 }
14909 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14910 {
14911 *total = COSTS_N_INSNS (ix86_cost->lea);
14912 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14913 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14914 *total += rtx_cost (XEXP (x, 1), outer_code);
14915 return true;
14916 }
14917 }
14918 /* FALLTHRU */
14919
14920 case MINUS:
14921 if (FLOAT_MODE_P (mode))
14922 {
14923 *total = COSTS_N_INSNS (ix86_cost->fadd);
14924 return false;
14925 }
14926 /* FALLTHRU */
14927
14928 case AND:
14929 case IOR:
14930 case XOR:
14931 if (!TARGET_64BIT && mode == DImode)
14932 {
14933 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14934 + (rtx_cost (XEXP (x, 0), outer_code)
14935 << (GET_MODE (XEXP (x, 0)) != DImode))
14936 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 14937 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
14938 return true;
14939 }
14940 /* FALLTHRU */
14941
14942 case NEG:
14943 if (FLOAT_MODE_P (mode))
14944 {
14945 *total = COSTS_N_INSNS (ix86_cost->fchs);
14946 return false;
14947 }
14948 /* FALLTHRU */
14949
14950 case NOT:
14951 if (!TARGET_64BIT && mode == DImode)
14952 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14953 else
14954 *total = COSTS_N_INSNS (ix86_cost->add);
14955 return false;
14956
14957 case FLOAT_EXTEND:
14958 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14959 *total = 0;
14960 return false;
14961
14962 case ABS:
14963 if (FLOAT_MODE_P (mode))
14964 *total = COSTS_N_INSNS (ix86_cost->fabs);
14965 return false;
14966
14967 case SQRT:
14968 if (FLOAT_MODE_P (mode))
14969 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14970 return false;
14971
74dc3e94
RH
14972 case UNSPEC:
14973 if (XINT (x, 1) == UNSPEC_TP)
14974 *total = 0;
14975 return false;
14976
3c50106f
RH
14977 default:
14978 return false;
14979 }
14980}
14981
21c318ba 14982#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4 14983static void
b96a374d 14984ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
2cc07db4
RH
14985{
14986 init_section ();
14987 fputs ("\tpushl $", asm_out_file);
14988 assemble_name (asm_out_file, XSTR (symbol, 0));
14989 fputc ('\n', asm_out_file);
14990}
14991#endif
162f023b 14992
b069de3b
SS
14993#if TARGET_MACHO
14994
14995static int current_machopic_label_num;
14996
14997/* Given a symbol name and its associated stub, write out the
14998 definition of the stub. */
14999
15000void
b96a374d 15001machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
15002{
15003 unsigned int length;
15004 char *binder_name, *symbol_name, lazy_ptr_name[32];
15005 int label = ++current_machopic_label_num;
15006
15007 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15008 symb = (*targetm.strip_name_encoding) (symb);
15009
15010 length = strlen (stub);
15011 binder_name = alloca (length + 32);
15012 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15013
15014 length = strlen (symb);
15015 symbol_name = alloca (length + 32);
15016 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15017
15018 sprintf (lazy_ptr_name, "L%d$lz", label);
15019
15020 if (MACHOPIC_PURE)
15021 machopic_picsymbol_stub_section ();
15022 else
15023 machopic_symbol_stub_section ();
15024
15025 fprintf (file, "%s:\n", stub);
15026 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15027
15028 if (MACHOPIC_PURE)
15029 {
15030 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15031 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15032 fprintf (file, "\tjmp %%edx\n");
15033 }
15034 else
15035 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 15036
b069de3b 15037 fprintf (file, "%s:\n", binder_name);
b96a374d 15038
b069de3b
SS
15039 if (MACHOPIC_PURE)
15040 {
15041 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15042 fprintf (file, "\tpushl %%eax\n");
15043 }
15044 else
15045 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15046
15047 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15048
15049 machopic_lazy_symbol_ptr_section ();
15050 fprintf (file, "%s:\n", lazy_ptr_name);
15051 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15052 fprintf (file, "\t.long %s\n", binder_name);
15053}
15054#endif /* TARGET_MACHO */
15055
162f023b
JH
15056/* Order the registers for register allocator. */
15057
15058void
b96a374d 15059x86_order_regs_for_local_alloc (void)
162f023b
JH
15060{
15061 int pos = 0;
15062 int i;
15063
15064 /* First allocate the local general purpose registers. */
15065 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15066 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15067 reg_alloc_order [pos++] = i;
15068
15069 /* Global general purpose registers. */
15070 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15071 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15072 reg_alloc_order [pos++] = i;
15073
15074 /* x87 registers come first in case we are doing FP math
15075 using them. */
15076 if (!TARGET_SSE_MATH)
15077 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15078 reg_alloc_order [pos++] = i;
fce5a9f2 15079
162f023b
JH
15080 /* SSE registers. */
15081 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15082 reg_alloc_order [pos++] = i;
15083 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15084 reg_alloc_order [pos++] = i;
15085
d1f87653 15086 /* x87 registers. */
162f023b
JH
15087 if (TARGET_SSE_MATH)
15088 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15089 reg_alloc_order [pos++] = i;
15090
15091 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15092 reg_alloc_order [pos++] = i;
15093
15094 /* Initialize the rest of array as we do not allocate some registers
15095 at all. */
15096 while (pos < FIRST_PSEUDO_REGISTER)
15097 reg_alloc_order [pos++] = 0;
15098}
194734e9 15099
4977bab6
ZW
15100#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15101#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15102#endif
15103
fe77449a
DR
15104/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15105 struct attribute_spec.handler. */
15106static tree
b96a374d
AJ
15107ix86_handle_struct_attribute (tree *node, tree name,
15108 tree args ATTRIBUTE_UNUSED,
15109 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15110{
15111 tree *type = NULL;
15112 if (DECL_P (*node))
15113 {
15114 if (TREE_CODE (*node) == TYPE_DECL)
15115 type = &TREE_TYPE (*node);
15116 }
15117 else
15118 type = node;
15119
15120 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15121 || TREE_CODE (*type) == UNION_TYPE)))
15122 {
15123 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15124 *no_add_attrs = true;
15125 }
15126
15127 else if ((is_attribute_p ("ms_struct", name)
15128 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15129 || ((is_attribute_p ("gcc_struct", name)
15130 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15131 {
15132 warning ("`%s' incompatible attribute ignored",
15133 IDENTIFIER_POINTER (name));
15134 *no_add_attrs = true;
15135 }
15136
15137 return NULL_TREE;
15138}
15139
4977bab6 15140static bool
b96a374d 15141ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15142{
fe77449a 15143 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15144 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15145 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15146}
15147
483ab821
MM
15148/* Returns an expression indicating where the this parameter is
15149 located on entry to the FUNCTION. */
15150
15151static rtx
b96a374d 15152x86_this_parameter (tree function)
483ab821
MM
15153{
15154 tree type = TREE_TYPE (function);
15155
3961e8fe
RH
15156 if (TARGET_64BIT)
15157 {
61f71b34 15158 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
15159 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15160 }
15161
e767b5be 15162 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
15163 {
15164 tree parm;
15165
15166 parm = TYPE_ARG_TYPES (type);
15167 /* Figure out whether or not the function has a variable number of
15168 arguments. */
3961e8fe 15169 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15170 if (TREE_VALUE (parm) == void_type_node)
15171 break;
e767b5be 15172 /* If not, the this parameter is in the first argument. */
483ab821 15173 if (parm)
e767b5be
JH
15174 {
15175 int regno = 0;
15176 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15177 regno = 2;
02e02343 15178 return gen_rtx_REG (SImode, regno);
e767b5be 15179 }
483ab821
MM
15180 }
15181
61f71b34 15182 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
15183 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15184 else
15185 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15186}
15187
3961e8fe
RH
15188/* Determine whether x86_output_mi_thunk can succeed. */
15189
15190static bool
b96a374d
AJ
15191x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15192 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15193 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15194{
15195 /* 64-bit can handle anything. */
15196 if (TARGET_64BIT)
15197 return true;
15198
15199 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 15200 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
15201 return true;
15202
15203 /* Need a free register for vcall_offset. */
15204 if (vcall_offset)
15205 return false;
15206
15207 /* Need a free register for GOT references. */
15208 if (flag_pic && !(*targetm.binds_local_p) (function))
15209 return false;
15210
15211 /* Otherwise ok. */
15212 return true;
15213}
15214
15215/* Output the assembler code for a thunk function. THUNK_DECL is the
15216 declaration for the thunk function itself, FUNCTION is the decl for
15217 the target function. DELTA is an immediate constant offset to be
272d0bee 15218 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15219 *(*this + vcall_offset) should be added to THIS. */
483ab821 15220
c590b625 15221static void
b96a374d
AJ
15222x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15223 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15224 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15225{
194734e9 15226 rtx xops[3];
3961e8fe
RH
15227 rtx this = x86_this_parameter (function);
15228 rtx this_reg, tmp;
194734e9 15229
3961e8fe
RH
15230 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15231 pull it in now and let DELTA benefit. */
15232 if (REG_P (this))
15233 this_reg = this;
15234 else if (vcall_offset)
15235 {
15236 /* Put the this parameter into %eax. */
15237 xops[0] = this;
15238 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15239 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15240 }
15241 else
15242 this_reg = NULL_RTX;
15243
15244 /* Adjust the this parameter by a fixed constant. */
15245 if (delta)
194734e9 15246 {
483ab821 15247 xops[0] = GEN_INT (delta);
3961e8fe
RH
15248 xops[1] = this_reg ? this_reg : this;
15249 if (TARGET_64BIT)
194734e9 15250 {
3961e8fe
RH
15251 if (!x86_64_general_operand (xops[0], DImode))
15252 {
15253 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15254 xops[1] = tmp;
15255 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15256 xops[0] = tmp;
15257 xops[1] = this;
15258 }
15259 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15260 }
15261 else
3961e8fe 15262 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15263 }
3961e8fe
RH
15264
15265 /* Adjust the this parameter by a value stored in the vtable. */
15266 if (vcall_offset)
194734e9 15267 {
3961e8fe
RH
15268 if (TARGET_64BIT)
15269 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15270 else
e767b5be
JH
15271 {
15272 int tmp_regno = 2 /* ECX */;
15273 if (lookup_attribute ("fastcall",
15274 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15275 tmp_regno = 0 /* EAX */;
15276 tmp = gen_rtx_REG (SImode, tmp_regno);
15277 }
483ab821 15278
3961e8fe
RH
15279 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15280 xops[1] = tmp;
15281 if (TARGET_64BIT)
15282 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15283 else
15284 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15285
3961e8fe
RH
15286 /* Adjust the this parameter. */
15287 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15288 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15289 {
15290 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15291 xops[0] = GEN_INT (vcall_offset);
15292 xops[1] = tmp2;
15293 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15294 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15295 }
3961e8fe
RH
15296 xops[1] = this_reg;
15297 if (TARGET_64BIT)
15298 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15299 else
15300 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15301 }
194734e9 15302
3961e8fe
RH
15303 /* If necessary, drop THIS back to its stack slot. */
15304 if (this_reg && this_reg != this)
15305 {
15306 xops[0] = this_reg;
15307 xops[1] = this;
15308 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15309 }
194734e9 15310
89ce1c8f 15311 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15312 if (TARGET_64BIT)
15313 {
15314 if (!flag_pic || (*targetm.binds_local_p) (function))
15315 output_asm_insn ("jmp\t%P0", xops);
15316 else
fcbe3b89 15317 {
89ce1c8f 15318 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15319 tmp = gen_rtx_CONST (Pmode, tmp);
15320 tmp = gen_rtx_MEM (QImode, tmp);
15321 xops[0] = tmp;
15322 output_asm_insn ("jmp\t%A0", xops);
15323 }
3961e8fe
RH
15324 }
15325 else
15326 {
15327 if (!flag_pic || (*targetm.binds_local_p) (function))
15328 output_asm_insn ("jmp\t%P0", xops);
194734e9 15329 else
21ff35fb 15330#if TARGET_MACHO
095fa594
SH
15331 if (TARGET_MACHO)
15332 {
15333 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15334 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15335 tmp = gen_rtx_MEM (QImode, tmp);
15336 xops[0] = tmp;
15337 output_asm_insn ("jmp\t%0", xops);
15338 }
15339 else
15340#endif /* TARGET_MACHO */
194734e9 15341 {
3961e8fe
RH
15342 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15343 output_set_got (tmp);
15344
15345 xops[1] = tmp;
15346 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15347 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15348 }
15349 }
15350}
e2500fed 15351
1bc7c5b6 15352static void
b96a374d 15353x86_file_start (void)
1bc7c5b6
ZW
15354{
15355 default_file_start ();
15356 if (X86_FILE_START_VERSION_DIRECTIVE)
15357 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15358 if (X86_FILE_START_FLTUSED)
15359 fputs ("\t.global\t__fltused\n", asm_out_file);
15360 if (ix86_asm_dialect == ASM_INTEL)
15361 fputs ("\t.intel_syntax\n", asm_out_file);
15362}
15363
e932b21b 15364int
b96a374d 15365x86_field_alignment (tree field, int computed)
e932b21b
JH
15366{
15367 enum machine_mode mode;
ad9335eb
JJ
15368 tree type = TREE_TYPE (field);
15369
15370 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15371 return computed;
ad9335eb
JJ
15372 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15373 ? get_inner_array_type (type) : type);
39e3a681
JJ
15374 if (mode == DFmode || mode == DCmode
15375 || GET_MODE_CLASS (mode) == MODE_INT
15376 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15377 return MIN (32, computed);
15378 return computed;
15379}
15380
a5fa1ecd
JH
15381/* Output assembler code to FILE to increment profiler label # LABELNO
15382 for profiling a function entry. */
15383void
b96a374d 15384x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15385{
15386 if (TARGET_64BIT)
15387 if (flag_pic)
15388 {
15389#ifndef NO_PROFILE_COUNTERS
15390 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15391#endif
15392 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15393 }
15394 else
15395 {
15396#ifndef NO_PROFILE_COUNTERS
15397 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15398#endif
15399 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15400 }
15401 else if (flag_pic)
15402 {
15403#ifndef NO_PROFILE_COUNTERS
15404 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15405 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15406#endif
15407 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15408 }
15409 else
15410 {
15411#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15412 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15413 PROFILE_COUNT_REGISTER);
15414#endif
15415 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15416 }
15417}
15418
d2c49530
JH
15419/* We don't have exact information about the insn sizes, but we may assume
15420 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15421 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15422 99% of cases. */
15423
15424static int
b96a374d 15425min_insn_size (rtx insn)
d2c49530
JH
15426{
15427 int l = 0;
15428
15429 if (!INSN_P (insn) || !active_insn_p (insn))
15430 return 0;
15431
15432 /* Discard alignments we've emit and jump instructions. */
15433 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15434 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15435 return 0;
15436 if (GET_CODE (insn) == JUMP_INSN
15437 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15438 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15439 return 0;
15440
15441 /* Important case - calls are always 5 bytes.
15442 It is common to have many calls in the row. */
15443 if (GET_CODE (insn) == CALL_INSN
15444 && symbolic_reference_mentioned_p (PATTERN (insn))
15445 && !SIBLING_CALL_P (insn))
15446 return 5;
15447 if (get_attr_length (insn) <= 1)
15448 return 1;
15449
15450 /* For normal instructions we may rely on the sizes of addresses
15451 and the presence of symbol to require 4 bytes of encoding.
15452 This is not the case for jumps where references are PC relative. */
15453 if (GET_CODE (insn) != JUMP_INSN)
15454 {
15455 l = get_attr_length_address (insn);
15456 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15457 l = 4;
15458 }
15459 if (l)
15460 return 1+l;
15461 else
15462 return 2;
15463}
15464
c51e6d85 15465/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15466 window. */
15467
15468static void
b96a374d 15469k8_avoid_jump_misspredicts (void)
d2c49530
JH
15470{
15471 rtx insn, start = get_insns ();
15472 int nbytes = 0, njumps = 0;
15473 int isjump = 0;
15474
15475 /* Look for all minimal intervals of instructions containing 4 jumps.
15476 The intervals are bounded by START and INSN. NBYTES is the total
15477 size of instructions in the interval including INSN and not including
15478 START. When the NBYTES is smaller than 16 bytes, it is possible
15479 that the end of START and INSN ends up in the same 16byte page.
15480
15481 The smallest offset in the page INSN can start is the case where START
15482 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15483 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15484 */
15485 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15486 {
15487
15488 nbytes += min_insn_size (insn);
15489 if (rtl_dump_file)
da2d4c01 15490 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15491 INSN_UID (insn), min_insn_size (insn));
15492 if ((GET_CODE (insn) == JUMP_INSN
15493 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15494 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15495 || GET_CODE (insn) == CALL_INSN)
15496 njumps++;
15497 else
15498 continue;
15499
15500 while (njumps > 3)
15501 {
15502 start = NEXT_INSN (start);
15503 if ((GET_CODE (start) == JUMP_INSN
15504 && GET_CODE (PATTERN (start)) != ADDR_VEC
15505 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15506 || GET_CODE (start) == CALL_INSN)
15507 njumps--, isjump = 1;
15508 else
15509 isjump = 0;
15510 nbytes -= min_insn_size (start);
15511 }
15512 if (njumps < 0)
15513 abort ();
15514 if (rtl_dump_file)
10e9fecc 15515 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15516 INSN_UID (start), INSN_UID (insn), nbytes);
15517
15518 if (njumps == 3 && isjump && nbytes < 16)
15519 {
15520 int padsize = 15 - nbytes + min_insn_size (insn);
15521
15522 if (rtl_dump_file)
15523 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15524 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15525 }
15526 }
15527}
15528
b96a374d 15529/* Implement machine specific optimizations.
2a500b9e 15530 At the moment we implement single transformation: AMD Athlon works faster
d1f87653 15531 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15532 by other jump instruction. We avoid the penalty by inserting NOP just
15533 before the RET instructions in such cases. */
18dbd950 15534static void
b96a374d 15535ix86_reorg (void)
2a500b9e
JH
15536{
15537 edge e;
15538
4977bab6 15539 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
2a500b9e
JH
15540 return;
15541 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15542 {
15543 basic_block bb = e->src;
15544 rtx ret = bb->end;
15545 rtx prev;
253c7a00 15546 bool replace = false;
2a500b9e 15547
253c7a00
JH
15548 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15549 || !maybe_hot_bb_p (bb))
2a500b9e 15550 continue;
4977bab6
ZW
15551 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15552 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15553 break;
2a500b9e
JH
15554 if (prev && GET_CODE (prev) == CODE_LABEL)
15555 {
15556 edge e;
15557 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15558 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e 15559 && !(e->flags & EDGE_FALLTHRU))
253c7a00 15560 replace = true;
2a500b9e 15561 }
253c7a00 15562 if (!replace)
2a500b9e 15563 {
4977bab6 15564 prev = prev_active_insn (ret);
25f57a0e
JH
15565 if (prev
15566 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15567 || GET_CODE (prev) == CALL_INSN))
253c7a00 15568 replace = true;
c51e6d85 15569 /* Empty functions get branch mispredict even when the jump destination
4977bab6
ZW
15570 is not visible to us. */
15571 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
253c7a00
JH
15572 replace = true;
15573 }
15574 if (replace)
15575 {
15576 emit_insn_before (gen_return_internal_long (), ret);
15577 delete_insn (ret);
2a500b9e 15578 }
2a500b9e 15579 }
10e9fecc 15580 k8_avoid_jump_misspredicts ();
2a500b9e
JH
15581}
15582
4977bab6
ZW
15583/* Return nonzero when QImode register that must be represented via REX prefix
15584 is used. */
15585bool
b96a374d 15586x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15587{
15588 int i;
15589 extract_insn_cached (insn);
15590 for (i = 0; i < recog_data.n_operands; i++)
15591 if (REG_P (recog_data.operand[i])
15592 && REGNO (recog_data.operand[i]) >= 4)
15593 return true;
15594 return false;
15595}
15596
15597/* Return nonzero when P points to register encoded via REX prefix.
15598 Called via for_each_rtx. */
15599static int
b96a374d 15600extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15601{
15602 unsigned int regno;
15603 if (!REG_P (*p))
15604 return 0;
15605 regno = REGNO (*p);
15606 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15607}
15608
15609/* Return true when INSN mentions register that must be encoded using REX
15610 prefix. */
15611bool
b96a374d 15612x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15613{
15614 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15615}
15616
1d6ba901 15617/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15618 optabs would emit if we didn't have TFmode patterns. */
15619
15620void
b96a374d 15621x86_emit_floatuns (rtx operands[2])
8d705469
JH
15622{
15623 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15624 enum machine_mode mode, inmode;
15625
15626 inmode = GET_MODE (operands[1]);
15627 if (inmode != SImode
15628 && inmode != DImode)
15629 abort ();
8d705469
JH
15630
15631 out = operands[0];
1d6ba901 15632 in = force_reg (inmode, operands[1]);
8d705469
JH
15633 mode = GET_MODE (out);
15634 neglab = gen_label_rtx ();
15635 donelab = gen_label_rtx ();
15636 i1 = gen_reg_rtx (Pmode);
15637 f0 = gen_reg_rtx (mode);
15638
15639 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15640
15641 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15642 emit_jump_insn (gen_jump (donelab));
15643 emit_barrier ();
15644
15645 emit_label (neglab);
15646
15647 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15648 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15649 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15650 expand_float (f0, i0, 0);
15651 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15652
15653 emit_label (donelab);
15654}
15655
dafc5b82
JH
15656/* Return if we do not know how to pass TYPE solely in registers. */
15657bool
b96a374d 15658ix86_must_pass_in_stack (enum machine_mode mode, tree type)
dafc5b82
JH
15659{
15660 if (default_must_pass_in_stack (mode, type))
15661 return true;
15662 return (!TARGET_64BIT && type && mode == TImode);
15663}
15664
e2500fed 15665#include "gt-i386.h"
This page took 5.073395 seconds and 5 git commands to generate.