]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
Makefile.in (install-po): Cope with empty CATALOGS.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
36210500 3 2002, 2003 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
dafc5b82 48#include "cgraph.h"
2a2ab3f9 49
8dfe5673 50#ifndef CHECK_STACK_LIMIT
07933f72 51#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
52#endif
53
3c50106f
RH
54/* Return index of given mode in mult and division cost tables. */
55#define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
2ab0437e 62/* Processor costs (relative to an add) */
fce5a9f2 63static const
2ab0437e
JH
64struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
4977bab6 69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 70 0, /* cost of multiply per each bit set */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
72 3, /* cost of movsx */
73 3, /* cost of movzx */
2ab0437e
JH
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
f4365627
JH
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
4977bab6 98 1, /* Branch cost */
229b303a
RS
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
2ab0437e 105};
229b303a 106
32b5b1aa 107/* Processor costs (relative to an add) */
fce5a9f2 108static const
32b5b1aa 109struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 110 1, /* cost of an add instruction */
32b5b1aa
SC
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
4977bab6 114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 115 1, /* cost of multiply per each bit set */
4977bab6 116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
117 3, /* cost of movsx */
118 2, /* cost of movzx */
96e7ae40 119 15, /* "large" insn */
e2e52e1b 120 3, /* MOVE_RATIO */
7c6b971d 121 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
0f290768 124 Relative to reg-reg move (2). */
96e7ae40
JH
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
fa79946e
JH
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
f4365627
JH
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
4977bab6 143 1, /* Branch cost */
229b303a
RS
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
150};
151
fce5a9f2 152static const
32b5b1aa
SC
153struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
4977bab6 158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 159 1, /* cost of multiply per each bit set */
4977bab6 160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
161 3, /* cost of movsx */
162 2, /* cost of movzx */
96e7ae40 163 15, /* "large" insn */
e2e52e1b 164 3, /* MOVE_RATIO */
7c6b971d 165 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
96e7ae40
JH
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
fa79946e
JH
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
f4365627
JH
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
4977bab6 187 1, /* Branch cost */
229b303a
RS
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
194};
195
fce5a9f2 196static const
e5cb57e8 197struct processor_costs pentium_cost = {
32b5b1aa
SC
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
856b07a1 200 4, /* variable shift costs */
e5cb57e8 201 1, /* constant shift costs */
4977bab6 202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 203 0, /* cost of multiply per each bit set */
4977bab6 204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
205 3, /* cost of movsx */
206 2, /* cost of movzx */
96e7ae40 207 8, /* "large" insn */
e2e52e1b 208 6, /* MOVE_RATIO */
7c6b971d 209 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
0f290768 212 Relative to reg-reg move (2). */
96e7ae40
JH
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
fa79946e
JH
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
f4365627
JH
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
4977bab6 231 2, /* Branch cost */
229b303a
RS
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
238};
239
fce5a9f2 240static const
856b07a1
SC
241struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
e075ae69 244 1, /* variable shift costs */
856b07a1 245 1, /* constant shift costs */
4977bab6 246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 247 0, /* cost of multiply per each bit set */
4977bab6 248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
249 1, /* cost of movsx */
250 1, /* cost of movzx */
96e7ae40 251 8, /* "large" insn */
e2e52e1b 252 6, /* MOVE_RATIO */
7c6b971d 253 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
0f290768 256 Relative to reg-reg move (2). */
96e7ae40
JH
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
f4365627
JH
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
4977bab6 275 2, /* Branch cost */
229b303a
RS
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
856b07a1
SC
282};
283
fce5a9f2 284static const
a269a03c
JC
285struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
e075ae69 287 2, /* cost of a lea instruction */
a269a03c
JC
288 1, /* variable shift costs */
289 1, /* constant shift costs */
4977bab6 290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 291 0, /* cost of multiply per each bit set */
4977bab6 292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
293 2, /* cost of movsx */
294 2, /* cost of movzx */
96e7ae40 295 8, /* "large" insn */
e2e52e1b 296 4, /* MOVE_RATIO */
7c6b971d 297 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
0f290768 300 Relative to reg-reg move (2). */
96e7ae40
JH
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
fa79946e
JH
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
f4365627
JH
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
4977bab6 319 1, /* Branch cost */
229b303a
RS
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
4f770e7b
RS
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
229b303a
RS
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
a269a03c
JC
326};
327
fce5a9f2 328static const
309ada50
JH
329struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
0b5107cf 331 2, /* cost of a lea instruction */
309ada50
JH
332 1, /* variable shift costs */
333 1, /* constant shift costs */
4977bab6 334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 335 0, /* cost of multiply per each bit set */
4977bab6 336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
337 1, /* cost of movsx */
338 1, /* cost of movzx */
309ada50 339 8, /* "large" insn */
e2e52e1b 340 9, /* MOVE_RATIO */
309ada50 341 4, /* cost for loading QImode using movzbl */
b72b1c29 342 {3, 4, 3}, /* cost of loading integer registers
309ada50 343 in QImode, HImode and SImode.
0f290768 344 Relative to reg-reg move (2). */
b72b1c29 345 {3, 4, 3}, /* cost of storing integer registers */
309ada50 346 4, /* cost of reg,reg fld/fst */
b72b1c29 347 {4, 4, 12}, /* cost of loading fp registers
309ada50 348 in SFmode, DFmode and XFmode */
b72b1c29 349 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 350 2, /* cost of moving MMX register */
b72b1c29 351 {4, 4}, /* cost of loading MMX registers
fa79946e 352 in SImode and DImode */
b72b1c29 353 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
b72b1c29 356 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 357 in SImode, DImode and TImode */
b72b1c29 358 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 5, /* MMX or SSE register to integer */
f4365627
JH
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
4977bab6 363 2, /* Branch cost */
229b303a
RS
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
309ada50
JH
370};
371
4977bab6
ZW
372static const
373struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414};
415
fce5a9f2 416static const
b4e89e2d
JH
417struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
4977bab6
ZW
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 423 0, /* cost of multiply per each bit set */
4977bab6 424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
425 1, /* cost of movsx */
426 1, /* cost of movzx */
b4e89e2d
JH
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
f4365627
JH
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
4977bab6 451 2, /* Branch cost */
229b303a
RS
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
458};
459
8b60264b 460const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 461
a269a03c
JC
462/* Processor feature/optimization bitmasks. */
463#define m_386 (1<<PROCESSOR_I386)
464#define m_486 (1<<PROCESSOR_I486)
465#define m_PENT (1<<PROCESSOR_PENTIUM)
466#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467#define m_K6 (1<<PROCESSOR_K6)
309ada50 468#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 469#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
470#define m_K8 (1<<PROCESSOR_K8)
471#define m_ATHLON_K8 (m_K8 | m_ATHLON)
a269a03c 472
4977bab6
ZW
473const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
a269a03c 475const int x86_zero_extend_with_and = m_486 | m_PENT;
4977bab6 476const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 477const int x86_double_with_add = ~m_386;
a269a03c 478const int x86_use_bit_test = m_386;
4977bab6
ZW
479const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481const int x86_3dnow_a = m_ATHLON_K8;
482const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
ef6257cd 483const int x86_branch_hints = m_PENT4;
b4e89e2d 484const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
485const int x86_partial_reg_stall = m_PPRO;
486const int x86_use_loop = m_K6;
4977bab6 487const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
488const int x86_use_mov0 = m_K6;
489const int x86_use_cltd = ~(m_PENT | m_K6);
490const int x86_read_modify_write = ~m_PENT;
491const int x86_read_modify = ~(m_PENT | m_PPRO);
492const int x86_split_long_moves = m_PPRO;
4977bab6 493const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 494const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 495const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
496const int x86_qimode_math = ~(0);
497const int x86_promote_qi_regs = 0;
498const int x86_himode_math = ~(m_PPRO);
499const int x86_promote_hi_regs = m_PPRO;
4977bab6
ZW
500const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
b972dd02 510const int x86_decompose_lea = m_PENT4;
495333a6 511const int x86_shift1 = ~m_486;
4977bab6
ZW
512const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 515 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
516 scalar values in proper format leaving the upper part undefined. */
517const int x86_sse_partial_regs = m_ATHLON_K8;
518/* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521const int x86_sse_typeless_stores = m_ATHLON_K8;
522const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523const int x86_use_ffreep = m_ATHLON_K8;
524const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 525const int x86_inter_unit_moves = ~(m_ATHLON_K8);
881b2a96 526const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
a269a03c 527
d1f87653 528/* In case the average insn count for single function invocation is
6ab16dd9
JH
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
4977bab6 531#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 532
5bf0ebab
RH
533/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
537
538/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 540
e075ae69 541enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
542{
543 /* ax, dx, cx, bx */
ab408a86 544 AREG, DREG, CREG, BREG,
4c0d89b5 545 /* si, di, bp, sp */
e075ae69 546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
547 /* FP registers */
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 550 /* arg pointer */
83774849 551 NON_Q_REGS,
564d80f4 552 /* flags, fpsr, dirflag, frame */
a7180f70
BS
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555 SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
557 MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561 SSE_REGS, SSE_REGS,
4c0d89b5 562};
c572e5ba 563
3d117b30 564/* The "default" register map used in 32bit mode. */
83774849 565
0f290768 566int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
567{
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
575};
576
5bf0ebab
RH
577static int const x86_64_int_parameter_registers[6] =
578{
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
581};
582
583static int const x86_64_int_return_registers[4] =
584{
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
586};
53c17031 587
0f7fa3d0
JH
588/* The "default" register map used in 64bit mode. */
589int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
590{
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
598};
599
83774849
RH
600/* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
644 numbers.
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
653*/
0f290768 654int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
655{
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
663};
664
c572e5ba
JVA
665/* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
667
07933f72
GS
668rtx ix86_compare_op0 = NULL_RTX;
669rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 670
7a2e09f4 671#define MAX_386_STACK_LOCALS 3
8362f420
JH
672/* Size of the register save area. */
673#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
674
675/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
676
677struct stack_local_entry GTY(())
678{
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
683};
684
4dd2ac2c
JH
685/* Structure describing stack frame layout.
686 Stack grows downward:
687
688 [arguments]
689 <- ARG_POINTER
690 saved pc
691
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
694 [saved regs]
695
696 [padding1] \
697 )
698 [va_arg registers] (
699 > to_allocate <- FRAME_POINTER
700 [frame] (
701 )
702 [padding2] /
703 */
704struct ix86_frame
705{
706 int nregs;
707 int padding1;
8362f420 708 int va_arg_size;
4dd2ac2c
JH
709 HOST_WIDE_INT frame;
710 int padding2;
711 int outgoing_arguments_size;
8362f420 712 int red_zone_size;
4dd2ac2c
JH
713
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
719
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
4dd2ac2c
JH
723};
724
c93e80a5
JH
725/* Used to enable/disable debugging features. */
726const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
727/* Code model option as passed by user. */
728const char *ix86_cmodel_string;
729/* Parsed value. */
730enum cmodel ix86_cmodel;
80f33d06
GS
731/* Asm dialect. */
732const char *ix86_asm_string;
733enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
734/* TLS dialext. */
735const char *ix86_tls_dialect_string;
736enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 737
5bf0ebab 738/* Which unit we are generating floating point math for. */
965f5423
JH
739enum fpmath_unit ix86_fpmath;
740
5bf0ebab 741/* Which cpu are we scheduling for. */
9e555526 742enum processor_type ix86_tune;
5bf0ebab
RH
743/* Which instruction set architecture to use. */
744enum processor_type ix86_arch;
c8c5cb99
SC
745
746/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 747const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 748const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 749const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 750
0f290768 751/* # of registers to use to pass arguments. */
e075ae69 752const char *ix86_regparm_string;
e9a25f70 753
f4365627
JH
754/* true if sse prefetch instruction is not NOOP. */
755int x86_prefetch_sse;
756
e075ae69
RH
757/* ix86_regparm_string as a number */
758int ix86_regparm;
e9a25f70
JL
759
760/* Alignment to use for loops and jumps: */
761
0f290768 762/* Power of two alignment for loops. */
e075ae69 763const char *ix86_align_loops_string;
e9a25f70 764
0f290768 765/* Power of two alignment for non-loop jumps. */
e075ae69 766const char *ix86_align_jumps_string;
e9a25f70 767
3af4bd89 768/* Power of two alignment for stack boundary in bytes. */
e075ae69 769const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
770
771/* Preferred alignment for stack boundary in bits. */
e075ae69 772int ix86_preferred_stack_boundary;
3af4bd89 773
e9a25f70 774/* Values 1-5: see jump.c */
e075ae69
RH
775int ix86_branch_cost;
776const char *ix86_branch_cost_string;
e9a25f70 777
0f290768 778/* Power of two alignment for functions. */
e075ae69 779const char *ix86_align_funcs_string;
623fe810
RH
780
781/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782static char internal_label_prefix[16];
783static int internal_label_prefix_len;
e075ae69 784\f
b96a374d
AJ
785static int local_symbolic_operand (rtx, enum machine_mode);
786static int tls_symbolic_operand_1 (rtx, enum tls_model);
787static void output_pic_addr_const (FILE *, rtx, int);
788static void put_condition_code (enum rtx_code, enum machine_mode,
789 int, int, FILE *);
790static const char *get_some_local_dynamic_name (void);
791static int get_some_local_dynamic_name_1 (rtx *, void *);
792static rtx maybe_get_pool_constant (rtx);
793static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
795 rtx *);
796static rtx get_thread_pointer (int);
797static rtx legitimize_tls_address (rtx, enum tls_model, int);
798static void get_pc_thunk_name (char [32], unsigned int);
799static rtx gen_push (rtx);
800static int memory_address_length (rtx addr);
801static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804static void ix86_dump_ppro_packet (FILE *);
805static void ix86_reorder_insn (rtx *, rtx *);
806static struct machine_function * ix86_init_machine_status (void);
807static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808static int ix86_nsaved_regs (void);
809static void ix86_emit_save_regs (void);
810static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814static void ix86_sched_reorder_ppro (rtx *, rtx *);
815static HOST_WIDE_INT ix86_GOT_alias_set (void);
816static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817static rtx ix86_expand_aligntest (rtx, int);
818static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819static int ix86_issue_rate (void);
820static int ix86_adjust_cost (rtx, rtx, rtx, int);
821static void ix86_sched_init (FILE *, int, int);
822static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823static int ix86_variable_issue (FILE *, int, rtx, int);
824static int ia32_use_dfa_pipeline_interface (void);
825static int ia32_multipass_dfa_lookahead (void);
826static void ix86_init_mmx_sse_builtins (void);
827static rtx x86_this_parameter (tree);
828static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831static void x86_file_start (void);
832static void ix86_reorg (void);
833bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
e075ae69
RH
834
835struct ix86_address
836{
837 rtx base, index, disp;
838 HOST_WIDE_INT scale;
74dc3e94 839 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
e075ae69 840};
b08de47e 841
b96a374d
AJ
842static int ix86_decompose_address (rtx, struct ix86_address *);
843static int ix86_address_cost (rtx);
844static bool ix86_cannot_force_const_mem (rtx);
845static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
846
847struct builtin_description;
b96a374d
AJ
848static rtx ix86_expand_sse_comi (const struct builtin_description *,
849 tree, rtx);
850static rtx ix86_expand_sse_compare (const struct builtin_description *,
851 tree, rtx);
852static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
853static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
854static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
855static rtx ix86_expand_store_builtin (enum insn_code, tree);
856static rtx safe_vector_operand (rtx, enum machine_mode);
857static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
858static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
859 enum rtx_code *, enum rtx_code *);
860static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
861static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
862static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
863static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
864static int ix86_fp_comparison_cost (enum rtx_code code);
865static unsigned int ix86_select_alt_pic_regnum (void);
866static int ix86_save_reg (unsigned int, int);
867static void ix86_compute_frame_layout (struct ix86_frame *);
868static int ix86_comp_type_attributes (tree, tree);
869static int ix86_fntype_regparm (tree);
91d231cb 870const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
871static bool ix86_function_ok_for_sibcall (tree, tree);
872static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
873static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
874static int ix86_value_regno (enum machine_mode);
875static bool contains_128bit_aligned_vector_p (tree);
876static bool ix86_ms_bitfield_layout_p (tree);
877static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
878static int extended_reg_mentioned_1 (rtx *, void *);
879static bool ix86_rtx_costs (rtx, int, int, int *);
880static int min_insn_size (rtx);
881static void k8_avoid_jump_misspredicts (void);
7c262518 882
21c318ba 883#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
b96a374d 884static void ix86_svr3_asm_out_constructor (rtx, int);
2cc07db4 885#endif
e56feed6 886
53c17031
JH
887/* Register class used for passing given 64bit part of the argument.
888 These represent classes as documented by the PS ABI, with the exception
889 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 890 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 891
d1f87653 892 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
893 whenever possible (upper half does contain padding).
894 */
895enum x86_64_reg_class
896 {
897 X86_64_NO_CLASS,
898 X86_64_INTEGER_CLASS,
899 X86_64_INTEGERSI_CLASS,
900 X86_64_SSE_CLASS,
901 X86_64_SSESF_CLASS,
902 X86_64_SSEDF_CLASS,
903 X86_64_SSEUP_CLASS,
904 X86_64_X87_CLASS,
905 X86_64_X87UP_CLASS,
906 X86_64_MEMORY_CLASS
907 };
0b5826ac 908static const char * const x86_64_reg_class_name[] =
53c17031
JH
909 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
910
911#define MAX_CLASSES 4
b96a374d
AJ
912static int classify_argument (enum machine_mode, tree,
913 enum x86_64_reg_class [MAX_CLASSES], int);
914static int examine_argument (enum machine_mode, tree, int, int *, int *);
915static rtx construct_container (enum machine_mode, tree, int, int, int,
916 const int *, int);
917static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
918 enum x86_64_reg_class);
881b2a96
RS
919
920/* Table of constants used by fldpi, fldln2, etc... */
921static REAL_VALUE_TYPE ext_80387_constants_table [5];
922static bool ext_80387_constants_init = 0;
b96a374d 923static void init_ext_80387_constants (void);
672a6f42
NB
924\f
925/* Initialize the GCC target structure. */
91d231cb
JM
926#undef TARGET_ATTRIBUTE_TABLE
927#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 928#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
929# undef TARGET_MERGE_DECL_ATTRIBUTES
930# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
931#endif
932
8d8e52be
JM
933#undef TARGET_COMP_TYPE_ATTRIBUTES
934#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
935
f6155fda
SS
936#undef TARGET_INIT_BUILTINS
937#define TARGET_INIT_BUILTINS ix86_init_builtins
938
939#undef TARGET_EXPAND_BUILTIN
940#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
941
bd09bdeb
RH
942#undef TARGET_ASM_FUNCTION_EPILOGUE
943#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 944
17b53c33
NB
945#undef TARGET_ASM_OPEN_PAREN
946#define TARGET_ASM_OPEN_PAREN ""
947#undef TARGET_ASM_CLOSE_PAREN
948#define TARGET_ASM_CLOSE_PAREN ""
949
301d03af
RS
950#undef TARGET_ASM_ALIGNED_HI_OP
951#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952#undef TARGET_ASM_ALIGNED_SI_OP
953#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
954#ifdef ASM_QUAD
955#undef TARGET_ASM_ALIGNED_DI_OP
956#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
957#endif
958
959#undef TARGET_ASM_UNALIGNED_HI_OP
960#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961#undef TARGET_ASM_UNALIGNED_SI_OP
962#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963#undef TARGET_ASM_UNALIGNED_DI_OP
964#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
965
c237e94a
ZW
966#undef TARGET_SCHED_ADJUST_COST
967#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968#undef TARGET_SCHED_ISSUE_RATE
969#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970#undef TARGET_SCHED_VARIABLE_ISSUE
971#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972#undef TARGET_SCHED_INIT
973#define TARGET_SCHED_INIT ix86_sched_init
974#undef TARGET_SCHED_REORDER
975#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 976#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
977#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
c237e94a 982
4977bab6
ZW
983#undef TARGET_FUNCTION_OK_FOR_SIBCALL
984#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
985
f996902d
RH
986#ifdef HAVE_AS_TLS
987#undef TARGET_HAVE_TLS
988#define TARGET_HAVE_TLS true
989#endif
3a04ff64
RH
990#undef TARGET_CANNOT_FORCE_CONST_MEM
991#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 992
7daebb7a 993#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 994#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 995
4977bab6
ZW
996#undef TARGET_MS_BITFIELD_LAYOUT_P
997#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
998
c590b625
RH
999#undef TARGET_ASM_OUTPUT_MI_THUNK
1000#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1001#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1003
1bc7c5b6
ZW
1004#undef TARGET_ASM_FILE_START
1005#define TARGET_ASM_FILE_START x86_file_start
1006
3c50106f
RH
1007#undef TARGET_RTX_COSTS
1008#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1009#undef TARGET_ADDRESS_COST
1010#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1011
18dbd950
RS
1012#undef TARGET_MACHINE_DEPENDENT_REORG
1013#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1014
f6897b10 1015struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 1016\f
67c2b45f
JS
1017/* The svr4 ABI for the i386 says that records and unions are returned
1018 in memory. */
1019#ifndef DEFAULT_PCC_STRUCT_RETURN
1020#define DEFAULT_PCC_STRUCT_RETURN 1
1021#endif
1022
f5316dfe
MM
1023/* Sometimes certain combinations of command options do not make
1024 sense on a particular target machine. You can define a macro
1025 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1026 defined, is executed once just after all the command options have
1027 been parsed.
1028
1029 Don't use this macro to turn on various extra optimizations for
1030 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1031
1032void
b96a374d 1033override_options (void)
f5316dfe 1034{
400500c4 1035 int i;
e075ae69
RH
1036 /* Comes from final.c -- no real reason to change it. */
1037#define MAX_CODE_ALIGN 16
f5316dfe 1038
c8c5cb99
SC
1039 static struct ptt
1040 {
8b60264b
KG
1041 const struct processor_costs *cost; /* Processor costs */
1042 const int target_enable; /* Target flags to enable. */
1043 const int target_disable; /* Target flags to disable. */
1044 const int align_loop; /* Default alignments. */
2cca7283 1045 const int align_loop_max_skip;
8b60264b 1046 const int align_jump;
2cca7283 1047 const int align_jump_max_skip;
8b60264b 1048 const int align_func;
e075ae69 1049 }
0f290768 1050 const processor_target_table[PROCESSOR_max] =
e075ae69 1051 {
4977bab6
ZW
1052 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1053 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1054 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1055 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1056 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1057 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1058 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1059 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
e075ae69
RH
1060 };
1061
f4365627 1062 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1063 static struct pta
1064 {
8b60264b
KG
1065 const char *const name; /* processor name or nickname. */
1066 const enum processor_type processor;
0dd0e980
JH
1067 const enum pta_flags
1068 {
1069 PTA_SSE = 1,
1070 PTA_SSE2 = 2,
1071 PTA_MMX = 4,
f4365627 1072 PTA_PREFETCH_SSE = 8,
0dd0e980 1073 PTA_3DNOW = 16,
4977bab6
ZW
1074 PTA_3DNOW_A = 64,
1075 PTA_64BIT = 128
0dd0e980 1076 } flags;
e075ae69 1077 }
0f290768 1078 const processor_alias_table[] =
e075ae69 1079 {
0dd0e980
JH
1080 {"i386", PROCESSOR_I386, 0},
1081 {"i486", PROCESSOR_I486, 0},
1082 {"i586", PROCESSOR_PENTIUM, 0},
1083 {"pentium", PROCESSOR_PENTIUM, 0},
1084 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1085 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1086 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1087 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1088 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1089 {"i686", PROCESSOR_PENTIUMPRO, 0},
1090 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1092 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 1093 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 1094 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1095 {"k6", PROCESSOR_K6, PTA_MMX},
1096 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1097 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1098 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1099 | PTA_3DNOW_A},
f4365627 1100 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1101 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1102 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1103 | PTA_3DNOW_A | PTA_SSE},
f4365627 1104 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1105 | PTA_3DNOW_A | PTA_SSE},
f4365627 1106 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1107 | PTA_3DNOW_A | PTA_SSE},
4977bab6
ZW
1108 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1109 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1110 };
c8c5cb99 1111
ca7558fc 1112 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1113
3dc85dfb
RH
1114 /* By default our XFmode is the 80-bit extended format. If we have
1115 use TFmode instead, it's also the 80-bit format, but with padding. */
1116 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1117 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1118
41ed2237 1119 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1120 in case they weren't overwritten by command line options. */
55ba61f3
JH
1121 if (TARGET_64BIT)
1122 {
1123 if (flag_omit_frame_pointer == 2)
1124 flag_omit_frame_pointer = 1;
1125 if (flag_asynchronous_unwind_tables == 2)
1126 flag_asynchronous_unwind_tables = 1;
1127 if (flag_pcc_struct_return == 2)
1128 flag_pcc_struct_return = 0;
1129 }
1130 else
1131 {
1132 if (flag_omit_frame_pointer == 2)
1133 flag_omit_frame_pointer = 0;
1134 if (flag_asynchronous_unwind_tables == 2)
1135 flag_asynchronous_unwind_tables = 0;
1136 if (flag_pcc_struct_return == 2)
7c712dcc 1137 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1138 }
1139
f5316dfe
MM
1140#ifdef SUBTARGET_OVERRIDE_OPTIONS
1141 SUBTARGET_OVERRIDE_OPTIONS;
1142#endif
1143
9e555526
RH
1144 if (!ix86_tune_string && ix86_arch_string)
1145 ix86_tune_string = ix86_arch_string;
1146 if (!ix86_tune_string)
1147 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
f4365627 1148 if (!ix86_arch_string)
4977bab6 1149 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
e075ae69 1150
6189a572
JH
1151 if (ix86_cmodel_string != 0)
1152 {
1153 if (!strcmp (ix86_cmodel_string, "small"))
1154 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1155 else if (flag_pic)
c725bd79 1156 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1157 else if (!strcmp (ix86_cmodel_string, "32"))
1158 ix86_cmodel = CM_32;
1159 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1160 ix86_cmodel = CM_KERNEL;
1161 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1162 ix86_cmodel = CM_MEDIUM;
1163 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1164 ix86_cmodel = CM_LARGE;
1165 else
1166 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1167 }
1168 else
1169 {
1170 ix86_cmodel = CM_32;
1171 if (TARGET_64BIT)
1172 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1173 }
c93e80a5
JH
1174 if (ix86_asm_string != 0)
1175 {
1176 if (!strcmp (ix86_asm_string, "intel"))
1177 ix86_asm_dialect = ASM_INTEL;
1178 else if (!strcmp (ix86_asm_string, "att"))
1179 ix86_asm_dialect = ASM_ATT;
1180 else
1181 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1182 }
6189a572 1183 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1184 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1185 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1186 if (ix86_cmodel == CM_LARGE)
c725bd79 1187 sorry ("code model `large' not supported yet");
0c2dc519 1188 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1189 sorry ("%i-bit mode not compiled in",
0c2dc519 1190 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1191
f4365627
JH
1192 for (i = 0; i < pta_size; i++)
1193 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1194 {
1195 ix86_arch = processor_alias_table[i].processor;
1196 /* Default cpu tuning to the architecture. */
9e555526 1197 ix86_tune = ix86_arch;
f4365627 1198 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1199 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1200 target_flags |= MASK_MMX;
1201 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1202 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1203 target_flags |= MASK_3DNOW;
1204 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1205 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1206 target_flags |= MASK_3DNOW_A;
1207 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1208 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1209 target_flags |= MASK_SSE;
1210 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1211 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1212 target_flags |= MASK_SSE2;
1213 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1214 x86_prefetch_sse = true;
4977bab6
ZW
1215 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1216 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1217 break;
1218 }
400500c4 1219
f4365627
JH
1220 if (i == pta_size)
1221 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1222
f4365627 1223 for (i = 0; i < pta_size; i++)
9e555526 1224 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1225 {
9e555526 1226 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1227 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1228 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1229 break;
1230 }
1231 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1232 x86_prefetch_sse = true;
1233 if (i == pta_size)
9e555526 1234 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1235
2ab0437e
JH
1236 if (optimize_size)
1237 ix86_cost = &size_cost;
1238 else
9e555526
RH
1239 ix86_cost = processor_target_table[ix86_tune].cost;
1240 target_flags |= processor_target_table[ix86_tune].target_enable;
1241 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1242
36edd3cc
BS
1243 /* Arrange to set up i386_stack_locals for all functions. */
1244 init_machine_status = ix86_init_machine_status;
fce5a9f2 1245
0f290768 1246 /* Validate -mregparm= value. */
e075ae69 1247 if (ix86_regparm_string)
b08de47e 1248 {
400500c4
RK
1249 i = atoi (ix86_regparm_string);
1250 if (i < 0 || i > REGPARM_MAX)
1251 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1252 else
1253 ix86_regparm = i;
b08de47e 1254 }
0d7d98ee
JH
1255 else
1256 if (TARGET_64BIT)
1257 ix86_regparm = REGPARM_MAX;
b08de47e 1258
3e18fdf6 1259 /* If the user has provided any of the -malign-* options,
a4f31c00 1260 warn and use that value only if -falign-* is not set.
3e18fdf6 1261 Remove this code in GCC 3.2 or later. */
e075ae69 1262 if (ix86_align_loops_string)
b08de47e 1263 {
3e18fdf6
GK
1264 warning ("-malign-loops is obsolete, use -falign-loops");
1265 if (align_loops == 0)
1266 {
1267 i = atoi (ix86_align_loops_string);
1268 if (i < 0 || i > MAX_CODE_ALIGN)
1269 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1270 else
1271 align_loops = 1 << i;
1272 }
b08de47e 1273 }
3af4bd89 1274
e075ae69 1275 if (ix86_align_jumps_string)
b08de47e 1276 {
3e18fdf6
GK
1277 warning ("-malign-jumps is obsolete, use -falign-jumps");
1278 if (align_jumps == 0)
1279 {
1280 i = atoi (ix86_align_jumps_string);
1281 if (i < 0 || i > MAX_CODE_ALIGN)
1282 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1283 else
1284 align_jumps = 1 << i;
1285 }
b08de47e 1286 }
b08de47e 1287
e075ae69 1288 if (ix86_align_funcs_string)
b08de47e 1289 {
3e18fdf6
GK
1290 warning ("-malign-functions is obsolete, use -falign-functions");
1291 if (align_functions == 0)
1292 {
1293 i = atoi (ix86_align_funcs_string);
1294 if (i < 0 || i > MAX_CODE_ALIGN)
1295 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1296 else
1297 align_functions = 1 << i;
1298 }
b08de47e 1299 }
3af4bd89 1300
3e18fdf6 1301 /* Default align_* from the processor table. */
3e18fdf6 1302 if (align_loops == 0)
2cca7283 1303 {
9e555526
RH
1304 align_loops = processor_target_table[ix86_tune].align_loop;
1305 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1306 }
3e18fdf6 1307 if (align_jumps == 0)
2cca7283 1308 {
9e555526
RH
1309 align_jumps = processor_target_table[ix86_tune].align_jump;
1310 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1311 }
3e18fdf6 1312 if (align_functions == 0)
2cca7283 1313 {
9e555526 1314 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1315 }
3e18fdf6 1316
e4c0478d 1317 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1318 The default of 128 bits is for Pentium III's SSE __m128, but we
1319 don't want additional code to keep the stack aligned when
1320 optimizing for code size. */
1321 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1322 ? TARGET_64BIT ? 128 : 32
fbb83b43 1323 : 128);
e075ae69 1324 if (ix86_preferred_stack_boundary_string)
3af4bd89 1325 {
400500c4 1326 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1329 TARGET_64BIT ? 4 : 2);
400500c4
RK
1330 else
1331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1332 }
77a989d1 1333
0f290768 1334 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1335 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1336 if (ix86_branch_cost_string)
804a8ee0 1337 {
400500c4
RK
1338 i = atoi (ix86_branch_cost_string);
1339 if (i < 0 || i > 5)
1340 error ("-mbranch-cost=%d is not between 0 and 5", i);
1341 else
1342 ix86_branch_cost = i;
804a8ee0 1343 }
804a8ee0 1344
f996902d
RH
1345 if (ix86_tls_dialect_string)
1346 {
1347 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1348 ix86_tls_dialect = TLS_DIALECT_GNU;
1349 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_SUN;
1351 else
1352 error ("bad value (%s) for -mtls-dialect= switch",
1353 ix86_tls_dialect_string);
1354 }
1355
e9a25f70
JL
1356 /* Keep nonleaf frame pointers. */
1357 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1358 flag_omit_frame_pointer = 1;
e075ae69
RH
1359
1360 /* If we're doing fast math, we don't care about comparison order
1361 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1362 if (flag_unsafe_math_optimizations)
e075ae69
RH
1363 target_flags &= ~MASK_IEEE_FP;
1364
30c99a84
RH
1365 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1366 since the insns won't need emulation. */
1367 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1368 target_flags &= ~MASK_NO_FANCY_MATH_387;
1369
22c7c85e
L
1370 /* Turn on SSE2 builtins for -mpni. */
1371 if (TARGET_PNI)
1372 target_flags |= MASK_SSE2;
1373
1374 /* Turn on SSE builtins for -msse2. */
1375 if (TARGET_SSE2)
1376 target_flags |= MASK_SSE;
1377
14f73b5a
JH
1378 if (TARGET_64BIT)
1379 {
1380 if (TARGET_ALIGN_DOUBLE)
c725bd79 1381 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1382 if (TARGET_RTD)
c725bd79 1383 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1384 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1385 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1386 ix86_fpmath = FPMATH_SSE;
14f73b5a 1387 }
965f5423 1388 else
a5b378d6
JH
1389 {
1390 ix86_fpmath = FPMATH_387;
1391 /* i386 ABI does not specify red zone. It still makes sense to use it
1392 when programmer takes care to stack from being destroyed. */
1393 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1394 target_flags |= MASK_NO_RED_ZONE;
1395 }
965f5423
JH
1396
1397 if (ix86_fpmath_string != 0)
1398 {
1399 if (! strcmp (ix86_fpmath_string, "387"))
1400 ix86_fpmath = FPMATH_387;
1401 else if (! strcmp (ix86_fpmath_string, "sse"))
1402 {
1403 if (!TARGET_SSE)
1404 {
1405 warning ("SSE instruction set disabled, using 387 arithmetics");
1406 ix86_fpmath = FPMATH_387;
1407 }
1408 else
1409 ix86_fpmath = FPMATH_SSE;
1410 }
1411 else if (! strcmp (ix86_fpmath_string, "387,sse")
1412 || ! strcmp (ix86_fpmath_string, "sse,387"))
1413 {
1414 if (!TARGET_SSE)
1415 {
1416 warning ("SSE instruction set disabled, using 387 arithmetics");
1417 ix86_fpmath = FPMATH_387;
1418 }
1419 else if (!TARGET_80387)
1420 {
1421 warning ("387 instruction set disabled, using SSE arithmetics");
1422 ix86_fpmath = FPMATH_SSE;
1423 }
1424 else
1425 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1426 }
fce5a9f2 1427 else
965f5423
JH
1428 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1429 }
14f73b5a 1430
a7180f70
BS
1431 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1432 on by -msse. */
1433 if (TARGET_SSE)
e37af218
RH
1434 {
1435 target_flags |= MASK_MMX;
1436 x86_prefetch_sse = true;
1437 }
c6036a37 1438
47f339cf
BS
1439 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1440 if (TARGET_3DNOW)
1441 {
1442 target_flags |= MASK_MMX;
d1f87653 1443 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1444 extensions it adds. */
1445 if (x86_3dnow_a & (1 << ix86_arch))
1446 target_flags |= MASK_3DNOW_A;
1447 }
9e555526 1448 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1449 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1450 && !optimize_size)
1451 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1452
1453 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1454 {
1455 char *p;
1456 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1457 p = strchr (internal_label_prefix, 'X');
1458 internal_label_prefix_len = p - internal_label_prefix;
1459 *p = '\0';
1460 }
f5316dfe
MM
1461}
1462\f
32b5b1aa 1463void
b96a374d 1464optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1465{
e9a25f70
JL
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
32b5b1aa
SC
1468#ifdef INSN_SCHEDULING
1469 if (level > 1)
1470 flag_schedule_insns = 0;
1471#endif
55ba61f3
JH
1472
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1477 if (optimize >= 1)
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1481}
b08de47e 1482\f
91d231cb
JM
1483/* Table of valid machine attributes. */
1484const struct attribute_spec ix86_attribute_table[] =
b08de47e 1485{
91d231cb 1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
91d231cb 1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1495 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1496 passed in registers. */
91d231cb
JM
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1502#endif
fe77449a
DR
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1505 { NULL, 0, 0, false, false, false, NULL }
1506};
1507
5fbf0217
EB
1508/* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
4977bab6
ZW
1511
1512static bool
b96a374d 1513ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1514{
1515 /* If we are generating position-independent code, we cannot sibcall
1516 optimize any indirect call, or a direct call to a global function,
1517 as the PLT requires %ebx be live. */
1518 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1519 return false;
1520
1521 /* If we are returning floats on the 80387 register stack, we cannot
1522 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1523 function that does or, conversely, from a function that does return
1524 a float to a function that doesn't; the necessary stack adjustment
1525 would not be executed. */
4977bab6 1526 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1527 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1528 return false;
1529
1530 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1531 register for the address of the target function. Make sure that all
4977bab6
ZW
1532 such registers are not used for passing parameters. */
1533 if (!decl && !TARGET_64BIT)
1534 {
1535 int regparm = ix86_regparm;
1536 tree attr, type;
1537
1538 /* We're looking at the CALL_EXPR, we need the type of the function. */
1539 type = TREE_OPERAND (exp, 0); /* pointer expression */
1540 type = TREE_TYPE (type); /* pointer type */
1541 type = TREE_TYPE (type); /* function type */
1542
1543 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1544 if (attr)
1545 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1546
1547 if (regparm >= 3)
1548 {
1549 /* ??? Need to count the actual number of registers to be used,
1550 not the possible number of registers. Fix later. */
1551 return false;
1552 }
1553 }
1554
1555 /* Otherwise okay. That also includes certain types of indirect calls. */
1556 return true;
1557}
1558
e91f04de 1559/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1560 arguments as in struct attribute_spec.handler. */
1561static tree
b96a374d
AJ
1562ix86_handle_cdecl_attribute (tree *node, tree name,
1563 tree args ATTRIBUTE_UNUSED,
1564 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1565{
1566 if (TREE_CODE (*node) != FUNCTION_TYPE
1567 && TREE_CODE (*node) != METHOD_TYPE
1568 && TREE_CODE (*node) != FIELD_DECL
1569 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1570 {
91d231cb
JM
1571 warning ("`%s' attribute only applies to functions",
1572 IDENTIFIER_POINTER (name));
1573 *no_add_attrs = true;
1574 }
e91f04de
CH
1575 else
1576 {
1577 if (is_attribute_p ("fastcall", name))
1578 {
1579 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1580 {
1581 error ("fastcall and stdcall attributes are not compatible");
1582 }
1583 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1584 {
1585 error ("fastcall and regparm attributes are not compatible");
1586 }
1587 }
1588 else if (is_attribute_p ("stdcall", name))
1589 {
1590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1591 {
1592 error ("fastcall and stdcall attributes are not compatible");
1593 }
1594 }
1595 }
b08de47e 1596
91d231cb
JM
1597 if (TARGET_64BIT)
1598 {
1599 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1601 }
b08de47e 1602
91d231cb
JM
1603 return NULL_TREE;
1604}
b08de47e 1605
91d231cb
JM
1606/* Handle a "regparm" attribute;
1607 arguments as in struct attribute_spec.handler. */
1608static tree
b96a374d
AJ
1609ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1610 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1611{
1612 if (TREE_CODE (*node) != FUNCTION_TYPE
1613 && TREE_CODE (*node) != METHOD_TYPE
1614 && TREE_CODE (*node) != FIELD_DECL
1615 && TREE_CODE (*node) != TYPE_DECL)
1616 {
1617 warning ("`%s' attribute only applies to functions",
1618 IDENTIFIER_POINTER (name));
1619 *no_add_attrs = true;
1620 }
1621 else
1622 {
1623 tree cst;
b08de47e 1624
91d231cb
JM
1625 cst = TREE_VALUE (args);
1626 if (TREE_CODE (cst) != INTEGER_CST)
1627 {
1628 warning ("`%s' attribute requires an integer constant argument",
1629 IDENTIFIER_POINTER (name));
1630 *no_add_attrs = true;
1631 }
1632 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1633 {
1634 warning ("argument to `%s' attribute larger than %d",
1635 IDENTIFIER_POINTER (name), REGPARM_MAX);
1636 *no_add_attrs = true;
1637 }
e91f04de
CH
1638
1639 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1640 {
1641 error ("fastcall and regparm attributes are not compatible");
1642 }
b08de47e
MM
1643 }
1644
91d231cb 1645 return NULL_TREE;
b08de47e
MM
1646}
1647
1648/* Return 0 if the attributes for two types are incompatible, 1 if they
1649 are compatible, and 2 if they are nearly compatible (which causes a
1650 warning to be generated). */
1651
8d8e52be 1652static int
b96a374d 1653ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1654{
0f290768 1655 /* Check for mismatch of non-default calling convention. */
27c38fbe 1656 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1657
1658 if (TREE_CODE (type1) != FUNCTION_TYPE)
1659 return 1;
1660
b96a374d 1661 /* Check for mismatched fastcall types */
e91f04de
CH
1662 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1664 return 0;
e91f04de 1665
afcfe58c 1666 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1667 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1668 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1669 return 0;
b08de47e
MM
1670 return 1;
1671}
b08de47e 1672\f
483ab821
MM
1673/* Return the regparm value for a fuctio with the indicated TYPE. */
1674
1675static int
b96a374d 1676ix86_fntype_regparm (tree type)
483ab821
MM
1677{
1678 tree attr;
1679
1680 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1681 if (attr)
1682 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1683 else
1684 return ix86_regparm;
1685}
1686
b08de47e
MM
1687/* Value is the number of bytes of arguments automatically
1688 popped when returning from a subroutine call.
1689 FUNDECL is the declaration node of the function (as a tree),
1690 FUNTYPE is the data type of the function (as a tree),
1691 or for a library call it is an identifier node for the subroutine name.
1692 SIZE is the number of bytes of arguments passed on the stack.
1693
1694 On the 80386, the RTD insn may be used to pop them if the number
1695 of args is fixed, but if the number is variable then the caller
1696 must pop them all. RTD can't be used for library calls now
1697 because the library is compiled with the Unix compiler.
1698 Use of RTD is a selectable option, since it is incompatible with
1699 standard Unix calling sequences. If the option is not selected,
1700 the caller must always pop the args.
1701
1702 The attribute stdcall is equivalent to RTD on a per module basis. */
1703
1704int
b96a374d 1705ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1706{
3345ee7d 1707 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1708
0f290768 1709 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1710 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1711
e91f04de
CH
1712 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1713 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1714 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1715 rtd = 1;
79325812 1716
698cdd84
SC
1717 if (rtd
1718 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1719 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1720 == void_type_node)))
698cdd84
SC
1721 return size;
1722 }
79325812 1723
232b8f52 1724 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1725 if (aggregate_value_p (TREE_TYPE (funtype))
1726 && !TARGET_64BIT)
232b8f52 1727 {
483ab821 1728 int nregs = ix86_fntype_regparm (funtype);
232b8f52
JJ
1729
1730 if (!nregs)
1731 return GET_MODE_SIZE (Pmode);
1732 }
1733
1734 return 0;
b08de47e 1735}
b08de47e
MM
1736\f
1737/* Argument support functions. */
1738
53c17031
JH
1739/* Return true when register may be used to pass function parameters. */
1740bool
b96a374d 1741ix86_function_arg_regno_p (int regno)
53c17031
JH
1742{
1743 int i;
1744 if (!TARGET_64BIT)
0333394e
JJ
1745 return (regno < REGPARM_MAX
1746 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1747 if (SSE_REGNO_P (regno) && TARGET_SSE)
1748 return true;
1749 /* RAX is used as hidden argument to va_arg functions. */
1750 if (!regno)
1751 return true;
1752 for (i = 0; i < REGPARM_MAX; i++)
1753 if (regno == x86_64_int_parameter_registers[i])
1754 return true;
1755 return false;
1756}
1757
b08de47e
MM
1758/* Initialize a variable CUM of type CUMULATIVE_ARGS
1759 for a call to a function whose data type is FNTYPE.
1760 For a library call, FNTYPE is 0. */
1761
1762void
b96a374d
AJ
1763init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1764 tree fntype, /* tree ptr for function decl */
1765 rtx libname, /* SYMBOL_REF of library name or 0 */
1766 tree fndecl)
b08de47e
MM
1767{
1768 static CUMULATIVE_ARGS zero_cum;
1769 tree param, next_param;
dafc5b82 1770 bool user_convention = false;
b08de47e
MM
1771
1772 if (TARGET_DEBUG_ARG)
1773 {
1774 fprintf (stderr, "\ninit_cumulative_args (");
1775 if (fntype)
e9a25f70
JL
1776 fprintf (stderr, "fntype code = %s, ret code = %s",
1777 tree_code_name[(int) TREE_CODE (fntype)],
1778 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1779 else
1780 fprintf (stderr, "no fntype");
1781
1782 if (libname)
1783 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1784 }
1785
1786 *cum = zero_cum;
1787
1788 /* Set up the number of registers to use for passing arguments. */
e075ae69 1789 cum->nregs = ix86_regparm;
53c17031
JH
1790 cum->sse_nregs = SSE_REGPARM_MAX;
1791 if (fntype && !TARGET_64BIT)
b08de47e
MM
1792 {
1793 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1794
b08de47e 1795 if (attr)
dafc5b82
JH
1796 {
1797 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1798 user_convention = true;
1799 }
b08de47e 1800 }
53c17031 1801 cum->maybe_vaarg = false;
b08de47e 1802
e91f04de
CH
1803 /* Use ecx and edx registers if function has fastcall attribute */
1804 if (fntype && !TARGET_64BIT)
1805 {
1806 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1807 {
1808 cum->nregs = 2;
1809 cum->fastcall = 1;
dafc5b82
JH
1810 user_convention = true;
1811 }
1812 }
1813
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT && !user_convention && fndecl
1816 && flag_unit_at_a_time)
1817 {
1818 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1819 if (i && i->local)
1820 {
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1824 cum->nregs = 2;
1825 else
1826 cum->nregs = 3;
e91f04de
CH
1827 }
1828 }
1829
1830
b08de47e
MM
1831 /* Determine if this function has variable arguments. This is
1832 indicated by the last argument being 'void_type_mode' if there
1833 are no variable arguments. If there are variable arguments, then
1834 we won't pass anything in registers */
1835
1836 if (cum->nregs)
1837 {
1838 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1839 param != 0; param = next_param)
b08de47e
MM
1840 {
1841 next_param = TREE_CHAIN (param);
e9a25f70 1842 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1843 {
1844 if (!TARGET_64BIT)
e91f04de
CH
1845 {
1846 cum->nregs = 0;
1847 cum->fastcall = 0;
1848 }
53c17031
JH
1849 cum->maybe_vaarg = true;
1850 }
b08de47e
MM
1851 }
1852 }
53c17031
JH
1853 if ((!fntype && !libname)
1854 || (fntype && !TYPE_ARG_TYPES (fntype)))
1855 cum->maybe_vaarg = 1;
b08de47e
MM
1856
1857 if (TARGET_DEBUG_ARG)
1858 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1859
1860 return;
1861}
1862
d1f87653 1863/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1864 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1865 class and assign registers accordingly. */
1866
1867/* Return the union class of CLASS1 and CLASS2.
1868 See the x86-64 PS ABI for details. */
1869
1870static enum x86_64_reg_class
b96a374d 1871merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
1872{
1873 /* Rule #1: If both classes are equal, this is the resulting class. */
1874 if (class1 == class2)
1875 return class1;
1876
1877 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1878 the other class. */
1879 if (class1 == X86_64_NO_CLASS)
1880 return class2;
1881 if (class2 == X86_64_NO_CLASS)
1882 return class1;
1883
1884 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1885 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1886 return X86_64_MEMORY_CLASS;
1887
1888 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1889 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1890 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1891 return X86_64_INTEGERSI_CLASS;
1892 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1893 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1894 return X86_64_INTEGER_CLASS;
1895
1896 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1897 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1898 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1899 return X86_64_MEMORY_CLASS;
1900
1901 /* Rule #6: Otherwise class SSE is used. */
1902 return X86_64_SSE_CLASS;
1903}
1904
1905/* Classify the argument of type TYPE and mode MODE.
1906 CLASSES will be filled by the register class used to pass each word
1907 of the operand. The number of words is returned. In case the parameter
1908 should be passed in memory, 0 is returned. As a special case for zero
1909 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1910
1911 BIT_OFFSET is used internally for handling records and specifies offset
1912 of the offset in bits modulo 256 to avoid overflow cases.
1913
1914 See the x86-64 PS ABI for details.
1915*/
1916
1917static int
b96a374d
AJ
1918classify_argument (enum machine_mode mode, tree type,
1919 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031
JH
1920{
1921 int bytes =
1922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1923 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1924
c60ee6f5
JH
1925 /* Variable sized entities are always passed/returned in memory. */
1926 if (bytes < 0)
1927 return 0;
1928
dafc5b82
JH
1929 if (mode != VOIDmode
1930 && MUST_PASS_IN_STACK (mode, type))
1931 return 0;
1932
53c17031
JH
1933 if (type && AGGREGATE_TYPE_P (type))
1934 {
1935 int i;
1936 tree field;
1937 enum x86_64_reg_class subclasses[MAX_CLASSES];
1938
1939 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1940 if (bytes > 16)
1941 return 0;
1942
1943 for (i = 0; i < words; i++)
1944 classes[i] = X86_64_NO_CLASS;
1945
1946 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1947 signalize memory class, so handle it as special case. */
1948 if (!words)
1949 {
1950 classes[0] = X86_64_NO_CLASS;
1951 return 1;
1952 }
1953
1954 /* Classify each field of record and merge classes. */
1955 if (TREE_CODE (type) == RECORD_TYPE)
1956 {
91ea38f9
JH
1957 /* For classes first merge in the field of the subclasses. */
1958 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1959 {
1960 tree bases = TYPE_BINFO_BASETYPES (type);
1961 int n_bases = TREE_VEC_LENGTH (bases);
1962 int i;
1963
1964 for (i = 0; i < n_bases; ++i)
1965 {
1966 tree binfo = TREE_VEC_ELT (bases, i);
1967 int num;
1968 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1969 tree type = BINFO_TYPE (binfo);
1970
1971 num = classify_argument (TYPE_MODE (type),
1972 type, subclasses,
1973 (offset + bit_offset) % 256);
1974 if (!num)
1975 return 0;
1976 for (i = 0; i < num; i++)
1977 {
db01f480 1978 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1979 classes[i + pos] =
1980 merge_classes (subclasses[i], classes[i + pos]);
1981 }
1982 }
1983 }
1984 /* And now merge the fields of structure. */
53c17031
JH
1985 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1986 {
1987 if (TREE_CODE (field) == FIELD_DECL)
1988 {
1989 int num;
1990
1991 /* Bitfields are always classified as integer. Handle them
1992 early, since later code would consider them to be
1993 misaligned integers. */
1994 if (DECL_BIT_FIELD (field))
1995 {
1996 for (i = int_bit_position (field) / 8 / 8;
1997 i < (int_bit_position (field)
1998 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 1999 + 63) / 8 / 8; i++)
53c17031
JH
2000 classes[i] =
2001 merge_classes (X86_64_INTEGER_CLASS,
2002 classes[i]);
2003 }
2004 else
2005 {
2006 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2007 TREE_TYPE (field), subclasses,
2008 (int_bit_position (field)
2009 + bit_offset) % 256);
2010 if (!num)
2011 return 0;
2012 for (i = 0; i < num; i++)
2013 {
2014 int pos =
db01f480 2015 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2016 classes[i + pos] =
2017 merge_classes (subclasses[i], classes[i + pos]);
2018 }
2019 }
2020 }
2021 }
2022 }
2023 /* Arrays are handled as small records. */
2024 else if (TREE_CODE (type) == ARRAY_TYPE)
2025 {
2026 int num;
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2028 TREE_TYPE (type), subclasses, bit_offset);
2029 if (!num)
2030 return 0;
2031
2032 /* The partial classes are now full classes. */
2033 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2034 subclasses[0] = X86_64_SSE_CLASS;
2035 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2036 subclasses[0] = X86_64_INTEGER_CLASS;
2037
2038 for (i = 0; i < words; i++)
2039 classes[i] = subclasses[i % num];
2040 }
2041 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2042 else if (TREE_CODE (type) == UNION_TYPE
2043 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2044 {
91ea38f9
JH
2045 /* For classes first merge in the field of the subclasses. */
2046 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2047 {
2048 tree bases = TYPE_BINFO_BASETYPES (type);
2049 int n_bases = TREE_VEC_LENGTH (bases);
2050 int i;
2051
2052 for (i = 0; i < n_bases; ++i)
2053 {
2054 tree binfo = TREE_VEC_ELT (bases, i);
2055 int num;
2056 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2057 tree type = BINFO_TYPE (binfo);
2058
2059 num = classify_argument (TYPE_MODE (type),
2060 type, subclasses,
db01f480 2061 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2062 if (!num)
2063 return 0;
2064 for (i = 0; i < num; i++)
2065 {
c16576e6 2066 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2067 classes[i + pos] =
2068 merge_classes (subclasses[i], classes[i + pos]);
2069 }
2070 }
2071 }
53c17031
JH
2072 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2073 {
2074 if (TREE_CODE (field) == FIELD_DECL)
2075 {
2076 int num;
2077 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2078 TREE_TYPE (field), subclasses,
2079 bit_offset);
2080 if (!num)
2081 return 0;
2082 for (i = 0; i < num; i++)
2083 classes[i] = merge_classes (subclasses[i], classes[i]);
2084 }
2085 }
2086 }
2087 else
2088 abort ();
2089
2090 /* Final merger cleanup. */
2091 for (i = 0; i < words; i++)
2092 {
2093 /* If one class is MEMORY, everything should be passed in
2094 memory. */
2095 if (classes[i] == X86_64_MEMORY_CLASS)
2096 return 0;
2097
d6a7951f 2098 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2099 X86_64_SSE_CLASS. */
2100 if (classes[i] == X86_64_SSEUP_CLASS
2101 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2102 classes[i] = X86_64_SSE_CLASS;
2103
d6a7951f 2104 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2105 if (classes[i] == X86_64_X87UP_CLASS
2106 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2107 classes[i] = X86_64_SSE_CLASS;
2108 }
2109 return words;
2110 }
2111
2112 /* Compute alignment needed. We align all types to natural boundaries with
2113 exception of XFmode that is aligned to 64bits. */
2114 if (mode != VOIDmode && mode != BLKmode)
2115 {
2116 int mode_alignment = GET_MODE_BITSIZE (mode);
2117
2118 if (mode == XFmode)
2119 mode_alignment = 128;
2120 else if (mode == XCmode)
2121 mode_alignment = 256;
f5143c46 2122 /* Misaligned fields are always returned in memory. */
53c17031
JH
2123 if (bit_offset % mode_alignment)
2124 return 0;
2125 }
2126
2127 /* Classification of atomic types. */
2128 switch (mode)
2129 {
2130 case DImode:
2131 case SImode:
2132 case HImode:
2133 case QImode:
2134 case CSImode:
2135 case CHImode:
2136 case CQImode:
2137 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2138 classes[0] = X86_64_INTEGERSI_CLASS;
2139 else
2140 classes[0] = X86_64_INTEGER_CLASS;
2141 return 1;
2142 case CDImode:
2143 case TImode:
2144 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2145 return 2;
2146 case CTImode:
2147 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2148 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2149 return 4;
2150 case SFmode:
2151 if (!(bit_offset % 64))
2152 classes[0] = X86_64_SSESF_CLASS;
2153 else
2154 classes[0] = X86_64_SSE_CLASS;
2155 return 1;
2156 case DFmode:
2157 classes[0] = X86_64_SSEDF_CLASS;
2158 return 1;
2159 case TFmode:
2160 classes[0] = X86_64_X87_CLASS;
2161 classes[1] = X86_64_X87UP_CLASS;
2162 return 2;
2163 case TCmode:
2164 classes[0] = X86_64_X87_CLASS;
2165 classes[1] = X86_64_X87UP_CLASS;
2166 classes[2] = X86_64_X87_CLASS;
2167 classes[3] = X86_64_X87UP_CLASS;
2168 return 4;
2169 case DCmode:
2170 classes[0] = X86_64_SSEDF_CLASS;
2171 classes[1] = X86_64_SSEDF_CLASS;
2172 return 2;
2173 case SCmode:
2174 classes[0] = X86_64_SSE_CLASS;
2175 return 1;
e95d6b23
JH
2176 case V4SFmode:
2177 case V4SImode:
495333a6
JH
2178 case V16QImode:
2179 case V8HImode:
2180 case V2DFmode:
2181 case V2DImode:
e95d6b23
JH
2182 classes[0] = X86_64_SSE_CLASS;
2183 classes[1] = X86_64_SSEUP_CLASS;
2184 return 2;
2185 case V2SFmode:
2186 case V2SImode:
2187 case V4HImode:
2188 case V8QImode:
1194ca05 2189 return 0;
53c17031 2190 case BLKmode:
e95d6b23 2191 case VOIDmode:
53c17031
JH
2192 return 0;
2193 default:
2194 abort ();
2195 }
2196}
2197
2198/* Examine the argument and return set number of register required in each
f5143c46 2199 class. Return 0 iff parameter should be passed in memory. */
53c17031 2200static int
b96a374d
AJ
2201examine_argument (enum machine_mode mode, tree type, int in_return,
2202 int *int_nregs, int *sse_nregs)
53c17031
JH
2203{
2204 enum x86_64_reg_class class[MAX_CLASSES];
2205 int n = classify_argument (mode, type, class, 0);
2206
2207 *int_nregs = 0;
2208 *sse_nregs = 0;
2209 if (!n)
2210 return 0;
2211 for (n--; n >= 0; n--)
2212 switch (class[n])
2213 {
2214 case X86_64_INTEGER_CLASS:
2215 case X86_64_INTEGERSI_CLASS:
2216 (*int_nregs)++;
2217 break;
2218 case X86_64_SSE_CLASS:
2219 case X86_64_SSESF_CLASS:
2220 case X86_64_SSEDF_CLASS:
2221 (*sse_nregs)++;
2222 break;
2223 case X86_64_NO_CLASS:
2224 case X86_64_SSEUP_CLASS:
2225 break;
2226 case X86_64_X87_CLASS:
2227 case X86_64_X87UP_CLASS:
2228 if (!in_return)
2229 return 0;
2230 break;
2231 case X86_64_MEMORY_CLASS:
2232 abort ();
2233 }
2234 return 1;
2235}
2236/* Construct container for the argument used by GCC interface. See
2237 FUNCTION_ARG for the detailed description. */
2238static rtx
b96a374d
AJ
2239construct_container (enum machine_mode mode, tree type, int in_return,
2240 int nintregs, int nsseregs, const int * intreg,
2241 int sse_regno)
53c17031
JH
2242{
2243 enum machine_mode tmpmode;
2244 int bytes =
2245 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2246 enum x86_64_reg_class class[MAX_CLASSES];
2247 int n;
2248 int i;
2249 int nexps = 0;
2250 int needed_sseregs, needed_intregs;
2251 rtx exp[MAX_CLASSES];
2252 rtx ret;
2253
2254 n = classify_argument (mode, type, class, 0);
2255 if (TARGET_DEBUG_ARG)
2256 {
2257 if (!n)
2258 fprintf (stderr, "Memory class\n");
2259 else
2260 {
2261 fprintf (stderr, "Classes:");
2262 for (i = 0; i < n; i++)
2263 {
2264 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2265 }
2266 fprintf (stderr, "\n");
2267 }
2268 }
2269 if (!n)
2270 return NULL;
2271 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2272 return NULL;
2273 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2274 return NULL;
2275
2276 /* First construct simple cases. Avoid SCmode, since we want to use
2277 single register to pass this type. */
2278 if (n == 1 && mode != SCmode)
2279 switch (class[0])
2280 {
2281 case X86_64_INTEGER_CLASS:
2282 case X86_64_INTEGERSI_CLASS:
2283 return gen_rtx_REG (mode, intreg[0]);
2284 case X86_64_SSE_CLASS:
2285 case X86_64_SSESF_CLASS:
2286 case X86_64_SSEDF_CLASS:
2287 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2288 case X86_64_X87_CLASS:
2289 return gen_rtx_REG (mode, FIRST_STACK_REG);
2290 case X86_64_NO_CLASS:
2291 /* Zero sized array, struct or class. */
2292 return NULL;
2293 default:
2294 abort ();
2295 }
2296 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2297 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2298 if (n == 2
2299 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2300 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2301 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2302 && class[1] == X86_64_INTEGER_CLASS
2303 && (mode == CDImode || mode == TImode)
2304 && intreg[0] + 1 == intreg[1])
2305 return gen_rtx_REG (mode, intreg[0]);
2306 if (n == 4
2307 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2308 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2309 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2310
2311 /* Otherwise figure out the entries of the PARALLEL. */
2312 for (i = 0; i < n; i++)
2313 {
2314 switch (class[i])
2315 {
2316 case X86_64_NO_CLASS:
2317 break;
2318 case X86_64_INTEGER_CLASS:
2319 case X86_64_INTEGERSI_CLASS:
d1f87653 2320 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2321 if (i * 8 + 8 > bytes)
2322 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2323 else if (class[i] == X86_64_INTEGERSI_CLASS)
2324 tmpmode = SImode;
2325 else
2326 tmpmode = DImode;
2327 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2328 if (tmpmode == BLKmode)
2329 tmpmode = DImode;
2330 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2331 gen_rtx_REG (tmpmode, *intreg),
2332 GEN_INT (i*8));
2333 intreg++;
2334 break;
2335 case X86_64_SSESF_CLASS:
2336 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2337 gen_rtx_REG (SFmode,
2338 SSE_REGNO (sse_regno)),
2339 GEN_INT (i*8));
2340 sse_regno++;
2341 break;
2342 case X86_64_SSEDF_CLASS:
2343 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2344 gen_rtx_REG (DFmode,
2345 SSE_REGNO (sse_regno)),
2346 GEN_INT (i*8));
2347 sse_regno++;
2348 break;
2349 case X86_64_SSE_CLASS:
12f5c45e
JH
2350 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2351 tmpmode = TImode;
53c17031
JH
2352 else
2353 tmpmode = DImode;
2354 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2355 gen_rtx_REG (tmpmode,
2356 SSE_REGNO (sse_regno)),
2357 GEN_INT (i*8));
12f5c45e
JH
2358 if (tmpmode == TImode)
2359 i++;
53c17031
JH
2360 sse_regno++;
2361 break;
2362 default:
2363 abort ();
2364 }
2365 }
2366 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2367 for (i = 0; i < nexps; i++)
2368 XVECEXP (ret, 0, i) = exp [i];
2369 return ret;
2370}
2371
b08de47e
MM
2372/* Update the data in CUM to advance over an argument
2373 of mode MODE and data type TYPE.
2374 (TYPE is null for libcalls where that information may not be available.) */
2375
2376void
b96a374d
AJ
2377function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2378 enum machine_mode mode, /* current arg mode */
2379 tree type, /* type of the argument or 0 if lib support */
2380 int named) /* whether or not the argument was named */
b08de47e 2381{
5ac9118e
KG
2382 int bytes =
2383 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2384 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2385
2386 if (TARGET_DEBUG_ARG)
2387 fprintf (stderr,
e9a25f70 2388 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2389 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2390 if (TARGET_64BIT)
b08de47e 2391 {
53c17031
JH
2392 int int_nregs, sse_nregs;
2393 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2394 cum->words += words;
2395 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2396 {
53c17031
JH
2397 cum->nregs -= int_nregs;
2398 cum->sse_nregs -= sse_nregs;
2399 cum->regno += int_nregs;
2400 cum->sse_regno += sse_nregs;
82a127a9 2401 }
53c17031
JH
2402 else
2403 cum->words += words;
b08de47e 2404 }
a4f31c00 2405 else
82a127a9 2406 {
53c17031
JH
2407 if (TARGET_SSE && mode == TImode)
2408 {
2409 cum->sse_words += words;
2410 cum->sse_nregs -= 1;
2411 cum->sse_regno += 1;
2412 if (cum->sse_nregs <= 0)
2413 {
2414 cum->sse_nregs = 0;
2415 cum->sse_regno = 0;
2416 }
2417 }
2418 else
82a127a9 2419 {
53c17031
JH
2420 cum->words += words;
2421 cum->nregs -= words;
2422 cum->regno += words;
2423
2424 if (cum->nregs <= 0)
2425 {
2426 cum->nregs = 0;
2427 cum->regno = 0;
2428 }
82a127a9
CM
2429 }
2430 }
b08de47e
MM
2431 return;
2432}
2433
2434/* Define where to put the arguments to a function.
2435 Value is zero to push the argument on the stack,
2436 or a hard register in which to store the argument.
2437
2438 MODE is the argument's machine mode.
2439 TYPE is the data type of the argument (as a tree).
2440 This is null for libcalls where that information may
2441 not be available.
2442 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2443 the preceding args and about the function being called.
2444 NAMED is nonzero if this argument is a named parameter
2445 (otherwise it is an extra parameter matching an ellipsis). */
2446
07933f72 2447rtx
b96a374d
AJ
2448function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2449 enum machine_mode mode, /* current arg mode */
2450 tree type, /* type of the argument or 0 if lib support */
2451 int named) /* != 0 for normal args, == 0 for ... args */
b08de47e
MM
2452{
2453 rtx ret = NULL_RTX;
5ac9118e
KG
2454 int bytes =
2455 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2456 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2457
5bdc5878 2458 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2459 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2460 any AL settings. */
32ee7d1d 2461 if (mode == VOIDmode)
b08de47e 2462 {
53c17031
JH
2463 if (TARGET_64BIT)
2464 return GEN_INT (cum->maybe_vaarg
2465 ? (cum->sse_nregs < 0
2466 ? SSE_REGPARM_MAX
2467 : cum->sse_regno)
2468 : -1);
2469 else
2470 return constm1_rtx;
b08de47e 2471 }
53c17031
JH
2472 if (TARGET_64BIT)
2473 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2474 &x86_64_int_parameter_registers [cum->regno],
2475 cum->sse_regno);
2476 else
2477 switch (mode)
2478 {
2479 /* For now, pass fp/complex values on the stack. */
2480 default:
2481 break;
2482
2483 case BLKmode:
8d454008
RH
2484 if (bytes < 0)
2485 break;
2486 /* FALLTHRU */
53c17031
JH
2487 case DImode:
2488 case SImode:
2489 case HImode:
2490 case QImode:
2491 if (words <= cum->nregs)
b96a374d
AJ
2492 {
2493 int regno = cum->regno;
2494
2495 /* Fastcall allocates the first two DWORD (SImode) or
2496 smaller arguments to ECX and EDX. */
2497 if (cum->fastcall)
2498 {
2499 if (mode == BLKmode || mode == DImode)
2500 break;
2501
2502 /* ECX not EAX is the first allocated register. */
2503 if (regno == 0)
2504 regno = 2;
2505 }
2506 ret = gen_rtx_REG (mode, regno);
2507 }
53c17031
JH
2508 break;
2509 case TImode:
2510 if (cum->sse_nregs)
2511 ret = gen_rtx_REG (mode, cum->sse_regno);
2512 break;
2513 }
b08de47e
MM
2514
2515 if (TARGET_DEBUG_ARG)
2516 {
2517 fprintf (stderr,
91ea38f9 2518 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2519 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2520
2521 if (ret)
91ea38f9 2522 print_simple_rtl (stderr, ret);
b08de47e
MM
2523 else
2524 fprintf (stderr, ", stack");
2525
2526 fprintf (stderr, " )\n");
2527 }
2528
2529 return ret;
2530}
53c17031 2531
09b2e78d
ZD
2532/* A C expression that indicates when an argument must be passed by
2533 reference. If nonzero for an argument, a copy of that argument is
2534 made in memory and a pointer to the argument is passed instead of
2535 the argument itself. The pointer is passed in whatever way is
2536 appropriate for passing a pointer to that type. */
2537
2538int
b96a374d
AJ
2539function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2540 enum machine_mode mode ATTRIBUTE_UNUSED,
2541 tree type, int named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2542{
2543 if (!TARGET_64BIT)
2544 return 0;
2545
2546 if (type && int_size_in_bytes (type) == -1)
2547 {
2548 if (TARGET_DEBUG_ARG)
2549 fprintf (stderr, "function_arg_pass_by_reference\n");
2550 return 1;
2551 }
2552
2553 return 0;
2554}
2555
8b978a57
JH
2556/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2557 ABI */
2558static bool
b96a374d 2559contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2560{
2561 enum machine_mode mode = TYPE_MODE (type);
2562 if (SSE_REG_MODE_P (mode)
2563 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2564 return true;
2565 if (TYPE_ALIGN (type) < 128)
2566 return false;
2567
2568 if (AGGREGATE_TYPE_P (type))
2569 {
2a43945f 2570 /* Walk the aggregates recursively. */
8b978a57
JH
2571 if (TREE_CODE (type) == RECORD_TYPE
2572 || TREE_CODE (type) == UNION_TYPE
2573 || TREE_CODE (type) == QUAL_UNION_TYPE)
2574 {
2575 tree field;
2576
2577 if (TYPE_BINFO (type) != NULL
2578 && TYPE_BINFO_BASETYPES (type) != NULL)
2579 {
2580 tree bases = TYPE_BINFO_BASETYPES (type);
2581 int n_bases = TREE_VEC_LENGTH (bases);
2582 int i;
2583
2584 for (i = 0; i < n_bases; ++i)
2585 {
2586 tree binfo = TREE_VEC_ELT (bases, i);
2587 tree type = BINFO_TYPE (binfo);
2588
2589 if (contains_128bit_aligned_vector_p (type))
2590 return true;
2591 }
2592 }
2593 /* And now merge the fields of structure. */
2594 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2595 {
2596 if (TREE_CODE (field) == FIELD_DECL
2597 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2598 return true;
2599 }
2600 }
2601 /* Just for use if some languages passes arrays by value. */
2602 else if (TREE_CODE (type) == ARRAY_TYPE)
2603 {
2604 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2605 return true;
2606 }
2607 else
2608 abort ();
2609 }
2610 return false;
2611}
2612
53c17031
JH
2613/* Gives the alignment boundary, in bits, of an argument with the specified mode
2614 and type. */
2615
2616int
b96a374d 2617ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2618{
2619 int align;
53c17031
JH
2620 if (type)
2621 align = TYPE_ALIGN (type);
2622 else
2623 align = GET_MODE_ALIGNMENT (mode);
2624 if (align < PARM_BOUNDARY)
2625 align = PARM_BOUNDARY;
8b978a57
JH
2626 if (!TARGET_64BIT)
2627 {
2628 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2629 make an exception for SSE modes since these require 128bit
b96a374d 2630 alignment.
8b978a57
JH
2631
2632 The handling here differs from field_alignment. ICC aligns MMX
2633 arguments to 4 byte boundaries, while structure fields are aligned
2634 to 8 byte boundaries. */
2635 if (!type)
2636 {
2637 if (!SSE_REG_MODE_P (mode))
2638 align = PARM_BOUNDARY;
2639 }
2640 else
2641 {
2642 if (!contains_128bit_aligned_vector_p (type))
2643 align = PARM_BOUNDARY;
2644 }
2645 if (align != PARM_BOUNDARY && !TARGET_SSE)
2646 abort();
2647 }
53c17031
JH
2648 if (align > 128)
2649 align = 128;
2650 return align;
2651}
2652
2653/* Return true if N is a possible register number of function value. */
2654bool
b96a374d 2655ix86_function_value_regno_p (int regno)
53c17031
JH
2656{
2657 if (!TARGET_64BIT)
2658 {
2659 return ((regno) == 0
2660 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2661 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2662 }
2663 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2664 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2665 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2666}
2667
2668/* Define how to find the value returned by a function.
2669 VALTYPE is the data type of the value (as a tree).
2670 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2671 otherwise, FUNC is 0. */
2672rtx
b96a374d 2673ix86_function_value (tree valtype)
53c17031
JH
2674{
2675 if (TARGET_64BIT)
2676 {
2677 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2678 REGPARM_MAX, SSE_REGPARM_MAX,
2679 x86_64_int_return_registers, 0);
d1f87653
KH
2680 /* For zero sized structures, construct_container return NULL, but we need
2681 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2682 if (!ret)
2683 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2684 return ret;
2685 }
2686 else
b069de3b
SS
2687 return gen_rtx_REG (TYPE_MODE (valtype),
2688 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2689}
2690
f5143c46 2691/* Return false iff type is returned in memory. */
53c17031 2692int
b96a374d 2693ix86_return_in_memory (tree type)
53c17031
JH
2694{
2695 int needed_intregs, needed_sseregs;
2696 if (TARGET_64BIT)
2697 {
2698 return !examine_argument (TYPE_MODE (type), type, 1,
2699 &needed_intregs, &needed_sseregs);
2700 }
2701 else
2702 {
5e062767
DS
2703 if (TYPE_MODE (type) == BLKmode)
2704 return 1;
2705 else if (MS_AGGREGATE_RETURN
2706 && AGGREGATE_TYPE_P (type)
2707 && int_size_in_bytes(type) <= 8)
2708 return 0;
2709 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2710 && int_size_in_bytes (type) == 8)
2711 || (int_size_in_bytes (type) > 12
2712 && TYPE_MODE (type) != TImode
2713 && TYPE_MODE (type) != TFmode
2714 && !VECTOR_MODE_P (TYPE_MODE (type))))
53c17031
JH
2715 return 1;
2716 return 0;
2717 }
2718}
2719
2720/* Define how to find the value returned by a library function
2721 assuming the value has mode MODE. */
2722rtx
b96a374d 2723ix86_libcall_value (enum machine_mode mode)
53c17031
JH
2724{
2725 if (TARGET_64BIT)
2726 {
2727 switch (mode)
2728 {
2729 case SFmode:
2730 case SCmode:
2731 case DFmode:
2732 case DCmode:
2733 return gen_rtx_REG (mode, FIRST_SSE_REG);
2734 case TFmode:
2735 case TCmode:
2736 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2737 default:
2738 return gen_rtx_REG (mode, 0);
2739 }
2740 }
2741 else
b069de3b
SS
2742 return gen_rtx_REG (mode, ix86_value_regno (mode));
2743}
2744
2745/* Given a mode, return the register to use for a return value. */
2746
2747static int
b96a374d 2748ix86_value_regno (enum machine_mode mode)
b069de3b
SS
2749{
2750 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2751 return FIRST_FLOAT_REG;
2752 if (mode == TImode || VECTOR_MODE_P (mode))
2753 return FIRST_SSE_REG;
2754 return 0;
53c17031 2755}
ad919812
JH
2756\f
2757/* Create the va_list data type. */
53c17031 2758
ad919812 2759tree
b96a374d 2760ix86_build_va_list (void)
ad919812
JH
2761{
2762 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2763
ad919812
JH
2764 /* For i386 we use plain pointer to argument area. */
2765 if (!TARGET_64BIT)
2766 return build_pointer_type (char_type_node);
2767
f1e639b1 2768 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2769 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2770
fce5a9f2 2771 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2772 unsigned_type_node);
fce5a9f2 2773 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2774 unsigned_type_node);
2775 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2776 ptr_type_node);
2777 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2778 ptr_type_node);
2779
2780 DECL_FIELD_CONTEXT (f_gpr) = record;
2781 DECL_FIELD_CONTEXT (f_fpr) = record;
2782 DECL_FIELD_CONTEXT (f_ovf) = record;
2783 DECL_FIELD_CONTEXT (f_sav) = record;
2784
2785 TREE_CHAIN (record) = type_decl;
2786 TYPE_NAME (record) = type_decl;
2787 TYPE_FIELDS (record) = f_gpr;
2788 TREE_CHAIN (f_gpr) = f_fpr;
2789 TREE_CHAIN (f_fpr) = f_ovf;
2790 TREE_CHAIN (f_ovf) = f_sav;
2791
2792 layout_type (record);
2793
2794 /* The correct type is an array type of one element. */
2795 return build_array_type (record, build_index_type (size_zero_node));
2796}
2797
2798/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2799 variable number of arguments.
ad919812
JH
2800
2801 CUM is as above.
2802
2803 MODE and TYPE are the mode and type of the current parameter.
2804
2805 PRETEND_SIZE is a variable that should be set to the amount of stack
2806 that must be pushed by the prolog to pretend that our caller pushed
2807 it.
2808
2809 Normally, this macro will push all remaining incoming registers on the
2810 stack and set PRETEND_SIZE to the length of the registers pushed. */
2811
2812void
b96a374d
AJ
2813ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2814 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2815 int no_rtl)
ad919812
JH
2816{
2817 CUMULATIVE_ARGS next_cum;
2818 rtx save_area = NULL_RTX, mem;
2819 rtx label;
2820 rtx label_ref;
2821 rtx tmp_reg;
2822 rtx nsse_reg;
2823 int set;
2824 tree fntype;
2825 int stdarg_p;
2826 int i;
2827
2828 if (!TARGET_64BIT)
2829 return;
2830
2831 /* Indicate to allocate space on the stack for varargs save area. */
2832 ix86_save_varrargs_registers = 1;
2833
5474eed5
JH
2834 cfun->stack_alignment_needed = 128;
2835
ad919812
JH
2836 fntype = TREE_TYPE (current_function_decl);
2837 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2838 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2839 != void_type_node));
2840
2841 /* For varargs, we do not want to skip the dummy va_dcl argument.
2842 For stdargs, we do want to skip the last named argument. */
2843 next_cum = *cum;
2844 if (stdarg_p)
2845 function_arg_advance (&next_cum, mode, type, 1);
2846
2847 if (!no_rtl)
2848 save_area = frame_pointer_rtx;
2849
2850 set = get_varargs_alias_set ();
2851
2852 for (i = next_cum.regno; i < ix86_regparm; i++)
2853 {
2854 mem = gen_rtx_MEM (Pmode,
2855 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2856 set_mem_alias_set (mem, set);
ad919812
JH
2857 emit_move_insn (mem, gen_rtx_REG (Pmode,
2858 x86_64_int_parameter_registers[i]));
2859 }
2860
2861 if (next_cum.sse_nregs)
2862 {
2863 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 2864 of SSE parameter registers used to call this function. We use
ad919812
JH
2865 sse_prologue_save insn template that produces computed jump across
2866 SSE saves. We need some preparation work to get this working. */
2867
2868 label = gen_label_rtx ();
2869 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2870
2871 /* Compute address to jump to :
2872 label - 5*eax + nnamed_sse_arguments*5 */
2873 tmp_reg = gen_reg_rtx (Pmode);
2874 nsse_reg = gen_reg_rtx (Pmode);
2875 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2876 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2877 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2878 GEN_INT (4))));
2879 if (next_cum.sse_regno)
2880 emit_move_insn
2881 (nsse_reg,
2882 gen_rtx_CONST (DImode,
2883 gen_rtx_PLUS (DImode,
2884 label_ref,
2885 GEN_INT (next_cum.sse_regno * 4))));
2886 else
2887 emit_move_insn (nsse_reg, label_ref);
2888 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2889
2890 /* Compute address of memory block we save into. We always use pointer
2891 pointing 127 bytes after first byte to store - this is needed to keep
2892 instruction size limited by 4 bytes. */
2893 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2894 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2895 plus_constant (save_area,
2896 8 * REGPARM_MAX + 127)));
ad919812 2897 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2898 set_mem_alias_set (mem, set);
8ac61af7 2899 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2900
2901 /* And finally do the dirty job! */
8ac61af7
RK
2902 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2903 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2904 }
2905
2906}
2907
2908/* Implement va_start. */
2909
2910void
b96a374d 2911ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
2912{
2913 HOST_WIDE_INT words, n_gpr, n_fpr;
2914 tree f_gpr, f_fpr, f_ovf, f_sav;
2915 tree gpr, fpr, ovf, sav, t;
2916
2917 /* Only 64bit target needs something special. */
2918 if (!TARGET_64BIT)
2919 {
e5faf155 2920 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2921 return;
2922 }
2923
2924 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2925 f_fpr = TREE_CHAIN (f_gpr);
2926 f_ovf = TREE_CHAIN (f_fpr);
2927 f_sav = TREE_CHAIN (f_ovf);
2928
2929 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2930 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2931 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2932 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2933 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2934
2935 /* Count number of gp and fp argument registers used. */
2936 words = current_function_args_info.words;
2937 n_gpr = current_function_args_info.regno;
2938 n_fpr = current_function_args_info.sse_regno;
2939
2940 if (TARGET_DEBUG_ARG)
2941 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2942 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2943
2944 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2945 build_int_2 (n_gpr * 8, 0));
2946 TREE_SIDE_EFFECTS (t) = 1;
2947 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2948
2949 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2950 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2951 TREE_SIDE_EFFECTS (t) = 1;
2952 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2953
2954 /* Find the overflow area. */
2955 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2956 if (words != 0)
2957 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2958 build_int_2 (words * UNITS_PER_WORD, 0));
2959 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2960 TREE_SIDE_EFFECTS (t) = 1;
2961 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2962
2963 /* Find the register save area.
2964 Prologue of the function save it right above stack frame. */
2965 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2966 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2967 TREE_SIDE_EFFECTS (t) = 1;
2968 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2969}
2970
2971/* Implement va_arg. */
2972rtx
b96a374d 2973ix86_va_arg (tree valist, tree type)
ad919812 2974{
0139adca 2975 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2976 tree f_gpr, f_fpr, f_ovf, f_sav;
2977 tree gpr, fpr, ovf, sav, t;
b932f770 2978 int size, rsize;
ad919812
JH
2979 rtx lab_false, lab_over = NULL_RTX;
2980 rtx addr_rtx, r;
2981 rtx container;
09b2e78d 2982 int indirect_p = 0;
ad919812
JH
2983
2984 /* Only 64bit target needs something special. */
2985 if (!TARGET_64BIT)
2986 {
2987 return std_expand_builtin_va_arg (valist, type);
2988 }
2989
2990 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2991 f_fpr = TREE_CHAIN (f_gpr);
2992 f_ovf = TREE_CHAIN (f_fpr);
2993 f_sav = TREE_CHAIN (f_ovf);
2994
2995 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2996 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2997 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2998 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2999 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3000
3001 size = int_size_in_bytes (type);
09b2e78d
ZD
3002 if (size == -1)
3003 {
3004 /* Passed by reference. */
3005 indirect_p = 1;
3006 type = build_pointer_type (type);
3007 size = int_size_in_bytes (type);
3008 }
ad919812
JH
3009 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3010
3011 container = construct_container (TYPE_MODE (type), type, 0,
3012 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3013 /*
3014 * Pull the value out of the saved registers ...
3015 */
3016
3017 addr_rtx = gen_reg_rtx (Pmode);
3018
3019 if (container)
3020 {
3021 rtx int_addr_rtx, sse_addr_rtx;
3022 int needed_intregs, needed_sseregs;
3023 int need_temp;
3024
3025 lab_over = gen_label_rtx ();
3026 lab_false = gen_label_rtx ();
8bad7136 3027
ad919812
JH
3028 examine_argument (TYPE_MODE (type), type, 0,
3029 &needed_intregs, &needed_sseregs);
3030
3031
3032 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3033 || TYPE_ALIGN (type) > 128);
3034
d1f87653 3035 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
3036 on the register save area. If not we need to do moves. */
3037 if (!need_temp && !REG_P (container))
3038 {
d1f87653 3039 /* Verify that all registers are strictly consecutive */
ad919812
JH
3040 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3041 {
3042 int i;
3043
3044 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3045 {
3046 rtx slot = XVECEXP (container, 0, i);
b531087a 3047 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
3048 || INTVAL (XEXP (slot, 1)) != i * 16)
3049 need_temp = 1;
3050 }
3051 }
3052 else
3053 {
3054 int i;
3055
3056 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3057 {
3058 rtx slot = XVECEXP (container, 0, i);
b531087a 3059 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
3060 || INTVAL (XEXP (slot, 1)) != i * 8)
3061 need_temp = 1;
3062 }
3063 }
3064 }
3065 if (!need_temp)
3066 {
3067 int_addr_rtx = addr_rtx;
3068 sse_addr_rtx = addr_rtx;
3069 }
3070 else
3071 {
3072 int_addr_rtx = gen_reg_rtx (Pmode);
3073 sse_addr_rtx = gen_reg_rtx (Pmode);
3074 }
3075 /* First ensure that we fit completely in registers. */
3076 if (needed_intregs)
3077 {
3078 emit_cmp_and_jump_insns (expand_expr
3079 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3080 GEN_INT ((REGPARM_MAX - needed_intregs +
3081 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 3082 1, lab_false);
ad919812
JH
3083 }
3084 if (needed_sseregs)
3085 {
3086 emit_cmp_and_jump_insns (expand_expr
3087 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3088 GEN_INT ((SSE_REGPARM_MAX -
3089 needed_sseregs + 1) * 16 +
3090 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 3091 SImode, 1, lab_false);
ad919812
JH
3092 }
3093
3094 /* Compute index to start of area used for integer regs. */
3095 if (needed_intregs)
3096 {
3097 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3098 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3099 if (r != int_addr_rtx)
3100 emit_move_insn (int_addr_rtx, r);
3101 }
3102 if (needed_sseregs)
3103 {
3104 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3105 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3106 if (r != sse_addr_rtx)
3107 emit_move_insn (sse_addr_rtx, r);
3108 }
3109 if (need_temp)
3110 {
3111 int i;
3112 rtx mem;
70642ee3 3113 rtx x;
ad919812 3114
b932f770 3115 /* Never use the memory itself, as it has the alias set. */
70642ee3
JH
3116 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3117 mem = gen_rtx_MEM (BLKmode, x);
3118 force_operand (x, addr_rtx);
0692acba 3119 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3120 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3121
ad919812
JH
3122 for (i = 0; i < XVECLEN (container, 0); i++)
3123 {
3124 rtx slot = XVECEXP (container, 0, i);
3125 rtx reg = XEXP (slot, 0);
3126 enum machine_mode mode = GET_MODE (reg);
3127 rtx src_addr;
3128 rtx src_mem;
3129 int src_offset;
3130 rtx dest_mem;
3131
3132 if (SSE_REGNO_P (REGNO (reg)))
3133 {
3134 src_addr = sse_addr_rtx;
3135 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3136 }
3137 else
3138 {
3139 src_addr = int_addr_rtx;
3140 src_offset = REGNO (reg) * 8;
3141 }
3142 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3143 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3144 src_mem = adjust_address (src_mem, mode, src_offset);
3145 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3146 emit_move_insn (dest_mem, src_mem);
3147 }
3148 }
3149
3150 if (needed_intregs)
3151 {
3152 t =
3153 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3154 build_int_2 (needed_intregs * 8, 0));
3155 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3156 TREE_SIDE_EFFECTS (t) = 1;
3157 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3158 }
3159 if (needed_sseregs)
3160 {
3161 t =
3162 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3163 build_int_2 (needed_sseregs * 16, 0));
3164 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3165 TREE_SIDE_EFFECTS (t) = 1;
3166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3167 }
3168
3169 emit_jump_insn (gen_jump (lab_over));
3170 emit_barrier ();
3171 emit_label (lab_false);
3172 }
3173
3174 /* ... otherwise out of the overflow area. */
3175
3176 /* Care for on-stack alignment if needed. */
3177 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3178 t = ovf;
3179 else
3180 {
3181 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3182 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3183 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3184 }
3185 t = save_expr (t);
3186
3187 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3188 if (r != addr_rtx)
3189 emit_move_insn (addr_rtx, r);
3190
3191 t =
3192 build (PLUS_EXPR, TREE_TYPE (t), t,
3193 build_int_2 (rsize * UNITS_PER_WORD, 0));
3194 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3195 TREE_SIDE_EFFECTS (t) = 1;
3196 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3197
3198 if (container)
3199 emit_label (lab_over);
3200
09b2e78d
ZD
3201 if (indirect_p)
3202 {
3203 r = gen_rtx_MEM (Pmode, addr_rtx);
3204 set_mem_alias_set (r, get_varargs_alias_set ());
3205 emit_move_insn (addr_rtx, r);
3206 }
3207
ad919812
JH
3208 return addr_rtx;
3209}
3210\f
c3c637e3
GS
3211/* Return nonzero if OP is either a i387 or SSE fp register. */
3212int
b96a374d 3213any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3214{
3215 return ANY_FP_REG_P (op);
3216}
3217
3218/* Return nonzero if OP is an i387 fp register. */
3219int
b96a374d 3220fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3221{
3222 return FP_REG_P (op);
3223}
3224
3225/* Return nonzero if OP is a non-fp register_operand. */
3226int
b96a374d 3227register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3228{
3229 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3230}
3231
40b982a9 3232/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3233 i387 fp register. */
3234int
b96a374d 3235register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3236{
3237 return register_operand (op, mode) && !FP_REG_P (op);
3238}
3239
7dd4b4a3
JH
3240/* Return nonzero if OP is general operand representable on x86_64. */
3241
3242int
b96a374d 3243x86_64_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3244{
3245 if (!TARGET_64BIT)
3246 return general_operand (op, mode);
3247 if (nonimmediate_operand (op, mode))
3248 return 1;
c05dbe81 3249 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3250}
3251
3252/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3253 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3254
3255int
b96a374d 3256x86_64_szext_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3257{
3258 if (!TARGET_64BIT)
3259 return general_operand (op, mode);
3260 if (nonimmediate_operand (op, mode))
3261 return 1;
c05dbe81 3262 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3263}
3264
3265/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3266
3267int
b96a374d 3268x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3269{
3270 if (!TARGET_64BIT)
3271 return nonmemory_operand (op, mode);
3272 if (register_operand (op, mode))
3273 return 1;
c05dbe81 3274 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3275}
3276
3277/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3278
3279int
b96a374d 3280x86_64_movabs_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3281{
3282 if (!TARGET_64BIT || !flag_pic)
3283 return nonmemory_operand (op, mode);
c05dbe81 3284 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3285 return 1;
3286 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3287 return 1;
3288 return 0;
3289}
3290
3291/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3292
3293int
b96a374d 3294x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3295{
3296 if (!TARGET_64BIT)
3297 return nonmemory_operand (op, mode);
3298 if (register_operand (op, mode))
3299 return 1;
c05dbe81 3300 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3301}
3302
3303/* Return nonzero if OP is immediate operand representable on x86_64. */
3304
3305int
b96a374d 3306x86_64_immediate_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3307{
3308 if (!TARGET_64BIT)
3309 return immediate_operand (op, mode);
c05dbe81 3310 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3311}
3312
3313/* Return nonzero if OP is immediate operand representable on x86_64. */
3314
3315int
b96a374d 3316x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7dd4b4a3
JH
3317{
3318 return x86_64_zero_extended_value (op);
3319}
3320
8bad7136
JL
3321/* Return nonzero if OP is (const_int 1), else return zero. */
3322
3323int
b96a374d 3324const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8bad7136
JL
3325{
3326 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3327}
3328
794a292d
JJ
3329/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3330 for shift & compare patterns, as shifting by 0 does not change flags),
3331 else return zero. */
3332
3333int
b96a374d 3334const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
794a292d
JJ
3335{
3336 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3337}
3338
e075ae69
RH
3339/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3340 reference and a constant. */
b08de47e
MM
3341
3342int
b96a374d 3343symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3344{
e075ae69 3345 switch (GET_CODE (op))
2a2ab3f9 3346 {
e075ae69
RH
3347 case SYMBOL_REF:
3348 case LABEL_REF:
3349 return 1;
3350
3351 case CONST:
3352 op = XEXP (op, 0);
3353 if (GET_CODE (op) == SYMBOL_REF
3354 || GET_CODE (op) == LABEL_REF
3355 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3356 && (XINT (op, 1) == UNSPEC_GOT
3357 || XINT (op, 1) == UNSPEC_GOTOFF
3358 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3359 return 1;
3360 if (GET_CODE (op) != PLUS
3361 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3362 return 0;
3363
3364 op = XEXP (op, 0);
3365 if (GET_CODE (op) == SYMBOL_REF
3366 || GET_CODE (op) == LABEL_REF)
3367 return 1;
3368 /* Only @GOTOFF gets offsets. */
3369 if (GET_CODE (op) != UNSPEC
8ee41eaf 3370 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3371 return 0;
3372
3373 op = XVECEXP (op, 0, 0);
3374 if (GET_CODE (op) == SYMBOL_REF
3375 || GET_CODE (op) == LABEL_REF)
3376 return 1;
3377 return 0;
3378
3379 default:
3380 return 0;
2a2ab3f9
JVA
3381 }
3382}
2a2ab3f9 3383
e075ae69 3384/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3385
e075ae69 3386int
b96a374d 3387pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3388{
6eb791fc
JH
3389 if (GET_CODE (op) != CONST)
3390 return 0;
3391 op = XEXP (op, 0);
3392 if (TARGET_64BIT)
3393 {
3394 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3395 return 1;
3396 }
fce5a9f2 3397 else
2a2ab3f9 3398 {
e075ae69
RH
3399 if (GET_CODE (op) == UNSPEC)
3400 return 1;
3401 if (GET_CODE (op) != PLUS
3402 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3403 return 0;
3404 op = XEXP (op, 0);
3405 if (GET_CODE (op) == UNSPEC)
3406 return 1;
2a2ab3f9 3407 }
e075ae69 3408 return 0;
2a2ab3f9 3409}
2a2ab3f9 3410
623fe810
RH
3411/* Return true if OP is a symbolic operand that resolves locally. */
3412
3413static int
b96a374d 3414local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
623fe810 3415{
623fe810
RH
3416 if (GET_CODE (op) == CONST
3417 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3418 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3419 op = XEXP (XEXP (op, 0), 0);
3420
8bfb45f8
JJ
3421 if (GET_CODE (op) == LABEL_REF)
3422 return 1;
3423
623fe810
RH
3424 if (GET_CODE (op) != SYMBOL_REF)
3425 return 0;
3426
2ae5ae57 3427 if (SYMBOL_REF_LOCAL_P (op))
623fe810
RH
3428 return 1;
3429
3430 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3431 the compiler that assumes it can just stick the results of
623fe810
RH
3432 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3433 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3434 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3435 if (strncmp (XSTR (op, 0), internal_label_prefix,
3436 internal_label_prefix_len) == 0)
3437 return 1;
3438
3439 return 0;
3440}
3441
2ae5ae57 3442/* Test for various thread-local symbols. */
f996902d
RH
3443
3444int
b96a374d 3445tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d 3446{
f996902d
RH
3447 if (GET_CODE (op) != SYMBOL_REF)
3448 return 0;
2ae5ae57 3449 return SYMBOL_REF_TLS_MODEL (op);
f996902d
RH
3450}
3451
2ae5ae57 3452static inline int
b96a374d 3453tls_symbolic_operand_1 (rtx op, enum tls_model kind)
f996902d 3454{
f996902d
RH
3455 if (GET_CODE (op) != SYMBOL_REF)
3456 return 0;
2ae5ae57 3457 return SYMBOL_REF_TLS_MODEL (op) == kind;
f996902d
RH
3458}
3459
3460int
b96a374d
AJ
3461global_dynamic_symbolic_operand (register rtx op,
3462 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3463{
3464 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3465}
3466
3467int
b96a374d
AJ
3468local_dynamic_symbolic_operand (register rtx op,
3469 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3470{
3471 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3472}
3473
3474int
b96a374d
AJ
3475initial_exec_symbolic_operand (register rtx op,
3476 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3477{
3478 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3479}
3480
3481int
b96a374d
AJ
3482local_exec_symbolic_operand (register rtx op,
3483 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3484{
3485 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3486}
3487
28d52ffb
RH
3488/* Test for a valid operand for a call instruction. Don't allow the
3489 arg pointer register or virtual regs since they may decay into
3490 reg + const, which the patterns can't handle. */
2a2ab3f9 3491
e075ae69 3492int
b96a374d 3493call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3494{
e075ae69
RH
3495 /* Disallow indirect through a virtual register. This leads to
3496 compiler aborts when trying to eliminate them. */
3497 if (GET_CODE (op) == REG
3498 && (op == arg_pointer_rtx
564d80f4 3499 || op == frame_pointer_rtx
e075ae69
RH
3500 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3501 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3502 return 0;
2a2ab3f9 3503
28d52ffb
RH
3504 /* Disallow `call 1234'. Due to varying assembler lameness this
3505 gets either rejected or translated to `call .+1234'. */
3506 if (GET_CODE (op) == CONST_INT)
3507 return 0;
3508
cbbf65e0
RH
3509 /* Explicitly allow SYMBOL_REF even if pic. */
3510 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3511 return 1;
2a2ab3f9 3512
cbbf65e0
RH
3513 /* Otherwise we can allow any general_operand in the address. */
3514 return general_operand (op, Pmode);
e075ae69 3515}
79325812 3516
4977bab6
ZW
3517/* Test for a valid operand for a call instruction. Don't allow the
3518 arg pointer register or virtual regs since they may decay into
3519 reg + const, which the patterns can't handle. */
3520
3521int
b96a374d 3522sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3523{
3524 /* Disallow indirect through a virtual register. This leads to
3525 compiler aborts when trying to eliminate them. */
3526 if (GET_CODE (op) == REG
3527 && (op == arg_pointer_rtx
3528 || op == frame_pointer_rtx
3529 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3530 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3531 return 0;
3532
3533 /* Explicitly allow SYMBOL_REF even if pic. */
3534 if (GET_CODE (op) == SYMBOL_REF)
3535 return 1;
3536
3537 /* Otherwise we can only allow register operands. */
3538 return register_operand (op, Pmode);
3539}
3540
e075ae69 3541int
b96a374d 3542constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3543{
eaf19aba
JJ
3544 if (GET_CODE (op) == CONST
3545 && GET_CODE (XEXP (op, 0)) == PLUS
3546 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3547 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3548 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3549}
2a2ab3f9 3550
e075ae69 3551/* Match exactly zero and one. */
e9a25f70 3552
0f290768 3553int
b96a374d 3554const0_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3555{
3556 return op == CONST0_RTX (mode);
3557}
e9a25f70 3558
0f290768 3559int
b96a374d 3560const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3561{
3562 return op == const1_rtx;
3563}
2a2ab3f9 3564
e075ae69 3565/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3566
e075ae69 3567int
b96a374d 3568const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3569{
3570 return (GET_CODE (op) == CONST_INT
3571 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3572}
e9a25f70 3573
d1f87653 3574/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3575
e075ae69 3576int
b96a374d 3577incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3578{
f5143c46 3579 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3580 registers, since carry flag is not set. */
3581 if (TARGET_PENTIUM4 && !optimize_size)
3582 return 0;
2b1c08f5 3583 return op == const1_rtx || op == constm1_rtx;
e075ae69 3584}
2a2ab3f9 3585
371bc54b
JH
3586/* Return nonzero if OP is acceptable as operand of DImode shift
3587 expander. */
3588
3589int
b96a374d 3590shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
371bc54b
JH
3591{
3592 if (TARGET_64BIT)
3593 return nonimmediate_operand (op, mode);
3594 else
3595 return register_operand (op, mode);
3596}
3597
0f290768 3598/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3599 register eliminable to the stack pointer. Otherwise, this is
3600 a register operand.
2a2ab3f9 3601
e075ae69
RH
3602 This is used to prevent esp from being used as an index reg.
3603 Which would only happen in pathological cases. */
5f1ec3e6 3604
e075ae69 3605int
b96a374d 3606reg_no_sp_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3607{
3608 rtx t = op;
3609 if (GET_CODE (t) == SUBREG)
3610 t = SUBREG_REG (t);
564d80f4 3611 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3612 return 0;
2a2ab3f9 3613
e075ae69 3614 return register_operand (op, mode);
2a2ab3f9 3615}
b840bfb0 3616
915119a5 3617int
b96a374d 3618mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3619{
3620 return MMX_REG_P (op);
3621}
3622
2c5a510c
RH
3623/* Return false if this is any eliminable register. Otherwise
3624 general_operand. */
3625
3626int
b96a374d 3627general_no_elim_operand (register rtx op, enum machine_mode mode)
2c5a510c
RH
3628{
3629 rtx t = op;
3630 if (GET_CODE (t) == SUBREG)
3631 t = SUBREG_REG (t);
3632 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3633 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3634 || t == virtual_stack_dynamic_rtx)
3635 return 0;
1020a5ab
RH
3636 if (REG_P (t)
3637 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3638 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3639 return 0;
2c5a510c
RH
3640
3641 return general_operand (op, mode);
3642}
3643
3644/* Return false if this is any eliminable register. Otherwise
3645 register_operand or const_int. */
3646
3647int
b96a374d 3648nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
2c5a510c
RH
3649{
3650 rtx t = op;
3651 if (GET_CODE (t) == SUBREG)
3652 t = SUBREG_REG (t);
3653 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3654 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3655 || t == virtual_stack_dynamic_rtx)
3656 return 0;
3657
3658 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3659}
3660
7ec70495
JH
3661/* Return false if this is any eliminable register or stack register,
3662 otherwise work like register_operand. */
3663
3664int
b96a374d 3665index_register_operand (register rtx op, enum machine_mode mode)
7ec70495
JH
3666{
3667 rtx t = op;
3668 if (GET_CODE (t) == SUBREG)
3669 t = SUBREG_REG (t);
3670 if (!REG_P (t))
3671 return 0;
3672 if (t == arg_pointer_rtx
3673 || t == frame_pointer_rtx
3674 || t == virtual_incoming_args_rtx
3675 || t == virtual_stack_vars_rtx
3676 || t == virtual_stack_dynamic_rtx
3677 || REGNO (t) == STACK_POINTER_REGNUM)
3678 return 0;
3679
3680 return general_operand (op, mode);
3681}
3682
e075ae69 3683/* Return true if op is a Q_REGS class register. */
b840bfb0 3684
e075ae69 3685int
b96a374d 3686q_regs_operand (register rtx op, enum machine_mode mode)
b840bfb0 3687{
e075ae69
RH
3688 if (mode != VOIDmode && GET_MODE (op) != mode)
3689 return 0;
3690 if (GET_CODE (op) == SUBREG)
3691 op = SUBREG_REG (op);
7799175f 3692 return ANY_QI_REG_P (op);
0f290768 3693}
b840bfb0 3694
4977bab6
ZW
3695/* Return true if op is an flags register. */
3696
3697int
b96a374d 3698flags_reg_operand (register rtx op, enum machine_mode mode)
4977bab6
ZW
3699{
3700 if (mode != VOIDmode && GET_MODE (op) != mode)
3701 return 0;
3702 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3703}
3704
e075ae69 3705/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3706
e075ae69 3707int
b96a374d 3708non_q_regs_operand (register rtx op, enum machine_mode mode)
e075ae69
RH
3709{
3710 if (mode != VOIDmode && GET_MODE (op) != mode)
3711 return 0;
3712 if (GET_CODE (op) == SUBREG)
3713 op = SUBREG_REG (op);
3714 return NON_QI_REG_P (op);
0f290768 3715}
b840bfb0 3716
4977bab6 3717int
b96a374d
AJ
3718zero_extended_scalar_load_operand (rtx op,
3719 enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3720{
3721 unsigned n_elts;
3722 if (GET_CODE (op) != MEM)
3723 return 0;
3724 op = maybe_get_pool_constant (op);
3725 if (!op)
3726 return 0;
3727 if (GET_CODE (op) != CONST_VECTOR)
3728 return 0;
3729 n_elts =
3730 (GET_MODE_SIZE (GET_MODE (op)) /
3731 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3732 for (n_elts--; n_elts > 0; n_elts--)
3733 {
3734 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3735 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3736 return 0;
3737 }
3738 return 1;
3739}
3740
fdc4b40b
JH
3741/* Return 1 when OP is operand acceptable for standard SSE move. */
3742int
b96a374d 3743vector_move_operand (rtx op, enum machine_mode mode)
fdc4b40b
JH
3744{
3745 if (nonimmediate_operand (op, mode))
3746 return 1;
3747 if (GET_MODE (op) != mode && mode != VOIDmode)
3748 return 0;
3749 return (op == CONST0_RTX (GET_MODE (op)));
3750}
3751
74dc3e94
RH
3752/* Return true if op if a valid address, and does not contain
3753 a segment override. */
3754
3755int
b96a374d 3756no_seg_address_operand (register rtx op, enum machine_mode mode)
74dc3e94
RH
3757{
3758 struct ix86_address parts;
3759
3760 if (! address_operand (op, mode))
3761 return 0;
3762
3763 if (! ix86_decompose_address (op, &parts))
3764 abort ();
3765
3766 return parts.seg == SEG_DEFAULT;
3767}
3768
915119a5
BS
3769/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3770 insns. */
3771int
b96a374d 3772sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3773{
3774 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3775 switch (code)
3776 {
3777 /* Operations supported directly. */
3778 case EQ:
3779 case LT:
3780 case LE:
3781 case UNORDERED:
3782 case NE:
3783 case UNGE:
3784 case UNGT:
3785 case ORDERED:
3786 return 1;
3787 /* These are equivalent to ones above in non-IEEE comparisons. */
3788 case UNEQ:
3789 case UNLT:
3790 case UNLE:
3791 case LTGT:
3792 case GE:
3793 case GT:
3794 return !TARGET_IEEE_FP;
3795 default:
3796 return 0;
3797 }
915119a5 3798}
9076b9c1 3799/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3800int
b96a374d 3801ix86_comparison_operator (register rtx op, enum machine_mode mode)
e075ae69 3802{
9076b9c1 3803 enum machine_mode inmode;
9a915772 3804 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3805 if (mode != VOIDmode && GET_MODE (op) != mode)
3806 return 0;
9a915772
JH
3807 if (GET_RTX_CLASS (code) != '<')
3808 return 0;
3809 inmode = GET_MODE (XEXP (op, 0));
3810
3811 if (inmode == CCFPmode || inmode == CCFPUmode)
3812 {
3813 enum rtx_code second_code, bypass_code;
3814 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3815 return (bypass_code == NIL && second_code == NIL);
3816 }
3817 switch (code)
3a3677ff
RH
3818 {
3819 case EQ: case NE:
3a3677ff 3820 return 1;
9076b9c1 3821 case LT: case GE:
7e08e190 3822 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3823 || inmode == CCGOCmode || inmode == CCNOmode)
3824 return 1;
3825 return 0;
7e08e190 3826 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3827 if (inmode == CCmode)
9076b9c1
JH
3828 return 1;
3829 return 0;
3830 case GT: case LE:
7e08e190 3831 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3832 return 1;
3833 return 0;
3a3677ff
RH
3834 default:
3835 return 0;
3836 }
3837}
3838
e6e81735
JH
3839/* Return 1 if OP is a valid comparison operator testing carry flag
3840 to be set. */
3841int
b96a374d 3842ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
e6e81735
JH
3843{
3844 enum machine_mode inmode;
3845 enum rtx_code code = GET_CODE (op);
3846
3847 if (mode != VOIDmode && GET_MODE (op) != mode)
3848 return 0;
3849 if (GET_RTX_CLASS (code) != '<')
3850 return 0;
3851 inmode = GET_MODE (XEXP (op, 0));
3852 if (GET_CODE (XEXP (op, 0)) != REG
3853 || REGNO (XEXP (op, 0)) != 17
3854 || XEXP (op, 1) != const0_rtx)
3855 return 0;
3856
3857 if (inmode == CCFPmode || inmode == CCFPUmode)
3858 {
3859 enum rtx_code second_code, bypass_code;
3860
3861 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3862 if (bypass_code != NIL || second_code != NIL)
3863 return 0;
3864 code = ix86_fp_compare_code_to_integer (code);
3865 }
3866 else if (inmode != CCmode)
3867 return 0;
3868 return code == LTU;
3869}
3870
9076b9c1 3871/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3872
9076b9c1 3873int
b96a374d 3874fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3a3677ff 3875{
b62d22a2 3876 enum machine_mode inmode;
9a915772 3877 enum rtx_code code = GET_CODE (op);
e6e81735 3878
3a3677ff
RH
3879 if (mode != VOIDmode && GET_MODE (op) != mode)
3880 return 0;
9a915772
JH
3881 if (GET_RTX_CLASS (code) != '<')
3882 return 0;
3883 inmode = GET_MODE (XEXP (op, 0));
3884 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3885 {
9a915772 3886 enum rtx_code second_code, bypass_code;
e6e81735 3887
9a915772
JH
3888 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3889 if (bypass_code != NIL || second_code != NIL)
3890 return 0;
3891 code = ix86_fp_compare_code_to_integer (code);
3892 }
3893 /* i387 supports just limited amount of conditional codes. */
3894 switch (code)
3895 {
3896 case LTU: case GTU: case LEU: case GEU:
3897 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3898 return 1;
3899 return 0;
9a915772
JH
3900 case ORDERED: case UNORDERED:
3901 case EQ: case NE:
3902 return 1;
3a3677ff
RH
3903 default:
3904 return 0;
3905 }
e075ae69 3906}
b840bfb0 3907
e9e80858
JH
3908/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3909
3910int
b96a374d
AJ
3911promotable_binary_operator (register rtx op,
3912 enum machine_mode mode ATTRIBUTE_UNUSED)
e9e80858
JH
3913{
3914 switch (GET_CODE (op))
3915 {
3916 case MULT:
3917 /* Modern CPUs have same latency for HImode and SImode multiply,
3918 but 386 and 486 do HImode multiply faster. */
9e555526 3919 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
3920 case PLUS:
3921 case AND:
3922 case IOR:
3923 case XOR:
3924 case ASHIFT:
3925 return 1;
3926 default:
3927 return 0;
3928 }
3929}
3930
e075ae69
RH
3931/* Nearly general operand, but accept any const_double, since we wish
3932 to be able to drop them into memory rather than have them get pulled
3933 into registers. */
b840bfb0 3934
2a2ab3f9 3935int
b96a374d 3936cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
2a2ab3f9 3937{
e075ae69 3938 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3939 return 0;
e075ae69 3940 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3941 return 1;
e075ae69 3942 return general_operand (op, mode);
2a2ab3f9
JVA
3943}
3944
e075ae69 3945/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3946
3947int
b96a374d 3948ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3949{
3522082b 3950 int regno;
0d7d98ee
JH
3951 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3952 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3953 return 0;
3522082b
JH
3954
3955 if (!register_operand (op, VOIDmode))
3956 return 0;
3957
d1f87653 3958 /* Be careful to accept only registers having upper parts. */
3522082b
JH
3959 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3960 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3961}
3962
3963/* Return 1 if this is a valid binary floating-point operation.
0f290768 3964 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3965
3966int
b96a374d 3967binary_fp_operator (register rtx op, enum machine_mode mode)
e075ae69
RH
3968{
3969 if (mode != VOIDmode && mode != GET_MODE (op))
3970 return 0;
3971
2a2ab3f9
JVA
3972 switch (GET_CODE (op))
3973 {
e075ae69
RH
3974 case PLUS:
3975 case MINUS:
3976 case MULT:
3977 case DIV:
3978 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3979
2a2ab3f9
JVA
3980 default:
3981 return 0;
3982 }
3983}
fee2770d 3984
e075ae69 3985int
b96a374d 3986mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3987{
3988 return GET_CODE (op) == MULT;
3989}
3990
3991int
b96a374d 3992div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3993{
3994 return GET_CODE (op) == DIV;
3995}
0a726ef1
JL
3996
3997int
b96a374d 3998arith_or_logical_operator (rtx op, enum machine_mode mode)
0a726ef1 3999{
e075ae69
RH
4000 return ((mode == VOIDmode || GET_MODE (op) == mode)
4001 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4002 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
4003}
4004
e075ae69 4005/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4006
4007int
b96a374d 4008memory_displacement_operand (register rtx op, enum machine_mode mode)
4f2c8ebb 4009{
e075ae69 4010 struct ix86_address parts;
e9a25f70 4011
e075ae69
RH
4012 if (! memory_operand (op, mode))
4013 return 0;
4014
4015 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4016 abort ();
4017
4018 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4019}
4020
16189740 4021/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4022 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4023
4024 ??? It seems likely that this will only work because cmpsi is an
4025 expander, and no actual insns use this. */
4f2c8ebb
RS
4026
4027int
b96a374d 4028cmpsi_operand (rtx op, enum machine_mode mode)
fee2770d 4029{
b9b2c339 4030 if (nonimmediate_operand (op, mode))
e075ae69
RH
4031 return 1;
4032
4033 if (GET_CODE (op) == AND
4034 && GET_MODE (op) == SImode
4035 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4036 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4037 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4038 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4039 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4040 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4041 return 1;
e9a25f70 4042
fee2770d
RS
4043 return 0;
4044}
d784886d 4045
e075ae69
RH
4046/* Returns 1 if OP is memory operand that can not be represented by the
4047 modRM array. */
d784886d
RK
4048
4049int
b96a374d 4050long_memory_operand (register rtx op, enum machine_mode mode)
d784886d 4051{
e075ae69 4052 if (! memory_operand (op, mode))
d784886d
RK
4053 return 0;
4054
e075ae69 4055 return memory_address_length (op) != 0;
d784886d 4056}
2247f6ed
JH
4057
4058/* Return nonzero if the rtx is known aligned. */
4059
4060int
b96a374d 4061aligned_operand (rtx op, enum machine_mode mode)
2247f6ed
JH
4062{
4063 struct ix86_address parts;
4064
4065 if (!general_operand (op, mode))
4066 return 0;
4067
0f290768 4068 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4069 if (GET_CODE (op) != MEM)
4070 return 1;
4071
0f290768 4072 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4073 if (MEM_VOLATILE_P (op))
4074 return 0;
4075
4076 op = XEXP (op, 0);
4077
4078 /* Pushes and pops are only valid on the stack pointer. */
4079 if (GET_CODE (op) == PRE_DEC
4080 || GET_CODE (op) == POST_INC)
4081 return 1;
4082
4083 /* Decode the address. */
4084 if (! ix86_decompose_address (op, &parts))
4085 abort ();
4086
1540f9eb
JH
4087 if (parts.base && GET_CODE (parts.base) == SUBREG)
4088 parts.base = SUBREG_REG (parts.base);
4089 if (parts.index && GET_CODE (parts.index) == SUBREG)
4090 parts.index = SUBREG_REG (parts.index);
4091
2247f6ed
JH
4092 /* Look for some component that isn't known to be aligned. */
4093 if (parts.index)
4094 {
4095 if (parts.scale < 4
bdb429a5 4096 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4097 return 0;
4098 }
4099 if (parts.base)
4100 {
bdb429a5 4101 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4102 return 0;
4103 }
4104 if (parts.disp)
4105 {
4106 if (GET_CODE (parts.disp) != CONST_INT
4107 || (INTVAL (parts.disp) & 3) != 0)
4108 return 0;
4109 }
4110
4111 /* Didn't find one -- this must be an aligned address. */
4112 return 1;
4113}
e075ae69 4114\f
881b2a96
RS
4115/* Initialize the table of extra 80387 mathematical constants. */
4116
4117static void
b96a374d 4118init_ext_80387_constants (void)
881b2a96
RS
4119{
4120 static const char * cst[5] =
4121 {
4122 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4123 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4124 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4125 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4126 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4127 };
4128 int i;
4129
4130 for (i = 0; i < 5; i++)
4131 {
4132 real_from_string (&ext_80387_constants_table[i], cst[i]);
4133 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d
JH
4134 real_convert (&ext_80387_constants_table[i],
4135 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
881b2a96
RS
4136 &ext_80387_constants_table[i]);
4137 }
4138
4139 ext_80387_constants_init = 1;
4140}
4141
e075ae69 4142/* Return true if the constant is something that can be loaded with
881b2a96 4143 a special instruction. */
57dbca5e
BS
4144
4145int
b96a374d 4146standard_80387_constant_p (rtx x)
57dbca5e 4147{
2b04e52b 4148 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4149 return -1;
881b2a96 4150
2b04e52b
JH
4151 if (x == CONST0_RTX (GET_MODE (x)))
4152 return 1;
4153 if (x == CONST1_RTX (GET_MODE (x)))
4154 return 2;
881b2a96
RS
4155
4156 /* For XFmode constants, try to find a special 80387 instruction on
4157 those CPUs that benefit from them. */
1f48e56d 4158 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
9e555526 4159 && x86_ext_80387_constants & TUNEMASK)
881b2a96
RS
4160 {
4161 REAL_VALUE_TYPE r;
4162 int i;
4163
4164 if (! ext_80387_constants_init)
4165 init_ext_80387_constants ();
4166
4167 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4168 for (i = 0; i < 5; i++)
4169 if (real_identical (&r, &ext_80387_constants_table[i]))
4170 return i + 3;
4171 }
4172
e075ae69 4173 return 0;
57dbca5e
BS
4174}
4175
881b2a96
RS
4176/* Return the opcode of the special instruction to be used to load
4177 the constant X. */
4178
4179const char *
b96a374d 4180standard_80387_constant_opcode (rtx x)
881b2a96
RS
4181{
4182 switch (standard_80387_constant_p (x))
4183 {
b96a374d 4184 case 1:
881b2a96
RS
4185 return "fldz";
4186 case 2:
4187 return "fld1";
b96a374d 4188 case 3:
881b2a96
RS
4189 return "fldlg2";
4190 case 4:
4191 return "fldln2";
b96a374d 4192 case 5:
881b2a96
RS
4193 return "fldl2e";
4194 case 6:
4195 return "fldl2t";
b96a374d 4196 case 7:
881b2a96
RS
4197 return "fldpi";
4198 }
4199 abort ();
4200}
4201
4202/* Return the CONST_DOUBLE representing the 80387 constant that is
4203 loaded by the specified special instruction. The argument IDX
4204 matches the return value from standard_80387_constant_p. */
4205
4206rtx
b96a374d 4207standard_80387_constant_rtx (int idx)
881b2a96
RS
4208{
4209 int i;
4210
4211 if (! ext_80387_constants_init)
4212 init_ext_80387_constants ();
4213
4214 switch (idx)
4215 {
4216 case 3:
4217 case 4:
4218 case 5:
4219 case 6:
4220 case 7:
4221 i = idx - 3;
4222 break;
4223
4224 default:
4225 abort ();
4226 }
4227
1f48e56d
JH
4228 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4229 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
881b2a96
RS
4230}
4231
2b04e52b
JH
4232/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4233 */
4234int
b96a374d 4235standard_sse_constant_p (rtx x)
2b04e52b 4236{
0e67d460
JH
4237 if (x == const0_rtx)
4238 return 1;
2b04e52b
JH
4239 return (x == CONST0_RTX (GET_MODE (x)));
4240}
4241
2a2ab3f9
JVA
4242/* Returns 1 if OP contains a symbol reference */
4243
4244int
b96a374d 4245symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4246{
6f7d635c 4247 register const char *fmt;
2a2ab3f9
JVA
4248 register int i;
4249
4250 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4251 return 1;
4252
4253 fmt = GET_RTX_FORMAT (GET_CODE (op));
4254 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4255 {
4256 if (fmt[i] == 'E')
4257 {
4258 register int j;
4259
4260 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4261 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4262 return 1;
4263 }
e9a25f70 4264
2a2ab3f9
JVA
4265 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4266 return 1;
4267 }
4268
4269 return 0;
4270}
e075ae69
RH
4271
4272/* Return 1 if it is appropriate to emit `ret' instructions in the
4273 body of a function. Do this only if the epilogue is simple, needing a
4274 couple of insns. Prior to reloading, we can't tell how many registers
4275 must be saved, so return 0 then. Return 0 if there is no frame
4276 marker to de-allocate.
4277
4278 If NON_SAVING_SETJMP is defined and true, then it is not possible
4279 for the epilogue to be simple, so return 0. This is a special case
4280 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4281 until final, but jump_optimize may need to know sooner if a
4282 `return' is OK. */
32b5b1aa
SC
4283
4284int
b96a374d 4285ix86_can_use_return_insn_p (void)
32b5b1aa 4286{
4dd2ac2c 4287 struct ix86_frame frame;
9a7372d6 4288
e075ae69
RH
4289#ifdef NON_SAVING_SETJMP
4290 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4291 return 0;
4292#endif
9a7372d6
RH
4293
4294 if (! reload_completed || frame_pointer_needed)
4295 return 0;
32b5b1aa 4296
9a7372d6
RH
4297 /* Don't allow more than 32 pop, since that's all we can do
4298 with one instruction. */
4299 if (current_function_pops_args
4300 && current_function_args_size >= 32768)
e075ae69 4301 return 0;
32b5b1aa 4302
4dd2ac2c
JH
4303 ix86_compute_frame_layout (&frame);
4304 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4305}
6189a572
JH
4306\f
4307/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4308int
b96a374d 4309x86_64_sign_extended_value (rtx value)
6189a572
JH
4310{
4311 switch (GET_CODE (value))
4312 {
4313 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4314 to be at least 32 and this all acceptable constants are
4315 represented as CONST_INT. */
4316 case CONST_INT:
4317 if (HOST_BITS_PER_WIDE_INT == 32)
4318 return 1;
4319 else
4320 {
4321 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4322 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4323 }
4324 break;
4325
75d38379
JJ
4326 /* For certain code models, the symbolic references are known to fit.
4327 in CM_SMALL_PIC model we know it fits if it is local to the shared
4328 library. Don't count TLS SYMBOL_REFs here, since they should fit
4329 only if inside of UNSPEC handled below. */
6189a572 4330 case SYMBOL_REF:
c05dbe81 4331 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4332
4333 /* For certain code models, the code is near as well. */
4334 case LABEL_REF:
c05dbe81
JH
4335 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4336 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4337
4338 /* We also may accept the offsetted memory references in certain special
4339 cases. */
4340 case CONST:
75d38379
JJ
4341 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4342 switch (XINT (XEXP (value, 0), 1))
4343 {
4344 case UNSPEC_GOTPCREL:
4345 case UNSPEC_DTPOFF:
4346 case UNSPEC_GOTNTPOFF:
4347 case UNSPEC_NTPOFF:
4348 return 1;
4349 default:
4350 break;
4351 }
4352 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4353 {
4354 rtx op1 = XEXP (XEXP (value, 0), 0);
4355 rtx op2 = XEXP (XEXP (value, 0), 1);
4356 HOST_WIDE_INT offset;
4357
4358 if (ix86_cmodel == CM_LARGE)
4359 return 0;
4360 if (GET_CODE (op2) != CONST_INT)
4361 return 0;
4362 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4363 switch (GET_CODE (op1))
4364 {
4365 case SYMBOL_REF:
75d38379 4366 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4367 end of 31bits boundary. We may also accept pretty
4368 large negative constants knowing that all objects are
4369 in the positive half of address space. */
4370 if (ix86_cmodel == CM_SMALL
75d38379 4371 && offset < 16*1024*1024
6189a572
JH
4372 && trunc_int_for_mode (offset, SImode) == offset)
4373 return 1;
4374 /* For CM_KERNEL we know that all object resist in the
4375 negative half of 32bits address space. We may not
4376 accept negative offsets, since they may be just off
d6a7951f 4377 and we may accept pretty large positive ones. */
6189a572
JH
4378 if (ix86_cmodel == CM_KERNEL
4379 && offset > 0
4380 && trunc_int_for_mode (offset, SImode) == offset)
4381 return 1;
4382 break;
4383 case LABEL_REF:
4384 /* These conditions are similar to SYMBOL_REF ones, just the
4385 constraints for code models differ. */
c05dbe81 4386 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4387 && offset < 16*1024*1024
6189a572
JH
4388 && trunc_int_for_mode (offset, SImode) == offset)
4389 return 1;
4390 if (ix86_cmodel == CM_KERNEL
4391 && offset > 0
4392 && trunc_int_for_mode (offset, SImode) == offset)
4393 return 1;
4394 break;
75d38379
JJ
4395 case UNSPEC:
4396 switch (XINT (op1, 1))
4397 {
4398 case UNSPEC_DTPOFF:
4399 case UNSPEC_NTPOFF:
4400 if (offset > 0
4401 && trunc_int_for_mode (offset, SImode) == offset)
4402 return 1;
4403 }
4404 break;
6189a572
JH
4405 default:
4406 return 0;
4407 }
4408 }
4409 return 0;
4410 default:
4411 return 0;
4412 }
4413}
4414
4415/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4416int
b96a374d 4417x86_64_zero_extended_value (rtx value)
6189a572
JH
4418{
4419 switch (GET_CODE (value))
4420 {
4421 case CONST_DOUBLE:
4422 if (HOST_BITS_PER_WIDE_INT == 32)
4423 return (GET_MODE (value) == VOIDmode
4424 && !CONST_DOUBLE_HIGH (value));
4425 else
4426 return 0;
4427 case CONST_INT:
4428 if (HOST_BITS_PER_WIDE_INT == 32)
4429 return INTVAL (value) >= 0;
4430 else
b531087a 4431 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4432 break;
4433
4434 /* For certain code models, the symbolic references are known to fit. */
4435 case SYMBOL_REF:
4436 return ix86_cmodel == CM_SMALL;
4437
4438 /* For certain code models, the code is near as well. */
4439 case LABEL_REF:
4440 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4441
4442 /* We also may accept the offsetted memory references in certain special
4443 cases. */
4444 case CONST:
4445 if (GET_CODE (XEXP (value, 0)) == PLUS)
4446 {
4447 rtx op1 = XEXP (XEXP (value, 0), 0);
4448 rtx op2 = XEXP (XEXP (value, 0), 1);
4449
4450 if (ix86_cmodel == CM_LARGE)
4451 return 0;
4452 switch (GET_CODE (op1))
4453 {
4454 case SYMBOL_REF:
4455 return 0;
d6a7951f 4456 /* For small code model we may accept pretty large positive
6189a572
JH
4457 offsets, since one bit is available for free. Negative
4458 offsets are limited by the size of NULL pointer area
4459 specified by the ABI. */
4460 if (ix86_cmodel == CM_SMALL
4461 && GET_CODE (op2) == CONST_INT
4462 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4463 && (trunc_int_for_mode (INTVAL (op2), SImode)
4464 == INTVAL (op2)))
4465 return 1;
4466 /* ??? For the kernel, we may accept adjustment of
4467 -0x10000000, since we know that it will just convert
d6a7951f 4468 negative address space to positive, but perhaps this
6189a572
JH
4469 is not worthwhile. */
4470 break;
4471 case LABEL_REF:
4472 /* These conditions are similar to SYMBOL_REF ones, just the
4473 constraints for code models differ. */
4474 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4475 && GET_CODE (op2) == CONST_INT
4476 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4477 && (trunc_int_for_mode (INTVAL (op2), SImode)
4478 == INTVAL (op2)))
4479 return 1;
4480 break;
4481 default:
4482 return 0;
4483 }
4484 }
4485 return 0;
4486 default:
4487 return 0;
4488 }
4489}
6fca22eb
RH
4490
4491/* Value should be nonzero if functions must have frame pointers.
4492 Zero means the frame pointer need not be set up (and parms may
4493 be accessed via the stack pointer) in functions that seem suitable. */
4494
4495int
b96a374d 4496ix86_frame_pointer_required (void)
6fca22eb
RH
4497{
4498 /* If we accessed previous frames, then the generated code expects
4499 to be able to access the saved ebp value in our frame. */
4500 if (cfun->machine->accesses_prev_frame)
4501 return 1;
a4f31c00 4502
6fca22eb
RH
4503 /* Several x86 os'es need a frame pointer for other reasons,
4504 usually pertaining to setjmp. */
4505 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4506 return 1;
4507
4508 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4509 the frame pointer by default. Turn it back on now if we've not
4510 got a leaf function. */
a7943381 4511 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4512 && (!current_function_is_leaf))
4513 return 1;
4514
4515 if (current_function_profile)
6fca22eb
RH
4516 return 1;
4517
4518 return 0;
4519}
4520
4521/* Record that the current function accesses previous call frames. */
4522
4523void
b96a374d 4524ix86_setup_frame_addresses (void)
6fca22eb
RH
4525{
4526 cfun->machine->accesses_prev_frame = 1;
4527}
e075ae69 4528\f
145aacc2
RH
4529#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4530# define USE_HIDDEN_LINKONCE 1
4531#else
4532# define USE_HIDDEN_LINKONCE 0
4533#endif
4534
bd09bdeb 4535static int pic_labels_used;
e9a25f70 4536
145aacc2
RH
4537/* Fills in the label name that should be used for a pc thunk for
4538 the given register. */
4539
4540static void
b96a374d 4541get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4542{
4543 if (USE_HIDDEN_LINKONCE)
4544 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4545 else
4546 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4547}
4548
4549
e075ae69
RH
4550/* This function generates code for -fpic that loads %ebx with
4551 the return address of the caller and then returns. */
4552
4553void
b96a374d 4554ix86_file_end (void)
e075ae69
RH
4555{
4556 rtx xops[2];
bd09bdeb 4557 int regno;
32b5b1aa 4558
bd09bdeb 4559 for (regno = 0; regno < 8; ++regno)
7c262518 4560 {
145aacc2
RH
4561 char name[32];
4562
bd09bdeb
RH
4563 if (! ((pic_labels_used >> regno) & 1))
4564 continue;
4565
145aacc2 4566 get_pc_thunk_name (name, regno);
bd09bdeb 4567
145aacc2
RH
4568 if (USE_HIDDEN_LINKONCE)
4569 {
4570 tree decl;
4571
4572 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4573 error_mark_node);
4574 TREE_PUBLIC (decl) = 1;
4575 TREE_STATIC (decl) = 1;
4576 DECL_ONE_ONLY (decl) = 1;
4577
4578 (*targetm.asm_out.unique_section) (decl, 0);
4579 named_section (decl, NULL, 0);
4580
a5fe455b
ZW
4581 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4582 fputs ("\t.hidden\t", asm_out_file);
4583 assemble_name (asm_out_file, name);
4584 fputc ('\n', asm_out_file);
4585 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4586 }
4587 else
4588 {
4589 text_section ();
a5fe455b 4590 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4591 }
bd09bdeb
RH
4592
4593 xops[0] = gen_rtx_REG (SImode, regno);
4594 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4595 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4596 output_asm_insn ("ret", xops);
7c262518 4597 }
3edc56a9 4598
a5fe455b
ZW
4599 if (NEED_INDICATE_EXEC_STACK)
4600 file_end_indicate_exec_stack ();
32b5b1aa 4601}
32b5b1aa 4602
c8c03509 4603/* Emit code for the SET_GOT patterns. */
32b5b1aa 4604
c8c03509 4605const char *
b96a374d 4606output_set_got (rtx dest)
c8c03509
RH
4607{
4608 rtx xops[3];
0d7d98ee 4609
c8c03509 4610 xops[0] = dest;
5fc0e5df 4611 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4612
c8c03509 4613 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4614 {
c8c03509
RH
4615 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4616
4617 if (!flag_pic)
4618 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4619 else
4620 output_asm_insn ("call\t%a2", xops);
4621
b069de3b
SS
4622#if TARGET_MACHO
4623 /* Output the "canonical" label name ("Lxx$pb") here too. This
4624 is what will be referred to by the Mach-O PIC subsystem. */
4625 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4626#endif
4977bab6 4627 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4628 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4629
4630 if (flag_pic)
4631 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4632 }
e075ae69 4633 else
e5cb57e8 4634 {
145aacc2
RH
4635 char name[32];
4636 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4637 pic_labels_used |= 1 << REGNO (dest);
f996902d 4638
145aacc2 4639 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4640 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4641 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4642 }
e5cb57e8 4643
c8c03509
RH
4644 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4645 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4646 else if (!TARGET_MACHO)
8e9fadc3 4647 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4648
c8c03509 4649 return "";
e9a25f70 4650}
8dfe5673 4651
0d7d98ee 4652/* Generate an "push" pattern for input ARG. */
e9a25f70 4653
e075ae69 4654static rtx
b96a374d 4655gen_push (rtx arg)
e9a25f70 4656{
c5c76735 4657 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4658 gen_rtx_MEM (Pmode,
4659 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4660 stack_pointer_rtx)),
4661 arg);
e9a25f70
JL
4662}
4663
bd09bdeb
RH
4664/* Return >= 0 if there is an unused call-clobbered register available
4665 for the entire function. */
4666
4667static unsigned int
b96a374d 4668ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
4669{
4670 if (current_function_is_leaf && !current_function_profile)
4671 {
4672 int i;
4673 for (i = 2; i >= 0; --i)
4674 if (!regs_ever_live[i])
4675 return i;
4676 }
4677
4678 return INVALID_REGNUM;
4679}
fce5a9f2 4680
4dd2ac2c
JH
4681/* Return 1 if we need to save REGNO. */
4682static int
b96a374d 4683ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 4684{
bd09bdeb
RH
4685 if (pic_offset_table_rtx
4686 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4687 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4688 || current_function_profile
8c38a24f
MM
4689 || current_function_calls_eh_return
4690 || current_function_uses_const_pool))
bd09bdeb
RH
4691 {
4692 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4693 return 0;
4694 return 1;
4695 }
1020a5ab
RH
4696
4697 if (current_function_calls_eh_return && maybe_eh_return)
4698 {
4699 unsigned i;
4700 for (i = 0; ; i++)
4701 {
b531087a 4702 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4703 if (test == INVALID_REGNUM)
4704 break;
9b690711 4705 if (test == regno)
1020a5ab
RH
4706 return 1;
4707 }
4708 }
4dd2ac2c 4709
1020a5ab
RH
4710 return (regs_ever_live[regno]
4711 && !call_used_regs[regno]
4712 && !fixed_regs[regno]
4713 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4714}
4715
0903fcab
JH
4716/* Return number of registers to be saved on the stack. */
4717
4718static int
b96a374d 4719ix86_nsaved_regs (void)
0903fcab
JH
4720{
4721 int nregs = 0;
0903fcab
JH
4722 int regno;
4723
4dd2ac2c 4724 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4725 if (ix86_save_reg (regno, true))
4dd2ac2c 4726 nregs++;
0903fcab
JH
4727 return nregs;
4728}
4729
4730/* Return the offset between two registers, one to be eliminated, and the other
4731 its replacement, at the start of a routine. */
4732
4733HOST_WIDE_INT
b96a374d 4734ix86_initial_elimination_offset (int from, int to)
0903fcab 4735{
4dd2ac2c
JH
4736 struct ix86_frame frame;
4737 ix86_compute_frame_layout (&frame);
564d80f4
JH
4738
4739 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4740 return frame.hard_frame_pointer_offset;
564d80f4
JH
4741 else if (from == FRAME_POINTER_REGNUM
4742 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4743 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4744 else
4745 {
564d80f4
JH
4746 if (to != STACK_POINTER_REGNUM)
4747 abort ();
4748 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4749 return frame.stack_pointer_offset;
564d80f4
JH
4750 else if (from != FRAME_POINTER_REGNUM)
4751 abort ();
0903fcab 4752 else
4dd2ac2c 4753 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4754 }
4755}
4756
4dd2ac2c 4757/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4758
4dd2ac2c 4759static void
b96a374d 4760ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4761{
65954bd8 4762 HOST_WIDE_INT total_size;
564d80f4 4763 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4764 int offset;
4765 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4766 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4767
4dd2ac2c 4768 frame->nregs = ix86_nsaved_regs ();
564d80f4 4769 total_size = size;
65954bd8 4770
d7394366
JH
4771 /* During reload iteration the amount of registers saved can change.
4772 Recompute the value as needed. Do not recompute when amount of registers
4773 didn't change as reload does mutiple calls to the function and does not
4774 expect the decision to change within single iteration. */
4775 if (!optimize_size
4776 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
4777 {
4778 int count = frame->nregs;
4779
d7394366 4780 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
4781 /* The fast prologue uses move instead of push to save registers. This
4782 is significantly longer, but also executes faster as modern hardware
4783 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 4784
d9b40e8d
JH
4785 Be careful about choosing what prologue to emit: When function takes
4786 many instructions to execute we may use slow version as well as in
4787 case function is known to be outside hot spot (this is known with
4788 feedback only). Weight the size of function by number of registers
4789 to save as it is cheap to use one or two push instructions but very
4790 slow to use many of them. */
4791 if (count)
4792 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4793 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4794 || (flag_branch_probabilities
4795 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4796 cfun->machine->use_fast_prologue_epilogue = false;
4797 else
4798 cfun->machine->use_fast_prologue_epilogue
4799 = !expensive_function_p (count);
4800 }
4801 if (TARGET_PROLOGUE_USING_MOVE
4802 && cfun->machine->use_fast_prologue_epilogue)
4803 frame->save_regs_using_mov = true;
4804 else
4805 frame->save_regs_using_mov = false;
4806
4807
9ba81eaa 4808 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4809 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4810
4811 frame->hard_frame_pointer_offset = offset;
564d80f4 4812
fcbfaa65
RK
4813 /* Do some sanity checking of stack_alignment_needed and
4814 preferred_alignment, since i386 port is the only using those features
f710504c 4815 that may break easily. */
564d80f4 4816
44affdae
JH
4817 if (size && !stack_alignment_needed)
4818 abort ();
44affdae
JH
4819 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4820 abort ();
4821 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4822 abort ();
4823 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4824 abort ();
564d80f4 4825
4dd2ac2c
JH
4826 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4827 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4828
4dd2ac2c
JH
4829 /* Register save area */
4830 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4831
8362f420
JH
4832 /* Va-arg area */
4833 if (ix86_save_varrargs_registers)
4834 {
4835 offset += X86_64_VARARGS_SIZE;
4836 frame->va_arg_size = X86_64_VARARGS_SIZE;
4837 }
4838 else
4839 frame->va_arg_size = 0;
4840
4dd2ac2c
JH
4841 /* Align start of frame for local function. */
4842 frame->padding1 = ((offset + stack_alignment_needed - 1)
4843 & -stack_alignment_needed) - offset;
f73ad30e 4844
4dd2ac2c 4845 offset += frame->padding1;
65954bd8 4846
4dd2ac2c
JH
4847 /* Frame pointer points here. */
4848 frame->frame_pointer_offset = offset;
54ff41b7 4849
4dd2ac2c 4850 offset += size;
65954bd8 4851
0b7ae565
RH
4852 /* Add outgoing arguments area. Can be skipped if we eliminated
4853 all the function calls as dead code. */
4854 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4855 {
4856 offset += current_function_outgoing_args_size;
4857 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4858 }
4859 else
4860 frame->outgoing_arguments_size = 0;
564d80f4 4861
002ff5bc
RH
4862 /* Align stack boundary. Only needed if we're calling another function
4863 or using alloca. */
4864 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4865 frame->padding2 = ((offset + preferred_alignment - 1)
4866 & -preferred_alignment) - offset;
4867 else
4868 frame->padding2 = 0;
4dd2ac2c
JH
4869
4870 offset += frame->padding2;
4871
4872 /* We've reached end of stack frame. */
4873 frame->stack_pointer_offset = offset;
4874
4875 /* Size prologue needs to allocate. */
4876 frame->to_allocate =
4877 (size + frame->padding1 + frame->padding2
8362f420 4878 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4879
d9b40e8d
JH
4880 if (!frame->to_allocate && frame->nregs <= 1)
4881 frame->save_regs_using_mov = false;
4882
a5b378d6 4883 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
4884 && current_function_is_leaf)
4885 {
4886 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
4887 if (frame->save_regs_using_mov)
4888 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
4889 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4890 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4891 }
4892 else
4893 frame->red_zone_size = 0;
4894 frame->to_allocate -= frame->red_zone_size;
4895 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4896#if 0
4897 fprintf (stderr, "nregs: %i\n", frame->nregs);
4898 fprintf (stderr, "size: %i\n", size);
4899 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4900 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4901 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4902 fprintf (stderr, "padding2: %i\n", frame->padding2);
4903 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4904 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4905 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4906 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4907 frame->hard_frame_pointer_offset);
4908 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4909#endif
65954bd8
JL
4910}
4911
0903fcab
JH
4912/* Emit code to save registers in the prologue. */
4913
4914static void
b96a374d 4915ix86_emit_save_regs (void)
0903fcab
JH
4916{
4917 register int regno;
0903fcab 4918 rtx insn;
0903fcab 4919
4dd2ac2c 4920 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4921 if (ix86_save_reg (regno, true))
0903fcab 4922 {
0d7d98ee 4923 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4924 RTX_FRAME_RELATED_P (insn) = 1;
4925 }
4926}
4927
c6036a37
JH
4928/* Emit code to save registers using MOV insns. First register
4929 is restored from POINTER + OFFSET. */
4930static void
b96a374d 4931ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
4932{
4933 int regno;
4934 rtx insn;
4935
4936 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4937 if (ix86_save_reg (regno, true))
4938 {
b72f00af
RK
4939 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4940 Pmode, offset),
c6036a37
JH
4941 gen_rtx_REG (Pmode, regno));
4942 RTX_FRAME_RELATED_P (insn) = 1;
4943 offset += UNITS_PER_WORD;
4944 }
4945}
4946
0f290768 4947/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4948
4949void
b96a374d 4950ix86_expand_prologue (void)
2a2ab3f9 4951{
564d80f4 4952 rtx insn;
bd09bdeb 4953 bool pic_reg_used;
4dd2ac2c 4954 struct ix86_frame frame;
c6036a37 4955 HOST_WIDE_INT allocate;
4dd2ac2c 4956
4977bab6 4957 ix86_compute_frame_layout (&frame);
79325812 4958
e075ae69
RH
4959 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4960 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4961
2a2ab3f9
JVA
4962 if (frame_pointer_needed)
4963 {
564d80f4 4964 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4965 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4966
564d80f4 4967 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4968 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4969 }
4970
c6036a37 4971 allocate = frame.to_allocate;
c6036a37 4972
d9b40e8d 4973 if (!frame.save_regs_using_mov)
c6036a37
JH
4974 ix86_emit_save_regs ();
4975 else
4976 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4977
d9b40e8d
JH
4978 /* When using red zone we may start register saving before allocating
4979 the stack frame saving one cycle of the prologue. */
4980 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4981 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4982 : stack_pointer_rtx,
4983 -frame.nregs * UNITS_PER_WORD);
4984
c6036a37 4985 if (allocate == 0)
8dfe5673 4986 ;
e323735c 4987 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4988 {
f2042df3
RH
4989 insn = emit_insn (gen_pro_epilogue_adjust_stack
4990 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4991 GEN_INT (-allocate)));
e075ae69 4992 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4993 }
79325812 4994 else
8dfe5673 4995 {
e075ae69 4996 /* ??? Is this only valid for Win32? */
e9a25f70 4997
e075ae69 4998 rtx arg0, sym;
e9a25f70 4999
8362f420 5000 if (TARGET_64BIT)
b531087a 5001 abort ();
8362f420 5002
e075ae69 5003 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 5004 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 5005
e075ae69
RH
5006 sym = gen_rtx_MEM (FUNCTION_MODE,
5007 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 5008 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
5009
5010 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
5011 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5012 CALL_INSN_FUNCTION_USAGE (insn));
98417968
DS
5013
5014 /* Don't allow scheduling pass to move insns across __alloca
5015 call. */
5016 emit_insn (gen_blockage (const0_rtx));
e075ae69 5017 }
d9b40e8d 5018 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5019 {
5020 if (!frame_pointer_needed || !frame.to_allocate)
5021 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5022 else
5023 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5024 -frame.nregs * UNITS_PER_WORD);
5025 }
e9a25f70 5026
bd09bdeb
RH
5027 pic_reg_used = false;
5028 if (pic_offset_table_rtx
5029 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5030 || current_function_profile))
5031 {
5032 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5033
5034 if (alt_pic_reg_used != INVALID_REGNUM)
5035 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5036
5037 pic_reg_used = true;
5038 }
5039
e9a25f70 5040 if (pic_reg_used)
c8c03509
RH
5041 {
5042 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5043
66edd3b4
RH
5044 /* Even with accurate pre-reload life analysis, we can wind up
5045 deleting all references to the pic register after reload.
5046 Consider if cross-jumping unifies two sides of a branch
d1f87653 5047 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5048 In which case, allow the set_got to be deleted, though we're
5049 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5050 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5051 }
77a989d1 5052
66edd3b4
RH
5053 /* Prevent function calls from be scheduled before the call to mcount.
5054 In the pic_reg_used case, make sure that the got load isn't deleted. */
5055 if (current_function_profile)
5056 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5057}
5058
da2d1d3a
JH
5059/* Emit code to restore saved registers using MOV insns. First register
5060 is restored from POINTER + OFFSET. */
5061static void
b96a374d 5062ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
da2d1d3a
JH
5063{
5064 int regno;
da2d1d3a 5065
4dd2ac2c 5066 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5067 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5068 {
4dd2ac2c 5069 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
5070 adjust_address (gen_rtx_MEM (Pmode, pointer),
5071 Pmode, offset));
4dd2ac2c 5072 offset += UNITS_PER_WORD;
da2d1d3a
JH
5073 }
5074}
5075
0f290768 5076/* Restore function stack, frame, and registers. */
e9a25f70 5077
2a2ab3f9 5078void
b96a374d 5079ix86_expand_epilogue (int style)
2a2ab3f9 5080{
1c71e60e 5081 int regno;
fdb8a883 5082 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5083 struct ix86_frame frame;
65954bd8 5084 HOST_WIDE_INT offset;
4dd2ac2c
JH
5085
5086 ix86_compute_frame_layout (&frame);
2a2ab3f9 5087
a4f31c00 5088 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5089 must be taken for the normal return case of a function using
5090 eh_return: the eax and edx registers are marked as saved, but not
5091 restored along this path. */
5092 offset = frame.nregs;
5093 if (current_function_calls_eh_return && style != 2)
5094 offset -= 2;
5095 offset *= -UNITS_PER_WORD;
2a2ab3f9 5096
fdb8a883
JW
5097 /* If we're only restoring one register and sp is not valid then
5098 using a move instruction to restore the register since it's
0f290768 5099 less work than reloading sp and popping the register.
da2d1d3a
JH
5100
5101 The default code result in stack adjustment using add/lea instruction,
5102 while this code results in LEAVE instruction (or discrete equivalent),
5103 so it is profitable in some other cases as well. Especially when there
5104 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5105 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5106 tuning in future. */
4dd2ac2c 5107 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5108 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5109 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5110 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5111 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5112 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5113 && cfun->machine->use_fast_prologue_epilogue
5114 && frame.nregs == 1)
2ab0437e 5115 || current_function_calls_eh_return)
2a2ab3f9 5116 {
da2d1d3a
JH
5117 /* Restore registers. We can use ebp or esp to address the memory
5118 locations. If both are available, default to ebp, since offsets
5119 are known to be small. Only exception is esp pointing directly to the
5120 end of block of saved registers, where we may simplify addressing
5121 mode. */
5122
4dd2ac2c 5123 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5124 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5125 frame.to_allocate, style == 2);
da2d1d3a 5126 else
1020a5ab
RH
5127 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5128 offset, style == 2);
5129
5130 /* eh_return epilogues need %ecx added to the stack pointer. */
5131 if (style == 2)
5132 {
5133 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5134
1020a5ab
RH
5135 if (frame_pointer_needed)
5136 {
5137 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5138 tmp = plus_constant (tmp, UNITS_PER_WORD);
5139 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5140
5141 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5142 emit_move_insn (hard_frame_pointer_rtx, tmp);
5143
5144 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 5145 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
5146 }
5147 else
5148 {
5149 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5150 tmp = plus_constant (tmp, (frame.to_allocate
5151 + frame.nregs * UNITS_PER_WORD));
5152 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5153 }
5154 }
5155 else if (!frame_pointer_needed)
f2042df3
RH
5156 emit_insn (gen_pro_epilogue_adjust_stack
5157 (stack_pointer_rtx, stack_pointer_rtx,
5158 GEN_INT (frame.to_allocate
5159 + frame.nregs * UNITS_PER_WORD)));
0f290768 5160 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5161 else if (TARGET_USE_LEAVE || optimize_size
5162 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5163 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5164 else
2a2ab3f9 5165 {
1c71e60e
JH
5166 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5167 hard_frame_pointer_rtx,
f2042df3 5168 const0_rtx));
8362f420
JH
5169 if (TARGET_64BIT)
5170 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5171 else
5172 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5173 }
5174 }
1c71e60e 5175 else
68f654ec 5176 {
1c71e60e
JH
5177 /* First step is to deallocate the stack frame so that we can
5178 pop the registers. */
5179 if (!sp_valid)
5180 {
5181 if (!frame_pointer_needed)
5182 abort ();
5183 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5184 hard_frame_pointer_rtx,
f2042df3 5185 GEN_INT (offset)));
1c71e60e 5186 }
4dd2ac2c 5187 else if (frame.to_allocate)
f2042df3
RH
5188 emit_insn (gen_pro_epilogue_adjust_stack
5189 (stack_pointer_rtx, stack_pointer_rtx,
5190 GEN_INT (frame.to_allocate)));
1c71e60e 5191
4dd2ac2c 5192 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5193 if (ix86_save_reg (regno, false))
8362f420
JH
5194 {
5195 if (TARGET_64BIT)
5196 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5197 else
5198 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5199 }
4dd2ac2c 5200 if (frame_pointer_needed)
8362f420 5201 {
f5143c46 5202 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5203 able to grok it fast. */
5204 if (TARGET_USE_LEAVE)
5205 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5206 else if (TARGET_64BIT)
8362f420
JH
5207 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5208 else
5209 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5210 }
68f654ec 5211 }
68f654ec 5212
cbbf65e0 5213 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5214 if (style == 0)
cbbf65e0
RH
5215 return;
5216
2a2ab3f9
JVA
5217 if (current_function_pops_args && current_function_args_size)
5218 {
e075ae69 5219 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5220
b8c752c8
UD
5221 /* i386 can only pop 64K bytes. If asked to pop more, pop
5222 return address, do explicit add, and jump indirectly to the
0f290768 5223 caller. */
2a2ab3f9 5224
b8c752c8 5225 if (current_function_pops_args >= 65536)
2a2ab3f9 5226 {
e075ae69 5227 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5228
8362f420
JH
5229 /* There are is no "pascal" calling convention in 64bit ABI. */
5230 if (TARGET_64BIT)
b531087a 5231 abort ();
8362f420 5232
e075ae69
RH
5233 emit_insn (gen_popsi1 (ecx));
5234 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5235 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5236 }
79325812 5237 else
e075ae69
RH
5238 emit_jump_insn (gen_return_pop_internal (popc));
5239 }
5240 else
5241 emit_jump_insn (gen_return_internal ());
5242}
bd09bdeb
RH
5243
5244/* Reset from the function's potential modifications. */
5245
5246static void
b96a374d
AJ
5247ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5248 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5249{
5250 if (pic_offset_table_rtx)
5251 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5252}
e075ae69
RH
5253\f
5254/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5255 for an instruction. Return 0 if the structure of the address is
5256 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5257 strictly valid, but still used for computing length of lea instruction. */
e075ae69
RH
5258
5259static int
b96a374d 5260ix86_decompose_address (register rtx addr, struct ix86_address *out)
e075ae69
RH
5261{
5262 rtx base = NULL_RTX;
5263 rtx index = NULL_RTX;
5264 rtx disp = NULL_RTX;
5265 HOST_WIDE_INT scale = 1;
5266 rtx scale_rtx = NULL_RTX;
b446e5a2 5267 int retval = 1;
74dc3e94 5268 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5269
1540f9eb 5270 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
5271 base = addr;
5272 else if (GET_CODE (addr) == PLUS)
5273 {
74dc3e94
RH
5274 rtx addends[4], op;
5275 int n = 0, i;
e075ae69 5276
74dc3e94
RH
5277 op = addr;
5278 do
e075ae69 5279 {
74dc3e94
RH
5280 if (n >= 4)
5281 return 0;
5282 addends[n++] = XEXP (op, 1);
5283 op = XEXP (op, 0);
2a2ab3f9 5284 }
74dc3e94
RH
5285 while (GET_CODE (op) == PLUS);
5286 if (n >= 4)
5287 return 0;
5288 addends[n] = op;
5289
5290 for (i = n; i >= 0; --i)
e075ae69 5291 {
74dc3e94
RH
5292 op = addends[i];
5293 switch (GET_CODE (op))
5294 {
5295 case MULT:
5296 if (index)
5297 return 0;
5298 index = XEXP (op, 0);
5299 scale_rtx = XEXP (op, 1);
5300 break;
5301
5302 case UNSPEC:
5303 if (XINT (op, 1) == UNSPEC_TP
5304 && TARGET_TLS_DIRECT_SEG_REFS
5305 && seg == SEG_DEFAULT)
5306 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5307 else
5308 return 0;
5309 break;
5310
5311 case REG:
5312 case SUBREG:
5313 if (!base)
5314 base = op;
5315 else if (!index)
5316 index = op;
5317 else
5318 return 0;
5319 break;
5320
5321 case CONST:
5322 case CONST_INT:
5323 case SYMBOL_REF:
5324 case LABEL_REF:
5325 if (disp)
5326 return 0;
5327 disp = op;
5328 break;
5329
5330 default:
5331 return 0;
5332 }
e075ae69 5333 }
e075ae69
RH
5334 }
5335 else if (GET_CODE (addr) == MULT)
5336 {
5337 index = XEXP (addr, 0); /* index*scale */
5338 scale_rtx = XEXP (addr, 1);
5339 }
5340 else if (GET_CODE (addr) == ASHIFT)
5341 {
5342 rtx tmp;
5343
5344 /* We're called for lea too, which implements ashift on occasion. */
5345 index = XEXP (addr, 0);
5346 tmp = XEXP (addr, 1);
5347 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5348 return 0;
e075ae69
RH
5349 scale = INTVAL (tmp);
5350 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5351 return 0;
e075ae69 5352 scale = 1 << scale;
b446e5a2 5353 retval = -1;
2a2ab3f9 5354 }
2a2ab3f9 5355 else
e075ae69
RH
5356 disp = addr; /* displacement */
5357
5358 /* Extract the integral value of scale. */
5359 if (scale_rtx)
e9a25f70 5360 {
e075ae69 5361 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5362 return 0;
e075ae69 5363 scale = INTVAL (scale_rtx);
e9a25f70 5364 }
3b3c6a3f 5365
74dc3e94 5366 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 5367 if (base && index && scale == 1
74dc3e94
RH
5368 && (index == arg_pointer_rtx
5369 || index == frame_pointer_rtx
5370 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
5371 {
5372 rtx tmp = base;
5373 base = index;
5374 index = tmp;
5375 }
5376
5377 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5378 if ((base == hard_frame_pointer_rtx
5379 || base == frame_pointer_rtx
5380 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5381 disp = const0_rtx;
5382
5383 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5384 Avoid this by transforming to [%esi+0]. */
9e555526 5385 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5386 && base && !index && !disp
329e1d01 5387 && REG_P (base)
e075ae69
RH
5388 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5389 disp = const0_rtx;
5390
5391 /* Special case: encode reg+reg instead of reg*2. */
5392 if (!base && index && scale && scale == 2)
5393 base = index, scale = 1;
0f290768 5394
e075ae69
RH
5395 /* Special case: scaling cannot be encoded without base or displacement. */
5396 if (!base && !disp && index && scale != 1)
5397 disp = const0_rtx;
5398
5399 out->base = base;
5400 out->index = index;
5401 out->disp = disp;
5402 out->scale = scale;
74dc3e94 5403 out->seg = seg;
3b3c6a3f 5404
b446e5a2 5405 return retval;
e075ae69 5406}
01329426
JH
5407\f
5408/* Return cost of the memory address x.
5409 For i386, it is better to use a complex address than let gcc copy
5410 the address into a reg and make a new pseudo. But not if the address
5411 requires to two regs - that would mean more pseudos with longer
5412 lifetimes. */
dcefdf67 5413static int
b96a374d 5414ix86_address_cost (rtx x)
01329426
JH
5415{
5416 struct ix86_address parts;
5417 int cost = 1;
3b3c6a3f 5418
01329426
JH
5419 if (!ix86_decompose_address (x, &parts))
5420 abort ();
5421
1540f9eb
JH
5422 if (parts.base && GET_CODE (parts.base) == SUBREG)
5423 parts.base = SUBREG_REG (parts.base);
5424 if (parts.index && GET_CODE (parts.index) == SUBREG)
5425 parts.index = SUBREG_REG (parts.index);
5426
01329426
JH
5427 /* More complex memory references are better. */
5428 if (parts.disp && parts.disp != const0_rtx)
5429 cost--;
74dc3e94
RH
5430 if (parts.seg != SEG_DEFAULT)
5431 cost--;
01329426
JH
5432
5433 /* Attempt to minimize number of registers in the address. */
5434 if ((parts.base
5435 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5436 || (parts.index
5437 && (!REG_P (parts.index)
5438 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5439 cost++;
5440
5441 if (parts.base
5442 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5443 && parts.index
5444 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5445 && parts.base != parts.index)
5446 cost++;
5447
5448 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5449 since it's predecode logic can't detect the length of instructions
5450 and it degenerates to vector decoded. Increase cost of such
5451 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5452 to split such addresses or even refuse such addresses at all.
01329426
JH
5453
5454 Following addressing modes are affected:
5455 [base+scale*index]
5456 [scale*index+disp]
5457 [base+index]
0f290768 5458
01329426
JH
5459 The first and last case may be avoidable by explicitly coding the zero in
5460 memory address, but I don't have AMD-K6 machine handy to check this
5461 theory. */
5462
5463 if (TARGET_K6
5464 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5465 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5466 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5467 cost += 10;
0f290768 5468
01329426
JH
5469 return cost;
5470}
5471\f
b949ea8b
JW
5472/* If X is a machine specific address (i.e. a symbol or label being
5473 referenced as a displacement from the GOT implemented using an
5474 UNSPEC), then return the base term. Otherwise return X. */
5475
5476rtx
b96a374d 5477ix86_find_base_term (rtx x)
b949ea8b
JW
5478{
5479 rtx term;
5480
6eb791fc
JH
5481 if (TARGET_64BIT)
5482 {
5483 if (GET_CODE (x) != CONST)
5484 return x;
5485 term = XEXP (x, 0);
5486 if (GET_CODE (term) == PLUS
5487 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5488 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5489 term = XEXP (term, 0);
5490 if (GET_CODE (term) != UNSPEC
8ee41eaf 5491 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5492 return x;
5493
5494 term = XVECEXP (term, 0, 0);
5495
5496 if (GET_CODE (term) != SYMBOL_REF
5497 && GET_CODE (term) != LABEL_REF)
5498 return x;
5499
5500 return term;
5501 }
5502
69bd9368 5503 term = ix86_delegitimize_address (x);
b949ea8b
JW
5504
5505 if (GET_CODE (term) != SYMBOL_REF
5506 && GET_CODE (term) != LABEL_REF)
5507 return x;
5508
5509 return term;
5510}
5511\f
f996902d
RH
5512/* Determine if a given RTX is a valid constant. We already know this
5513 satisfies CONSTANT_P. */
5514
5515bool
b96a374d 5516legitimate_constant_p (rtx x)
f996902d
RH
5517{
5518 rtx inner;
5519
5520 switch (GET_CODE (x))
5521 {
5522 case SYMBOL_REF:
5523 /* TLS symbols are not constant. */
5524 if (tls_symbolic_operand (x, Pmode))
5525 return false;
5526 break;
5527
5528 case CONST:
5529 inner = XEXP (x, 0);
5530
5531 /* Offsets of TLS symbols are never valid.
5532 Discourage CSE from creating them. */
5533 if (GET_CODE (inner) == PLUS
5534 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5535 return false;
5536
799b33a0
JH
5537 if (GET_CODE (inner) == PLUS)
5538 {
5539 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5540 return false;
5541 inner = XEXP (inner, 0);
5542 }
5543
f996902d
RH
5544 /* Only some unspecs are valid as "constants". */
5545 if (GET_CODE (inner) == UNSPEC)
5546 switch (XINT (inner, 1))
5547 {
5548 case UNSPEC_TPOFF:
cb0e3e3f 5549 case UNSPEC_NTPOFF:
f996902d 5550 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
cb0e3e3f
RH
5551 case UNSPEC_DTPOFF:
5552 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5553 default:
5554 return false;
5555 }
5556 break;
5557
5558 default:
5559 break;
5560 }
5561
5562 /* Otherwise we handle everything else in the move patterns. */
5563 return true;
5564}
5565
3a04ff64
RH
5566/* Determine if it's legal to put X into the constant pool. This
5567 is not possible for the address of thread-local symbols, which
5568 is checked above. */
5569
5570static bool
b96a374d 5571ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5572{
5573 return !legitimate_constant_p (x);
5574}
5575
f996902d
RH
5576/* Determine if a given RTX is a valid constant address. */
5577
5578bool
b96a374d 5579constant_address_p (rtx x)
f996902d 5580{
a94f136b 5581 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5582}
5583
5584/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5585 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5586 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5587
5588bool
b96a374d 5589legitimate_pic_operand_p (rtx x)
f996902d
RH
5590{
5591 rtx inner;
5592
5593 switch (GET_CODE (x))
5594 {
5595 case CONST:
5596 inner = XEXP (x, 0);
5597
5598 /* Only some unspecs are valid as "constants". */
5599 if (GET_CODE (inner) == UNSPEC)
5600 switch (XINT (inner, 1))
5601 {
5602 case UNSPEC_TPOFF:
5603 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5604 default:
5605 return false;
5606 }
5607 /* FALLTHRU */
5608
5609 case SYMBOL_REF:
5610 case LABEL_REF:
5611 return legitimate_pic_address_disp_p (x);
5612
5613 default:
5614 return true;
5615 }
5616}
5617
e075ae69
RH
5618/* Determine if a given CONST RTX is a valid memory displacement
5619 in PIC mode. */
0f290768 5620
59be65f6 5621int
b96a374d 5622legitimate_pic_address_disp_p (register rtx disp)
91bb873f 5623{
f996902d
RH
5624 bool saw_plus;
5625
6eb791fc
JH
5626 /* In 64bit mode we can allow direct addresses of symbols and labels
5627 when they are not dynamic symbols. */
c05dbe81
JH
5628 if (TARGET_64BIT)
5629 {
5630 /* TLS references should always be enclosed in UNSPEC. */
5631 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5632 return 0;
5633 if (GET_CODE (disp) == SYMBOL_REF
5634 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5635 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
5636 return 1;
5637 if (GET_CODE (disp) == LABEL_REF)
5638 return 1;
5639 if (GET_CODE (disp) == CONST
5640 && GET_CODE (XEXP (disp, 0)) == PLUS
5641 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5642 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5643 && SYMBOL_REF_LOCAL_P (XEXP (XEXP (disp, 0), 0)))
c05dbe81
JH
5644 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5645 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5646 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5647 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5648 return 1;
5649 }
91bb873f
RH
5650 if (GET_CODE (disp) != CONST)
5651 return 0;
5652 disp = XEXP (disp, 0);
5653
6eb791fc
JH
5654 if (TARGET_64BIT)
5655 {
5656 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5657 of GOT tables. We should not need these anyway. */
5658 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5659 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5660 return 0;
5661
5662 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5663 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5664 return 0;
5665 return 1;
5666 }
5667
f996902d 5668 saw_plus = false;
91bb873f
RH
5669 if (GET_CODE (disp) == PLUS)
5670 {
5671 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5672 return 0;
5673 disp = XEXP (disp, 0);
f996902d 5674 saw_plus = true;
91bb873f
RH
5675 }
5676
b069de3b
SS
5677 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5678 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5679 {
5680 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5681 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5682 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5683 {
5684 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5685 if (strstr (sym_name, "$pb") != 0)
5686 return 1;
5687 }
5688 }
5689
8ee41eaf 5690 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5691 return 0;
5692
623fe810
RH
5693 switch (XINT (disp, 1))
5694 {
8ee41eaf 5695 case UNSPEC_GOT:
f996902d
RH
5696 if (saw_plus)
5697 return false;
623fe810 5698 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5699 case UNSPEC_GOTOFF:
799b33a0
JH
5700 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5701 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5702 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5703 return false;
f996902d 5704 case UNSPEC_GOTTPOFF:
dea73790
JJ
5705 case UNSPEC_GOTNTPOFF:
5706 case UNSPEC_INDNTPOFF:
f996902d
RH
5707 if (saw_plus)
5708 return false;
5709 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5710 case UNSPEC_NTPOFF:
f996902d
RH
5711 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5712 case UNSPEC_DTPOFF:
f996902d 5713 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5714 }
fce5a9f2 5715
623fe810 5716 return 0;
91bb873f
RH
5717}
5718
e075ae69
RH
5719/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5720 memory address for an instruction. The MODE argument is the machine mode
5721 for the MEM expression that wants to use this address.
5722
5723 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5724 convert common non-canonical forms to canonical form so that they will
5725 be recognized. */
5726
3b3c6a3f 5727int
b96a374d 5728legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
3b3c6a3f 5729{
e075ae69
RH
5730 struct ix86_address parts;
5731 rtx base, index, disp;
5732 HOST_WIDE_INT scale;
5733 const char *reason = NULL;
5734 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5735
5736 if (TARGET_DEBUG_ADDR)
5737 {
5738 fprintf (stderr,
e9a25f70 5739 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5740 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5741 debug_rtx (addr);
5742 }
5743
b446e5a2 5744 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5745 {
e075ae69 5746 reason = "decomposition failed";
50e60bc3 5747 goto report_error;
3b3c6a3f
MM
5748 }
5749
e075ae69
RH
5750 base = parts.base;
5751 index = parts.index;
5752 disp = parts.disp;
5753 scale = parts.scale;
91f0226f 5754
e075ae69 5755 /* Validate base register.
e9a25f70
JL
5756
5757 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5758 is one word out of a two word structure, which is represented internally
5759 as a DImode int. */
e9a25f70 5760
3b3c6a3f
MM
5761 if (base)
5762 {
1540f9eb 5763 rtx reg;
e075ae69
RH
5764 reason_rtx = base;
5765
1540f9eb
JH
5766 if (GET_CODE (base) == SUBREG)
5767 reg = SUBREG_REG (base);
5768 else
5769 reg = base;
5770
5771 if (GET_CODE (reg) != REG)
3b3c6a3f 5772 {
e075ae69 5773 reason = "base is not a register";
50e60bc3 5774 goto report_error;
3b3c6a3f
MM
5775 }
5776
c954bd01
RH
5777 if (GET_MODE (base) != Pmode)
5778 {
e075ae69 5779 reason = "base is not in Pmode";
50e60bc3 5780 goto report_error;
c954bd01
RH
5781 }
5782
1540f9eb
JH
5783 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5784 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5785 {
e075ae69 5786 reason = "base is not valid";
50e60bc3 5787 goto report_error;
3b3c6a3f
MM
5788 }
5789 }
5790
e075ae69 5791 /* Validate index register.
e9a25f70
JL
5792
5793 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5794 is one word out of a two word structure, which is represented internally
5795 as a DImode int. */
e075ae69
RH
5796
5797 if (index)
3b3c6a3f 5798 {
1540f9eb 5799 rtx reg;
e075ae69
RH
5800 reason_rtx = index;
5801
1540f9eb
JH
5802 if (GET_CODE (index) == SUBREG)
5803 reg = SUBREG_REG (index);
5804 else
5805 reg = index;
5806
5807 if (GET_CODE (reg) != REG)
3b3c6a3f 5808 {
e075ae69 5809 reason = "index is not a register";
50e60bc3 5810 goto report_error;
3b3c6a3f
MM
5811 }
5812
e075ae69 5813 if (GET_MODE (index) != Pmode)
c954bd01 5814 {
e075ae69 5815 reason = "index is not in Pmode";
50e60bc3 5816 goto report_error;
c954bd01
RH
5817 }
5818
1540f9eb
JH
5819 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5820 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5821 {
e075ae69 5822 reason = "index is not valid";
50e60bc3 5823 goto report_error;
3b3c6a3f
MM
5824 }
5825 }
3b3c6a3f 5826
e075ae69
RH
5827 /* Validate scale factor. */
5828 if (scale != 1)
3b3c6a3f 5829 {
e075ae69
RH
5830 reason_rtx = GEN_INT (scale);
5831 if (!index)
3b3c6a3f 5832 {
e075ae69 5833 reason = "scale without index";
50e60bc3 5834 goto report_error;
3b3c6a3f
MM
5835 }
5836
e075ae69 5837 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5838 {
e075ae69 5839 reason = "scale is not a valid multiplier";
50e60bc3 5840 goto report_error;
3b3c6a3f
MM
5841 }
5842 }
5843
91bb873f 5844 /* Validate displacement. */
3b3c6a3f
MM
5845 if (disp)
5846 {
e075ae69
RH
5847 reason_rtx = disp;
5848
f996902d
RH
5849 if (GET_CODE (disp) == CONST
5850 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5851 switch (XINT (XEXP (disp, 0), 1))
5852 {
5853 case UNSPEC_GOT:
5854 case UNSPEC_GOTOFF:
5855 case UNSPEC_GOTPCREL:
5856 if (!flag_pic)
5857 abort ();
5858 goto is_legitimate_pic;
5859
5860 case UNSPEC_GOTTPOFF:
dea73790
JJ
5861 case UNSPEC_GOTNTPOFF:
5862 case UNSPEC_INDNTPOFF:
f996902d
RH
5863 case UNSPEC_NTPOFF:
5864 case UNSPEC_DTPOFF:
5865 break;
5866
5867 default:
5868 reason = "invalid address unspec";
5869 goto report_error;
5870 }
5871
b069de3b
SS
5872 else if (flag_pic && (SYMBOLIC_CONST (disp)
5873#if TARGET_MACHO
5874 && !machopic_operand_p (disp)
5875#endif
5876 ))
3b3c6a3f 5877 {
f996902d 5878 is_legitimate_pic:
0d7d98ee
JH
5879 if (TARGET_64BIT && (index || base))
5880 {
75d38379
JJ
5881 /* foo@dtpoff(%rX) is ok. */
5882 if (GET_CODE (disp) != CONST
5883 || GET_CODE (XEXP (disp, 0)) != PLUS
5884 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5885 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5886 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5887 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5888 {
5889 reason = "non-constant pic memory reference";
5890 goto report_error;
5891 }
0d7d98ee 5892 }
75d38379 5893 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5894 {
e075ae69 5895 reason = "displacement is an invalid pic construct";
50e60bc3 5896 goto report_error;
91bb873f
RH
5897 }
5898
4e9efe54 5899 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5900 includes the pic_offset_table_rtx register.
5901
4e9efe54
JH
5902 While this is good idea, unfortunately these constructs may
5903 be created by "adds using lea" optimization for incorrect
5904 code like:
5905
5906 int a;
5907 int foo(int i)
5908 {
5909 return *(&a+i);
5910 }
5911
50e60bc3 5912 This code is nonsensical, but results in addressing
4e9efe54 5913 GOT table with pic_offset_table_rtx base. We can't
f710504c 5914 just refuse it easily, since it gets matched by
4e9efe54
JH
5915 "addsi3" pattern, that later gets split to lea in the
5916 case output register differs from input. While this
5917 can be handled by separate addsi pattern for this case
5918 that never results in lea, this seems to be easier and
5919 correct fix for crash to disable this test. */
3b3c6a3f 5920 }
a94f136b
JH
5921 else if (GET_CODE (disp) != LABEL_REF
5922 && GET_CODE (disp) != CONST_INT
5923 && (GET_CODE (disp) != CONST
5924 || !legitimate_constant_p (disp))
5925 && (GET_CODE (disp) != SYMBOL_REF
5926 || !legitimate_constant_p (disp)))
f996902d
RH
5927 {
5928 reason = "displacement is not constant";
5929 goto report_error;
5930 }
c05dbe81
JH
5931 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5932 {
5933 reason = "displacement is out of range";
5934 goto report_error;
5935 }
3b3c6a3f
MM
5936 }
5937
e075ae69 5938 /* Everything looks valid. */
3b3c6a3f 5939 if (TARGET_DEBUG_ADDR)
e075ae69 5940 fprintf (stderr, "Success.\n");
3b3c6a3f 5941 return TRUE;
e075ae69 5942
5bf0ebab 5943 report_error:
e075ae69
RH
5944 if (TARGET_DEBUG_ADDR)
5945 {
5946 fprintf (stderr, "Error: %s\n", reason);
5947 debug_rtx (reason_rtx);
5948 }
5949 return FALSE;
3b3c6a3f 5950}
3b3c6a3f 5951\f
55efb413
JW
5952/* Return an unique alias set for the GOT. */
5953
0f290768 5954static HOST_WIDE_INT
b96a374d 5955ix86_GOT_alias_set (void)
55efb413 5956{
5bf0ebab
RH
5957 static HOST_WIDE_INT set = -1;
5958 if (set == -1)
5959 set = new_alias_set ();
5960 return set;
0f290768 5961}
55efb413 5962
3b3c6a3f
MM
5963/* Return a legitimate reference for ORIG (an address) using the
5964 register REG. If REG is 0, a new pseudo is generated.
5965
91bb873f 5966 There are two types of references that must be handled:
3b3c6a3f
MM
5967
5968 1. Global data references must load the address from the GOT, via
5969 the PIC reg. An insn is emitted to do this load, and the reg is
5970 returned.
5971
91bb873f
RH
5972 2. Static data references, constant pool addresses, and code labels
5973 compute the address as an offset from the GOT, whose base is in
2ae5ae57 5974 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
5975 differentiate them from global data objects. The returned
5976 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5977
5978 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5979 reg also appears in the address. */
3b3c6a3f
MM
5980
5981rtx
b96a374d 5982legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
5983{
5984 rtx addr = orig;
5985 rtx new = orig;
91bb873f 5986 rtx base;
3b3c6a3f 5987
b069de3b
SS
5988#if TARGET_MACHO
5989 if (reg == 0)
5990 reg = gen_reg_rtx (Pmode);
5991 /* Use the generic Mach-O PIC machinery. */
5992 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5993#endif
5994
c05dbe81
JH
5995 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5996 new = addr;
5997 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 5998 {
c05dbe81
JH
5999 /* This symbol may be referenced via a displacement from the PIC
6000 base address (@GOTOFF). */
3b3c6a3f 6001
c05dbe81
JH
6002 if (reload_in_progress)
6003 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6004 if (GET_CODE (addr) == CONST)
6005 addr = XEXP (addr, 0);
6006 if (GET_CODE (addr) == PLUS)
6007 {
6008 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6009 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6010 }
6011 else
6012 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6013 new = gen_rtx_CONST (Pmode, new);
6014 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6015
c05dbe81
JH
6016 if (reg != 0)
6017 {
6018 emit_move_insn (reg, new);
6019 new = reg;
6020 }
3b3c6a3f 6021 }
91bb873f 6022 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6023 {
14f73b5a
JH
6024 if (TARGET_64BIT)
6025 {
8ee41eaf 6026 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6027 new = gen_rtx_CONST (Pmode, new);
6028 new = gen_rtx_MEM (Pmode, new);
6029 RTX_UNCHANGING_P (new) = 1;
6030 set_mem_alias_set (new, ix86_GOT_alias_set ());
6031
6032 if (reg == 0)
6033 reg = gen_reg_rtx (Pmode);
6034 /* Use directly gen_movsi, otherwise the address is loaded
6035 into register for CSE. We don't want to CSE this addresses,
6036 instead we CSE addresses from the GOT table, so skip this. */
6037 emit_insn (gen_movsi (reg, new));
6038 new = reg;
6039 }
6040 else
6041 {
6042 /* This symbol must be referenced via a load from the
6043 Global Offset Table (@GOT). */
3b3c6a3f 6044
66edd3b4
RH
6045 if (reload_in_progress)
6046 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6047 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6048 new = gen_rtx_CONST (Pmode, new);
6049 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6050 new = gen_rtx_MEM (Pmode, new);
6051 RTX_UNCHANGING_P (new) = 1;
6052 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6053
14f73b5a
JH
6054 if (reg == 0)
6055 reg = gen_reg_rtx (Pmode);
6056 emit_move_insn (reg, new);
6057 new = reg;
6058 }
0f290768 6059 }
91bb873f
RH
6060 else
6061 {
6062 if (GET_CODE (addr) == CONST)
3b3c6a3f 6063 {
91bb873f 6064 addr = XEXP (addr, 0);
e3c8ea67
RH
6065
6066 /* We must match stuff we generate before. Assume the only
6067 unspecs that can get here are ours. Not that we could do
6068 anything with them anyway... */
6069 if (GET_CODE (addr) == UNSPEC
6070 || (GET_CODE (addr) == PLUS
6071 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6072 return orig;
6073 if (GET_CODE (addr) != PLUS)
564d80f4 6074 abort ();
3b3c6a3f 6075 }
91bb873f
RH
6076 if (GET_CODE (addr) == PLUS)
6077 {
6078 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6079
91bb873f
RH
6080 /* Check first to see if this is a constant offset from a @GOTOFF
6081 symbol reference. */
623fe810 6082 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6083 && GET_CODE (op1) == CONST_INT)
6084 {
6eb791fc
JH
6085 if (!TARGET_64BIT)
6086 {
66edd3b4
RH
6087 if (reload_in_progress)
6088 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6089 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6090 UNSPEC_GOTOFF);
6eb791fc
JH
6091 new = gen_rtx_PLUS (Pmode, new, op1);
6092 new = gen_rtx_CONST (Pmode, new);
6093 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6094
6eb791fc
JH
6095 if (reg != 0)
6096 {
6097 emit_move_insn (reg, new);
6098 new = reg;
6099 }
6100 }
6101 else
91bb873f 6102 {
75d38379
JJ
6103 if (INTVAL (op1) < -16*1024*1024
6104 || INTVAL (op1) >= 16*1024*1024)
6105 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6106 }
6107 }
6108 else
6109 {
6110 base = legitimize_pic_address (XEXP (addr, 0), reg);
6111 new = legitimize_pic_address (XEXP (addr, 1),
6112 base == reg ? NULL_RTX : reg);
6113
6114 if (GET_CODE (new) == CONST_INT)
6115 new = plus_constant (base, INTVAL (new));
6116 else
6117 {
6118 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6119 {
6120 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6121 new = XEXP (new, 1);
6122 }
6123 new = gen_rtx_PLUS (Pmode, base, new);
6124 }
6125 }
6126 }
3b3c6a3f
MM
6127 }
6128 return new;
6129}
6130\f
74dc3e94 6131/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6132
6133static rtx
b96a374d 6134get_thread_pointer (int to_reg)
f996902d 6135{
74dc3e94 6136 rtx tp, reg, insn;
f996902d
RH
6137
6138 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6139 if (!to_reg)
6140 return tp;
f996902d 6141
74dc3e94
RH
6142 reg = gen_reg_rtx (Pmode);
6143 insn = gen_rtx_SET (VOIDmode, reg, tp);
6144 insn = emit_insn (insn);
6145
6146 return reg;
6147}
6148
6149/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6150 false if we expect this to be used for a memory address and true if
6151 we expect to load the address into a register. */
6152
6153static rtx
b96a374d 6154legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
6155{
6156 rtx dest, base, off, pic;
6157 int type;
6158
6159 switch (model)
6160 {
6161 case TLS_MODEL_GLOBAL_DYNAMIC:
6162 dest = gen_reg_rtx (Pmode);
6163 if (TARGET_64BIT)
6164 {
6165 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6166
6167 start_sequence ();
6168 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6169 insns = get_insns ();
6170 end_sequence ();
6171
6172 emit_libcall_block (insns, dest, rax, x);
6173 }
6174 else
6175 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6176 break;
6177
6178 case TLS_MODEL_LOCAL_DYNAMIC:
6179 base = gen_reg_rtx (Pmode);
6180 if (TARGET_64BIT)
6181 {
6182 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6183
6184 start_sequence ();
6185 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6186 insns = get_insns ();
6187 end_sequence ();
6188
6189 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6190 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6191 emit_libcall_block (insns, base, rax, note);
6192 }
6193 else
6194 emit_insn (gen_tls_local_dynamic_base_32 (base));
6195
6196 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6197 off = gen_rtx_CONST (Pmode, off);
6198
6199 return gen_rtx_PLUS (Pmode, base, off);
6200
6201 case TLS_MODEL_INITIAL_EXEC:
6202 if (TARGET_64BIT)
6203 {
6204 pic = NULL;
6205 type = UNSPEC_GOTNTPOFF;
6206 }
6207 else if (flag_pic)
6208 {
6209 if (reload_in_progress)
6210 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6211 pic = pic_offset_table_rtx;
6212 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6213 }
6214 else if (!TARGET_GNU_TLS)
6215 {
6216 pic = gen_reg_rtx (Pmode);
6217 emit_insn (gen_set_got (pic));
6218 type = UNSPEC_GOTTPOFF;
6219 }
6220 else
6221 {
6222 pic = NULL;
6223 type = UNSPEC_INDNTPOFF;
6224 }
6225
6226 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6227 off = gen_rtx_CONST (Pmode, off);
6228 if (pic)
6229 off = gen_rtx_PLUS (Pmode, pic, off);
6230 off = gen_rtx_MEM (Pmode, off);
6231 RTX_UNCHANGING_P (off) = 1;
6232 set_mem_alias_set (off, ix86_GOT_alias_set ());
6233
6234 if (TARGET_64BIT || TARGET_GNU_TLS)
6235 {
6236 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6237 off = force_reg (Pmode, off);
6238 return gen_rtx_PLUS (Pmode, base, off);
6239 }
6240 else
6241 {
6242 base = get_thread_pointer (true);
6243 dest = gen_reg_rtx (Pmode);
6244 emit_insn (gen_subsi3 (dest, base, off));
6245 }
6246 break;
6247
6248 case TLS_MODEL_LOCAL_EXEC:
6249 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6250 (TARGET_64BIT || TARGET_GNU_TLS)
6251 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6252 off = gen_rtx_CONST (Pmode, off);
6253
6254 if (TARGET_64BIT || TARGET_GNU_TLS)
6255 {
6256 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6257 return gen_rtx_PLUS (Pmode, base, off);
6258 }
6259 else
6260 {
6261 base = get_thread_pointer (true);
6262 dest = gen_reg_rtx (Pmode);
6263 emit_insn (gen_subsi3 (dest, base, off));
6264 }
6265 break;
6266
6267 default:
6268 abort ();
6269 }
6270
6271 return dest;
f996902d 6272}
fce5a9f2 6273
3b3c6a3f
MM
6274/* Try machine-dependent ways of modifying an illegitimate address
6275 to be legitimate. If we find one, return the new, valid address.
6276 This macro is used in only one place: `memory_address' in explow.c.
6277
6278 OLDX is the address as it was before break_out_memory_refs was called.
6279 In some cases it is useful to look at this to decide what needs to be done.
6280
6281 MODE and WIN are passed so that this macro can use
6282 GO_IF_LEGITIMATE_ADDRESS.
6283
6284 It is always safe for this macro to do nothing. It exists to recognize
6285 opportunities to optimize the output.
6286
6287 For the 80386, we handle X+REG by loading X into a register R and
6288 using R+REG. R will go in a general reg and indexing will be used.
6289 However, if REG is a broken-out memory address or multiplication,
6290 nothing needs to be done because REG can certainly go in a general reg.
6291
6292 When -fpic is used, special handling is needed for symbolic references.
6293 See comments by legitimize_pic_address in i386.c for details. */
6294
6295rtx
b96a374d
AJ
6296legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6297 enum machine_mode mode)
3b3c6a3f
MM
6298{
6299 int changed = 0;
6300 unsigned log;
6301
6302 if (TARGET_DEBUG_ADDR)
6303 {
e9a25f70
JL
6304 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6305 GET_MODE_NAME (mode));
3b3c6a3f
MM
6306 debug_rtx (x);
6307 }
6308
f996902d
RH
6309 log = tls_symbolic_operand (x, mode);
6310 if (log)
74dc3e94 6311 return legitimize_tls_address (x, log, false);
f996902d 6312
3b3c6a3f
MM
6313 if (flag_pic && SYMBOLIC_CONST (x))
6314 return legitimize_pic_address (x, 0);
6315
6316 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6317 if (GET_CODE (x) == ASHIFT
6318 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6319 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6320 {
6321 changed = 1;
a269a03c
JC
6322 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6323 GEN_INT (1 << log));
3b3c6a3f
MM
6324 }
6325
6326 if (GET_CODE (x) == PLUS)
6327 {
0f290768 6328 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6329
3b3c6a3f
MM
6330 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6331 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6332 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6333 {
6334 changed = 1;
c5c76735
JL
6335 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6336 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6337 GEN_INT (1 << log));
3b3c6a3f
MM
6338 }
6339
6340 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6341 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6342 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6343 {
6344 changed = 1;
c5c76735
JL
6345 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6346 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6347 GEN_INT (1 << log));
3b3c6a3f
MM
6348 }
6349
0f290768 6350 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6351 if (GET_CODE (XEXP (x, 1)) == MULT)
6352 {
6353 rtx tmp = XEXP (x, 0);
6354 XEXP (x, 0) = XEXP (x, 1);
6355 XEXP (x, 1) = tmp;
6356 changed = 1;
6357 }
6358
6359 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6360 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6361 created by virtual register instantiation, register elimination, and
6362 similar optimizations. */
6363 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6364 {
6365 changed = 1;
c5c76735
JL
6366 x = gen_rtx_PLUS (Pmode,
6367 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6368 XEXP (XEXP (x, 1), 0)),
6369 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6370 }
6371
e9a25f70
JL
6372 /* Canonicalize
6373 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6374 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6375 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6376 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6377 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6378 && CONSTANT_P (XEXP (x, 1)))
6379 {
00c79232
ML
6380 rtx constant;
6381 rtx other = NULL_RTX;
3b3c6a3f
MM
6382
6383 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6384 {
6385 constant = XEXP (x, 1);
6386 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6387 }
6388 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6389 {
6390 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6391 other = XEXP (x, 1);
6392 }
6393 else
6394 constant = 0;
6395
6396 if (constant)
6397 {
6398 changed = 1;
c5c76735
JL
6399 x = gen_rtx_PLUS (Pmode,
6400 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6401 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6402 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6403 }
6404 }
6405
6406 if (changed && legitimate_address_p (mode, x, FALSE))
6407 return x;
6408
6409 if (GET_CODE (XEXP (x, 0)) == MULT)
6410 {
6411 changed = 1;
6412 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6413 }
6414
6415 if (GET_CODE (XEXP (x, 1)) == MULT)
6416 {
6417 changed = 1;
6418 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6419 }
6420
6421 if (changed
6422 && GET_CODE (XEXP (x, 1)) == REG
6423 && GET_CODE (XEXP (x, 0)) == REG)
6424 return x;
6425
6426 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6427 {
6428 changed = 1;
6429 x = legitimize_pic_address (x, 0);
6430 }
6431
6432 if (changed && legitimate_address_p (mode, x, FALSE))
6433 return x;
6434
6435 if (GET_CODE (XEXP (x, 0)) == REG)
6436 {
6437 register rtx temp = gen_reg_rtx (Pmode);
6438 register rtx val = force_operand (XEXP (x, 1), temp);
6439 if (val != temp)
6440 emit_move_insn (temp, val);
6441
6442 XEXP (x, 1) = temp;
6443 return x;
6444 }
6445
6446 else if (GET_CODE (XEXP (x, 1)) == REG)
6447 {
6448 register rtx temp = gen_reg_rtx (Pmode);
6449 register rtx val = force_operand (XEXP (x, 0), temp);
6450 if (val != temp)
6451 emit_move_insn (temp, val);
6452
6453 XEXP (x, 0) = temp;
6454 return x;
6455 }
6456 }
6457
6458 return x;
6459}
2a2ab3f9
JVA
6460\f
6461/* Print an integer constant expression in assembler syntax. Addition
6462 and subtraction are the only arithmetic that may appear in these
6463 expressions. FILE is the stdio stream to write to, X is the rtx, and
6464 CODE is the operand print code from the output string. */
6465
6466static void
b96a374d 6467output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6468{
6469 char buf[256];
6470
6471 switch (GET_CODE (x))
6472 {
6473 case PC:
6474 if (flag_pic)
6475 putc ('.', file);
6476 else
6477 abort ();
6478 break;
6479
6480 case SYMBOL_REF:
91bb873f 6481 assemble_name (file, XSTR (x, 0));
12969f45 6482 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6483 fputs ("@PLT", file);
2a2ab3f9
JVA
6484 break;
6485
91bb873f
RH
6486 case LABEL_REF:
6487 x = XEXP (x, 0);
6488 /* FALLTHRU */
2a2ab3f9
JVA
6489 case CODE_LABEL:
6490 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6491 assemble_name (asm_out_file, buf);
6492 break;
6493
6494 case CONST_INT:
f64cecad 6495 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6496 break;
6497
6498 case CONST:
6499 /* This used to output parentheses around the expression,
6500 but that does not work on the 386 (either ATT or BSD assembler). */
6501 output_pic_addr_const (file, XEXP (x, 0), code);
6502 break;
6503
6504 case CONST_DOUBLE:
6505 if (GET_MODE (x) == VOIDmode)
6506 {
6507 /* We can use %d if the number is <32 bits and positive. */
6508 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6509 fprintf (file, "0x%lx%08lx",
6510 (unsigned long) CONST_DOUBLE_HIGH (x),
6511 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6512 else
f64cecad 6513 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6514 }
6515 else
6516 /* We can't handle floating point constants;
6517 PRINT_OPERAND must handle them. */
6518 output_operand_lossage ("floating constant misused");
6519 break;
6520
6521 case PLUS:
e9a25f70 6522 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6523 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6524 {
2a2ab3f9 6525 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6526 putc ('+', file);
e9a25f70 6527 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6528 }
91bb873f 6529 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6530 {
2a2ab3f9 6531 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6532 putc ('+', file);
e9a25f70 6533 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6534 }
91bb873f
RH
6535 else
6536 abort ();
2a2ab3f9
JVA
6537 break;
6538
6539 case MINUS:
b069de3b
SS
6540 if (!TARGET_MACHO)
6541 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6542 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6543 putc ('-', file);
2a2ab3f9 6544 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6545 if (!TARGET_MACHO)
6546 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6547 break;
6548
91bb873f
RH
6549 case UNSPEC:
6550 if (XVECLEN (x, 0) != 1)
5bf0ebab 6551 abort ();
91bb873f
RH
6552 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6553 switch (XINT (x, 1))
77ebd435 6554 {
8ee41eaf 6555 case UNSPEC_GOT:
77ebd435
AJ
6556 fputs ("@GOT", file);
6557 break;
8ee41eaf 6558 case UNSPEC_GOTOFF:
77ebd435
AJ
6559 fputs ("@GOTOFF", file);
6560 break;
8ee41eaf 6561 case UNSPEC_GOTPCREL:
edfe8595 6562 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6563 break;
f996902d 6564 case UNSPEC_GOTTPOFF:
dea73790 6565 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6566 fputs ("@GOTTPOFF", file);
6567 break;
6568 case UNSPEC_TPOFF:
6569 fputs ("@TPOFF", file);
6570 break;
6571 case UNSPEC_NTPOFF:
75d38379
JJ
6572 if (TARGET_64BIT)
6573 fputs ("@TPOFF", file);
6574 else
6575 fputs ("@NTPOFF", file);
f996902d
RH
6576 break;
6577 case UNSPEC_DTPOFF:
6578 fputs ("@DTPOFF", file);
6579 break;
dea73790 6580 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6581 if (TARGET_64BIT)
6582 fputs ("@GOTTPOFF(%rip)", file);
6583 else
6584 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6585 break;
6586 case UNSPEC_INDNTPOFF:
6587 fputs ("@INDNTPOFF", file);
6588 break;
77ebd435
AJ
6589 default:
6590 output_operand_lossage ("invalid UNSPEC as operand");
6591 break;
6592 }
91bb873f
RH
6593 break;
6594
2a2ab3f9
JVA
6595 default:
6596 output_operand_lossage ("invalid expression as operand");
6597 }
6598}
1865dbb5 6599
0f290768 6600/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6601 We need to handle our special PIC relocations. */
6602
0f290768 6603void
b96a374d 6604i386_dwarf_output_addr_const (FILE *file, rtx x)
1865dbb5 6605{
14f73b5a 6606#ifdef ASM_QUAD
18b5b8d6 6607 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6608#else
6609 if (TARGET_64BIT)
6610 abort ();
18b5b8d6 6611 fprintf (file, "%s", ASM_LONG);
14f73b5a 6612#endif
1865dbb5
JM
6613 if (flag_pic)
6614 output_pic_addr_const (file, x, '\0');
6615 else
6616 output_addr_const (file, x);
6617 fputc ('\n', file);
6618}
6619
b9203463
RH
6620/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6621 We need to emit DTP-relative relocations. */
6622
6623void
b96a374d 6624i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 6625{
75d38379
JJ
6626 fputs (ASM_LONG, file);
6627 output_addr_const (file, x);
6628 fputs ("@DTPOFF", file);
b9203463
RH
6629 switch (size)
6630 {
6631 case 4:
b9203463
RH
6632 break;
6633 case 8:
75d38379 6634 fputs (", 0", file);
b9203463 6635 break;
b9203463
RH
6636 default:
6637 abort ();
6638 }
b9203463
RH
6639}
6640
1865dbb5
JM
6641/* In the name of slightly smaller debug output, and to cater to
6642 general assembler losage, recognize PIC+GOTOFF and turn it back
6643 into a direct symbol reference. */
6644
69bd9368 6645static rtx
b96a374d 6646ix86_delegitimize_address (rtx orig_x)
1865dbb5 6647{
ec65b2e3 6648 rtx x = orig_x, y;
1865dbb5 6649
4c8c0dec
JJ
6650 if (GET_CODE (x) == MEM)
6651 x = XEXP (x, 0);
6652
6eb791fc
JH
6653 if (TARGET_64BIT)
6654 {
6655 if (GET_CODE (x) != CONST
6656 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6657 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6658 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6659 return orig_x;
6660 return XVECEXP (XEXP (x, 0), 0, 0);
6661 }
6662
1865dbb5 6663 if (GET_CODE (x) != PLUS
1865dbb5
JM
6664 || GET_CODE (XEXP (x, 1)) != CONST)
6665 return orig_x;
6666
ec65b2e3
JJ
6667 if (GET_CODE (XEXP (x, 0)) == REG
6668 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6669 /* %ebx + GOT/GOTOFF */
6670 y = NULL;
6671 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6672 {
6673 /* %ebx + %reg * scale + GOT/GOTOFF */
6674 y = XEXP (x, 0);
6675 if (GET_CODE (XEXP (y, 0)) == REG
6676 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6677 y = XEXP (y, 1);
6678 else if (GET_CODE (XEXP (y, 1)) == REG
6679 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6680 y = XEXP (y, 0);
6681 else
6682 return orig_x;
6683 if (GET_CODE (y) != REG
6684 && GET_CODE (y) != MULT
6685 && GET_CODE (y) != ASHIFT)
6686 return orig_x;
6687 }
6688 else
6689 return orig_x;
6690
1865dbb5
JM
6691 x = XEXP (XEXP (x, 1), 0);
6692 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6693 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6694 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6695 {
6696 if (y)
6697 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6698 return XVECEXP (x, 0, 0);
6699 }
1865dbb5
JM
6700
6701 if (GET_CODE (x) == PLUS
6702 && GET_CODE (XEXP (x, 0)) == UNSPEC
6703 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6704 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6705 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6706 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6707 {
6708 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6709 if (y)
6710 return gen_rtx_PLUS (Pmode, y, x);
6711 return x;
6712 }
1865dbb5
JM
6713
6714 return orig_x;
6715}
2a2ab3f9 6716\f
a269a03c 6717static void
b96a374d
AJ
6718put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6719 int fp, FILE *file)
a269a03c 6720{
a269a03c
JC
6721 const char *suffix;
6722
9a915772
JH
6723 if (mode == CCFPmode || mode == CCFPUmode)
6724 {
6725 enum rtx_code second_code, bypass_code;
6726 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6727 if (bypass_code != NIL || second_code != NIL)
b531087a 6728 abort ();
9a915772
JH
6729 code = ix86_fp_compare_code_to_integer (code);
6730 mode = CCmode;
6731 }
a269a03c
JC
6732 if (reverse)
6733 code = reverse_condition (code);
e075ae69 6734
a269a03c
JC
6735 switch (code)
6736 {
6737 case EQ:
6738 suffix = "e";
6739 break;
a269a03c
JC
6740 case NE:
6741 suffix = "ne";
6742 break;
a269a03c 6743 case GT:
7e08e190 6744 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6745 abort ();
6746 suffix = "g";
a269a03c 6747 break;
a269a03c 6748 case GTU:
e075ae69
RH
6749 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6750 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6751 if (mode != CCmode)
0f290768 6752 abort ();
e075ae69 6753 suffix = fp ? "nbe" : "a";
a269a03c 6754 break;
a269a03c 6755 case LT:
9076b9c1 6756 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6757 suffix = "s";
7e08e190 6758 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6759 suffix = "l";
9076b9c1 6760 else
0f290768 6761 abort ();
a269a03c 6762 break;
a269a03c 6763 case LTU:
9076b9c1 6764 if (mode != CCmode)
0f290768 6765 abort ();
a269a03c
JC
6766 suffix = "b";
6767 break;
a269a03c 6768 case GE:
9076b9c1 6769 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6770 suffix = "ns";
7e08e190 6771 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6772 suffix = "ge";
9076b9c1 6773 else
0f290768 6774 abort ();
a269a03c 6775 break;
a269a03c 6776 case GEU:
e075ae69 6777 /* ??? As above. */
7e08e190 6778 if (mode != CCmode)
0f290768 6779 abort ();
7e08e190 6780 suffix = fp ? "nb" : "ae";
a269a03c 6781 break;
a269a03c 6782 case LE:
7e08e190 6783 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6784 abort ();
6785 suffix = "le";
a269a03c 6786 break;
a269a03c 6787 case LEU:
9076b9c1
JH
6788 if (mode != CCmode)
6789 abort ();
7e08e190 6790 suffix = "be";
a269a03c 6791 break;
3a3677ff 6792 case UNORDERED:
9e7adcb3 6793 suffix = fp ? "u" : "p";
3a3677ff
RH
6794 break;
6795 case ORDERED:
9e7adcb3 6796 suffix = fp ? "nu" : "np";
3a3677ff 6797 break;
a269a03c
JC
6798 default:
6799 abort ();
6800 }
6801 fputs (suffix, file);
6802}
6803
e075ae69 6804void
b96a374d 6805print_reg (rtx x, int code, FILE *file)
e5cb57e8 6806{
e075ae69 6807 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6808 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6809 || REGNO (x) == FLAGS_REG
6810 || REGNO (x) == FPSR_REG)
6811 abort ();
e9a25f70 6812
5bf0ebab 6813 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6814 putc ('%', file);
6815
ef6257cd 6816 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6817 code = 2;
6818 else if (code == 'b')
6819 code = 1;
6820 else if (code == 'k')
6821 code = 4;
3f3f2124
JH
6822 else if (code == 'q')
6823 code = 8;
e075ae69
RH
6824 else if (code == 'y')
6825 code = 3;
6826 else if (code == 'h')
6827 code = 0;
6828 else
6829 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6830
3f3f2124
JH
6831 /* Irritatingly, AMD extended registers use different naming convention
6832 from the normal registers. */
6833 if (REX_INT_REG_P (x))
6834 {
885a70fd
JH
6835 if (!TARGET_64BIT)
6836 abort ();
3f3f2124
JH
6837 switch (code)
6838 {
ef6257cd 6839 case 0:
c725bd79 6840 error ("extended registers have no high halves");
3f3f2124
JH
6841 break;
6842 case 1:
6843 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6844 break;
6845 case 2:
6846 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6847 break;
6848 case 4:
6849 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6850 break;
6851 case 8:
6852 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6853 break;
6854 default:
c725bd79 6855 error ("unsupported operand size for extended register");
3f3f2124
JH
6856 break;
6857 }
6858 return;
6859 }
e075ae69
RH
6860 switch (code)
6861 {
6862 case 3:
6863 if (STACK_TOP_P (x))
6864 {
6865 fputs ("st(0)", file);
6866 break;
6867 }
6868 /* FALLTHRU */
e075ae69 6869 case 8:
3f3f2124 6870 case 4:
e075ae69 6871 case 12:
446988df 6872 if (! ANY_FP_REG_P (x))
885a70fd 6873 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6874 /* FALLTHRU */
a7180f70 6875 case 16:
e075ae69
RH
6876 case 2:
6877 fputs (hi_reg_name[REGNO (x)], file);
6878 break;
6879 case 1:
6880 fputs (qi_reg_name[REGNO (x)], file);
6881 break;
6882 case 0:
6883 fputs (qi_high_reg_name[REGNO (x)], file);
6884 break;
6885 default:
6886 abort ();
fe25fea3 6887 }
e5cb57e8
SC
6888}
6889
f996902d
RH
6890/* Locate some local-dynamic symbol still in use by this function
6891 so that we can print its name in some tls_local_dynamic_base
6892 pattern. */
6893
6894static const char *
b96a374d 6895get_some_local_dynamic_name (void)
f996902d
RH
6896{
6897 rtx insn;
6898
6899 if (cfun->machine->some_ld_name)
6900 return cfun->machine->some_ld_name;
6901
6902 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6903 if (INSN_P (insn)
6904 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6905 return cfun->machine->some_ld_name;
6906
6907 abort ();
6908}
6909
6910static int
b96a374d 6911get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
6912{
6913 rtx x = *px;
6914
6915 if (GET_CODE (x) == SYMBOL_REF
6916 && local_dynamic_symbolic_operand (x, Pmode))
6917 {
6918 cfun->machine->some_ld_name = XSTR (x, 0);
6919 return 1;
6920 }
6921
6922 return 0;
6923}
6924
2a2ab3f9 6925/* Meaning of CODE:
fe25fea3 6926 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6927 C -- print opcode suffix for set/cmov insn.
fe25fea3 6928 c -- like C, but print reversed condition
ef6257cd 6929 F,f -- likewise, but for floating-point.
048b1c95
JJ
6930 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6931 nothing
2a2ab3f9
JVA
6932 R -- print the prefix for register names.
6933 z -- print the opcode suffix for the size of the current operand.
6934 * -- print a star (in certain assembler syntax)
fb204271 6935 A -- print an absolute memory reference.
2a2ab3f9 6936 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6937 s -- print a shift double count, followed by the assemblers argument
6938 delimiter.
fe25fea3
SC
6939 b -- print the QImode name of the register for the indicated operand.
6940 %b0 would print %al if operands[0] is reg 0.
6941 w -- likewise, print the HImode name of the register.
6942 k -- likewise, print the SImode name of the register.
3f3f2124 6943 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6944 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6945 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6946 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6947 P -- if PIC, print an @PLT suffix.
6948 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6949 & -- print some in-use local-dynamic symbol name.
a46d1d38 6950 */
2a2ab3f9
JVA
6951
6952void
b96a374d 6953print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6954{
6955 if (code)
6956 {
6957 switch (code)
6958 {
6959 case '*':
80f33d06 6960 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6961 putc ('*', file);
6962 return;
6963
f996902d
RH
6964 case '&':
6965 assemble_name (file, get_some_local_dynamic_name ());
6966 return;
6967
fb204271 6968 case 'A':
80f33d06 6969 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6970 putc ('*', file);
80f33d06 6971 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6972 {
6973 /* Intel syntax. For absolute addresses, registers should not
6974 be surrounded by braces. */
6975 if (GET_CODE (x) != REG)
6976 {
6977 putc ('[', file);
6978 PRINT_OPERAND (file, x, 0);
6979 putc (']', file);
6980 return;
6981 }
6982 }
80f33d06
GS
6983 else
6984 abort ();
fb204271
DN
6985
6986 PRINT_OPERAND (file, x, 0);
6987 return;
6988
6989
2a2ab3f9 6990 case 'L':
80f33d06 6991 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6992 putc ('l', file);
2a2ab3f9
JVA
6993 return;
6994
6995 case 'W':
80f33d06 6996 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6997 putc ('w', file);
2a2ab3f9
JVA
6998 return;
6999
7000 case 'B':
80f33d06 7001 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7002 putc ('b', file);
2a2ab3f9
JVA
7003 return;
7004
7005 case 'Q':
80f33d06 7006 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7007 putc ('l', file);
2a2ab3f9
JVA
7008 return;
7009
7010 case 'S':
80f33d06 7011 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7012 putc ('s', file);
2a2ab3f9
JVA
7013 return;
7014
5f1ec3e6 7015 case 'T':
80f33d06 7016 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7017 putc ('t', file);
5f1ec3e6
JVA
7018 return;
7019
2a2ab3f9
JVA
7020 case 'z':
7021 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7022 registers. */
2a2ab3f9
JVA
7023 if (STACK_REG_P (x))
7024 return;
7025
831c4e87
KC
7026 /* Likewise if using Intel opcodes. */
7027 if (ASSEMBLER_DIALECT == ASM_INTEL)
7028 return;
7029
7030 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7031 switch (GET_MODE_SIZE (GET_MODE (x)))
7032 {
2a2ab3f9 7033 case 2:
155d8a47
JW
7034#ifdef HAVE_GAS_FILDS_FISTS
7035 putc ('s', file);
7036#endif
2a2ab3f9
JVA
7037 return;
7038
7039 case 4:
7040 if (GET_MODE (x) == SFmode)
7041 {
e075ae69 7042 putc ('s', file);
2a2ab3f9
JVA
7043 return;
7044 }
7045 else
e075ae69 7046 putc ('l', file);
2a2ab3f9
JVA
7047 return;
7048
5f1ec3e6 7049 case 12:
2b589241 7050 case 16:
e075ae69
RH
7051 putc ('t', file);
7052 return;
5f1ec3e6 7053
2a2ab3f9
JVA
7054 case 8:
7055 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7056 {
7057#ifdef GAS_MNEMONICS
e075ae69 7058 putc ('q', file);
56c0e8fa 7059#else
e075ae69
RH
7060 putc ('l', file);
7061 putc ('l', file);
56c0e8fa
JVA
7062#endif
7063 }
e075ae69
RH
7064 else
7065 putc ('l', file);
2a2ab3f9 7066 return;
155d8a47
JW
7067
7068 default:
7069 abort ();
2a2ab3f9 7070 }
4af3895e
JVA
7071
7072 case 'b':
7073 case 'w':
7074 case 'k':
3f3f2124 7075 case 'q':
4af3895e
JVA
7076 case 'h':
7077 case 'y':
5cb6195d 7078 case 'X':
e075ae69 7079 case 'P':
4af3895e
JVA
7080 break;
7081
2d49677f
SC
7082 case 's':
7083 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7084 {
7085 PRINT_OPERAND (file, x, 0);
e075ae69 7086 putc (',', file);
2d49677f 7087 }
a269a03c
JC
7088 return;
7089
a46d1d38
JH
7090 case 'D':
7091 /* Little bit of braindamage here. The SSE compare instructions
7092 does use completely different names for the comparisons that the
7093 fp conditional moves. */
7094 switch (GET_CODE (x))
7095 {
7096 case EQ:
7097 case UNEQ:
7098 fputs ("eq", file);
7099 break;
7100 case LT:
7101 case UNLT:
7102 fputs ("lt", file);
7103 break;
7104 case LE:
7105 case UNLE:
7106 fputs ("le", file);
7107 break;
7108 case UNORDERED:
7109 fputs ("unord", file);
7110 break;
7111 case NE:
7112 case LTGT:
7113 fputs ("neq", file);
7114 break;
7115 case UNGE:
7116 case GE:
7117 fputs ("nlt", file);
7118 break;
7119 case UNGT:
7120 case GT:
7121 fputs ("nle", file);
7122 break;
7123 case ORDERED:
7124 fputs ("ord", file);
7125 break;
7126 default:
7127 abort ();
7128 break;
7129 }
7130 return;
048b1c95
JJ
7131 case 'O':
7132#ifdef CMOV_SUN_AS_SYNTAX
7133 if (ASSEMBLER_DIALECT == ASM_ATT)
7134 {
7135 switch (GET_MODE (x))
7136 {
7137 case HImode: putc ('w', file); break;
7138 case SImode:
7139 case SFmode: putc ('l', file); break;
7140 case DImode:
7141 case DFmode: putc ('q', file); break;
7142 default: abort ();
7143 }
7144 putc ('.', file);
7145 }
7146#endif
7147 return;
1853aadd 7148 case 'C':
e075ae69 7149 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7150 return;
fe25fea3 7151 case 'F':
048b1c95
JJ
7152#ifdef CMOV_SUN_AS_SYNTAX
7153 if (ASSEMBLER_DIALECT == ASM_ATT)
7154 putc ('.', file);
7155#endif
e075ae69 7156 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7157 return;
7158
e9a25f70 7159 /* Like above, but reverse condition */
e075ae69 7160 case 'c':
fce5a9f2 7161 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
7162 and not a condition code which needs to be reversed. */
7163 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7164 {
7165 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7166 return;
7167 }
e075ae69
RH
7168 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7169 return;
fe25fea3 7170 case 'f':
048b1c95
JJ
7171#ifdef CMOV_SUN_AS_SYNTAX
7172 if (ASSEMBLER_DIALECT == ASM_ATT)
7173 putc ('.', file);
7174#endif
e075ae69 7175 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7176 return;
ef6257cd
JH
7177 case '+':
7178 {
7179 rtx x;
e5cb57e8 7180
ef6257cd
JH
7181 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7182 return;
a4f31c00 7183
ef6257cd
JH
7184 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7185 if (x)
7186 {
7187 int pred_val = INTVAL (XEXP (x, 0));
7188
7189 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7190 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7191 {
7192 int taken = pred_val > REG_BR_PROB_BASE / 2;
7193 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7194
7195 /* Emit hints only in the case default branch prediction
d1f87653 7196 heuristics would fail. */
ef6257cd
JH
7197 if (taken != cputaken)
7198 {
7199 /* We use 3e (DS) prefix for taken branches and
7200 2e (CS) prefix for not taken branches. */
7201 if (taken)
7202 fputs ("ds ; ", file);
7203 else
7204 fputs ("cs ; ", file);
7205 }
7206 }
7207 }
7208 return;
7209 }
4af3895e 7210 default:
a52453cc 7211 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7212 }
7213 }
e9a25f70 7214
2a2ab3f9
JVA
7215 if (GET_CODE (x) == REG)
7216 {
7217 PRINT_REG (x, code, file);
7218 }
e9a25f70 7219
2a2ab3f9
JVA
7220 else if (GET_CODE (x) == MEM)
7221 {
e075ae69 7222 /* No `byte ptr' prefix for call instructions. */
80f33d06 7223 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7224 {
69ddee61 7225 const char * size;
e075ae69
RH
7226 switch (GET_MODE_SIZE (GET_MODE (x)))
7227 {
7228 case 1: size = "BYTE"; break;
7229 case 2: size = "WORD"; break;
7230 case 4: size = "DWORD"; break;
7231 case 8: size = "QWORD"; break;
7232 case 12: size = "XWORD"; break;
a7180f70 7233 case 16: size = "XMMWORD"; break;
e075ae69 7234 default:
564d80f4 7235 abort ();
e075ae69 7236 }
fb204271
DN
7237
7238 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7239 if (code == 'b')
7240 size = "BYTE";
7241 else if (code == 'w')
7242 size = "WORD";
7243 else if (code == 'k')
7244 size = "DWORD";
7245
e075ae69
RH
7246 fputs (size, file);
7247 fputs (" PTR ", file);
2a2ab3f9 7248 }
e075ae69
RH
7249
7250 x = XEXP (x, 0);
0d7d98ee 7251 /* Avoid (%rip) for call operands. */
d10f5ecf 7252 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7253 && GET_CODE (x) != CONST_INT)
7254 output_addr_const (file, x);
c8b94768
RH
7255 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7256 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7257 else
e075ae69 7258 output_address (x);
2a2ab3f9 7259 }
e9a25f70 7260
2a2ab3f9
JVA
7261 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7262 {
e9a25f70
JL
7263 REAL_VALUE_TYPE r;
7264 long l;
7265
5f1ec3e6
JVA
7266 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7267 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7268
80f33d06 7269 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7270 putc ('$', file);
52267fcb 7271 fprintf (file, "0x%lx", l);
5f1ec3e6 7272 }
e9a25f70 7273
74dc3e94
RH
7274 /* These float cases don't actually occur as immediate operands. */
7275 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7276 {
e9a25f70
JL
7277 char dstr[30];
7278
da6eec72 7279 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7280 fprintf (file, "%s", dstr);
2a2ab3f9 7281 }
e9a25f70 7282
2b589241
JH
7283 else if (GET_CODE (x) == CONST_DOUBLE
7284 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 7285 {
e9a25f70
JL
7286 char dstr[30];
7287
da6eec72 7288 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7289 fprintf (file, "%s", dstr);
2a2ab3f9 7290 }
f996902d 7291
79325812 7292 else
2a2ab3f9 7293 {
4af3895e 7294 if (code != 'P')
2a2ab3f9 7295 {
695dac07 7296 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7297 {
80f33d06 7298 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7299 putc ('$', file);
7300 }
2a2ab3f9
JVA
7301 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7302 || GET_CODE (x) == LABEL_REF)
e075ae69 7303 {
80f33d06 7304 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7305 putc ('$', file);
7306 else
7307 fputs ("OFFSET FLAT:", file);
7308 }
2a2ab3f9 7309 }
e075ae69
RH
7310 if (GET_CODE (x) == CONST_INT)
7311 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7312 else if (flag_pic)
2a2ab3f9
JVA
7313 output_pic_addr_const (file, x, code);
7314 else
7315 output_addr_const (file, x);
7316 }
7317}
7318\f
7319/* Print a memory operand whose address is ADDR. */
7320
7321void
b96a374d 7322print_operand_address (FILE *file, register rtx addr)
2a2ab3f9 7323{
e075ae69
RH
7324 struct ix86_address parts;
7325 rtx base, index, disp;
7326 int scale;
e9a25f70 7327
e075ae69
RH
7328 if (! ix86_decompose_address (addr, &parts))
7329 abort ();
e9a25f70 7330
e075ae69
RH
7331 base = parts.base;
7332 index = parts.index;
7333 disp = parts.disp;
7334 scale = parts.scale;
e9a25f70 7335
74dc3e94
RH
7336 switch (parts.seg)
7337 {
7338 case SEG_DEFAULT:
7339 break;
7340 case SEG_FS:
7341 case SEG_GS:
7342 if (USER_LABEL_PREFIX[0] == 0)
7343 putc ('%', file);
7344 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7345 break;
7346 default:
7347 abort ();
7348 }
7349
e075ae69
RH
7350 if (!base && !index)
7351 {
7352 /* Displacement only requires special attention. */
e9a25f70 7353
e075ae69 7354 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7355 {
74dc3e94 7356 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7357 {
7358 if (USER_LABEL_PREFIX[0] == 0)
7359 putc ('%', file);
7360 fputs ("ds:", file);
7361 }
74dc3e94 7362 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7363 }
e075ae69 7364 else if (flag_pic)
74dc3e94 7365 output_pic_addr_const (file, disp, 0);
e075ae69 7366 else
74dc3e94 7367 output_addr_const (file, disp);
0d7d98ee
JH
7368
7369 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7370 if (TARGET_64BIT
74dc3e94
RH
7371 && ((GET_CODE (disp) == SYMBOL_REF
7372 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7373 || GET_CODE (disp) == LABEL_REF
7374 || (GET_CODE (disp) == CONST
7375 && GET_CODE (XEXP (disp, 0)) == PLUS
7376 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7377 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7378 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 7379 fputs ("(%rip)", file);
e075ae69
RH
7380 }
7381 else
7382 {
80f33d06 7383 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7384 {
e075ae69 7385 if (disp)
2a2ab3f9 7386 {
c399861d 7387 if (flag_pic)
e075ae69
RH
7388 output_pic_addr_const (file, disp, 0);
7389 else if (GET_CODE (disp) == LABEL_REF)
7390 output_asm_label (disp);
2a2ab3f9 7391 else
e075ae69 7392 output_addr_const (file, disp);
2a2ab3f9
JVA
7393 }
7394
e075ae69
RH
7395 putc ('(', file);
7396 if (base)
7397 PRINT_REG (base, 0, file);
7398 if (index)
2a2ab3f9 7399 {
e075ae69
RH
7400 putc (',', file);
7401 PRINT_REG (index, 0, file);
7402 if (scale != 1)
7403 fprintf (file, ",%d", scale);
2a2ab3f9 7404 }
e075ae69 7405 putc (')', file);
2a2ab3f9 7406 }
2a2ab3f9
JVA
7407 else
7408 {
e075ae69 7409 rtx offset = NULL_RTX;
e9a25f70 7410
e075ae69
RH
7411 if (disp)
7412 {
7413 /* Pull out the offset of a symbol; print any symbol itself. */
7414 if (GET_CODE (disp) == CONST
7415 && GET_CODE (XEXP (disp, 0)) == PLUS
7416 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7417 {
7418 offset = XEXP (XEXP (disp, 0), 1);
7419 disp = gen_rtx_CONST (VOIDmode,
7420 XEXP (XEXP (disp, 0), 0));
7421 }
ce193852 7422
e075ae69
RH
7423 if (flag_pic)
7424 output_pic_addr_const (file, disp, 0);
7425 else if (GET_CODE (disp) == LABEL_REF)
7426 output_asm_label (disp);
7427 else if (GET_CODE (disp) == CONST_INT)
7428 offset = disp;
7429 else
7430 output_addr_const (file, disp);
7431 }
e9a25f70 7432
e075ae69
RH
7433 putc ('[', file);
7434 if (base)
a8620236 7435 {
e075ae69
RH
7436 PRINT_REG (base, 0, file);
7437 if (offset)
7438 {
7439 if (INTVAL (offset) >= 0)
7440 putc ('+', file);
7441 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7442 }
a8620236 7443 }
e075ae69
RH
7444 else if (offset)
7445 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7446 else
e075ae69 7447 putc ('0', file);
e9a25f70 7448
e075ae69
RH
7449 if (index)
7450 {
7451 putc ('+', file);
7452 PRINT_REG (index, 0, file);
7453 if (scale != 1)
7454 fprintf (file, "*%d", scale);
7455 }
7456 putc (']', file);
7457 }
2a2ab3f9
JVA
7458 }
7459}
f996902d
RH
7460
7461bool
b96a374d 7462output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7463{
7464 rtx op;
7465
7466 if (GET_CODE (x) != UNSPEC)
7467 return false;
7468
7469 op = XVECEXP (x, 0, 0);
7470 switch (XINT (x, 1))
7471 {
7472 case UNSPEC_GOTTPOFF:
7473 output_addr_const (file, op);
dea73790 7474 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7475 fputs ("@GOTTPOFF", file);
7476 break;
7477 case UNSPEC_TPOFF:
7478 output_addr_const (file, op);
7479 fputs ("@TPOFF", file);
7480 break;
7481 case UNSPEC_NTPOFF:
7482 output_addr_const (file, op);
75d38379
JJ
7483 if (TARGET_64BIT)
7484 fputs ("@TPOFF", file);
7485 else
7486 fputs ("@NTPOFF", file);
f996902d
RH
7487 break;
7488 case UNSPEC_DTPOFF:
7489 output_addr_const (file, op);
7490 fputs ("@DTPOFF", file);
7491 break;
dea73790
JJ
7492 case UNSPEC_GOTNTPOFF:
7493 output_addr_const (file, op);
75d38379
JJ
7494 if (TARGET_64BIT)
7495 fputs ("@GOTTPOFF(%rip)", file);
7496 else
7497 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7498 break;
7499 case UNSPEC_INDNTPOFF:
7500 output_addr_const (file, op);
7501 fputs ("@INDNTPOFF", file);
7502 break;
f996902d
RH
7503
7504 default:
7505 return false;
7506 }
7507
7508 return true;
7509}
2a2ab3f9
JVA
7510\f
7511/* Split one or more DImode RTL references into pairs of SImode
7512 references. The RTL can be REG, offsettable MEM, integer constant, or
7513 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7514 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7515 that parallel "operands". */
2a2ab3f9
JVA
7516
7517void
b96a374d 7518split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
7519{
7520 while (num--)
7521 {
57dbca5e 7522 rtx op = operands[num];
b932f770
JH
7523
7524 /* simplify_subreg refuse to split volatile memory addresses,
7525 but we still have to handle it. */
7526 if (GET_CODE (op) == MEM)
2a2ab3f9 7527 {
f4ef873c 7528 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7529 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7530 }
7531 else
b932f770 7532 {
38ca929b
JH
7533 lo_half[num] = simplify_gen_subreg (SImode, op,
7534 GET_MODE (op) == VOIDmode
7535 ? DImode : GET_MODE (op), 0);
7536 hi_half[num] = simplify_gen_subreg (SImode, op,
7537 GET_MODE (op) == VOIDmode
7538 ? DImode : GET_MODE (op), 4);
b932f770 7539 }
2a2ab3f9
JVA
7540 }
7541}
44cf5b6a
JH
7542/* Split one or more TImode RTL references into pairs of SImode
7543 references. The RTL can be REG, offsettable MEM, integer constant, or
7544 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7545 split and "num" is its length. lo_half and hi_half are output arrays
7546 that parallel "operands". */
7547
7548void
b96a374d 7549split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
7550{
7551 while (num--)
7552 {
7553 rtx op = operands[num];
b932f770
JH
7554
7555 /* simplify_subreg refuse to split volatile memory addresses, but we
7556 still have to handle it. */
7557 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7558 {
7559 lo_half[num] = adjust_address (op, DImode, 0);
7560 hi_half[num] = adjust_address (op, DImode, 8);
7561 }
7562 else
b932f770
JH
7563 {
7564 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7565 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7566 }
44cf5b6a
JH
7567 }
7568}
2a2ab3f9 7569\f
2a2ab3f9
JVA
7570/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7571 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7572 is the expression of the binary operation. The output may either be
7573 emitted here, or returned to the caller, like all output_* functions.
7574
7575 There is no guarantee that the operands are the same mode, as they
0f290768 7576 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7577
e3c2afab
AM
7578#ifndef SYSV386_COMPAT
7579/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7580 wants to fix the assemblers because that causes incompatibility
7581 with gcc. No-one wants to fix gcc because that causes
7582 incompatibility with assemblers... You can use the option of
7583 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7584#define SYSV386_COMPAT 1
7585#endif
7586
69ddee61 7587const char *
b96a374d 7588output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 7589{
e3c2afab 7590 static char buf[30];
69ddee61 7591 const char *p;
1deaa899
JH
7592 const char *ssep;
7593 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7594
e3c2afab
AM
7595#ifdef ENABLE_CHECKING
7596 /* Even if we do not want to check the inputs, this documents input
7597 constraints. Which helps in understanding the following code. */
7598 if (STACK_REG_P (operands[0])
7599 && ((REG_P (operands[1])
7600 && REGNO (operands[0]) == REGNO (operands[1])
7601 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7602 || (REG_P (operands[2])
7603 && REGNO (operands[0]) == REGNO (operands[2])
7604 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7605 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7606 ; /* ok */
1deaa899 7607 else if (!is_sse)
e3c2afab
AM
7608 abort ();
7609#endif
7610
2a2ab3f9
JVA
7611 switch (GET_CODE (operands[3]))
7612 {
7613 case PLUS:
e075ae69
RH
7614 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7615 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7616 p = "fiadd";
7617 else
7618 p = "fadd";
1deaa899 7619 ssep = "add";
2a2ab3f9
JVA
7620 break;
7621
7622 case MINUS:
e075ae69
RH
7623 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7624 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7625 p = "fisub";
7626 else
7627 p = "fsub";
1deaa899 7628 ssep = "sub";
2a2ab3f9
JVA
7629 break;
7630
7631 case MULT:
e075ae69
RH
7632 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7633 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7634 p = "fimul";
7635 else
7636 p = "fmul";
1deaa899 7637 ssep = "mul";
2a2ab3f9
JVA
7638 break;
7639
7640 case DIV:
e075ae69
RH
7641 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7642 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7643 p = "fidiv";
7644 else
7645 p = "fdiv";
1deaa899 7646 ssep = "div";
2a2ab3f9
JVA
7647 break;
7648
7649 default:
7650 abort ();
7651 }
7652
1deaa899
JH
7653 if (is_sse)
7654 {
7655 strcpy (buf, ssep);
7656 if (GET_MODE (operands[0]) == SFmode)
7657 strcat (buf, "ss\t{%2, %0|%0, %2}");
7658 else
7659 strcat (buf, "sd\t{%2, %0|%0, %2}");
7660 return buf;
7661 }
e075ae69 7662 strcpy (buf, p);
2a2ab3f9
JVA
7663
7664 switch (GET_CODE (operands[3]))
7665 {
7666 case MULT:
7667 case PLUS:
7668 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7669 {
e3c2afab 7670 rtx temp = operands[2];
2a2ab3f9
JVA
7671 operands[2] = operands[1];
7672 operands[1] = temp;
7673 }
7674
e3c2afab
AM
7675 /* know operands[0] == operands[1]. */
7676
2a2ab3f9 7677 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7678 {
7679 p = "%z2\t%2";
7680 break;
7681 }
2a2ab3f9
JVA
7682
7683 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7684 {
7685 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7686 /* How is it that we are storing to a dead operand[2]?
7687 Well, presumably operands[1] is dead too. We can't
7688 store the result to st(0) as st(0) gets popped on this
7689 instruction. Instead store to operands[2] (which I
7690 think has to be st(1)). st(1) will be popped later.
7691 gcc <= 2.8.1 didn't have this check and generated
7692 assembly code that the Unixware assembler rejected. */
7693 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7694 else
e3c2afab 7695 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7696 break;
6b28fd63 7697 }
2a2ab3f9
JVA
7698
7699 if (STACK_TOP_P (operands[0]))
e3c2afab 7700 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7701 else
e3c2afab 7702 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7703 break;
2a2ab3f9
JVA
7704
7705 case MINUS:
7706 case DIV:
7707 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7708 {
7709 p = "r%z1\t%1";
7710 break;
7711 }
2a2ab3f9
JVA
7712
7713 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7714 {
7715 p = "%z2\t%2";
7716 break;
7717 }
2a2ab3f9 7718
2a2ab3f9 7719 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7720 {
e3c2afab
AM
7721#if SYSV386_COMPAT
7722 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7723 derived assemblers, confusingly reverse the direction of
7724 the operation for fsub{r} and fdiv{r} when the
7725 destination register is not st(0). The Intel assembler
7726 doesn't have this brain damage. Read !SYSV386_COMPAT to
7727 figure out what the hardware really does. */
7728 if (STACK_TOP_P (operands[0]))
7729 p = "{p\t%0, %2|rp\t%2, %0}";
7730 else
7731 p = "{rp\t%2, %0|p\t%0, %2}";
7732#else
6b28fd63 7733 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7734 /* As above for fmul/fadd, we can't store to st(0). */
7735 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7736 else
e3c2afab
AM
7737 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7738#endif
e075ae69 7739 break;
6b28fd63 7740 }
2a2ab3f9
JVA
7741
7742 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7743 {
e3c2afab 7744#if SYSV386_COMPAT
6b28fd63 7745 if (STACK_TOP_P (operands[0]))
e3c2afab 7746 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7747 else
e3c2afab
AM
7748 p = "{p\t%1, %0|rp\t%0, %1}";
7749#else
7750 if (STACK_TOP_P (operands[0]))
7751 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7752 else
7753 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7754#endif
e075ae69 7755 break;
6b28fd63 7756 }
2a2ab3f9
JVA
7757
7758 if (STACK_TOP_P (operands[0]))
7759 {
7760 if (STACK_TOP_P (operands[1]))
e3c2afab 7761 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7762 else
e3c2afab 7763 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7764 break;
2a2ab3f9
JVA
7765 }
7766 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7767 {
7768#if SYSV386_COMPAT
7769 p = "{\t%1, %0|r\t%0, %1}";
7770#else
7771 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7772#endif
7773 }
2a2ab3f9 7774 else
e3c2afab
AM
7775 {
7776#if SYSV386_COMPAT
7777 p = "{r\t%2, %0|\t%0, %2}";
7778#else
7779 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7780#endif
7781 }
e075ae69 7782 break;
2a2ab3f9
JVA
7783
7784 default:
7785 abort ();
7786 }
e075ae69
RH
7787
7788 strcat (buf, p);
7789 return buf;
2a2ab3f9 7790}
e075ae69 7791
a4f31c00 7792/* Output code to initialize control word copies used by
7a2e09f4
JH
7793 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7794 is set to control word rounding downwards. */
7795void
b96a374d 7796emit_i387_cw_initialization (rtx normal, rtx round_down)
7a2e09f4
JH
7797{
7798 rtx reg = gen_reg_rtx (HImode);
7799
7800 emit_insn (gen_x86_fnstcw_1 (normal));
7801 emit_move_insn (reg, normal);
7802 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7803 && !TARGET_64BIT)
7804 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7805 else
7806 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7807 emit_move_insn (round_down, reg);
7808}
7809
2a2ab3f9 7810/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7811 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7812 operand may be [SDX]Fmode. */
2a2ab3f9 7813
69ddee61 7814const char *
b96a374d 7815output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
7816{
7817 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7818 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7819
e075ae69
RH
7820 /* Jump through a hoop or two for DImode, since the hardware has no
7821 non-popping instruction. We used to do this a different way, but
7822 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7823 if (dimode_p && !stack_top_dies)
7824 output_asm_insn ("fld\t%y1", operands);
e075ae69 7825
7a2e09f4 7826 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7827 abort ();
7828
e075ae69 7829 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7830 abort ();
e9a25f70 7831
7a2e09f4 7832 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7833 if (stack_top_dies || dimode_p)
7a2e09f4 7834 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7835 else
7a2e09f4 7836 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7837 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7838
e075ae69 7839 return "";
2a2ab3f9 7840}
cda749b1 7841
e075ae69
RH
7842/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7843 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7844 when fucom should be used. */
7845
69ddee61 7846const char *
b96a374d 7847output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 7848{
e075ae69
RH
7849 int stack_top_dies;
7850 rtx cmp_op0 = operands[0];
7851 rtx cmp_op1 = operands[1];
0644b628 7852 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7853
7854 if (eflags_p == 2)
7855 {
7856 cmp_op0 = cmp_op1;
7857 cmp_op1 = operands[2];
7858 }
0644b628
JH
7859 if (is_sse)
7860 {
7861 if (GET_MODE (operands[0]) == SFmode)
7862 if (unordered_p)
7863 return "ucomiss\t{%1, %0|%0, %1}";
7864 else
a5cf80f0 7865 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
7866 else
7867 if (unordered_p)
7868 return "ucomisd\t{%1, %0|%0, %1}";
7869 else
a5cf80f0 7870 return "comisd\t{%1, %0|%0, %1}";
0644b628 7871 }
cda749b1 7872
e075ae69 7873 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7874 abort ();
7875
e075ae69 7876 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7877
e075ae69
RH
7878 if (STACK_REG_P (cmp_op1)
7879 && stack_top_dies
7880 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7881 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7882 {
e075ae69
RH
7883 /* If both the top of the 387 stack dies, and the other operand
7884 is also a stack register that dies, then this must be a
7885 `fcompp' float compare */
7886
7887 if (eflags_p == 1)
7888 {
7889 /* There is no double popping fcomi variant. Fortunately,
7890 eflags is immune from the fstp's cc clobbering. */
7891 if (unordered_p)
7892 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7893 else
7894 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7895 return "fstp\t%y0";
7896 }
7897 else
cda749b1 7898 {
e075ae69
RH
7899 if (eflags_p == 2)
7900 {
7901 if (unordered_p)
7902 return "fucompp\n\tfnstsw\t%0";
7903 else
7904 return "fcompp\n\tfnstsw\t%0";
7905 }
cda749b1
JW
7906 else
7907 {
e075ae69
RH
7908 if (unordered_p)
7909 return "fucompp";
7910 else
7911 return "fcompp";
cda749b1
JW
7912 }
7913 }
cda749b1
JW
7914 }
7915 else
7916 {
e075ae69 7917 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7918
0f290768 7919 static const char * const alt[24] =
e075ae69
RH
7920 {
7921 "fcom%z1\t%y1",
7922 "fcomp%z1\t%y1",
7923 "fucom%z1\t%y1",
7924 "fucomp%z1\t%y1",
0f290768 7925
e075ae69
RH
7926 "ficom%z1\t%y1",
7927 "ficomp%z1\t%y1",
7928 NULL,
7929 NULL,
7930
7931 "fcomi\t{%y1, %0|%0, %y1}",
7932 "fcomip\t{%y1, %0|%0, %y1}",
7933 "fucomi\t{%y1, %0|%0, %y1}",
7934 "fucomip\t{%y1, %0|%0, %y1}",
7935
7936 NULL,
7937 NULL,
7938 NULL,
7939 NULL,
7940
7941 "fcom%z2\t%y2\n\tfnstsw\t%0",
7942 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7943 "fucom%z2\t%y2\n\tfnstsw\t%0",
7944 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7945
e075ae69
RH
7946 "ficom%z2\t%y2\n\tfnstsw\t%0",
7947 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7948 NULL,
7949 NULL
7950 };
7951
7952 int mask;
69ddee61 7953 const char *ret;
e075ae69
RH
7954
7955 mask = eflags_p << 3;
7956 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7957 mask |= unordered_p << 1;
7958 mask |= stack_top_dies;
7959
7960 if (mask >= 24)
7961 abort ();
7962 ret = alt[mask];
7963 if (ret == NULL)
7964 abort ();
cda749b1 7965
e075ae69 7966 return ret;
cda749b1
JW
7967 }
7968}
2a2ab3f9 7969
f88c65f7 7970void
b96a374d 7971ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
7972{
7973 const char *directive = ASM_LONG;
7974
7975 if (TARGET_64BIT)
7976 {
7977#ifdef ASM_QUAD
7978 directive = ASM_QUAD;
7979#else
7980 abort ();
7981#endif
7982 }
7983
7984 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7985}
7986
7987void
b96a374d 7988ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
7989{
7990 if (TARGET_64BIT)
74411039 7991 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7992 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7993 else if (HAVE_AS_GOTOFF_IN_DATA)
7994 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7995#if TARGET_MACHO
7996 else if (TARGET_MACHO)
7997 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7998 machopic_function_base_name () + 1);
7999#endif
f88c65f7 8000 else
5fc0e5df
KW
8001 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8002 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8003}
32b5b1aa 8004\f
a8bac9ab
RH
8005/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8006 for the target. */
8007
8008void
b96a374d 8009ix86_expand_clear (rtx dest)
a8bac9ab
RH
8010{
8011 rtx tmp;
8012
8013 /* We play register width games, which are only valid after reload. */
8014 if (!reload_completed)
8015 abort ();
8016
8017 /* Avoid HImode and its attendant prefix byte. */
8018 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8019 dest = gen_rtx_REG (SImode, REGNO (dest));
8020
8021 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8022
8023 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8024 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8025 {
8026 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8027 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8028 }
8029
8030 emit_insn (tmp);
8031}
8032
f996902d
RH
8033/* X is an unchanging MEM. If it is a constant pool reference, return
8034 the constant pool rtx, else NULL. */
8035
8036static rtx
b96a374d 8037maybe_get_pool_constant (rtx x)
f996902d 8038{
69bd9368 8039 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8040
8041 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8042 return get_pool_constant (x);
8043
8044 return NULL_RTX;
8045}
8046
79325812 8047void
b96a374d 8048ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8049{
e075ae69 8050 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8051 rtx op0, op1;
8052 enum tls_model model;
f996902d
RH
8053
8054 op0 = operands[0];
8055 op1 = operands[1];
8056
74dc3e94
RH
8057 model = tls_symbolic_operand (op1, Pmode);
8058 if (model)
f996902d 8059 {
74dc3e94
RH
8060 op1 = legitimize_tls_address (op1, model, true);
8061 op1 = force_operand (op1, op0);
8062 if (op1 == op0)
8063 return;
f996902d 8064 }
74dc3e94
RH
8065
8066 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8067 {
b069de3b
SS
8068#if TARGET_MACHO
8069 if (MACHOPIC_PURE)
8070 {
8071 rtx temp = ((reload_in_progress
8072 || ((op0 && GET_CODE (op0) == REG)
8073 && mode == Pmode))
8074 ? op0 : gen_reg_rtx (Pmode));
8075 op1 = machopic_indirect_data_reference (op1, temp);
8076 op1 = machopic_legitimize_pic_address (op1, mode,
8077 temp == op1 ? 0 : temp);
8078 }
74dc3e94
RH
8079 else if (MACHOPIC_INDIRECT)
8080 op1 = machopic_indirect_data_reference (op1, 0);
8081 if (op0 == op1)
8082 return;
8083#else
f996902d
RH
8084 if (GET_CODE (op0) == MEM)
8085 op1 = force_reg (Pmode, op1);
e075ae69 8086 else
32b5b1aa 8087 {
f996902d 8088 rtx temp = op0;
e075ae69
RH
8089 if (GET_CODE (temp) != REG)
8090 temp = gen_reg_rtx (Pmode);
f996902d
RH
8091 temp = legitimize_pic_address (op1, temp);
8092 if (temp == op0)
e075ae69 8093 return;
f996902d 8094 op1 = temp;
32b5b1aa 8095 }
74dc3e94 8096#endif /* TARGET_MACHO */
e075ae69
RH
8097 }
8098 else
8099 {
f996902d 8100 if (GET_CODE (op0) == MEM
44cf5b6a 8101 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8102 || !push_operand (op0, mode))
8103 && GET_CODE (op1) == MEM)
8104 op1 = force_reg (mode, op1);
e9a25f70 8105
f996902d
RH
8106 if (push_operand (op0, mode)
8107 && ! general_no_elim_operand (op1, mode))
8108 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8109
44cf5b6a
JH
8110 /* Force large constants in 64bit compilation into register
8111 to get them CSEed. */
8112 if (TARGET_64BIT && mode == DImode
f996902d
RH
8113 && immediate_operand (op1, mode)
8114 && !x86_64_zero_extended_value (op1)
8115 && !register_operand (op0, mode)
44cf5b6a 8116 && optimize && !reload_completed && !reload_in_progress)
f996902d 8117 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8118
e075ae69 8119 if (FLOAT_MODE_P (mode))
32b5b1aa 8120 {
d7a29404
JH
8121 /* If we are loading a floating point constant to a register,
8122 force the value to memory now, since we'll get better code
8123 out the back end. */
e075ae69
RH
8124
8125 if (strict)
8126 ;
ddc67067
MM
8127 else if (GET_CODE (op1) == CONST_DOUBLE)
8128 {
8129 op1 = validize_mem (force_const_mem (mode, op1));
8130 if (!register_operand (op0, mode))
8131 {
8132 rtx temp = gen_reg_rtx (mode);
8133 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8134 emit_move_insn (op0, temp);
8135 return;
8136 }
8137 }
32b5b1aa 8138 }
32b5b1aa 8139 }
e9a25f70 8140
74dc3e94 8141 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8142}
e9a25f70 8143
e37af218 8144void
b96a374d 8145ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
8146{
8147 /* Force constants other than zero into memory. We do not know how
8148 the instructions used to build constants modify the upper 64 bits
8149 of the register, once we have that information we may be able
8150 to handle some of them more efficiently. */
8151 if ((reload_in_progress | reload_completed) == 0
8152 && register_operand (operands[0], mode)
fdc4b40b 8153 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8154 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8155
8156 /* Make operand1 a register if it isn't already. */
f8ca7923 8157 if (!no_new_pseudos
e37af218 8158 && !register_operand (operands[0], mode)
b105d6da 8159 && !register_operand (operands[1], mode))
e37af218 8160 {
59bef189 8161 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8162 emit_move_insn (operands[0], temp);
8163 return;
8164 }
8165
8166 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8167}
e37af218 8168
e075ae69
RH
8169/* Attempt to expand a binary operator. Make the expansion closer to the
8170 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8171 memory references (one output, two input) in a single insn. */
e9a25f70 8172
e075ae69 8173void
b96a374d
AJ
8174ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8175 rtx operands[])
e075ae69
RH
8176{
8177 int matching_memory;
8178 rtx src1, src2, dst, op, clob;
8179
8180 dst = operands[0];
8181 src1 = operands[1];
8182 src2 = operands[2];
8183
8184 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8185 if (GET_RTX_CLASS (code) == 'c'
8186 && (rtx_equal_p (dst, src2)
8187 || immediate_operand (src1, mode)))
8188 {
8189 rtx temp = src1;
8190 src1 = src2;
8191 src2 = temp;
32b5b1aa 8192 }
e9a25f70 8193
e075ae69
RH
8194 /* If the destination is memory, and we do not have matching source
8195 operands, do things in registers. */
8196 matching_memory = 0;
8197 if (GET_CODE (dst) == MEM)
32b5b1aa 8198 {
e075ae69
RH
8199 if (rtx_equal_p (dst, src1))
8200 matching_memory = 1;
8201 else if (GET_RTX_CLASS (code) == 'c'
8202 && rtx_equal_p (dst, src2))
8203 matching_memory = 2;
8204 else
8205 dst = gen_reg_rtx (mode);
8206 }
0f290768 8207
e075ae69
RH
8208 /* Both source operands cannot be in memory. */
8209 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8210 {
8211 if (matching_memory != 2)
8212 src2 = force_reg (mode, src2);
8213 else
8214 src1 = force_reg (mode, src1);
32b5b1aa 8215 }
e9a25f70 8216
06a964de
JH
8217 /* If the operation is not commutable, source 1 cannot be a constant
8218 or non-matching memory. */
0f290768 8219 if ((CONSTANT_P (src1)
06a964de
JH
8220 || (!matching_memory && GET_CODE (src1) == MEM))
8221 && GET_RTX_CLASS (code) != 'c')
e075ae69 8222 src1 = force_reg (mode, src1);
0f290768 8223
e075ae69 8224 /* If optimizing, copy to regs to improve CSE */
fe577e58 8225 if (optimize && ! no_new_pseudos)
32b5b1aa 8226 {
e075ae69
RH
8227 if (GET_CODE (dst) == MEM)
8228 dst = gen_reg_rtx (mode);
8229 if (GET_CODE (src1) == MEM)
8230 src1 = force_reg (mode, src1);
8231 if (GET_CODE (src2) == MEM)
8232 src2 = force_reg (mode, src2);
32b5b1aa 8233 }
e9a25f70 8234
e075ae69
RH
8235 /* Emit the instruction. */
8236
8237 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8238 if (reload_in_progress)
8239 {
8240 /* Reload doesn't know about the flags register, and doesn't know that
8241 it doesn't want to clobber it. We can only do this with PLUS. */
8242 if (code != PLUS)
8243 abort ();
8244 emit_insn (op);
8245 }
8246 else
32b5b1aa 8247 {
e075ae69
RH
8248 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8249 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8250 }
e9a25f70 8251
e075ae69
RH
8252 /* Fix up the destination if needed. */
8253 if (dst != operands[0])
8254 emit_move_insn (operands[0], dst);
8255}
8256
8257/* Return TRUE or FALSE depending on whether the binary operator meets the
8258 appropriate constraints. */
8259
8260int
b96a374d
AJ
8261ix86_binary_operator_ok (enum rtx_code code,
8262 enum machine_mode mode ATTRIBUTE_UNUSED,
8263 rtx operands[3])
e075ae69
RH
8264{
8265 /* Both source operands cannot be in memory. */
8266 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8267 return 0;
8268 /* If the operation is not commutable, source 1 cannot be a constant. */
8269 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8270 return 0;
8271 /* If the destination is memory, we must have a matching source operand. */
8272 if (GET_CODE (operands[0]) == MEM
8273 && ! (rtx_equal_p (operands[0], operands[1])
8274 || (GET_RTX_CLASS (code) == 'c'
8275 && rtx_equal_p (operands[0], operands[2]))))
8276 return 0;
06a964de 8277 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8278 have a matching destination. */
06a964de
JH
8279 if (GET_CODE (operands[1]) == MEM
8280 && GET_RTX_CLASS (code) != 'c'
8281 && ! rtx_equal_p (operands[0], operands[1]))
8282 return 0;
e075ae69
RH
8283 return 1;
8284}
8285
8286/* Attempt to expand a unary operator. Make the expansion closer to the
8287 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8288 memory references (one output, one input) in a single insn. */
e075ae69 8289
9d81fc27 8290void
b96a374d
AJ
8291ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8292 rtx operands[])
e075ae69 8293{
06a964de
JH
8294 int matching_memory;
8295 rtx src, dst, op, clob;
8296
8297 dst = operands[0];
8298 src = operands[1];
e075ae69 8299
06a964de
JH
8300 /* If the destination is memory, and we do not have matching source
8301 operands, do things in registers. */
8302 matching_memory = 0;
8303 if (GET_CODE (dst) == MEM)
32b5b1aa 8304 {
06a964de
JH
8305 if (rtx_equal_p (dst, src))
8306 matching_memory = 1;
e075ae69 8307 else
06a964de 8308 dst = gen_reg_rtx (mode);
32b5b1aa 8309 }
e9a25f70 8310
06a964de
JH
8311 /* When source operand is memory, destination must match. */
8312 if (!matching_memory && GET_CODE (src) == MEM)
8313 src = force_reg (mode, src);
0f290768 8314
06a964de 8315 /* If optimizing, copy to regs to improve CSE */
fe577e58 8316 if (optimize && ! no_new_pseudos)
06a964de
JH
8317 {
8318 if (GET_CODE (dst) == MEM)
8319 dst = gen_reg_rtx (mode);
8320 if (GET_CODE (src) == MEM)
8321 src = force_reg (mode, src);
8322 }
8323
8324 /* Emit the instruction. */
8325
8326 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8327 if (reload_in_progress || code == NOT)
8328 {
8329 /* Reload doesn't know about the flags register, and doesn't know that
8330 it doesn't want to clobber it. */
8331 if (code != NOT)
8332 abort ();
8333 emit_insn (op);
8334 }
8335 else
8336 {
8337 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8338 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8339 }
8340
8341 /* Fix up the destination if needed. */
8342 if (dst != operands[0])
8343 emit_move_insn (operands[0], dst);
e075ae69
RH
8344}
8345
8346/* Return TRUE or FALSE depending on whether the unary operator meets the
8347 appropriate constraints. */
8348
8349int
b96a374d
AJ
8350ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8351 enum machine_mode mode ATTRIBUTE_UNUSED,
8352 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 8353{
06a964de
JH
8354 /* If one of operands is memory, source and destination must match. */
8355 if ((GET_CODE (operands[0]) == MEM
8356 || GET_CODE (operands[1]) == MEM)
8357 && ! rtx_equal_p (operands[0], operands[1]))
8358 return FALSE;
e075ae69
RH
8359 return TRUE;
8360}
8361
16189740
RH
8362/* Return TRUE or FALSE depending on whether the first SET in INSN
8363 has source and destination with matching CC modes, and that the
8364 CC mode is at least as constrained as REQ_MODE. */
8365
8366int
b96a374d 8367ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8368{
8369 rtx set;
8370 enum machine_mode set_mode;
8371
8372 set = PATTERN (insn);
8373 if (GET_CODE (set) == PARALLEL)
8374 set = XVECEXP (set, 0, 0);
8375 if (GET_CODE (set) != SET)
8376 abort ();
9076b9c1
JH
8377 if (GET_CODE (SET_SRC (set)) != COMPARE)
8378 abort ();
16189740
RH
8379
8380 set_mode = GET_MODE (SET_DEST (set));
8381 switch (set_mode)
8382 {
9076b9c1
JH
8383 case CCNOmode:
8384 if (req_mode != CCNOmode
8385 && (req_mode != CCmode
8386 || XEXP (SET_SRC (set), 1) != const0_rtx))
8387 return 0;
8388 break;
16189740 8389 case CCmode:
9076b9c1 8390 if (req_mode == CCGCmode)
16189740
RH
8391 return 0;
8392 /* FALLTHRU */
9076b9c1
JH
8393 case CCGCmode:
8394 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8395 return 0;
8396 /* FALLTHRU */
8397 case CCGOCmode:
16189740
RH
8398 if (req_mode == CCZmode)
8399 return 0;
8400 /* FALLTHRU */
8401 case CCZmode:
8402 break;
8403
8404 default:
8405 abort ();
8406 }
8407
8408 return (GET_MODE (SET_SRC (set)) == set_mode);
8409}
8410
e075ae69
RH
8411/* Generate insn patterns to do an integer compare of OPERANDS. */
8412
8413static rtx
b96a374d 8414ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8415{
8416 enum machine_mode cmpmode;
8417 rtx tmp, flags;
8418
8419 cmpmode = SELECT_CC_MODE (code, op0, op1);
8420 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8421
8422 /* This is very simple, but making the interface the same as in the
8423 FP case makes the rest of the code easier. */
8424 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8425 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8426
8427 /* Return the test that should be put into the flags user, i.e.
8428 the bcc, scc, or cmov instruction. */
8429 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8430}
8431
3a3677ff
RH
8432/* Figure out whether to use ordered or unordered fp comparisons.
8433 Return the appropriate mode to use. */
e075ae69 8434
b1cdafbb 8435enum machine_mode
b96a374d 8436ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8437{
9e7adcb3
JH
8438 /* ??? In order to make all comparisons reversible, we do all comparisons
8439 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8440 all forms trapping and nontrapping comparisons, we can make inequality
8441 comparisons trapping again, since it results in better code when using
8442 FCOM based compares. */
8443 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8444}
8445
9076b9c1 8446enum machine_mode
b96a374d 8447ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8448{
8449 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8450 return ix86_fp_compare_mode (code);
8451 switch (code)
8452 {
8453 /* Only zero flag is needed. */
8454 case EQ: /* ZF=0 */
8455 case NE: /* ZF!=0 */
8456 return CCZmode;
8457 /* Codes needing carry flag. */
265dab10
JH
8458 case GEU: /* CF=0 */
8459 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8460 case LTU: /* CF=1 */
8461 case LEU: /* CF=1 | ZF=1 */
265dab10 8462 return CCmode;
9076b9c1
JH
8463 /* Codes possibly doable only with sign flag when
8464 comparing against zero. */
8465 case GE: /* SF=OF or SF=0 */
7e08e190 8466 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8467 if (op1 == const0_rtx)
8468 return CCGOCmode;
8469 else
8470 /* For other cases Carry flag is not required. */
8471 return CCGCmode;
8472 /* Codes doable only with sign flag when comparing
8473 against zero, but we miss jump instruction for it
4aae8a9a 8474 so we need to use relational tests against overflow
9076b9c1
JH
8475 that thus needs to be zero. */
8476 case GT: /* ZF=0 & SF=OF */
8477 case LE: /* ZF=1 | SF<>OF */
8478 if (op1 == const0_rtx)
8479 return CCNOmode;
8480 else
8481 return CCGCmode;
7fcd7218
JH
8482 /* strcmp pattern do (use flags) and combine may ask us for proper
8483 mode. */
8484 case USE:
8485 return CCmode;
9076b9c1 8486 default:
0f290768 8487 abort ();
9076b9c1
JH
8488 }
8489}
8490
3a3677ff
RH
8491/* Return true if we should use an FCOMI instruction for this fp comparison. */
8492
a940d8bd 8493int
b96a374d 8494ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8495{
9e7adcb3
JH
8496 enum rtx_code swapped_code = swap_condition (code);
8497 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8498 || (ix86_fp_comparison_cost (swapped_code)
8499 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8500}
8501
0f290768 8502/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8503 to a fp comparison. The operands are updated in place; the new
d1f87653 8504 comparison code is returned. */
3a3677ff
RH
8505
8506static enum rtx_code
b96a374d 8507ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8508{
8509 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8510 rtx op0 = *pop0, op1 = *pop1;
8511 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8512 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8513
e075ae69 8514 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8515 The same is true of the XFmode compare instructions. The same is
8516 true of the fcomi compare instructions. */
8517
0644b628
JH
8518 if (!is_sse
8519 && (fpcmp_mode == CCFPUmode
8520 || op_mode == XFmode
8521 || op_mode == TFmode
8522 || ix86_use_fcomi_compare (code)))
e075ae69 8523 {
3a3677ff
RH
8524 op0 = force_reg (op_mode, op0);
8525 op1 = force_reg (op_mode, op1);
e075ae69
RH
8526 }
8527 else
8528 {
8529 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8530 things around if they appear profitable, otherwise force op0
8531 into a register. */
8532
8533 if (standard_80387_constant_p (op0) == 0
8534 || (GET_CODE (op0) == MEM
8535 && ! (standard_80387_constant_p (op1) == 0
8536 || GET_CODE (op1) == MEM)))
32b5b1aa 8537 {
e075ae69
RH
8538 rtx tmp;
8539 tmp = op0, op0 = op1, op1 = tmp;
8540 code = swap_condition (code);
8541 }
8542
8543 if (GET_CODE (op0) != REG)
3a3677ff 8544 op0 = force_reg (op_mode, op0);
e075ae69
RH
8545
8546 if (CONSTANT_P (op1))
8547 {
8548 if (standard_80387_constant_p (op1))
3a3677ff 8549 op1 = force_reg (op_mode, op1);
e075ae69 8550 else
3a3677ff 8551 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8552 }
8553 }
e9a25f70 8554
9e7adcb3
JH
8555 /* Try to rearrange the comparison to make it cheaper. */
8556 if (ix86_fp_comparison_cost (code)
8557 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8558 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8559 {
8560 rtx tmp;
8561 tmp = op0, op0 = op1, op1 = tmp;
8562 code = swap_condition (code);
8563 if (GET_CODE (op0) != REG)
8564 op0 = force_reg (op_mode, op0);
8565 }
8566
3a3677ff
RH
8567 *pop0 = op0;
8568 *pop1 = op1;
8569 return code;
8570}
8571
c0c102a9
JH
8572/* Convert comparison codes we use to represent FP comparison to integer
8573 code that will result in proper branch. Return UNKNOWN if no such code
8574 is available. */
8575static enum rtx_code
b96a374d 8576ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8577{
8578 switch (code)
8579 {
8580 case GT:
8581 return GTU;
8582 case GE:
8583 return GEU;
8584 case ORDERED:
8585 case UNORDERED:
8586 return code;
8587 break;
8588 case UNEQ:
8589 return EQ;
8590 break;
8591 case UNLT:
8592 return LTU;
8593 break;
8594 case UNLE:
8595 return LEU;
8596 break;
8597 case LTGT:
8598 return NE;
8599 break;
8600 default:
8601 return UNKNOWN;
8602 }
8603}
8604
8605/* Split comparison code CODE into comparisons we can do using branch
8606 instructions. BYPASS_CODE is comparison code for branch that will
8607 branch around FIRST_CODE and SECOND_CODE. If some of branches
8608 is not required, set value to NIL.
8609 We never require more than two branches. */
8610static void
b96a374d
AJ
8611ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8612 enum rtx_code *first_code,
8613 enum rtx_code *second_code)
c0c102a9
JH
8614{
8615 *first_code = code;
8616 *bypass_code = NIL;
8617 *second_code = NIL;
8618
8619 /* The fcomi comparison sets flags as follows:
8620
8621 cmp ZF PF CF
8622 > 0 0 0
8623 < 0 0 1
8624 = 1 0 0
8625 un 1 1 1 */
8626
8627 switch (code)
8628 {
8629 case GT: /* GTU - CF=0 & ZF=0 */
8630 case GE: /* GEU - CF=0 */
8631 case ORDERED: /* PF=0 */
8632 case UNORDERED: /* PF=1 */
8633 case UNEQ: /* EQ - ZF=1 */
8634 case UNLT: /* LTU - CF=1 */
8635 case UNLE: /* LEU - CF=1 | ZF=1 */
8636 case LTGT: /* EQ - ZF=0 */
8637 break;
8638 case LT: /* LTU - CF=1 - fails on unordered */
8639 *first_code = UNLT;
8640 *bypass_code = UNORDERED;
8641 break;
8642 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8643 *first_code = UNLE;
8644 *bypass_code = UNORDERED;
8645 break;
8646 case EQ: /* EQ - ZF=1 - fails on unordered */
8647 *first_code = UNEQ;
8648 *bypass_code = UNORDERED;
8649 break;
8650 case NE: /* NE - ZF=0 - fails on unordered */
8651 *first_code = LTGT;
8652 *second_code = UNORDERED;
8653 break;
8654 case UNGE: /* GEU - CF=0 - fails on unordered */
8655 *first_code = GE;
8656 *second_code = UNORDERED;
8657 break;
8658 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8659 *first_code = GT;
8660 *second_code = UNORDERED;
8661 break;
8662 default:
8663 abort ();
8664 }
8665 if (!TARGET_IEEE_FP)
8666 {
8667 *second_code = NIL;
8668 *bypass_code = NIL;
8669 }
8670}
8671
9e7adcb3 8672/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8673 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8674 In future this should be tweaked to compute bytes for optimize_size and
8675 take into account performance of various instructions on various CPUs. */
8676static int
b96a374d 8677ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
8678{
8679 if (!TARGET_IEEE_FP)
8680 return 4;
8681 /* The cost of code output by ix86_expand_fp_compare. */
8682 switch (code)
8683 {
8684 case UNLE:
8685 case UNLT:
8686 case LTGT:
8687 case GT:
8688 case GE:
8689 case UNORDERED:
8690 case ORDERED:
8691 case UNEQ:
8692 return 4;
8693 break;
8694 case LT:
8695 case NE:
8696 case EQ:
8697 case UNGE:
8698 return 5;
8699 break;
8700 case LE:
8701 case UNGT:
8702 return 6;
8703 break;
8704 default:
8705 abort ();
8706 }
8707}
8708
8709/* Return cost of comparison done using fcomi operation.
8710 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8711static int
b96a374d 8712ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
8713{
8714 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8715 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8716 prevents gcc from using it. */
8717 if (!TARGET_CMOVE)
8718 return 1024;
8719 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8720 return (bypass_code != NIL || second_code != NIL) + 2;
8721}
8722
8723/* Return cost of comparison done using sahf operation.
8724 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8725static int
b96a374d 8726ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
8727{
8728 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8729 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8730 avoids gcc from using it. */
8731 if (!TARGET_USE_SAHF && !optimize_size)
8732 return 1024;
8733 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8734 return (bypass_code != NIL || second_code != NIL) + 3;
8735}
8736
8737/* Compute cost of the comparison done using any method.
8738 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8739static int
b96a374d 8740ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
8741{
8742 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8743 int min;
8744
8745 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8746 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8747
8748 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8749 if (min > sahf_cost)
8750 min = sahf_cost;
8751 if (min > fcomi_cost)
8752 min = fcomi_cost;
8753 return min;
8754}
c0c102a9 8755
3a3677ff
RH
8756/* Generate insn patterns to do a floating point compare of OPERANDS. */
8757
9e7adcb3 8758static rtx
b96a374d
AJ
8759ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8760 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
8761{
8762 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8763 rtx tmp, tmp2;
9e7adcb3 8764 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8765 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8766
8767 fpcmp_mode = ix86_fp_compare_mode (code);
8768 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8769
9e7adcb3
JH
8770 if (second_test)
8771 *second_test = NULL_RTX;
8772 if (bypass_test)
8773 *bypass_test = NULL_RTX;
8774
c0c102a9
JH
8775 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8776
9e7adcb3
JH
8777 /* Do fcomi/sahf based test when profitable. */
8778 if ((bypass_code == NIL || bypass_test)
8779 && (second_code == NIL || second_test)
8780 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8781 {
c0c102a9
JH
8782 if (TARGET_CMOVE)
8783 {
8784 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8785 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8786 tmp);
8787 emit_insn (tmp);
8788 }
8789 else
8790 {
8791 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8792 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8793 if (!scratch)
8794 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8795 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8796 emit_insn (gen_x86_sahf_1 (scratch));
8797 }
e075ae69
RH
8798
8799 /* The FP codes work out to act like unsigned. */
9a915772 8800 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8801 code = first_code;
8802 if (bypass_code != NIL)
8803 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8804 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8805 const0_rtx);
8806 if (second_code != NIL)
8807 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8808 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8809 const0_rtx);
e075ae69
RH
8810 }
8811 else
8812 {
8813 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8814 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8815 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8816 if (!scratch)
8817 scratch = gen_reg_rtx (HImode);
3a3677ff 8818 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8819
9a915772
JH
8820 /* In the unordered case, we have to check C2 for NaN's, which
8821 doesn't happen to work out to anything nice combination-wise.
8822 So do some bit twiddling on the value we've got in AH to come
8823 up with an appropriate set of condition codes. */
e075ae69 8824
9a915772
JH
8825 intcmp_mode = CCNOmode;
8826 switch (code)
32b5b1aa 8827 {
9a915772
JH
8828 case GT:
8829 case UNGT:
8830 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8831 {
3a3677ff 8832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8833 code = EQ;
9a915772
JH
8834 }
8835 else
8836 {
8837 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8838 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8839 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8840 intcmp_mode = CCmode;
8841 code = GEU;
8842 }
8843 break;
8844 case LT:
8845 case UNLT:
8846 if (code == LT && TARGET_IEEE_FP)
8847 {
3a3677ff
RH
8848 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8849 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8850 intcmp_mode = CCmode;
8851 code = EQ;
9a915772
JH
8852 }
8853 else
8854 {
8855 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8856 code = NE;
8857 }
8858 break;
8859 case GE:
8860 case UNGE:
8861 if (code == GE || !TARGET_IEEE_FP)
8862 {
3a3677ff 8863 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8864 code = EQ;
9a915772
JH
8865 }
8866 else
8867 {
8868 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8869 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8870 GEN_INT (0x01)));
8871 code = NE;
8872 }
8873 break;
8874 case LE:
8875 case UNLE:
8876 if (code == LE && TARGET_IEEE_FP)
8877 {
3a3677ff
RH
8878 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8879 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8880 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8881 intcmp_mode = CCmode;
8882 code = LTU;
9a915772
JH
8883 }
8884 else
8885 {
8886 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8887 code = NE;
8888 }
8889 break;
8890 case EQ:
8891 case UNEQ:
8892 if (code == EQ && TARGET_IEEE_FP)
8893 {
3a3677ff
RH
8894 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8895 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8896 intcmp_mode = CCmode;
8897 code = EQ;
9a915772
JH
8898 }
8899 else
8900 {
3a3677ff
RH
8901 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8902 code = NE;
8903 break;
9a915772
JH
8904 }
8905 break;
8906 case NE:
8907 case LTGT:
8908 if (code == NE && TARGET_IEEE_FP)
8909 {
3a3677ff 8910 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8911 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8912 GEN_INT (0x40)));
3a3677ff 8913 code = NE;
9a915772
JH
8914 }
8915 else
8916 {
3a3677ff
RH
8917 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8918 code = EQ;
32b5b1aa 8919 }
9a915772
JH
8920 break;
8921
8922 case UNORDERED:
8923 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8924 code = NE;
8925 break;
8926 case ORDERED:
8927 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8928 code = EQ;
8929 break;
8930
8931 default:
8932 abort ();
32b5b1aa 8933 }
32b5b1aa 8934 }
e075ae69
RH
8935
8936 /* Return the test that should be put into the flags user, i.e.
8937 the bcc, scc, or cmov instruction. */
8938 return gen_rtx_fmt_ee (code, VOIDmode,
8939 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8940 const0_rtx);
8941}
8942
9e3e266c 8943rtx
b96a374d 8944ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
8945{
8946 rtx op0, op1, ret;
8947 op0 = ix86_compare_op0;
8948 op1 = ix86_compare_op1;
8949
a1b8572c
JH
8950 if (second_test)
8951 *second_test = NULL_RTX;
8952 if (bypass_test)
8953 *bypass_test = NULL_RTX;
8954
e075ae69 8955 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8956 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8957 second_test, bypass_test);
32b5b1aa 8958 else
e075ae69
RH
8959 ret = ix86_expand_int_compare (code, op0, op1);
8960
8961 return ret;
8962}
8963
03598dea
JH
8964/* Return true if the CODE will result in nontrivial jump sequence. */
8965bool
b96a374d 8966ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
8967{
8968 enum rtx_code bypass_code, first_code, second_code;
8969 if (!TARGET_CMOVE)
8970 return true;
8971 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8972 return bypass_code != NIL || second_code != NIL;
8973}
8974
e075ae69 8975void
b96a374d 8976ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 8977{
3a3677ff 8978 rtx tmp;
e075ae69 8979
3a3677ff 8980 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8981 {
3a3677ff
RH
8982 case QImode:
8983 case HImode:
8984 case SImode:
0d7d98ee 8985 simple:
a1b8572c 8986 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8987 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8988 gen_rtx_LABEL_REF (VOIDmode, label),
8989 pc_rtx);
8990 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8991 return;
e075ae69 8992
3a3677ff
RH
8993 case SFmode:
8994 case DFmode:
0f290768 8995 case XFmode:
2b589241 8996 case TFmode:
3a3677ff
RH
8997 {
8998 rtvec vec;
8999 int use_fcomi;
03598dea 9000 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9001
9002 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9003 &ix86_compare_op1);
fce5a9f2 9004
03598dea
JH
9005 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9006
9007 /* Check whether we will use the natural sequence with one jump. If
9008 so, we can expand jump early. Otherwise delay expansion by
9009 creating compound insn to not confuse optimizers. */
9010 if (bypass_code == NIL && second_code == NIL
9011 && TARGET_CMOVE)
9012 {
9013 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9014 gen_rtx_LABEL_REF (VOIDmode, label),
9015 pc_rtx, NULL_RTX);
9016 }
9017 else
9018 {
9019 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9020 ix86_compare_op0, ix86_compare_op1);
9021 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9022 gen_rtx_LABEL_REF (VOIDmode, label),
9023 pc_rtx);
9024 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9025
9026 use_fcomi = ix86_use_fcomi_compare (code);
9027 vec = rtvec_alloc (3 + !use_fcomi);
9028 RTVEC_ELT (vec, 0) = tmp;
9029 RTVEC_ELT (vec, 1)
9030 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9031 RTVEC_ELT (vec, 2)
9032 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9033 if (! use_fcomi)
9034 RTVEC_ELT (vec, 3)
9035 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9036
9037 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9038 }
3a3677ff
RH
9039 return;
9040 }
32b5b1aa 9041
3a3677ff 9042 case DImode:
0d7d98ee
JH
9043 if (TARGET_64BIT)
9044 goto simple;
3a3677ff
RH
9045 /* Expand DImode branch into multiple compare+branch. */
9046 {
9047 rtx lo[2], hi[2], label2;
9048 enum rtx_code code1, code2, code3;
32b5b1aa 9049
3a3677ff
RH
9050 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9051 {
9052 tmp = ix86_compare_op0;
9053 ix86_compare_op0 = ix86_compare_op1;
9054 ix86_compare_op1 = tmp;
9055 code = swap_condition (code);
9056 }
9057 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9058 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9059
3a3677ff
RH
9060 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9061 avoid two branches. This costs one extra insn, so disable when
9062 optimizing for size. */
32b5b1aa 9063
3a3677ff
RH
9064 if ((code == EQ || code == NE)
9065 && (!optimize_size
9066 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9067 {
9068 rtx xor0, xor1;
32b5b1aa 9069
3a3677ff
RH
9070 xor1 = hi[0];
9071 if (hi[1] != const0_rtx)
9072 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9073 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9074
3a3677ff
RH
9075 xor0 = lo[0];
9076 if (lo[1] != const0_rtx)
9077 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9078 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9079
3a3677ff
RH
9080 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9081 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9082
3a3677ff
RH
9083 ix86_compare_op0 = tmp;
9084 ix86_compare_op1 = const0_rtx;
9085 ix86_expand_branch (code, label);
9086 return;
9087 }
e075ae69 9088
1f9124e4
JJ
9089 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9090 op1 is a constant and the low word is zero, then we can just
9091 examine the high word. */
32b5b1aa 9092
1f9124e4
JJ
9093 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9094 switch (code)
9095 {
9096 case LT: case LTU: case GE: case GEU:
9097 ix86_compare_op0 = hi[0];
9098 ix86_compare_op1 = hi[1];
9099 ix86_expand_branch (code, label);
9100 return;
9101 default:
9102 break;
9103 }
e075ae69 9104
3a3677ff 9105 /* Otherwise, we need two or three jumps. */
e075ae69 9106
3a3677ff 9107 label2 = gen_label_rtx ();
e075ae69 9108
3a3677ff
RH
9109 code1 = code;
9110 code2 = swap_condition (code);
9111 code3 = unsigned_condition (code);
e075ae69 9112
3a3677ff
RH
9113 switch (code)
9114 {
9115 case LT: case GT: case LTU: case GTU:
9116 break;
e075ae69 9117
3a3677ff
RH
9118 case LE: code1 = LT; code2 = GT; break;
9119 case GE: code1 = GT; code2 = LT; break;
9120 case LEU: code1 = LTU; code2 = GTU; break;
9121 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9122
3a3677ff
RH
9123 case EQ: code1 = NIL; code2 = NE; break;
9124 case NE: code2 = NIL; break;
e075ae69 9125
3a3677ff
RH
9126 default:
9127 abort ();
9128 }
e075ae69 9129
3a3677ff
RH
9130 /*
9131 * a < b =>
9132 * if (hi(a) < hi(b)) goto true;
9133 * if (hi(a) > hi(b)) goto false;
9134 * if (lo(a) < lo(b)) goto true;
9135 * false:
9136 */
9137
9138 ix86_compare_op0 = hi[0];
9139 ix86_compare_op1 = hi[1];
9140
9141 if (code1 != NIL)
9142 ix86_expand_branch (code1, label);
9143 if (code2 != NIL)
9144 ix86_expand_branch (code2, label2);
9145
9146 ix86_compare_op0 = lo[0];
9147 ix86_compare_op1 = lo[1];
9148 ix86_expand_branch (code3, label);
9149
9150 if (code2 != NIL)
9151 emit_label (label2);
9152 return;
9153 }
e075ae69 9154
3a3677ff
RH
9155 default:
9156 abort ();
9157 }
32b5b1aa 9158}
e075ae69 9159
9e7adcb3
JH
9160/* Split branch based on floating point condition. */
9161void
b96a374d
AJ
9162ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9163 rtx target1, rtx target2, rtx tmp)
9e7adcb3
JH
9164{
9165 rtx second, bypass;
9166 rtx label = NULL_RTX;
03598dea 9167 rtx condition;
6b24c259
JH
9168 int bypass_probability = -1, second_probability = -1, probability = -1;
9169 rtx i;
9e7adcb3
JH
9170
9171 if (target2 != pc_rtx)
9172 {
9173 rtx tmp = target2;
9174 code = reverse_condition_maybe_unordered (code);
9175 target2 = target1;
9176 target1 = tmp;
9177 }
9178
9179 condition = ix86_expand_fp_compare (code, op1, op2,
9180 tmp, &second, &bypass);
6b24c259
JH
9181
9182 if (split_branch_probability >= 0)
9183 {
9184 /* Distribute the probabilities across the jumps.
9185 Assume the BYPASS and SECOND to be always test
9186 for UNORDERED. */
9187 probability = split_branch_probability;
9188
d6a7951f 9189 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9190 to be updated. Later we may run some experiments and see
9191 if unordered values are more frequent in practice. */
9192 if (bypass)
9193 bypass_probability = 1;
9194 if (second)
9195 second_probability = 1;
9196 }
9e7adcb3
JH
9197 if (bypass != NULL_RTX)
9198 {
9199 label = gen_label_rtx ();
6b24c259
JH
9200 i = emit_jump_insn (gen_rtx_SET
9201 (VOIDmode, pc_rtx,
9202 gen_rtx_IF_THEN_ELSE (VOIDmode,
9203 bypass,
9204 gen_rtx_LABEL_REF (VOIDmode,
9205 label),
9206 pc_rtx)));
9207 if (bypass_probability >= 0)
9208 REG_NOTES (i)
9209 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9210 GEN_INT (bypass_probability),
9211 REG_NOTES (i));
9212 }
9213 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9214 (VOIDmode, pc_rtx,
9215 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9216 condition, target1, target2)));
9217 if (probability >= 0)
9218 REG_NOTES (i)
9219 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9220 GEN_INT (probability),
9221 REG_NOTES (i));
9222 if (second != NULL_RTX)
9e7adcb3 9223 {
6b24c259
JH
9224 i = emit_jump_insn (gen_rtx_SET
9225 (VOIDmode, pc_rtx,
9226 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9227 target2)));
9228 if (second_probability >= 0)
9229 REG_NOTES (i)
9230 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9231 GEN_INT (second_probability),
9232 REG_NOTES (i));
9e7adcb3 9233 }
9e7adcb3
JH
9234 if (label != NULL_RTX)
9235 emit_label (label);
9236}
9237
32b5b1aa 9238int
b96a374d 9239ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9240{
a1b8572c
JH
9241 rtx ret, tmp, tmpreg;
9242 rtx second_test, bypass_test;
e075ae69 9243
885a70fd
JH
9244 if (GET_MODE (ix86_compare_op0) == DImode
9245 && !TARGET_64BIT)
e075ae69
RH
9246 return 0; /* FAIL */
9247
b932f770
JH
9248 if (GET_MODE (dest) != QImode)
9249 abort ();
e075ae69 9250
a1b8572c 9251 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9252 PUT_MODE (ret, QImode);
9253
9254 tmp = dest;
a1b8572c 9255 tmpreg = dest;
32b5b1aa 9256
e075ae69 9257 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9258 if (bypass_test || second_test)
9259 {
9260 rtx test = second_test;
9261 int bypass = 0;
9262 rtx tmp2 = gen_reg_rtx (QImode);
9263 if (bypass_test)
9264 {
9265 if (second_test)
b531087a 9266 abort ();
a1b8572c
JH
9267 test = bypass_test;
9268 bypass = 1;
9269 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9270 }
9271 PUT_MODE (test, QImode);
9272 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9273
9274 if (bypass)
9275 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9276 else
9277 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9278 }
e075ae69 9279
e075ae69 9280 return 1; /* DONE */
32b5b1aa 9281}
e075ae69 9282
d1f87653 9283/* Expand comparison setting or clearing carry flag. Return true when successful
4977bab6
ZW
9284 and set pop for the operation. */
9285bool
b96a374d 9286ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9287{
9288 enum machine_mode mode =
9289 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9290
9291 /* Do not handle DImode compares that go trought special path. Also we can't
9292 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9293 if ((mode == DImode && !TARGET_64BIT))
9294 return false;
9295 if (FLOAT_MODE_P (mode))
9296 {
9297 rtx second_test = NULL, bypass_test = NULL;
9298 rtx compare_op, compare_seq;
9299
9300 /* Shortcut: following common codes never translate into carry flag compares. */
9301 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9302 || code == ORDERED || code == UNORDERED)
9303 return false;
9304
9305 /* These comparisons require zero flag; swap operands so they won't. */
9306 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9307 && !TARGET_IEEE_FP)
9308 {
9309 rtx tmp = op0;
9310 op0 = op1;
9311 op1 = tmp;
9312 code = swap_condition (code);
9313 }
9314
c51e6d85
KH
9315 /* Try to expand the comparison and verify that we end up with carry flag
9316 based comparison. This is fails to be true only when we decide to expand
9317 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9318 start_sequence ();
9319 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9320 &second_test, &bypass_test);
9321 compare_seq = get_insns ();
9322 end_sequence ();
9323
9324 if (second_test || bypass_test)
9325 return false;
9326 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9327 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9328 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9329 else
9330 code = GET_CODE (compare_op);
9331 if (code != LTU && code != GEU)
9332 return false;
9333 emit_insn (compare_seq);
9334 *pop = compare_op;
9335 return true;
9336 }
9337 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9338 return false;
9339 switch (code)
9340 {
9341 case LTU:
9342 case GEU:
9343 break;
9344
9345 /* Convert a==0 into (unsigned)a<1. */
9346 case EQ:
9347 case NE:
9348 if (op1 != const0_rtx)
9349 return false;
9350 op1 = const1_rtx;
9351 code = (code == EQ ? LTU : GEU);
9352 break;
9353
9354 /* Convert a>b into b<a or a>=b-1. */
9355 case GTU:
9356 case LEU:
9357 if (GET_CODE (op1) == CONST_INT)
9358 {
9359 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9360 /* Bail out on overflow. We still can swap operands but that
9361 would force loading of the constant into register. */
9362 if (op1 == const0_rtx
9363 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9364 return false;
9365 code = (code == GTU ? GEU : LTU);
9366 }
9367 else
9368 {
9369 rtx tmp = op1;
9370 op1 = op0;
9371 op0 = tmp;
9372 code = (code == GTU ? LTU : GEU);
9373 }
9374 break;
9375
9376 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9377 case LT:
9378 case GE:
9379 if (mode == DImode || op1 != const0_rtx)
9380 return false;
9381 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9382 code = (code == LT ? GEU : LTU);
9383 break;
9384 case LE:
9385 case GT:
9386 if (mode == DImode || op1 != constm1_rtx)
9387 return false;
9388 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9389 code = (code == LE ? GEU : LTU);
9390 break;
9391
9392 default:
9393 return false;
9394 }
9395 ix86_compare_op0 = op0;
9396 ix86_compare_op1 = op1;
9397 *pop = ix86_expand_compare (code, NULL, NULL);
9398 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9399 abort ();
9400 return true;
9401}
9402
32b5b1aa 9403int
b96a374d 9404ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9405{
e075ae69
RH
9406 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9407 rtx compare_seq, compare_op;
a1b8572c 9408 rtx second_test, bypass_test;
635559ab 9409 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9410 bool sign_bit_compare_p = false;;
3a3677ff 9411
e075ae69 9412 start_sequence ();
a1b8572c 9413 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9414 compare_seq = get_insns ();
e075ae69
RH
9415 end_sequence ();
9416
9417 compare_code = GET_CODE (compare_op);
9418
4977bab6
ZW
9419 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9420 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9421 sign_bit_compare_p = true;
9422
e075ae69
RH
9423 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9424 HImode insns, we'd be swallowed in word prefix ops. */
9425
4977bab6 9426 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9427 && (mode != DImode || TARGET_64BIT)
0f290768 9428 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9429 && GET_CODE (operands[3]) == CONST_INT)
9430 {
9431 rtx out = operands[0];
9432 HOST_WIDE_INT ct = INTVAL (operands[2]);
9433 HOST_WIDE_INT cf = INTVAL (operands[3]);
9434 HOST_WIDE_INT diff;
9435
4977bab6
ZW
9436 diff = ct - cf;
9437 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9438 sbb. */
4977bab6
ZW
9439 if (sign_bit_compare_p
9440 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9441 ix86_compare_op1, &compare_op))
e075ae69 9442 {
e075ae69
RH
9443 /* Detect overlap between destination and compare sources. */
9444 rtx tmp = out;
9445
4977bab6 9446 if (!sign_bit_compare_p)
36583fea 9447 {
e6e81735
JH
9448 bool fpcmp = false;
9449
4977bab6
ZW
9450 compare_code = GET_CODE (compare_op);
9451
e6e81735
JH
9452 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9453 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9454 {
9455 fpcmp = true;
9456 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9457 }
9458
4977bab6
ZW
9459 /* To simplify rest of code, restrict to the GEU case. */
9460 if (compare_code == LTU)
9461 {
9462 HOST_WIDE_INT tmp = ct;
9463 ct = cf;
9464 cf = tmp;
9465 compare_code = reverse_condition (compare_code);
9466 code = reverse_condition (code);
9467 }
e6e81735
JH
9468 else
9469 {
9470 if (fpcmp)
9471 PUT_CODE (compare_op,
9472 reverse_condition_maybe_unordered
9473 (GET_CODE (compare_op)));
9474 else
9475 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9476 }
4977bab6 9477 diff = ct - cf;
36583fea 9478
4977bab6
ZW
9479 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9480 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9481 tmp = gen_reg_rtx (mode);
e075ae69 9482
4977bab6 9483 if (mode == DImode)
e6e81735 9484 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9485 else
e6e81735 9486 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9487 }
14f73b5a 9488 else
4977bab6
ZW
9489 {
9490 if (code == GT || code == GE)
9491 code = reverse_condition (code);
9492 else
9493 {
9494 HOST_WIDE_INT tmp = ct;
9495 ct = cf;
9496 cf = tmp;
5fb48685 9497 diff = ct - cf;
4977bab6
ZW
9498 }
9499 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9500 ix86_compare_op1, VOIDmode, 0, -1);
9501 }
e075ae69 9502
36583fea
JH
9503 if (diff == 1)
9504 {
9505 /*
9506 * cmpl op0,op1
9507 * sbbl dest,dest
9508 * [addl dest, ct]
9509 *
9510 * Size 5 - 8.
9511 */
9512 if (ct)
b96a374d 9513 tmp = expand_simple_binop (mode, PLUS,
635559ab 9514 tmp, GEN_INT (ct),
4977bab6 9515 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9516 }
9517 else if (cf == -1)
9518 {
9519 /*
9520 * cmpl op0,op1
9521 * sbbl dest,dest
9522 * orl $ct, dest
9523 *
9524 * Size 8.
9525 */
635559ab
JH
9526 tmp = expand_simple_binop (mode, IOR,
9527 tmp, GEN_INT (ct),
4977bab6 9528 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9529 }
9530 else if (diff == -1 && ct)
9531 {
9532 /*
9533 * cmpl op0,op1
9534 * sbbl dest,dest
06ec023f 9535 * notl dest
36583fea
JH
9536 * [addl dest, cf]
9537 *
9538 * Size 8 - 11.
9539 */
4977bab6 9540 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9541 if (cf)
b96a374d 9542 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9543 copy_rtx (tmp), GEN_INT (cf),
9544 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9545 }
9546 else
9547 {
9548 /*
9549 * cmpl op0,op1
9550 * sbbl dest,dest
06ec023f 9551 * [notl dest]
36583fea
JH
9552 * andl cf - ct, dest
9553 * [addl dest, ct]
9554 *
9555 * Size 8 - 11.
9556 */
06ec023f
RB
9557
9558 if (cf == 0)
9559 {
9560 cf = ct;
9561 ct = 0;
4977bab6 9562 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9563 }
9564
635559ab 9565 tmp = expand_simple_binop (mode, AND,
4977bab6 9566 copy_rtx (tmp),
d8bf17f9 9567 gen_int_mode (cf - ct, mode),
4977bab6 9568 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9569 if (ct)
b96a374d 9570 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9571 copy_rtx (tmp), GEN_INT (ct),
9572 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9573 }
e075ae69 9574
4977bab6
ZW
9575 if (!rtx_equal_p (tmp, out))
9576 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9577
9578 return 1; /* DONE */
9579 }
9580
e075ae69
RH
9581 if (diff < 0)
9582 {
9583 HOST_WIDE_INT tmp;
9584 tmp = ct, ct = cf, cf = tmp;
9585 diff = -diff;
734dba19
JH
9586 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9587 {
9588 /* We may be reversing unordered compare to normal compare, that
9589 is not valid in general (we may convert non-trapping condition
9590 to trapping one), however on i386 we currently emit all
9591 comparisons unordered. */
9592 compare_code = reverse_condition_maybe_unordered (compare_code);
9593 code = reverse_condition_maybe_unordered (code);
9594 }
9595 else
9596 {
9597 compare_code = reverse_condition (compare_code);
9598 code = reverse_condition (code);
9599 }
e075ae69 9600 }
0f2a3457
JJ
9601
9602 compare_code = NIL;
9603 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9604 && GET_CODE (ix86_compare_op1) == CONST_INT)
9605 {
9606 if (ix86_compare_op1 == const0_rtx
9607 && (code == LT || code == GE))
9608 compare_code = code;
9609 else if (ix86_compare_op1 == constm1_rtx)
9610 {
9611 if (code == LE)
9612 compare_code = LT;
9613 else if (code == GT)
9614 compare_code = GE;
9615 }
9616 }
9617
9618 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9619 if (compare_code != NIL
9620 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9621 && (cf == -1 || ct == -1))
9622 {
9623 /* If lea code below could be used, only optimize
9624 if it results in a 2 insn sequence. */
9625
9626 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9627 || diff == 3 || diff == 5 || diff == 9)
9628 || (compare_code == LT && ct == -1)
9629 || (compare_code == GE && cf == -1))
9630 {
9631 /*
9632 * notl op1 (if necessary)
9633 * sarl $31, op1
9634 * orl cf, op1
9635 */
9636 if (ct != -1)
9637 {
9638 cf = ct;
b96a374d 9639 ct = -1;
0f2a3457
JJ
9640 code = reverse_condition (code);
9641 }
9642
9643 out = emit_store_flag (out, code, ix86_compare_op0,
9644 ix86_compare_op1, VOIDmode, 0, -1);
9645
9646 out = expand_simple_binop (mode, IOR,
9647 out, GEN_INT (cf),
9648 out, 1, OPTAB_DIRECT);
9649 if (out != operands[0])
9650 emit_move_insn (operands[0], out);
9651
9652 return 1; /* DONE */
9653 }
9654 }
9655
4977bab6 9656
635559ab
JH
9657 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9658 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9659 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 9660 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9661 {
9662 /*
9663 * xorl dest,dest
9664 * cmpl op1,op2
9665 * setcc dest
9666 * lea cf(dest*(ct-cf)),dest
9667 *
9668 * Size 14.
9669 *
9670 * This also catches the degenerate setcc-only case.
9671 */
9672
9673 rtx tmp;
9674 int nops;
9675
9676 out = emit_store_flag (out, code, ix86_compare_op0,
9677 ix86_compare_op1, VOIDmode, 0, 1);
9678
9679 nops = 0;
97f51ac4
RB
9680 /* On x86_64 the lea instruction operates on Pmode, so we need
9681 to get arithmetics done in proper mode to match. */
e075ae69 9682 if (diff == 1)
068f5dea 9683 tmp = copy_rtx (out);
e075ae69
RH
9684 else
9685 {
885a70fd 9686 rtx out1;
068f5dea 9687 out1 = copy_rtx (out);
635559ab 9688 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9689 nops++;
9690 if (diff & 1)
9691 {
635559ab 9692 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9693 nops++;
9694 }
9695 }
9696 if (cf != 0)
9697 {
635559ab 9698 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9699 nops++;
9700 }
4977bab6 9701 if (!rtx_equal_p (tmp, out))
e075ae69 9702 {
14f73b5a 9703 if (nops == 1)
a5cf80f0 9704 out = force_operand (tmp, copy_rtx (out));
e075ae69 9705 else
4977bab6 9706 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9707 }
4977bab6 9708 if (!rtx_equal_p (out, operands[0]))
1985ef90 9709 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9710
9711 return 1; /* DONE */
9712 }
9713
9714 /*
9715 * General case: Jumpful:
9716 * xorl dest,dest cmpl op1, op2
9717 * cmpl op1, op2 movl ct, dest
9718 * setcc dest jcc 1f
9719 * decl dest movl cf, dest
9720 * andl (cf-ct),dest 1:
9721 * addl ct,dest
0f290768 9722 *
e075ae69
RH
9723 * Size 20. Size 14.
9724 *
9725 * This is reasonably steep, but branch mispredict costs are
9726 * high on modern cpus, so consider failing only if optimizing
9727 * for space.
e075ae69
RH
9728 */
9729
4977bab6
ZW
9730 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9731 && BRANCH_COST >= 2)
e075ae69 9732 {
97f51ac4 9733 if (cf == 0)
e075ae69 9734 {
97f51ac4
RB
9735 cf = ct;
9736 ct = 0;
734dba19 9737 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9738 /* We may be reversing unordered compare to normal compare,
9739 that is not valid in general (we may convert non-trapping
9740 condition to trapping one), however on i386 we currently
9741 emit all comparisons unordered. */
9742 code = reverse_condition_maybe_unordered (code);
9743 else
9744 {
9745 code = reverse_condition (code);
9746 if (compare_code != NIL)
9747 compare_code = reverse_condition (compare_code);
9748 }
9749 }
9750
9751 if (compare_code != NIL)
9752 {
9753 /* notl op1 (if needed)
9754 sarl $31, op1
9755 andl (cf-ct), op1
b96a374d 9756 addl ct, op1
0f2a3457
JJ
9757
9758 For x < 0 (resp. x <= -1) there will be no notl,
9759 so if possible swap the constants to get rid of the
9760 complement.
9761 True/false will be -1/0 while code below (store flag
9762 followed by decrement) is 0/-1, so the constants need
9763 to be exchanged once more. */
9764
9765 if (compare_code == GE || !cf)
734dba19 9766 {
b96a374d 9767 code = reverse_condition (code);
0f2a3457 9768 compare_code = LT;
734dba19
JH
9769 }
9770 else
9771 {
0f2a3457 9772 HOST_WIDE_INT tmp = cf;
b96a374d 9773 cf = ct;
0f2a3457 9774 ct = tmp;
734dba19 9775 }
0f2a3457
JJ
9776
9777 out = emit_store_flag (out, code, ix86_compare_op0,
9778 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9779 }
0f2a3457
JJ
9780 else
9781 {
9782 out = emit_store_flag (out, code, ix86_compare_op0,
9783 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9784
4977bab6
ZW
9785 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9786 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9787 }
e075ae69 9788
4977bab6 9789 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9790 gen_int_mode (cf - ct, mode),
4977bab6 9791 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9792 if (ct)
4977bab6
ZW
9793 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9794 copy_rtx (out), 1, OPTAB_DIRECT);
9795 if (!rtx_equal_p (out, operands[0]))
9796 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9797
9798 return 1; /* DONE */
9799 }
9800 }
9801
4977bab6 9802 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
9803 {
9804 /* Try a few things more with specific constants and a variable. */
9805
78a0d70c 9806 optab op;
e075ae69
RH
9807 rtx var, orig_out, out, tmp;
9808
4977bab6 9809 if (BRANCH_COST <= 2)
e075ae69
RH
9810 return 0; /* FAIL */
9811
0f290768 9812 /* If one of the two operands is an interesting constant, load a
e075ae69 9813 constant with the above and mask it in with a logical operation. */
0f290768 9814
e075ae69
RH
9815 if (GET_CODE (operands[2]) == CONST_INT)
9816 {
9817 var = operands[3];
4977bab6 9818 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 9819 operands[3] = constm1_rtx, op = and_optab;
4977bab6 9820 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 9821 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9822 else
9823 return 0; /* FAIL */
e075ae69
RH
9824 }
9825 else if (GET_CODE (operands[3]) == CONST_INT)
9826 {
9827 var = operands[2];
4977bab6 9828 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 9829 operands[2] = constm1_rtx, op = and_optab;
4977bab6 9830 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 9831 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9832 else
9833 return 0; /* FAIL */
e075ae69 9834 }
78a0d70c 9835 else
e075ae69
RH
9836 return 0; /* FAIL */
9837
9838 orig_out = operands[0];
635559ab 9839 tmp = gen_reg_rtx (mode);
e075ae69
RH
9840 operands[0] = tmp;
9841
9842 /* Recurse to get the constant loaded. */
9843 if (ix86_expand_int_movcc (operands) == 0)
9844 return 0; /* FAIL */
9845
9846 /* Mask in the interesting variable. */
635559ab 9847 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 9848 OPTAB_WIDEN);
4977bab6
ZW
9849 if (!rtx_equal_p (out, orig_out))
9850 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
9851
9852 return 1; /* DONE */
9853 }
9854
9855 /*
9856 * For comparison with above,
9857 *
9858 * movl cf,dest
9859 * movl ct,tmp
9860 * cmpl op1,op2
9861 * cmovcc tmp,dest
9862 *
9863 * Size 15.
9864 */
9865
635559ab
JH
9866 if (! nonimmediate_operand (operands[2], mode))
9867 operands[2] = force_reg (mode, operands[2]);
9868 if (! nonimmediate_operand (operands[3], mode))
9869 operands[3] = force_reg (mode, operands[3]);
e075ae69 9870
a1b8572c
JH
9871 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9872 {
635559ab 9873 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9874 emit_move_insn (tmp, operands[3]);
9875 operands[3] = tmp;
9876 }
9877 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9878 {
635559ab 9879 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9880 emit_move_insn (tmp, operands[2]);
9881 operands[2] = tmp;
9882 }
4977bab6 9883
c9682caf 9884 if (! register_operand (operands[2], VOIDmode)
b96a374d 9885 && (mode == QImode
4977bab6 9886 || ! register_operand (operands[3], VOIDmode)))
635559ab 9887 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9888
4977bab6
ZW
9889 if (mode == QImode
9890 && ! register_operand (operands[3], VOIDmode))
9891 operands[3] = force_reg (mode, operands[3]);
9892
e075ae69
RH
9893 emit_insn (compare_seq);
9894 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9895 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9896 compare_op, operands[2],
9897 operands[3])));
a1b8572c 9898 if (bypass_test)
4977bab6 9899 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9900 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9901 bypass_test,
4977bab6
ZW
9902 copy_rtx (operands[3]),
9903 copy_rtx (operands[0]))));
a1b8572c 9904 if (second_test)
4977bab6 9905 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9906 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9907 second_test,
4977bab6
ZW
9908 copy_rtx (operands[2]),
9909 copy_rtx (operands[0]))));
e075ae69
RH
9910
9911 return 1; /* DONE */
e9a25f70 9912}
e075ae69 9913
32b5b1aa 9914int
b96a374d 9915ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 9916{
e075ae69 9917 enum rtx_code code;
e075ae69 9918 rtx tmp;
a1b8572c 9919 rtx compare_op, second_test, bypass_test;
32b5b1aa 9920
0073023d
JH
9921 /* For SF/DFmode conditional moves based on comparisons
9922 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9923 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9924 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9925 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9926 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9927 && (!TARGET_IEEE_FP
9928 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9929 /* We may be called from the post-reload splitter. */
9930 && (!REG_P (operands[0])
9931 || SSE_REG_P (operands[0])
52a661a6 9932 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9933 {
9934 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9935 code = GET_CODE (operands[1]);
9936
9937 /* See if we have (cross) match between comparison operands and
9938 conditional move operands. */
9939 if (rtx_equal_p (operands[2], op1))
9940 {
9941 rtx tmp = op0;
9942 op0 = op1;
9943 op1 = tmp;
9944 code = reverse_condition_maybe_unordered (code);
9945 }
9946 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9947 {
9948 /* Check for min operation. */
4977bab6 9949 if (code == LT || code == UNLE)
0073023d 9950 {
4977bab6
ZW
9951 if (code == UNLE)
9952 {
9953 rtx tmp = op0;
9954 op0 = op1;
9955 op1 = tmp;
9956 }
0073023d
JH
9957 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9958 if (memory_operand (op0, VOIDmode))
9959 op0 = force_reg (GET_MODE (operands[0]), op0);
9960 if (GET_MODE (operands[0]) == SFmode)
9961 emit_insn (gen_minsf3 (operands[0], op0, op1));
9962 else
9963 emit_insn (gen_mindf3 (operands[0], op0, op1));
9964 return 1;
9965 }
9966 /* Check for max operation. */
4977bab6 9967 if (code == GT || code == UNGE)
0073023d 9968 {
4977bab6
ZW
9969 if (code == UNGE)
9970 {
9971 rtx tmp = op0;
9972 op0 = op1;
9973 op1 = tmp;
9974 }
0073023d
JH
9975 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9976 if (memory_operand (op0, VOIDmode))
9977 op0 = force_reg (GET_MODE (operands[0]), op0);
9978 if (GET_MODE (operands[0]) == SFmode)
9979 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9980 else
9981 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9982 return 1;
9983 }
9984 }
9985 /* Manage condition to be sse_comparison_operator. In case we are
9986 in non-ieee mode, try to canonicalize the destination operand
9987 to be first in the comparison - this helps reload to avoid extra
9988 moves. */
9989 if (!sse_comparison_operator (operands[1], VOIDmode)
9990 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9991 {
9992 rtx tmp = ix86_compare_op0;
9993 ix86_compare_op0 = ix86_compare_op1;
9994 ix86_compare_op1 = tmp;
9995 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9996 VOIDmode, ix86_compare_op0,
9997 ix86_compare_op1);
9998 }
d1f87653 9999 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10000 move. We also don't support the NE comparison on SSE, so try to
10001 avoid it. */
037f20f1
JH
10002 if ((rtx_equal_p (operands[0], operands[3])
10003 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10004 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10005 {
10006 rtx tmp = operands[2];
10007 operands[2] = operands[3];
92d0fb09 10008 operands[3] = tmp;
0073023d
JH
10009 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10010 (GET_CODE (operands[1])),
10011 VOIDmode, ix86_compare_op0,
10012 ix86_compare_op1);
10013 }
10014 if (GET_MODE (operands[0]) == SFmode)
10015 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10016 operands[2], operands[3],
10017 ix86_compare_op0, ix86_compare_op1));
10018 else
10019 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10020 operands[2], operands[3],
10021 ix86_compare_op0, ix86_compare_op1));
10022 return 1;
10023 }
10024
e075ae69 10025 /* The floating point conditional move instructions don't directly
0f290768 10026 support conditions resulting from a signed integer comparison. */
32b5b1aa 10027
e075ae69 10028 code = GET_CODE (operands[1]);
a1b8572c 10029 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10030
10031 /* The floating point conditional move instructions don't directly
10032 support signed integer comparisons. */
10033
a1b8572c 10034 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10035 {
a1b8572c 10036 if (second_test != NULL || bypass_test != NULL)
b531087a 10037 abort ();
e075ae69 10038 tmp = gen_reg_rtx (QImode);
3a3677ff 10039 ix86_expand_setcc (code, tmp);
e075ae69
RH
10040 code = NE;
10041 ix86_compare_op0 = tmp;
10042 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10043 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10044 }
10045 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10046 {
10047 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10048 emit_move_insn (tmp, operands[3]);
10049 operands[3] = tmp;
10050 }
10051 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10052 {
10053 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10054 emit_move_insn (tmp, operands[2]);
10055 operands[2] = tmp;
e075ae69 10056 }
e9a25f70 10057
e075ae69
RH
10058 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10059 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10060 compare_op,
e075ae69
RH
10061 operands[2],
10062 operands[3])));
a1b8572c
JH
10063 if (bypass_test)
10064 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10065 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10066 bypass_test,
10067 operands[3],
10068 operands[0])));
10069 if (second_test)
10070 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10071 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10072 second_test,
10073 operands[2],
10074 operands[0])));
32b5b1aa 10075
e075ae69 10076 return 1;
32b5b1aa
SC
10077}
10078
7b52eede
JH
10079/* Expand conditional increment or decrement using adb/sbb instructions.
10080 The default case using setcc followed by the conditional move can be
10081 done by generic code. */
10082int
b96a374d 10083ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
10084{
10085 enum rtx_code code = GET_CODE (operands[1]);
10086 rtx compare_op;
10087 rtx val = const0_rtx;
e6e81735 10088 bool fpcmp = false;
e6e81735 10089 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10090
10091 if (operands[3] != const1_rtx
10092 && operands[3] != constm1_rtx)
10093 return 0;
10094 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10095 ix86_compare_op1, &compare_op))
10096 return 0;
e6e81735
JH
10097 code = GET_CODE (compare_op);
10098
10099 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10100 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10101 {
10102 fpcmp = true;
10103 code = ix86_fp_compare_code_to_integer (code);
10104 }
10105
10106 if (code != LTU)
10107 {
10108 val = constm1_rtx;
10109 if (fpcmp)
10110 PUT_CODE (compare_op,
10111 reverse_condition_maybe_unordered
10112 (GET_CODE (compare_op)));
10113 else
10114 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10115 }
10116 PUT_MODE (compare_op, mode);
10117
10118 /* Construct either adc or sbb insn. */
10119 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10120 {
10121 switch (GET_MODE (operands[0]))
10122 {
10123 case QImode:
e6e81735 10124 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10125 break;
10126 case HImode:
e6e81735 10127 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10128 break;
10129 case SImode:
e6e81735 10130 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10131 break;
10132 case DImode:
e6e81735 10133 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10134 break;
10135 default:
10136 abort ();
10137 }
10138 }
10139 else
10140 {
10141 switch (GET_MODE (operands[0]))
10142 {
10143 case QImode:
e6e81735 10144 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10145 break;
10146 case HImode:
e6e81735 10147 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10148 break;
10149 case SImode:
e6e81735 10150 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10151 break;
10152 case DImode:
e6e81735 10153 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10154 break;
10155 default:
10156 abort ();
10157 }
10158 }
10159 return 1; /* DONE */
10160}
10161
10162
2450a057
JH
10163/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10164 works for floating pointer parameters and nonoffsetable memories.
10165 For pushes, it returns just stack offsets; the values will be saved
10166 in the right order. Maximally three parts are generated. */
10167
2b589241 10168static int
b96a374d 10169ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10170{
26e5b205
JH
10171 int size;
10172
10173 if (!TARGET_64BIT)
10174 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10175 else
10176 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10177
a7180f70
BS
10178 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10179 abort ();
2450a057
JH
10180 if (size < 2 || size > 3)
10181 abort ();
10182
f996902d
RH
10183 /* Optimize constant pool reference to immediates. This is used by fp
10184 moves, that force all constants to memory to allow combining. */
10185 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10186 {
10187 rtx tmp = maybe_get_pool_constant (operand);
10188 if (tmp)
10189 operand = tmp;
10190 }
d7a29404 10191
2450a057 10192 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10193 {
2450a057
JH
10194 /* The only non-offsetable memories we handle are pushes. */
10195 if (! push_operand (operand, VOIDmode))
10196 abort ();
10197
26e5b205
JH
10198 operand = copy_rtx (operand);
10199 PUT_MODE (operand, Pmode);
2450a057
JH
10200 parts[0] = parts[1] = parts[2] = operand;
10201 }
26e5b205 10202 else if (!TARGET_64BIT)
2450a057
JH
10203 {
10204 if (mode == DImode)
10205 split_di (&operand, 1, &parts[0], &parts[1]);
10206 else
e075ae69 10207 {
2450a057
JH
10208 if (REG_P (operand))
10209 {
10210 if (!reload_completed)
10211 abort ();
10212 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10213 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10214 if (size == 3)
10215 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10216 }
10217 else if (offsettable_memref_p (operand))
10218 {
f4ef873c 10219 operand = adjust_address (operand, SImode, 0);
2450a057 10220 parts[0] = operand;
b72f00af 10221 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10222 if (size == 3)
b72f00af 10223 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10224 }
10225 else if (GET_CODE (operand) == CONST_DOUBLE)
10226 {
10227 REAL_VALUE_TYPE r;
2b589241 10228 long l[4];
2450a057
JH
10229
10230 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10231 switch (mode)
10232 {
10233 case XFmode:
2b589241 10234 case TFmode:
2450a057 10235 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10236 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10237 break;
10238 case DFmode:
10239 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10240 break;
10241 default:
10242 abort ();
10243 }
d8bf17f9
LB
10244 parts[1] = gen_int_mode (l[1], SImode);
10245 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10246 }
10247 else
10248 abort ();
e075ae69 10249 }
2450a057 10250 }
26e5b205
JH
10251 else
10252 {
44cf5b6a
JH
10253 if (mode == TImode)
10254 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10255 if (mode == XFmode || mode == TFmode)
10256 {
10257 if (REG_P (operand))
10258 {
10259 if (!reload_completed)
10260 abort ();
10261 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10262 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10263 }
10264 else if (offsettable_memref_p (operand))
10265 {
b72f00af 10266 operand = adjust_address (operand, DImode, 0);
26e5b205 10267 parts[0] = operand;
b72f00af 10268 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
10269 }
10270 else if (GET_CODE (operand) == CONST_DOUBLE)
10271 {
10272 REAL_VALUE_TYPE r;
10273 long l[3];
10274
10275 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10276 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10277 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10278 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10279 parts[0]
d8bf17f9 10280 = gen_int_mode
44cf5b6a 10281 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10282 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10283 DImode);
26e5b205
JH
10284 else
10285 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 10286 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
10287 }
10288 else
10289 abort ();
10290 }
10291 }
2450a057 10292
2b589241 10293 return size;
2450a057
JH
10294}
10295
10296/* Emit insns to perform a move or push of DI, DF, and XF values.
10297 Return false when normal moves are needed; true when all required
10298 insns have been emitted. Operands 2-4 contain the input values
10299 int the correct order; operands 5-7 contain the output values. */
10300
26e5b205 10301void
b96a374d 10302ix86_split_long_move (rtx operands[])
2450a057
JH
10303{
10304 rtx part[2][3];
26e5b205 10305 int nparts;
2450a057
JH
10306 int push = 0;
10307 int collisions = 0;
26e5b205
JH
10308 enum machine_mode mode = GET_MODE (operands[0]);
10309
10310 /* The DFmode expanders may ask us to move double.
10311 For 64bit target this is single move. By hiding the fact
10312 here we simplify i386.md splitters. */
10313 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10314 {
8cdfa312
RH
10315 /* Optimize constant pool reference to immediates. This is used by
10316 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10317
10318 if (GET_CODE (operands[1]) == MEM
10319 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10320 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10321 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10322 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10323 {
10324 operands[0] = copy_rtx (operands[0]);
10325 PUT_MODE (operands[0], Pmode);
10326 }
26e5b205
JH
10327 else
10328 operands[0] = gen_lowpart (DImode, operands[0]);
10329 operands[1] = gen_lowpart (DImode, operands[1]);
10330 emit_move_insn (operands[0], operands[1]);
10331 return;
10332 }
2450a057 10333
2450a057
JH
10334 /* The only non-offsettable memory we handle is push. */
10335 if (push_operand (operands[0], VOIDmode))
10336 push = 1;
10337 else if (GET_CODE (operands[0]) == MEM
10338 && ! offsettable_memref_p (operands[0]))
10339 abort ();
10340
26e5b205
JH
10341 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10342 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10343
10344 /* When emitting push, take care for source operands on the stack. */
10345 if (push && GET_CODE (operands[1]) == MEM
10346 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10347 {
26e5b205 10348 if (nparts == 3)
886cbb88
JH
10349 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10350 XEXP (part[1][2], 0));
10351 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10352 XEXP (part[1][1], 0));
2450a057
JH
10353 }
10354
0f290768 10355 /* We need to do copy in the right order in case an address register
2450a057
JH
10356 of the source overlaps the destination. */
10357 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10358 {
10359 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10360 collisions++;
10361 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10362 collisions++;
26e5b205 10363 if (nparts == 3
2450a057
JH
10364 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10365 collisions++;
10366
10367 /* Collision in the middle part can be handled by reordering. */
26e5b205 10368 if (collisions == 1 && nparts == 3
2450a057 10369 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10370 {
2450a057
JH
10371 rtx tmp;
10372 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10373 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10374 }
e075ae69 10375
2450a057
JH
10376 /* If there are more collisions, we can't handle it by reordering.
10377 Do an lea to the last part and use only one colliding move. */
10378 else if (collisions > 1)
10379 {
8231b3f9
RH
10380 rtx base;
10381
2450a057 10382 collisions = 1;
8231b3f9
RH
10383
10384 base = part[0][nparts - 1];
10385
10386 /* Handle the case when the last part isn't valid for lea.
10387 Happens in 64-bit mode storing the 12-byte XFmode. */
10388 if (GET_MODE (base) != Pmode)
10389 base = gen_rtx_REG (Pmode, REGNO (base));
10390
10391 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10392 part[1][0] = replace_equiv_address (part[1][0], base);
10393 part[1][1] = replace_equiv_address (part[1][1],
10394 plus_constant (base, UNITS_PER_WORD));
26e5b205 10395 if (nparts == 3)
8231b3f9
RH
10396 part[1][2] = replace_equiv_address (part[1][2],
10397 plus_constant (base, 8));
2450a057
JH
10398 }
10399 }
10400
10401 if (push)
10402 {
26e5b205 10403 if (!TARGET_64BIT)
2b589241 10404 {
26e5b205
JH
10405 if (nparts == 3)
10406 {
10407 /* We use only first 12 bytes of TFmode value, but for pushing we
10408 are required to adjust stack as if we were pushing real 16byte
10409 value. */
10410 if (mode == TFmode && !TARGET_64BIT)
10411 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10412 GEN_INT (-4)));
10413 emit_move_insn (part[0][2], part[1][2]);
10414 }
2b589241 10415 }
26e5b205
JH
10416 else
10417 {
10418 /* In 64bit mode we don't have 32bit push available. In case this is
10419 register, it is OK - we will just use larger counterpart. We also
10420 retype memory - these comes from attempt to avoid REX prefix on
10421 moving of second half of TFmode value. */
10422 if (GET_MODE (part[1][1]) == SImode)
10423 {
10424 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10425 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10426 else if (REG_P (part[1][1]))
10427 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10428 else
b531087a 10429 abort ();
886cbb88
JH
10430 if (GET_MODE (part[1][0]) == SImode)
10431 part[1][0] = part[1][1];
26e5b205
JH
10432 }
10433 }
10434 emit_move_insn (part[0][1], part[1][1]);
10435 emit_move_insn (part[0][0], part[1][0]);
10436 return;
2450a057
JH
10437 }
10438
10439 /* Choose correct order to not overwrite the source before it is copied. */
10440 if ((REG_P (part[0][0])
10441 && REG_P (part[1][1])
10442 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10443 || (nparts == 3
2450a057
JH
10444 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10445 || (collisions > 0
10446 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10447 {
26e5b205 10448 if (nparts == 3)
2450a057 10449 {
26e5b205
JH
10450 operands[2] = part[0][2];
10451 operands[3] = part[0][1];
10452 operands[4] = part[0][0];
10453 operands[5] = part[1][2];
10454 operands[6] = part[1][1];
10455 operands[7] = part[1][0];
2450a057
JH
10456 }
10457 else
10458 {
26e5b205
JH
10459 operands[2] = part[0][1];
10460 operands[3] = part[0][0];
10461 operands[5] = part[1][1];
10462 operands[6] = part[1][0];
2450a057
JH
10463 }
10464 }
10465 else
10466 {
26e5b205 10467 if (nparts == 3)
2450a057 10468 {
26e5b205
JH
10469 operands[2] = part[0][0];
10470 operands[3] = part[0][1];
10471 operands[4] = part[0][2];
10472 operands[5] = part[1][0];
10473 operands[6] = part[1][1];
10474 operands[7] = part[1][2];
2450a057
JH
10475 }
10476 else
10477 {
26e5b205
JH
10478 operands[2] = part[0][0];
10479 operands[3] = part[0][1];
10480 operands[5] = part[1][0];
10481 operands[6] = part[1][1];
e075ae69
RH
10482 }
10483 }
26e5b205
JH
10484 emit_move_insn (operands[2], operands[5]);
10485 emit_move_insn (operands[3], operands[6]);
10486 if (nparts == 3)
10487 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10488
26e5b205 10489 return;
32b5b1aa 10490}
32b5b1aa 10491
e075ae69 10492void
b96a374d 10493ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10494{
e075ae69
RH
10495 rtx low[2], high[2];
10496 int count;
b985a30f 10497
e075ae69
RH
10498 if (GET_CODE (operands[2]) == CONST_INT)
10499 {
10500 split_di (operands, 2, low, high);
10501 count = INTVAL (operands[2]) & 63;
32b5b1aa 10502
e075ae69
RH
10503 if (count >= 32)
10504 {
10505 emit_move_insn (high[0], low[1]);
10506 emit_move_insn (low[0], const0_rtx);
b985a30f 10507
e075ae69
RH
10508 if (count > 32)
10509 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10510 }
10511 else
10512 {
10513 if (!rtx_equal_p (operands[0], operands[1]))
10514 emit_move_insn (operands[0], operands[1]);
10515 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10516 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10517 }
10518 }
10519 else
10520 {
10521 if (!rtx_equal_p (operands[0], operands[1]))
10522 emit_move_insn (operands[0], operands[1]);
b985a30f 10523
e075ae69 10524 split_di (operands, 1, low, high);
b985a30f 10525
e075ae69
RH
10526 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10527 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10528
fe577e58 10529 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10530 {
fe577e58 10531 if (! no_new_pseudos)
e075ae69
RH
10532 scratch = force_reg (SImode, const0_rtx);
10533 else
10534 emit_move_insn (scratch, const0_rtx);
10535
10536 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10537 scratch));
10538 }
10539 else
10540 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10541 }
e9a25f70 10542}
32b5b1aa 10543
e075ae69 10544void
b96a374d 10545ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10546{
e075ae69
RH
10547 rtx low[2], high[2];
10548 int count;
32b5b1aa 10549
e075ae69
RH
10550 if (GET_CODE (operands[2]) == CONST_INT)
10551 {
10552 split_di (operands, 2, low, high);
10553 count = INTVAL (operands[2]) & 63;
32b5b1aa 10554
e075ae69
RH
10555 if (count >= 32)
10556 {
10557 emit_move_insn (low[0], high[1]);
32b5b1aa 10558
e075ae69
RH
10559 if (! reload_completed)
10560 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10561 else
10562 {
10563 emit_move_insn (high[0], low[0]);
10564 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10565 }
10566
10567 if (count > 32)
10568 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10569 }
10570 else
10571 {
10572 if (!rtx_equal_p (operands[0], operands[1]))
10573 emit_move_insn (operands[0], operands[1]);
10574 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10575 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10576 }
10577 }
10578 else
32b5b1aa 10579 {
e075ae69
RH
10580 if (!rtx_equal_p (operands[0], operands[1]))
10581 emit_move_insn (operands[0], operands[1]);
10582
10583 split_di (operands, 1, low, high);
10584
10585 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10586 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10587
fe577e58 10588 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10589 {
fe577e58 10590 if (! no_new_pseudos)
e075ae69
RH
10591 scratch = gen_reg_rtx (SImode);
10592 emit_move_insn (scratch, high[0]);
10593 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10594 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10595 scratch));
10596 }
10597 else
10598 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10599 }
e075ae69 10600}
32b5b1aa 10601
e075ae69 10602void
b96a374d 10603ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10604{
10605 rtx low[2], high[2];
10606 int count;
32b5b1aa 10607
e075ae69 10608 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10609 {
e075ae69
RH
10610 split_di (operands, 2, low, high);
10611 count = INTVAL (operands[2]) & 63;
10612
10613 if (count >= 32)
c7271385 10614 {
e075ae69
RH
10615 emit_move_insn (low[0], high[1]);
10616 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10617
e075ae69
RH
10618 if (count > 32)
10619 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10620 }
10621 else
10622 {
10623 if (!rtx_equal_p (operands[0], operands[1]))
10624 emit_move_insn (operands[0], operands[1]);
10625 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10626 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10627 }
32b5b1aa 10628 }
e075ae69
RH
10629 else
10630 {
10631 if (!rtx_equal_p (operands[0], operands[1]))
10632 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10633
e075ae69
RH
10634 split_di (operands, 1, low, high);
10635
10636 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10637 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10638
10639 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 10640 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10641 {
fe577e58 10642 if (! no_new_pseudos)
e075ae69
RH
10643 scratch = force_reg (SImode, const0_rtx);
10644 else
10645 emit_move_insn (scratch, const0_rtx);
10646
10647 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10648 scratch));
10649 }
10650 else
10651 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10652 }
32b5b1aa 10653}
3f803cd9 10654
0407c02b 10655/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10656 it is aligned to VALUE bytes. If true, jump to the label. */
10657static rtx
b96a374d 10658ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
10659{
10660 rtx label = gen_label_rtx ();
10661 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10662 if (GET_MODE (variable) == DImode)
10663 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10664 else
10665 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10666 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10667 1, label);
0945b39d
JH
10668 return label;
10669}
10670
10671/* Adjust COUNTER by the VALUE. */
10672static void
b96a374d 10673ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
10674{
10675 if (GET_MODE (countreg) == DImode)
10676 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10677 else
10678 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10679}
10680
10681/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10682rtx
b96a374d 10683ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
10684{
10685 rtx r;
10686 if (GET_MODE (exp) == VOIDmode)
10687 return force_reg (Pmode, exp);
10688 if (GET_MODE (exp) == Pmode)
10689 return copy_to_mode_reg (Pmode, exp);
10690 r = gen_reg_rtx (Pmode);
10691 emit_insn (gen_zero_extendsidi2 (r, exp));
10692 return r;
10693}
10694
10695/* Expand string move (memcpy) operation. Use i386 string operations when
10696 profitable. expand_clrstr contains similar code. */
10697int
b96a374d 10698ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d
JH
10699{
10700 rtx srcreg, destreg, countreg;
10701 enum machine_mode counter_mode;
10702 HOST_WIDE_INT align = 0;
10703 unsigned HOST_WIDE_INT count = 0;
10704 rtx insns;
10705
0945b39d
JH
10706 if (GET_CODE (align_exp) == CONST_INT)
10707 align = INTVAL (align_exp);
10708
d0a5295a
RH
10709 /* Can't use any of this if the user has appropriated esi or edi. */
10710 if (global_regs[4] || global_regs[5])
10711 return 0;
10712
5519a4f9 10713 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10714 if (!TARGET_ALIGN_STRINGOPS)
10715 align = 64;
10716
10717 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10718 {
10719 count = INTVAL (count_exp);
10720 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10721 return 0;
10722 }
0945b39d
JH
10723
10724 /* Figure out proper mode for counter. For 32bits it is always SImode,
10725 for 64bits use SImode when possible, otherwise DImode.
10726 Set count to number of bytes copied when known at compile time. */
10727 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10728 || x86_64_zero_extended_value (count_exp))
10729 counter_mode = SImode;
10730 else
10731 counter_mode = DImode;
10732
26771da7
JH
10733 start_sequence ();
10734
0945b39d
JH
10735 if (counter_mode != SImode && counter_mode != DImode)
10736 abort ();
10737
10738 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10739 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10740
10741 emit_insn (gen_cld ());
10742
10743 /* When optimizing for size emit simple rep ; movsb instruction for
10744 counts not divisible by 4. */
10745
10746 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10747 {
10748 countreg = ix86_zero_extend_to_Pmode (count_exp);
10749 if (TARGET_64BIT)
10750 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10751 destreg, srcreg, countreg));
10752 else
10753 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10754 destreg, srcreg, countreg));
10755 }
10756
10757 /* For constant aligned (or small unaligned) copies use rep movsl
10758 followed by code copying the rest. For PentiumPro ensure 8 byte
10759 alignment to allow rep movsl acceleration. */
10760
10761 else if (count != 0
10762 && (align >= 8
10763 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10764 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10765 {
10766 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10767 if (count & ~(size - 1))
10768 {
10769 countreg = copy_to_mode_reg (counter_mode,
10770 GEN_INT ((count >> (size == 4 ? 2 : 3))
10771 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10772 countreg = ix86_zero_extend_to_Pmode (countreg);
10773 if (size == 4)
10774 {
10775 if (TARGET_64BIT)
10776 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10777 destreg, srcreg, countreg));
10778 else
10779 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10780 destreg, srcreg, countreg));
10781 }
10782 else
10783 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10784 destreg, srcreg, countreg));
10785 }
10786 if (size == 8 && (count & 0x04))
10787 emit_insn (gen_strmovsi (destreg, srcreg));
10788 if (count & 0x02)
10789 emit_insn (gen_strmovhi (destreg, srcreg));
10790 if (count & 0x01)
10791 emit_insn (gen_strmovqi (destreg, srcreg));
10792 }
10793 /* The generic code based on the glibc implementation:
10794 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10795 allowing accelerated copying there)
10796 - copy the data using rep movsl
10797 - copy the rest. */
10798 else
10799 {
10800 rtx countreg2;
10801 rtx label = NULL;
37ad04a5
JH
10802 int desired_alignment = (TARGET_PENTIUMPRO
10803 && (count == 0 || count >= (unsigned int) 260)
10804 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10805
10806 /* In case we don't know anything about the alignment, default to
10807 library version, since it is usually equally fast and result in
b96a374d 10808 shorter code.
4977bab6
ZW
10809
10810 Also emit call when we know that the count is large and call overhead
10811 will not be important. */
10812 if (!TARGET_INLINE_ALL_STRINGOPS
10813 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
10814 {
10815 end_sequence ();
10816 return 0;
10817 }
10818
10819 if (TARGET_SINGLE_STRINGOP)
10820 emit_insn (gen_cld ());
10821
10822 countreg2 = gen_reg_rtx (Pmode);
10823 countreg = copy_to_mode_reg (counter_mode, count_exp);
10824
10825 /* We don't use loops to align destination and to copy parts smaller
10826 than 4 bytes, because gcc is able to optimize such code better (in
10827 the case the destination or the count really is aligned, gcc is often
10828 able to predict the branches) and also it is friendlier to the
a4f31c00 10829 hardware branch prediction.
0945b39d 10830
d1f87653 10831 Using loops is beneficial for generic case, because we can
0945b39d
JH
10832 handle small counts using the loops. Many CPUs (such as Athlon)
10833 have large REP prefix setup costs.
10834
4aae8a9a 10835 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
10836 add some customizability to this code. */
10837
37ad04a5 10838 if (count == 0 && align < desired_alignment)
0945b39d
JH
10839 {
10840 label = gen_label_rtx ();
aaae0bb9 10841 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10842 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10843 }
10844 if (align <= 1)
10845 {
10846 rtx label = ix86_expand_aligntest (destreg, 1);
10847 emit_insn (gen_strmovqi (destreg, srcreg));
10848 ix86_adjust_counter (countreg, 1);
10849 emit_label (label);
10850 LABEL_NUSES (label) = 1;
10851 }
10852 if (align <= 2)
10853 {
10854 rtx label = ix86_expand_aligntest (destreg, 2);
10855 emit_insn (gen_strmovhi (destreg, srcreg));
10856 ix86_adjust_counter (countreg, 2);
10857 emit_label (label);
10858 LABEL_NUSES (label) = 1;
10859 }
37ad04a5 10860 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10861 {
10862 rtx label = ix86_expand_aligntest (destreg, 4);
10863 emit_insn (gen_strmovsi (destreg, srcreg));
10864 ix86_adjust_counter (countreg, 4);
10865 emit_label (label);
10866 LABEL_NUSES (label) = 1;
10867 }
10868
37ad04a5
JH
10869 if (label && desired_alignment > 4 && !TARGET_64BIT)
10870 {
10871 emit_label (label);
10872 LABEL_NUSES (label) = 1;
10873 label = NULL_RTX;
10874 }
0945b39d
JH
10875 if (!TARGET_SINGLE_STRINGOP)
10876 emit_insn (gen_cld ());
10877 if (TARGET_64BIT)
10878 {
10879 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10880 GEN_INT (3)));
10881 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10882 destreg, srcreg, countreg2));
10883 }
10884 else
10885 {
10886 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10887 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10888 destreg, srcreg, countreg2));
10889 }
10890
10891 if (label)
10892 {
10893 emit_label (label);
10894 LABEL_NUSES (label) = 1;
10895 }
10896 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10897 emit_insn (gen_strmovsi (destreg, srcreg));
10898 if ((align <= 4 || count == 0) && TARGET_64BIT)
10899 {
10900 rtx label = ix86_expand_aligntest (countreg, 4);
10901 emit_insn (gen_strmovsi (destreg, srcreg));
10902 emit_label (label);
10903 LABEL_NUSES (label) = 1;
10904 }
10905 if (align > 2 && count != 0 && (count & 2))
10906 emit_insn (gen_strmovhi (destreg, srcreg));
10907 if (align <= 2 || count == 0)
10908 {
10909 rtx label = ix86_expand_aligntest (countreg, 2);
10910 emit_insn (gen_strmovhi (destreg, srcreg));
10911 emit_label (label);
10912 LABEL_NUSES (label) = 1;
10913 }
10914 if (align > 1 && count != 0 && (count & 1))
10915 emit_insn (gen_strmovqi (destreg, srcreg));
10916 if (align <= 1 || count == 0)
10917 {
10918 rtx label = ix86_expand_aligntest (countreg, 1);
10919 emit_insn (gen_strmovqi (destreg, srcreg));
10920 emit_label (label);
10921 LABEL_NUSES (label) = 1;
10922 }
10923 }
10924
10925 insns = get_insns ();
10926 end_sequence ();
10927
10928 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10929 emit_insn (insns);
0945b39d
JH
10930 return 1;
10931}
10932
10933/* Expand string clear operation (bzero). Use i386 string operations when
10934 profitable. expand_movstr contains similar code. */
10935int
b96a374d 10936ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
0945b39d
JH
10937{
10938 rtx destreg, zeroreg, countreg;
10939 enum machine_mode counter_mode;
10940 HOST_WIDE_INT align = 0;
10941 unsigned HOST_WIDE_INT count = 0;
10942
10943 if (GET_CODE (align_exp) == CONST_INT)
10944 align = INTVAL (align_exp);
10945
d0a5295a
RH
10946 /* Can't use any of this if the user has appropriated esi. */
10947 if (global_regs[4])
10948 return 0;
10949
5519a4f9 10950 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10951 if (!TARGET_ALIGN_STRINGOPS)
10952 align = 32;
10953
10954 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10955 {
10956 count = INTVAL (count_exp);
10957 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10958 return 0;
10959 }
0945b39d
JH
10960 /* Figure out proper mode for counter. For 32bits it is always SImode,
10961 for 64bits use SImode when possible, otherwise DImode.
10962 Set count to number of bytes copied when known at compile time. */
10963 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10964 || x86_64_zero_extended_value (count_exp))
10965 counter_mode = SImode;
10966 else
10967 counter_mode = DImode;
10968
10969 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10970
10971 emit_insn (gen_cld ());
10972
10973 /* When optimizing for size emit simple rep ; movsb instruction for
10974 counts not divisible by 4. */
10975
10976 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10977 {
10978 countreg = ix86_zero_extend_to_Pmode (count_exp);
10979 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10980 if (TARGET_64BIT)
10981 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10982 destreg, countreg));
10983 else
10984 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10985 destreg, countreg));
10986 }
10987 else if (count != 0
10988 && (align >= 8
10989 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10990 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10991 {
10992 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10993 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10994 if (count & ~(size - 1))
10995 {
10996 countreg = copy_to_mode_reg (counter_mode,
10997 GEN_INT ((count >> (size == 4 ? 2 : 3))
10998 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10999 countreg = ix86_zero_extend_to_Pmode (countreg);
11000 if (size == 4)
11001 {
11002 if (TARGET_64BIT)
11003 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11004 destreg, countreg));
11005 else
11006 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11007 destreg, countreg));
11008 }
11009 else
11010 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11011 destreg, countreg));
11012 }
11013 if (size == 8 && (count & 0x04))
11014 emit_insn (gen_strsetsi (destreg,
11015 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11016 if (count & 0x02)
11017 emit_insn (gen_strsethi (destreg,
11018 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11019 if (count & 0x01)
11020 emit_insn (gen_strsetqi (destreg,
11021 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11022 }
11023 else
11024 {
11025 rtx countreg2;
11026 rtx label = NULL;
37ad04a5
JH
11027 /* Compute desired alignment of the string operation. */
11028 int desired_alignment = (TARGET_PENTIUMPRO
11029 && (count == 0 || count >= (unsigned int) 260)
11030 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11031
11032 /* In case we don't know anything about the alignment, default to
11033 library version, since it is usually equally fast and result in
4977bab6
ZW
11034 shorter code.
11035
11036 Also emit call when we know that the count is large and call overhead
11037 will not be important. */
11038 if (!TARGET_INLINE_ALL_STRINGOPS
11039 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11040 return 0;
11041
11042 if (TARGET_SINGLE_STRINGOP)
11043 emit_insn (gen_cld ());
11044
11045 countreg2 = gen_reg_rtx (Pmode);
11046 countreg = copy_to_mode_reg (counter_mode, count_exp);
11047 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11048
37ad04a5 11049 if (count == 0 && align < desired_alignment)
0945b39d
JH
11050 {
11051 label = gen_label_rtx ();
37ad04a5 11052 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11053 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11054 }
11055 if (align <= 1)
11056 {
11057 rtx label = ix86_expand_aligntest (destreg, 1);
11058 emit_insn (gen_strsetqi (destreg,
11059 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11060 ix86_adjust_counter (countreg, 1);
11061 emit_label (label);
11062 LABEL_NUSES (label) = 1;
11063 }
11064 if (align <= 2)
11065 {
11066 rtx label = ix86_expand_aligntest (destreg, 2);
11067 emit_insn (gen_strsethi (destreg,
11068 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11069 ix86_adjust_counter (countreg, 2);
11070 emit_label (label);
11071 LABEL_NUSES (label) = 1;
11072 }
37ad04a5 11073 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11074 {
11075 rtx label = ix86_expand_aligntest (destreg, 4);
11076 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11077 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11078 : zeroreg)));
11079 ix86_adjust_counter (countreg, 4);
11080 emit_label (label);
11081 LABEL_NUSES (label) = 1;
11082 }
11083
37ad04a5
JH
11084 if (label && desired_alignment > 4 && !TARGET_64BIT)
11085 {
11086 emit_label (label);
11087 LABEL_NUSES (label) = 1;
11088 label = NULL_RTX;
11089 }
11090
0945b39d
JH
11091 if (!TARGET_SINGLE_STRINGOP)
11092 emit_insn (gen_cld ());
11093 if (TARGET_64BIT)
11094 {
11095 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11096 GEN_INT (3)));
11097 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11098 destreg, countreg2));
11099 }
11100 else
11101 {
11102 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11103 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11104 destreg, countreg2));
11105 }
0945b39d
JH
11106 if (label)
11107 {
11108 emit_label (label);
11109 LABEL_NUSES (label) = 1;
11110 }
37ad04a5 11111
0945b39d
JH
11112 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11113 emit_insn (gen_strsetsi (destreg,
11114 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11115 if (TARGET_64BIT && (align <= 4 || count == 0))
11116 {
79258dce 11117 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
11118 emit_insn (gen_strsetsi (destreg,
11119 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11120 emit_label (label);
11121 LABEL_NUSES (label) = 1;
11122 }
11123 if (align > 2 && count != 0 && (count & 2))
11124 emit_insn (gen_strsethi (destreg,
11125 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11126 if (align <= 2 || count == 0)
11127 {
74411039 11128 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
11129 emit_insn (gen_strsethi (destreg,
11130 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11131 emit_label (label);
11132 LABEL_NUSES (label) = 1;
11133 }
11134 if (align > 1 && count != 0 && (count & 1))
11135 emit_insn (gen_strsetqi (destreg,
11136 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11137 if (align <= 1 || count == 0)
11138 {
74411039 11139 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
11140 emit_insn (gen_strsetqi (destreg,
11141 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11142 emit_label (label);
11143 LABEL_NUSES (label) = 1;
11144 }
11145 }
11146 return 1;
11147}
11148/* Expand strlen. */
11149int
b96a374d 11150ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11151{
11152 rtx addr, scratch1, scratch2, scratch3, scratch4;
11153
11154 /* The generic case of strlen expander is long. Avoid it's
11155 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11156
11157 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11158 && !TARGET_INLINE_ALL_STRINGOPS
11159 && !optimize_size
11160 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11161 return 0;
11162
11163 addr = force_reg (Pmode, XEXP (src, 0));
11164 scratch1 = gen_reg_rtx (Pmode);
11165
11166 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11167 && !optimize_size)
11168 {
11169 /* Well it seems that some optimizer does not combine a call like
11170 foo(strlen(bar), strlen(bar));
11171 when the move and the subtraction is done here. It does calculate
11172 the length just once when these instructions are done inside of
11173 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11174 often used and I use one fewer register for the lifetime of
11175 output_strlen_unroll() this is better. */
11176
11177 emit_move_insn (out, addr);
11178
11179 ix86_expand_strlensi_unroll_1 (out, align);
11180
11181 /* strlensi_unroll_1 returns the address of the zero at the end of
11182 the string, like memchr(), so compute the length by subtracting
11183 the start address. */
11184 if (TARGET_64BIT)
11185 emit_insn (gen_subdi3 (out, out, addr));
11186 else
11187 emit_insn (gen_subsi3 (out, out, addr));
11188 }
11189 else
11190 {
11191 scratch2 = gen_reg_rtx (Pmode);
11192 scratch3 = gen_reg_rtx (Pmode);
11193 scratch4 = force_reg (Pmode, constm1_rtx);
11194
11195 emit_move_insn (scratch3, addr);
11196 eoschar = force_reg (QImode, eoschar);
11197
11198 emit_insn (gen_cld ());
11199 if (TARGET_64BIT)
11200 {
11201 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11202 align, scratch4, scratch3));
11203 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11204 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11205 }
11206 else
11207 {
11208 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11209 align, scratch4, scratch3));
11210 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11211 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11212 }
11213 }
11214 return 1;
11215}
11216
e075ae69
RH
11217/* Expand the appropriate insns for doing strlen if not just doing
11218 repnz; scasb
11219
11220 out = result, initialized with the start address
11221 align_rtx = alignment of the address.
11222 scratch = scratch register, initialized with the startaddress when
77ebd435 11223 not aligned, otherwise undefined
3f803cd9 11224
39e3f58c 11225 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11226 some address computing at the end. These things are done in i386.md. */
11227
0945b39d 11228static void
b96a374d 11229ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
3f803cd9 11230{
e075ae69
RH
11231 int align;
11232 rtx tmp;
11233 rtx align_2_label = NULL_RTX;
11234 rtx align_3_label = NULL_RTX;
11235 rtx align_4_label = gen_label_rtx ();
11236 rtx end_0_label = gen_label_rtx ();
e075ae69 11237 rtx mem;
e2e52e1b 11238 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11239 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11240 rtx cmp;
e075ae69
RH
11241
11242 align = 0;
11243 if (GET_CODE (align_rtx) == CONST_INT)
11244 align = INTVAL (align_rtx);
3f803cd9 11245
e9a25f70 11246 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11247
e9a25f70 11248 /* Is there a known alignment and is it less than 4? */
e075ae69 11249 if (align < 4)
3f803cd9 11250 {
0945b39d
JH
11251 rtx scratch1 = gen_reg_rtx (Pmode);
11252 emit_move_insn (scratch1, out);
e9a25f70 11253 /* Is there a known alignment and is it not 2? */
e075ae69 11254 if (align != 2)
3f803cd9 11255 {
e075ae69
RH
11256 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11257 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11258
11259 /* Leave just the 3 lower bits. */
0945b39d 11260 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11261 NULL_RTX, 0, OPTAB_WIDEN);
11262
9076b9c1 11263 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11264 Pmode, 1, align_4_label);
9076b9c1 11265 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 11266 Pmode, 1, align_2_label);
9076b9c1 11267 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 11268 Pmode, 1, align_3_label);
3f803cd9
SC
11269 }
11270 else
11271 {
e9a25f70
JL
11272 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11273 check if is aligned to 4 - byte. */
e9a25f70 11274
0945b39d 11275 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
11276 NULL_RTX, 0, OPTAB_WIDEN);
11277
9076b9c1 11278 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11279 Pmode, 1, align_4_label);
3f803cd9
SC
11280 }
11281
e075ae69 11282 mem = gen_rtx_MEM (QImode, out);
e9a25f70 11283
e075ae69 11284 /* Now compare the bytes. */
e9a25f70 11285
0f290768 11286 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11287 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11288 QImode, 1, end_0_label);
3f803cd9 11289
0f290768 11290 /* Increment the address. */
0945b39d
JH
11291 if (TARGET_64BIT)
11292 emit_insn (gen_adddi3 (out, out, const1_rtx));
11293 else
11294 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11295
e075ae69
RH
11296 /* Not needed with an alignment of 2 */
11297 if (align != 2)
11298 {
11299 emit_label (align_2_label);
3f803cd9 11300
d43e0b7d
RK
11301 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11302 end_0_label);
e075ae69 11303
0945b39d
JH
11304 if (TARGET_64BIT)
11305 emit_insn (gen_adddi3 (out, out, const1_rtx));
11306 else
11307 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11308
11309 emit_label (align_3_label);
11310 }
11311
d43e0b7d
RK
11312 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11313 end_0_label);
e075ae69 11314
0945b39d
JH
11315 if (TARGET_64BIT)
11316 emit_insn (gen_adddi3 (out, out, const1_rtx));
11317 else
11318 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11319 }
11320
e075ae69
RH
11321 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11322 align this loop. It gives only huge programs, but does not help to
11323 speed up. */
11324 emit_label (align_4_label);
3f803cd9 11325
e075ae69
RH
11326 mem = gen_rtx_MEM (SImode, out);
11327 emit_move_insn (scratch, mem);
0945b39d
JH
11328 if (TARGET_64BIT)
11329 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11330 else
11331 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11332
e2e52e1b
JH
11333 /* This formula yields a nonzero result iff one of the bytes is zero.
11334 This saves three branches inside loop and many cycles. */
11335
11336 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11337 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11338 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11339 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11340 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11341 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11342 align_4_label);
e2e52e1b
JH
11343
11344 if (TARGET_CMOVE)
11345 {
11346 rtx reg = gen_reg_rtx (SImode);
0945b39d 11347 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11348 emit_move_insn (reg, tmpreg);
11349 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11350
0f290768 11351 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11352 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11353 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11354 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11355 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11356 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11357 reg,
11358 tmpreg)));
e2e52e1b 11359 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
11360 emit_insn (gen_rtx_SET (SImode, reg2,
11361 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
11362
11363 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11364 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11365 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11366 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11367 reg2,
11368 out)));
e2e52e1b
JH
11369
11370 }
11371 else
11372 {
11373 rtx end_2_label = gen_label_rtx ();
11374 /* Is zero in the first two bytes? */
11375
16189740 11376 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11377 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11378 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11379 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11380 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11381 pc_rtx);
11382 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11383 JUMP_LABEL (tmp) = end_2_label;
11384
0f290768 11385 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11386 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
11387 if (TARGET_64BIT)
11388 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11389 else
11390 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
11391
11392 emit_label (end_2_label);
11393
11394 }
11395
0f290768 11396 /* Avoid branch in fixing the byte. */
e2e52e1b 11397 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11398 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11399 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11400 if (TARGET_64BIT)
e6e81735 11401 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11402 else
e6e81735 11403 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11404
11405 emit_label (end_0_label);
11406}
0e07aff3
RH
11407
11408void
b96a374d
AJ
11409ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11410 rtx pop, int sibcall)
0e07aff3
RH
11411{
11412 rtx use = NULL, call;
11413
11414 if (pop == const0_rtx)
11415 pop = NULL;
11416 if (TARGET_64BIT && pop)
11417 abort ();
11418
b069de3b
SS
11419#if TARGET_MACHO
11420 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11421 fnaddr = machopic_indirect_call_target (fnaddr);
11422#else
0e07aff3
RH
11423 /* Static functions and indirect calls don't need the pic register. */
11424 if (! TARGET_64BIT && flag_pic
11425 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11426 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11427 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11428
11429 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11430 {
11431 rtx al = gen_rtx_REG (QImode, 0);
11432 emit_move_insn (al, callarg2);
11433 use_reg (&use, al);
11434 }
b069de3b 11435#endif /* TARGET_MACHO */
0e07aff3
RH
11436
11437 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11438 {
11439 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11440 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11441 }
4977bab6
ZW
11442 if (sibcall && TARGET_64BIT
11443 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11444 {
11445 rtx addr;
11446 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11447 fnaddr = gen_rtx_REG (Pmode, 40);
11448 emit_move_insn (fnaddr, addr);
11449 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11450 }
0e07aff3
RH
11451
11452 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11453 if (retval)
11454 call = gen_rtx_SET (VOIDmode, retval, call);
11455 if (pop)
11456 {
11457 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11458 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11459 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11460 }
11461
11462 call = emit_call_insn (call);
11463 if (use)
11464 CALL_INSN_FUNCTION_USAGE (call) = use;
11465}
fce5a9f2 11466
e075ae69 11467\f
e075ae69
RH
11468/* Clear stack slot assignments remembered from previous functions.
11469 This is called from INIT_EXPANDERS once before RTL is emitted for each
11470 function. */
11471
e2500fed 11472static struct machine_function *
b96a374d 11473ix86_init_machine_status (void)
37b15744 11474{
d7394366
JH
11475 struct machine_function *f;
11476
11477 f = ggc_alloc_cleared (sizeof (struct machine_function));
11478 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11479
11480 return f;
1526a060
BS
11481}
11482
e075ae69
RH
11483/* Return a MEM corresponding to a stack slot with mode MODE.
11484 Allocate a new slot if necessary.
11485
11486 The RTL for a function can have several slots available: N is
11487 which slot to use. */
11488
11489rtx
b96a374d 11490assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11491{
ddb0ae00
ZW
11492 struct stack_local_entry *s;
11493
e075ae69
RH
11494 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11495 abort ();
11496
ddb0ae00
ZW
11497 for (s = ix86_stack_locals; s; s = s->next)
11498 if (s->mode == mode && s->n == n)
11499 return s->rtl;
11500
11501 s = (struct stack_local_entry *)
11502 ggc_alloc (sizeof (struct stack_local_entry));
11503 s->n = n;
11504 s->mode = mode;
11505 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11506
ddb0ae00
ZW
11507 s->next = ix86_stack_locals;
11508 ix86_stack_locals = s;
11509 return s->rtl;
e075ae69 11510}
f996902d
RH
11511
11512/* Construct the SYMBOL_REF for the tls_get_addr function. */
11513
e2500fed 11514static GTY(()) rtx ix86_tls_symbol;
f996902d 11515rtx
b96a374d 11516ix86_tls_get_addr (void)
f996902d 11517{
f996902d 11518
e2500fed 11519 if (!ix86_tls_symbol)
f996902d 11520 {
75d38379
JJ
11521 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11522 (TARGET_GNU_TLS && !TARGET_64BIT)
11523 ? "___tls_get_addr"
11524 : "__tls_get_addr");
f996902d
RH
11525 }
11526
e2500fed 11527 return ix86_tls_symbol;
f996902d 11528}
e075ae69
RH
11529\f
11530/* Calculate the length of the memory address in the instruction
11531 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11532
11533static int
b96a374d 11534memory_address_length (rtx addr)
e075ae69
RH
11535{
11536 struct ix86_address parts;
11537 rtx base, index, disp;
11538 int len;
11539
11540 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11541 || GET_CODE (addr) == POST_INC
11542 || GET_CODE (addr) == PRE_MODIFY
11543 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11544 return 0;
3f803cd9 11545
e075ae69
RH
11546 if (! ix86_decompose_address (addr, &parts))
11547 abort ();
3f803cd9 11548
e075ae69
RH
11549 base = parts.base;
11550 index = parts.index;
11551 disp = parts.disp;
11552 len = 0;
3f803cd9 11553
e075ae69
RH
11554 /* Register Indirect. */
11555 if (base && !index && !disp)
11556 {
11557 /* Special cases: ebp and esp need the two-byte modrm form. */
11558 if (addr == stack_pointer_rtx
11559 || addr == arg_pointer_rtx
564d80f4
JH
11560 || addr == frame_pointer_rtx
11561 || addr == hard_frame_pointer_rtx)
e075ae69 11562 len = 1;
3f803cd9 11563 }
e9a25f70 11564
e075ae69
RH
11565 /* Direct Addressing. */
11566 else if (disp && !base && !index)
11567 len = 4;
11568
3f803cd9
SC
11569 else
11570 {
e075ae69
RH
11571 /* Find the length of the displacement constant. */
11572 if (disp)
11573 {
11574 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
11575 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11576 && base)
e075ae69
RH
11577 len = 1;
11578 else
11579 len = 4;
11580 }
3f803cd9 11581
e075ae69
RH
11582 /* An index requires the two-byte modrm form. */
11583 if (index)
11584 len += 1;
3f803cd9
SC
11585 }
11586
e075ae69
RH
11587 return len;
11588}
79325812 11589
5bf0ebab
RH
11590/* Compute default value for "length_immediate" attribute. When SHORTFORM
11591 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11592int
b96a374d 11593ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 11594{
6ef67412
JH
11595 int len = 0;
11596 int i;
6c698a6d 11597 extract_insn_cached (insn);
6ef67412
JH
11598 for (i = recog_data.n_operands - 1; i >= 0; --i)
11599 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11600 {
6ef67412 11601 if (len)
3071fab5 11602 abort ();
6ef67412
JH
11603 if (shortform
11604 && GET_CODE (recog_data.operand[i]) == CONST_INT
11605 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11606 len = 1;
11607 else
11608 {
11609 switch (get_attr_mode (insn))
11610 {
11611 case MODE_QI:
11612 len+=1;
11613 break;
11614 case MODE_HI:
11615 len+=2;
11616 break;
11617 case MODE_SI:
11618 len+=4;
11619 break;
14f73b5a
JH
11620 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11621 case MODE_DI:
11622 len+=4;
11623 break;
6ef67412 11624 default:
c725bd79 11625 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11626 }
11627 }
3071fab5 11628 }
6ef67412
JH
11629 return len;
11630}
11631/* Compute default value for "length_address" attribute. */
11632int
b96a374d 11633ix86_attr_length_address_default (rtx insn)
6ef67412
JH
11634{
11635 int i;
9b73c90a
EB
11636
11637 if (get_attr_type (insn) == TYPE_LEA)
11638 {
11639 rtx set = PATTERN (insn);
11640 if (GET_CODE (set) == SET)
11641 ;
11642 else if (GET_CODE (set) == PARALLEL
11643 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11644 set = XVECEXP (set, 0, 0);
11645 else
11646 {
11647#ifdef ENABLE_CHECKING
11648 abort ();
11649#endif
11650 return 0;
11651 }
11652
11653 return memory_address_length (SET_SRC (set));
11654 }
11655
6c698a6d 11656 extract_insn_cached (insn);
1ccbefce
RH
11657 for (i = recog_data.n_operands - 1; i >= 0; --i)
11658 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11659 {
6ef67412 11660 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11661 break;
11662 }
6ef67412 11663 return 0;
3f803cd9 11664}
e075ae69
RH
11665\f
11666/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11667
c237e94a 11668static int
b96a374d 11669ix86_issue_rate (void)
b657fc39 11670{
9e555526 11671 switch (ix86_tune)
b657fc39 11672 {
e075ae69
RH
11673 case PROCESSOR_PENTIUM:
11674 case PROCESSOR_K6:
11675 return 2;
79325812 11676
e075ae69 11677 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11678 case PROCESSOR_PENTIUM4:
11679 case PROCESSOR_ATHLON:
4977bab6 11680 case PROCESSOR_K8:
e075ae69 11681 return 3;
b657fc39 11682
b657fc39 11683 default:
e075ae69 11684 return 1;
b657fc39 11685 }
b657fc39
L
11686}
11687
e075ae69
RH
11688/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11689 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11690
e075ae69 11691static int
b96a374d 11692ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11693{
11694 rtx set, set2;
b657fc39 11695
e075ae69
RH
11696 /* Simplify the test for uninteresting insns. */
11697 if (insn_type != TYPE_SETCC
11698 && insn_type != TYPE_ICMOV
11699 && insn_type != TYPE_FCMOV
11700 && insn_type != TYPE_IBR)
11701 return 0;
b657fc39 11702
e075ae69
RH
11703 if ((set = single_set (dep_insn)) != 0)
11704 {
11705 set = SET_DEST (set);
11706 set2 = NULL_RTX;
11707 }
11708 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11709 && XVECLEN (PATTERN (dep_insn), 0) == 2
11710 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11711 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11712 {
11713 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11714 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11715 }
78a0d70c
ZW
11716 else
11717 return 0;
b657fc39 11718
78a0d70c
ZW
11719 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11720 return 0;
b657fc39 11721
f5143c46 11722 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11723 not any other potentially set register. */
11724 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11725 return 0;
11726
11727 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11728 return 0;
11729
11730 return 1;
e075ae69 11731}
b657fc39 11732
e075ae69
RH
11733/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11734 address with operands set by DEP_INSN. */
11735
11736static int
b96a374d 11737ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11738{
11739 rtx addr;
11740
6ad48e84
JH
11741 if (insn_type == TYPE_LEA
11742 && TARGET_PENTIUM)
5fbdde42
RH
11743 {
11744 addr = PATTERN (insn);
11745 if (GET_CODE (addr) == SET)
11746 ;
11747 else if (GET_CODE (addr) == PARALLEL
11748 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11749 addr = XVECEXP (addr, 0, 0);
11750 else
11751 abort ();
11752 addr = SET_SRC (addr);
11753 }
e075ae69
RH
11754 else
11755 {
11756 int i;
6c698a6d 11757 extract_insn_cached (insn);
1ccbefce
RH
11758 for (i = recog_data.n_operands - 1; i >= 0; --i)
11759 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11760 {
1ccbefce 11761 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11762 goto found;
11763 }
11764 return 0;
11765 found:;
b657fc39
L
11766 }
11767
e075ae69 11768 return modified_in_p (addr, dep_insn);
b657fc39 11769}
a269a03c 11770
c237e94a 11771static int
b96a374d 11772ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 11773{
e075ae69 11774 enum attr_type insn_type, dep_insn_type;
6ad48e84 11775 enum attr_memory memory, dep_memory;
e075ae69 11776 rtx set, set2;
9b00189f 11777 int dep_insn_code_number;
a269a03c 11778
d1f87653 11779 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 11780 if (REG_NOTE_KIND (link) != 0)
309ada50 11781 return 0;
a269a03c 11782
9b00189f
JH
11783 dep_insn_code_number = recog_memoized (dep_insn);
11784
e075ae69 11785 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11786 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11787 return cost;
a269a03c 11788
1c71e60e
JH
11789 insn_type = get_attr_type (insn);
11790 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11791
9e555526 11792 switch (ix86_tune)
a269a03c
JC
11793 {
11794 case PROCESSOR_PENTIUM:
e075ae69
RH
11795 /* Address Generation Interlock adds a cycle of latency. */
11796 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11797 cost += 1;
11798
11799 /* ??? Compares pair with jump/setcc. */
11800 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11801 cost = 0;
11802
d1f87653 11803 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 11804 if (insn_type == TYPE_FMOV
e075ae69
RH
11805 && get_attr_memory (insn) == MEMORY_STORE
11806 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11807 cost += 1;
11808 break;
a269a03c 11809
e075ae69 11810 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11811 memory = get_attr_memory (insn);
11812 dep_memory = get_attr_memory (dep_insn);
11813
0f290768 11814 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11815 increase the cost here for non-imov insns. */
11816 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11817 && dep_insn_type != TYPE_FMOV
11818 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11819 cost += 1;
11820
11821 /* INT->FP conversion is expensive. */
11822 if (get_attr_fp_int_src (dep_insn))
11823 cost += 5;
11824
11825 /* There is one cycle extra latency between an FP op and a store. */
11826 if (insn_type == TYPE_FMOV
11827 && (set = single_set (dep_insn)) != NULL_RTX
11828 && (set2 = single_set (insn)) != NULL_RTX
11829 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11830 && GET_CODE (SET_DEST (set2)) == MEM)
11831 cost += 1;
6ad48e84
JH
11832
11833 /* Show ability of reorder buffer to hide latency of load by executing
11834 in parallel with previous instruction in case
11835 previous instruction is not needed to compute the address. */
11836 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11837 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11838 {
6ad48e84
JH
11839 /* Claim moves to take one cycle, as core can issue one load
11840 at time and the next load can start cycle later. */
11841 if (dep_insn_type == TYPE_IMOV
11842 || dep_insn_type == TYPE_FMOV)
11843 cost = 1;
11844 else if (cost > 1)
11845 cost--;
11846 }
e075ae69 11847 break;
a269a03c 11848
e075ae69 11849 case PROCESSOR_K6:
6ad48e84
JH
11850 memory = get_attr_memory (insn);
11851 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11852 /* The esp dependency is resolved before the instruction is really
11853 finished. */
11854 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11855 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11856 return 1;
a269a03c 11857
0f290768 11858 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11859 increase the cost here for non-imov insns. */
6ad48e84 11860 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11861 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11862
11863 /* INT->FP conversion is expensive. */
11864 if (get_attr_fp_int_src (dep_insn))
11865 cost += 5;
6ad48e84
JH
11866
11867 /* Show ability of reorder buffer to hide latency of load by executing
11868 in parallel with previous instruction in case
11869 previous instruction is not needed to compute the address. */
11870 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11871 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11872 {
6ad48e84
JH
11873 /* Claim moves to take one cycle, as core can issue one load
11874 at time and the next load can start cycle later. */
11875 if (dep_insn_type == TYPE_IMOV
11876 || dep_insn_type == TYPE_FMOV)
11877 cost = 1;
11878 else if (cost > 2)
11879 cost -= 2;
11880 else
11881 cost = 1;
11882 }
a14003ee 11883 break;
e075ae69 11884
309ada50 11885 case PROCESSOR_ATHLON:
4977bab6 11886 case PROCESSOR_K8:
6ad48e84
JH
11887 memory = get_attr_memory (insn);
11888 dep_memory = get_attr_memory (dep_insn);
11889
6ad48e84
JH
11890 /* Show ability of reorder buffer to hide latency of load by executing
11891 in parallel with previous instruction in case
11892 previous instruction is not needed to compute the address. */
11893 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11894 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11895 {
26f74aa3
JH
11896 enum attr_unit unit = get_attr_unit (insn);
11897 int loadcost = 3;
11898
11899 /* Because of the difference between the length of integer and
11900 floating unit pipeline preparation stages, the memory operands
b96a374d 11901 for floating point are cheaper.
26f74aa3 11902
c51e6d85 11903 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
11904 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11905 loadcost = 3;
11906 else
11907 loadcost = TARGET_ATHLON ? 2 : 0;
11908
11909 if (cost >= loadcost)
11910 cost -= loadcost;
6ad48e84
JH
11911 else
11912 cost = 0;
11913 }
309ada50 11914
a269a03c 11915 default:
a269a03c
JC
11916 break;
11917 }
11918
11919 return cost;
11920}
0a726ef1 11921
e075ae69
RH
11922static union
11923{
11924 struct ppro_sched_data
11925 {
11926 rtx decode[3];
11927 int issued_this_cycle;
11928 } ppro;
11929} ix86_sched_data;
0a726ef1 11930
e075ae69 11931static enum attr_ppro_uops
b96a374d 11932ix86_safe_ppro_uops (rtx insn)
e075ae69
RH
11933{
11934 if (recog_memoized (insn) >= 0)
11935 return get_attr_ppro_uops (insn);
11936 else
11937 return PPRO_UOPS_MANY;
11938}
0a726ef1 11939
e075ae69 11940static void
b96a374d 11941ix86_dump_ppro_packet (FILE *dump)
0a726ef1 11942{
e075ae69 11943 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11944 {
e075ae69
RH
11945 fprintf (dump, "PPRO packet: %d",
11946 INSN_UID (ix86_sched_data.ppro.decode[0]));
11947 if (ix86_sched_data.ppro.decode[1])
11948 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11949 if (ix86_sched_data.ppro.decode[2])
11950 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11951 fputc ('\n', dump);
11952 }
11953}
0a726ef1 11954
e075ae69 11955/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11956
c237e94a 11957static void
b96a374d
AJ
11958ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
11959 int sched_verbose ATTRIBUTE_UNUSED,
11960 int veclen ATTRIBUTE_UNUSED)
e075ae69
RH
11961{
11962 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11963}
11964
11965/* Shift INSN to SLOT, and shift everything else down. */
11966
11967static void
b96a374d 11968ix86_reorder_insn (rtx *insnp, rtx *slot)
e075ae69
RH
11969{
11970 if (insnp != slot)
11971 {
11972 rtx insn = *insnp;
0f290768 11973 do
e075ae69
RH
11974 insnp[0] = insnp[1];
11975 while (++insnp != slot);
11976 *insnp = insn;
0a726ef1 11977 }
e075ae69
RH
11978}
11979
c6991660 11980static void
b96a374d 11981ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
78a0d70c
ZW
11982{
11983 rtx decode[3];
11984 enum attr_ppro_uops cur_uops;
11985 int issued_this_cycle;
11986 rtx *insnp;
11987 int i;
e075ae69 11988
0f290768 11989 /* At this point .ppro.decode contains the state of the three
78a0d70c 11990 decoders from last "cycle". That is, those insns that were
0f290768 11991 actually independent. But here we're scheduling for the
78a0d70c
ZW
11992 decoder, and we may find things that are decodable in the
11993 same cycle. */
e075ae69 11994
0f290768 11995 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11996 issued_this_cycle = 0;
e075ae69 11997
78a0d70c
ZW
11998 insnp = e_ready;
11999 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 12000
78a0d70c
ZW
12001 /* If the decoders are empty, and we've a complex insn at the
12002 head of the priority queue, let it issue without complaint. */
12003 if (decode[0] == NULL)
12004 {
12005 if (cur_uops == PPRO_UOPS_MANY)
12006 {
12007 decode[0] = *insnp;
12008 goto ppro_done;
12009 }
12010
12011 /* Otherwise, search for a 2-4 uop unsn to issue. */
12012 while (cur_uops != PPRO_UOPS_FEW)
12013 {
12014 if (insnp == ready)
12015 break;
12016 cur_uops = ix86_safe_ppro_uops (*--insnp);
12017 }
12018
12019 /* If so, move it to the head of the line. */
12020 if (cur_uops == PPRO_UOPS_FEW)
12021 ix86_reorder_insn (insnp, e_ready);
0a726ef1 12022
78a0d70c
ZW
12023 /* Issue the head of the queue. */
12024 issued_this_cycle = 1;
12025 decode[0] = *e_ready--;
12026 }
fb693d44 12027
78a0d70c
ZW
12028 /* Look for simple insns to fill in the other two slots. */
12029 for (i = 1; i < 3; ++i)
12030 if (decode[i] == NULL)
12031 {
a151daf0 12032 if (ready > e_ready)
78a0d70c 12033 goto ppro_done;
fb693d44 12034
e075ae69
RH
12035 insnp = e_ready;
12036 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
12037 while (cur_uops != PPRO_UOPS_ONE)
12038 {
12039 if (insnp == ready)
12040 break;
12041 cur_uops = ix86_safe_ppro_uops (*--insnp);
12042 }
fb693d44 12043
78a0d70c
ZW
12044 /* Found one. Move it to the head of the queue and issue it. */
12045 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 12046 {
78a0d70c
ZW
12047 ix86_reorder_insn (insnp, e_ready);
12048 decode[i] = *e_ready--;
12049 issued_this_cycle++;
12050 continue;
12051 }
fb693d44 12052
78a0d70c
ZW
12053 /* ??? Didn't find one. Ideally, here we would do a lazy split
12054 of 2-uop insns, issue one and queue the other. */
12055 }
fb693d44 12056
78a0d70c
ZW
12057 ppro_done:
12058 if (issued_this_cycle == 0)
12059 issued_this_cycle = 1;
12060 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12061}
fb693d44 12062
0f290768 12063/* We are about to being issuing insns for this clock cycle.
78a0d70c 12064 Override the default sort algorithm to better slot instructions. */
c237e94a 12065static int
b96a374d
AJ
12066ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12067 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12068 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
78a0d70c 12069{
c237e94a 12070 int n_ready = *n_readyp;
78a0d70c 12071 rtx *e_ready = ready + n_ready - 1;
fb693d44 12072
fce5a9f2 12073 /* Make sure to go ahead and initialize key items in
a151daf0
JL
12074 ix86_sched_data if we are not going to bother trying to
12075 reorder the ready queue. */
78a0d70c 12076 if (n_ready < 2)
a151daf0
JL
12077 {
12078 ix86_sched_data.ppro.issued_this_cycle = 1;
12079 goto out;
12080 }
e075ae69 12081
9e555526 12082 switch (ix86_tune)
78a0d70c
ZW
12083 {
12084 default:
12085 break;
e075ae69 12086
78a0d70c
ZW
12087 case PROCESSOR_PENTIUMPRO:
12088 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 12089 break;
fb693d44
RH
12090 }
12091
e075ae69
RH
12092out:
12093 return ix86_issue_rate ();
12094}
fb693d44 12095
e075ae69
RH
12096/* We are about to issue INSN. Return the number of insns left on the
12097 ready queue that can be issued this cycle. */
b222082e 12098
c237e94a 12099static int
b96a374d
AJ
12100ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12101 int can_issue_more)
e075ae69
RH
12102{
12103 int i;
9e555526 12104 switch (ix86_tune)
fb693d44 12105 {
e075ae69
RH
12106 default:
12107 return can_issue_more - 1;
fb693d44 12108
e075ae69
RH
12109 case PROCESSOR_PENTIUMPRO:
12110 {
12111 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 12112
e075ae69
RH
12113 if (uops == PPRO_UOPS_MANY)
12114 {
12115 if (sched_verbose)
12116 ix86_dump_ppro_packet (dump);
12117 ix86_sched_data.ppro.decode[0] = insn;
12118 ix86_sched_data.ppro.decode[1] = NULL;
12119 ix86_sched_data.ppro.decode[2] = NULL;
12120 if (sched_verbose)
12121 ix86_dump_ppro_packet (dump);
12122 ix86_sched_data.ppro.decode[0] = NULL;
12123 }
12124 else if (uops == PPRO_UOPS_FEW)
12125 {
12126 if (sched_verbose)
12127 ix86_dump_ppro_packet (dump);
12128 ix86_sched_data.ppro.decode[0] = insn;
12129 ix86_sched_data.ppro.decode[1] = NULL;
12130 ix86_sched_data.ppro.decode[2] = NULL;
12131 }
12132 else
12133 {
12134 for (i = 0; i < 3; ++i)
12135 if (ix86_sched_data.ppro.decode[i] == NULL)
12136 {
12137 ix86_sched_data.ppro.decode[i] = insn;
12138 break;
12139 }
12140 if (i == 3)
12141 abort ();
12142 if (i == 2)
12143 {
12144 if (sched_verbose)
12145 ix86_dump_ppro_packet (dump);
12146 ix86_sched_data.ppro.decode[0] = NULL;
12147 ix86_sched_data.ppro.decode[1] = NULL;
12148 ix86_sched_data.ppro.decode[2] = NULL;
12149 }
12150 }
12151 }
12152 return --ix86_sched_data.ppro.issued_this_cycle;
12153 }
fb693d44 12154}
9b690711
RH
12155
12156static int
b96a374d 12157ia32_use_dfa_pipeline_interface (void)
9b690711 12158{
4977bab6 12159 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
9b690711
RH
12160 return 1;
12161 return 0;
12162}
12163
12164/* How many alternative schedules to try. This should be as wide as the
12165 scheduling freedom in the DFA, but no wider. Making this value too
12166 large results extra work for the scheduler. */
12167
12168static int
b96a374d 12169ia32_multipass_dfa_lookahead (void)
9b690711 12170{
9e555526 12171 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711
RH
12172 return 2;
12173 else
12174 return 0;
12175}
12176
a7180f70 12177\f
0e4970d7
RK
12178/* Walk through INSNS and look for MEM references whose address is DSTREG or
12179 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12180 appropriate. */
12181
12182void
b96a374d
AJ
12183ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12184 rtx srcreg)
0e4970d7
RK
12185{
12186 rtx insn;
12187
12188 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12189 if (INSN_P (insn))
12190 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12191 dstreg, srcreg);
12192}
12193
12194/* Subroutine of above to actually do the updating by recursively walking
12195 the rtx. */
12196
12197static void
b96a374d
AJ
12198ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12199 rtx srcreg)
0e4970d7
RK
12200{
12201 enum rtx_code code = GET_CODE (x);
12202 const char *format_ptr = GET_RTX_FORMAT (code);
12203 int i, j;
12204
12205 if (code == MEM && XEXP (x, 0) == dstreg)
12206 MEM_COPY_ATTRIBUTES (x, dstref);
12207 else if (code == MEM && XEXP (x, 0) == srcreg)
12208 MEM_COPY_ATTRIBUTES (x, srcref);
12209
12210 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12211 {
12212 if (*format_ptr == 'e')
12213 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12214 dstreg, srcreg);
12215 else if (*format_ptr == 'E')
12216 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 12217 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
12218 dstreg, srcreg);
12219 }
12220}
12221\f
a7180f70
BS
12222/* Compute the alignment given to a constant that is being placed in memory.
12223 EXP is the constant and ALIGN is the alignment that the object would
12224 ordinarily have.
12225 The value of this function is used instead of that alignment to align
12226 the object. */
12227
12228int
b96a374d 12229ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12230{
12231 if (TREE_CODE (exp) == REAL_CST)
12232 {
12233 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12234 return 64;
12235 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12236 return 128;
12237 }
12238 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12239 && align < 256)
12240 return 256;
12241
12242 return align;
12243}
12244
12245/* Compute the alignment for a static variable.
12246 TYPE is the data type, and ALIGN is the alignment that
12247 the object would ordinarily have. The value of this function is used
12248 instead of that alignment to align the object. */
12249
12250int
b96a374d 12251ix86_data_alignment (tree type, int align)
a7180f70
BS
12252{
12253 if (AGGREGATE_TYPE_P (type)
12254 && TYPE_SIZE (type)
12255 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12256 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12257 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12258 return 256;
12259
0d7d98ee
JH
12260 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12261 to 16byte boundary. */
12262 if (TARGET_64BIT)
12263 {
12264 if (AGGREGATE_TYPE_P (type)
12265 && TYPE_SIZE (type)
12266 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12267 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12268 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12269 return 128;
12270 }
12271
a7180f70
BS
12272 if (TREE_CODE (type) == ARRAY_TYPE)
12273 {
12274 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12275 return 64;
12276 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12277 return 128;
12278 }
12279 else if (TREE_CODE (type) == COMPLEX_TYPE)
12280 {
0f290768 12281
a7180f70
BS
12282 if (TYPE_MODE (type) == DCmode && align < 64)
12283 return 64;
12284 if (TYPE_MODE (type) == XCmode && align < 128)
12285 return 128;
12286 }
12287 else if ((TREE_CODE (type) == RECORD_TYPE
12288 || TREE_CODE (type) == UNION_TYPE
12289 || TREE_CODE (type) == QUAL_UNION_TYPE)
12290 && TYPE_FIELDS (type))
12291 {
12292 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12293 return 64;
12294 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12295 return 128;
12296 }
12297 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12298 || TREE_CODE (type) == INTEGER_TYPE)
12299 {
12300 if (TYPE_MODE (type) == DFmode && align < 64)
12301 return 64;
12302 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12303 return 128;
12304 }
12305
12306 return align;
12307}
12308
12309/* Compute the alignment for a local variable.
12310 TYPE is the data type, and ALIGN is the alignment that
12311 the object would ordinarily have. The value of this macro is used
12312 instead of that alignment to align the object. */
12313
12314int
b96a374d 12315ix86_local_alignment (tree type, int align)
a7180f70 12316{
0d7d98ee
JH
12317 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12318 to 16byte boundary. */
12319 if (TARGET_64BIT)
12320 {
12321 if (AGGREGATE_TYPE_P (type)
12322 && TYPE_SIZE (type)
12323 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12324 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12325 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12326 return 128;
12327 }
a7180f70
BS
12328 if (TREE_CODE (type) == ARRAY_TYPE)
12329 {
12330 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12331 return 64;
12332 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12333 return 128;
12334 }
12335 else if (TREE_CODE (type) == COMPLEX_TYPE)
12336 {
12337 if (TYPE_MODE (type) == DCmode && align < 64)
12338 return 64;
12339 if (TYPE_MODE (type) == XCmode && align < 128)
12340 return 128;
12341 }
12342 else if ((TREE_CODE (type) == RECORD_TYPE
12343 || TREE_CODE (type) == UNION_TYPE
12344 || TREE_CODE (type) == QUAL_UNION_TYPE)
12345 && TYPE_FIELDS (type))
12346 {
12347 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12348 return 64;
12349 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12350 return 128;
12351 }
12352 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12353 || TREE_CODE (type) == INTEGER_TYPE)
12354 {
0f290768 12355
a7180f70
BS
12356 if (TYPE_MODE (type) == DFmode && align < 64)
12357 return 64;
12358 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12359 return 128;
12360 }
12361 return align;
12362}
0ed08620
JH
12363\f
12364/* Emit RTL insns to initialize the variable parts of a trampoline.
12365 FNADDR is an RTX for the address of the function's pure code.
12366 CXT is an RTX for the static chain value for the function. */
12367void
b96a374d 12368x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12369{
12370 if (!TARGET_64BIT)
12371 {
12372 /* Compute offset from the end of the jmp to the target function. */
12373 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12374 plus_constant (tramp, 10),
12375 NULL_RTX, 1, OPTAB_DIRECT);
12376 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12377 gen_int_mode (0xb9, QImode));
0ed08620
JH
12378 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12379 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12380 gen_int_mode (0xe9, QImode));
0ed08620
JH
12381 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12382 }
12383 else
12384 {
12385 int offset = 0;
12386 /* Try to load address using shorter movl instead of movabs.
12387 We may want to support movq for kernel mode, but kernel does not use
12388 trampolines at the moment. */
12389 if (x86_64_zero_extended_value (fnaddr))
12390 {
12391 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12392 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12393 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12394 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12395 gen_lowpart (SImode, fnaddr));
12396 offset += 6;
12397 }
12398 else
12399 {
12400 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12401 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12402 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12403 fnaddr);
12404 offset += 10;
12405 }
12406 /* Load static chain using movabs to r10. */
12407 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12408 gen_int_mode (0xba49, HImode));
0ed08620
JH
12409 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12410 cxt);
12411 offset += 10;
12412 /* Jump to the r11 */
12413 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12414 gen_int_mode (0xff49, HImode));
0ed08620 12415 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12416 gen_int_mode (0xe3, QImode));
0ed08620
JH
12417 offset += 3;
12418 if (offset > TRAMPOLINE_SIZE)
b531087a 12419 abort ();
0ed08620 12420 }
5791cc29
JT
12421
12422#ifdef TRANSFER_FROM_TRAMPOLINE
12423 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12424 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12425#endif
0ed08620 12426}
eeb06b1b 12427\f
6a2dd09a
RS
12428#define def_builtin(MASK, NAME, TYPE, CODE) \
12429do { \
453ee231
JH
12430 if ((MASK) & target_flags \
12431 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
6a2dd09a
RS
12432 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12433 NULL, NULL_TREE); \
eeb06b1b 12434} while (0)
bd793c65 12435
bd793c65
BS
12436struct builtin_description
12437{
8b60264b
KG
12438 const unsigned int mask;
12439 const enum insn_code icode;
12440 const char *const name;
12441 const enum ix86_builtins code;
12442 const enum rtx_code comparison;
12443 const unsigned int flag;
bd793c65
BS
12444};
12445
8b60264b 12446static const struct builtin_description bdesc_comi[] =
bd793c65 12447{
37f22004
L
12448 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12449 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12450 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12451 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12452 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12453 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12454 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12455 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12456 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12457 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12458 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12459 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12460 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12461 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12462 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12463 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12464 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12465 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12466 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12467 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12468 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12469 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12470 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12471 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12472};
12473
8b60264b 12474static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12475{
12476 /* SSE */
37f22004
L
12477 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12478 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12479 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12480 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12481 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12482 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12483 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12484 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12485
12486 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12487 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12488 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12489 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12490 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12491 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12492 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12493 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12494 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12495 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12496 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12497 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12498 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12499 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12500 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12501 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12502 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12503 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12504 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12505 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12506
12507 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12508 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12509 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12510 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12511
12512 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12513 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12514 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12515 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12516
12517 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12518 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12519 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12520 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12521 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12522
12523 /* MMX */
eeb06b1b
BS
12524 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12525 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12526 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12527 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12528 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12529 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12530 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12531 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12532
12533 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12534 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12535 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12536 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12537 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12538 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12539 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12540 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12541
12542 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12543 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12544 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12545
12546 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12547 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12548 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12549 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12550
37f22004
L
12551 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12552 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12553
12554 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12555 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12556 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12557 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12558 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12559 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12560
37f22004
L
12561 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12562 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12563 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12564 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12565
12566 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12567 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12568 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12569 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12570 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12571 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12572
12573 /* Special. */
eeb06b1b
BS
12574 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12575 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12576 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12577
37f22004
L
12578 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12579 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12580 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12581
12582 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12583 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12584 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12585 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12586 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12587 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12588
12589 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12590 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12591 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12592 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12593 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12594 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12595
12596 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12597 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12598 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12599 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12600
37f22004 12601 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12602 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12603
12604 /* SSE2 */
12605 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12606 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12607 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12608 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12609 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12610 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12611 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12612 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12613
12614 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12615 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12616 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12617 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12618 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12619 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12620 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12621 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12622 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12623 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12624 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12625 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12626 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12627 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12628 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12629 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12630 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12631 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12632 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12633 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12634
12635 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12636 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12637 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12638 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12639
1877be45
JH
12640 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12641 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12642 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12643 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12644
12645 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12646 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12647 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12648
12649 /* SSE2 MMX */
12650 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12651 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12652 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12653 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12654 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12655 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12656 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12657 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12658
12659 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12660 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12661 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12662 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12663 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12664 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12665 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12666 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12667
12668 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12669 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12670 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12672
916b60b7
BS
12673 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12674 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12675 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12676 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12677
12678 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12679 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12680
12681 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12682 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12683 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12684 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12685 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12686 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12687
12688 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12689 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12690 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12691 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12692
12693 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12696 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12697 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12698 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12700 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12701
916b60b7
BS
12702 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12704 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12705
12706 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12708
12709 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12711 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12712 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12713 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12714 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12715
12716 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12717 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12718 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12719 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12720 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12721 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12722
12723 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12727
12728 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12729
fbe5eb6d 12730 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 12731 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 12732 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
12733 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12734
12735 /* PNI MMX */
12736 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12737 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12738 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12739 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12740 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12741 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12742};
12743
8b60264b 12744static const struct builtin_description bdesc_1arg[] =
bd793c65 12745{
37f22004
L
12746 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12747 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12748
37f22004
L
12749 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12750 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12751 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12752
37f22004
L
12753 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12754 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12755 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12756 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12757 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12758 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
12759
12760 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12762 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12763 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12764
12765 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12766
12767 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12769
fbe5eb6d
BS
12770 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12771 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12772 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12773 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12774 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12775
fbe5eb6d 12776 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12777
fbe5eb6d
BS
12778 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
12780 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12781 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
12782
12783 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12785 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12786
22c7c85e
L
12787 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12788
12789 /* PNI */
12790 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12791 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12792 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
12793};
12794
f6155fda 12795void
b96a374d 12796ix86_init_builtins (void)
f6155fda
SS
12797{
12798 if (TARGET_MMX)
12799 ix86_init_mmx_sse_builtins ();
12800}
12801
12802/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12803 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12804 builtins. */
e37af218 12805static void
b96a374d 12806ix86_init_mmx_sse_builtins (void)
bd793c65 12807{
8b60264b 12808 const struct builtin_description * d;
77ebd435 12809 size_t i;
bd793c65
BS
12810
12811 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
12812 tree pcchar_type_node = build_pointer_type (
12813 build_type_variant (char_type_node, 1, 0));
bd793c65 12814 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
12815 tree pcfloat_type_node = build_pointer_type (
12816 build_type_variant (float_type_node, 1, 0));
bd793c65 12817 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12818 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12819 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12820
12821 /* Comparisons. */
12822 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12823 = build_function_type_list (integer_type_node,
12824 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12825 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12826 = build_function_type_list (V4SI_type_node,
12827 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12828 /* MMX/SSE/integer conversions. */
bd793c65 12829 tree int_ftype_v4sf
b4de2f7d
AH
12830 = build_function_type_list (integer_type_node,
12831 V4SF_type_node, NULL_TREE);
453ee231
JH
12832 tree int64_ftype_v4sf
12833 = build_function_type_list (long_long_integer_type_node,
12834 V4SF_type_node, NULL_TREE);
bd793c65 12835 tree int_ftype_v8qi
b4de2f7d 12836 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12837 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12838 = build_function_type_list (V4SF_type_node,
12839 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
12840 tree v4sf_ftype_v4sf_int64
12841 = build_function_type_list (V4SF_type_node,
12842 V4SF_type_node, long_long_integer_type_node,
12843 NULL_TREE);
bd793c65 12844 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12845 = build_function_type_list (V4SF_type_node,
12846 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12847 tree int_ftype_v4hi_int
b4de2f7d
AH
12848 = build_function_type_list (integer_type_node,
12849 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12850 tree v4hi_ftype_v4hi_int_int
e7a60f56 12851 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12852 integer_type_node, integer_type_node,
12853 NULL_TREE);
bd793c65
BS
12854 /* Miscellaneous. */
12855 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12856 = build_function_type_list (V8QI_type_node,
12857 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12858 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12859 = build_function_type_list (V4HI_type_node,
12860 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12861 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12862 = build_function_type_list (V4SF_type_node,
12863 V4SF_type_node, V4SF_type_node,
12864 integer_type_node, NULL_TREE);
bd793c65 12865 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12866 = build_function_type_list (V2SI_type_node,
12867 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12868 tree v4hi_ftype_v4hi_int
b4de2f7d 12869 = build_function_type_list (V4HI_type_node,
e7a60f56 12870 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12871 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12872 = build_function_type_list (V4HI_type_node,
12873 V4HI_type_node, long_long_unsigned_type_node,
12874 NULL_TREE);
bd793c65 12875 tree v2si_ftype_v2si_di
b4de2f7d
AH
12876 = build_function_type_list (V2SI_type_node,
12877 V2SI_type_node, long_long_unsigned_type_node,
12878 NULL_TREE);
bd793c65 12879 tree void_ftype_void
b4de2f7d 12880 = build_function_type (void_type_node, void_list_node);
bd793c65 12881 tree void_ftype_unsigned
b4de2f7d 12882 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
12883 tree void_ftype_unsigned_unsigned
12884 = build_function_type_list (void_type_node, unsigned_type_node,
12885 unsigned_type_node, NULL_TREE);
12886 tree void_ftype_pcvoid_unsigned_unsigned
12887 = build_function_type_list (void_type_node, const_ptr_type_node,
12888 unsigned_type_node, unsigned_type_node,
12889 NULL_TREE);
bd793c65 12890 tree unsigned_ftype_void
b4de2f7d 12891 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12892 tree di_ftype_void
b4de2f7d 12893 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12894 tree v4sf_ftype_void
b4de2f7d 12895 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12896 tree v2si_ftype_v4sf
b4de2f7d 12897 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12898 /* Loads/stores. */
bd793c65 12899 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12900 = build_function_type_list (void_type_node,
12901 V8QI_type_node, V8QI_type_node,
12902 pchar_type_node, NULL_TREE);
068f5dea
JH
12903 tree v4sf_ftype_pcfloat
12904 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
12905 /* @@@ the type is bogus */
12906 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 12907 = build_function_type_list (V4SF_type_node,
f8ca7923 12908 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 12909 tree void_ftype_pv2si_v4sf
b4de2f7d 12910 = build_function_type_list (void_type_node,
f8ca7923 12911 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12912 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12913 = build_function_type_list (void_type_node,
12914 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12915 tree void_ftype_pdi_di
b4de2f7d
AH
12916 = build_function_type_list (void_type_node,
12917 pdi_type_node, long_long_unsigned_type_node,
12918 NULL_TREE);
916b60b7 12919 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12920 = build_function_type_list (void_type_node,
12921 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12922 /* Normal vector unops. */
12923 tree v4sf_ftype_v4sf
b4de2f7d 12924 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12925
bd793c65
BS
12926 /* Normal vector binops. */
12927 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12928 = build_function_type_list (V4SF_type_node,
12929 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12930 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12931 = build_function_type_list (V8QI_type_node,
12932 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12933 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12934 = build_function_type_list (V4HI_type_node,
12935 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12936 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12937 = build_function_type_list (V2SI_type_node,
12938 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12939 tree di_ftype_di_di
b4de2f7d
AH
12940 = build_function_type_list (long_long_unsigned_type_node,
12941 long_long_unsigned_type_node,
12942 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12943
47f339cf 12944 tree v2si_ftype_v2sf
ae3aa00d 12945 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12946 tree v2sf_ftype_v2si
b4de2f7d 12947 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12948 tree v2si_ftype_v2si
b4de2f7d 12949 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12950 tree v2sf_ftype_v2sf
b4de2f7d 12951 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12952 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12953 = build_function_type_list (V2SF_type_node,
12954 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12955 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12956 = build_function_type_list (V2SI_type_node,
12957 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 12958 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
12959 tree pcint_type_node = build_pointer_type (
12960 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 12961 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
12962 tree pcdouble_type_node = build_pointer_type (
12963 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 12964 tree int_ftype_v2df_v2df
b4de2f7d
AH
12965 = build_function_type_list (integer_type_node,
12966 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12967
12968 tree ti_ftype_void
b4de2f7d 12969 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
12970 tree v2di_ftype_void
12971 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 12972 tree ti_ftype_ti_ti
b4de2f7d
AH
12973 = build_function_type_list (intTI_type_node,
12974 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
12975 tree void_ftype_pcvoid
12976 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 12977 tree v2di_ftype_di
b4de2f7d
AH
12978 = build_function_type_list (V2DI_type_node,
12979 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
12980 tree di_ftype_v2di
12981 = build_function_type_list (long_long_unsigned_type_node,
12982 V2DI_type_node, NULL_TREE);
fbe5eb6d 12983 tree v4sf_ftype_v4si
b4de2f7d 12984 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12985 tree v4si_ftype_v4sf
b4de2f7d 12986 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12987 tree v2df_ftype_v4si
b4de2f7d 12988 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12989 tree v4si_ftype_v2df
b4de2f7d 12990 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12991 tree v2si_ftype_v2df
b4de2f7d 12992 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12993 tree v4sf_ftype_v2df
b4de2f7d 12994 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12995 tree v2df_ftype_v2si
b4de2f7d 12996 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12997 tree v2df_ftype_v4sf
b4de2f7d 12998 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12999 tree int_ftype_v2df
b4de2f7d 13000 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13001 tree int64_ftype_v2df
13002 = build_function_type_list (long_long_integer_type_node,
b96a374d 13003 V2DF_type_node, NULL_TREE);
fbe5eb6d 13004 tree v2df_ftype_v2df_int
b4de2f7d
AH
13005 = build_function_type_list (V2DF_type_node,
13006 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13007 tree v2df_ftype_v2df_int64
13008 = build_function_type_list (V2DF_type_node,
13009 V2DF_type_node, long_long_integer_type_node,
13010 NULL_TREE);
fbe5eb6d 13011 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13012 = build_function_type_list (V4SF_type_node,
13013 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13014 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13015 = build_function_type_list (V2DF_type_node,
13016 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13017 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13018 = build_function_type_list (V2DF_type_node,
13019 V2DF_type_node, V2DF_type_node,
13020 integer_type_node,
13021 NULL_TREE);
fbe5eb6d 13022 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13023 = build_function_type_list (V2DF_type_node,
13024 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13025 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13026 = build_function_type_list (void_type_node,
13027 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13028 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13029 = build_function_type_list (void_type_node,
13030 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13031 tree void_ftype_pint_int
b4de2f7d
AH
13032 = build_function_type_list (void_type_node,
13033 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13034 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13035 = build_function_type_list (void_type_node,
13036 V16QI_type_node, V16QI_type_node,
13037 pchar_type_node, NULL_TREE);
068f5dea
JH
13038 tree v2df_ftype_pcdouble
13039 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13040 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13041 = build_function_type_list (V2DF_type_node,
13042 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13043 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13044 = build_function_type_list (V16QI_type_node,
13045 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13046 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13047 = build_function_type_list (V8HI_type_node,
13048 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13049 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13050 = build_function_type_list (V4SI_type_node,
13051 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13052 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13053 = build_function_type_list (V2DI_type_node,
13054 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13055 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13056 = build_function_type_list (V2DI_type_node,
13057 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13058 tree v2df_ftype_v2df
b4de2f7d 13059 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13060 tree v2df_ftype_double
b4de2f7d 13061 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13062 tree v2df_ftype_double_double
b4de2f7d
AH
13063 = build_function_type_list (V2DF_type_node,
13064 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13065 tree int_ftype_v8hi_int
b4de2f7d
AH
13066 = build_function_type_list (integer_type_node,
13067 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13068 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13069 = build_function_type_list (V8HI_type_node,
13070 V8HI_type_node, integer_type_node,
13071 integer_type_node, NULL_TREE);
916b60b7 13072 tree v2di_ftype_v2di_int
b4de2f7d
AH
13073 = build_function_type_list (V2DI_type_node,
13074 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13075 tree v4si_ftype_v4si_int
b4de2f7d
AH
13076 = build_function_type_list (V4SI_type_node,
13077 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13078 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13079 = build_function_type_list (V8HI_type_node,
13080 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13081 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13082 = build_function_type_list (V8HI_type_node,
13083 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13084 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13085 = build_function_type_list (V4SI_type_node,
13086 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13087 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13088 = build_function_type_list (V4SI_type_node,
13089 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13090 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13091 = build_function_type_list (long_long_unsigned_type_node,
13092 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13093 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13094 = build_function_type_list (V2DI_type_node,
13095 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13096 tree int_ftype_v16qi
b4de2f7d 13097 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13098 tree v16qi_ftype_pcchar
13099 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13100 tree void_ftype_pchar_v16qi
13101 = build_function_type_list (void_type_node,
13102 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13103 tree v4si_ftype_pcint
13104 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13105 tree void_ftype_pcint_v4si
f02e1358 13106 = build_function_type_list (void_type_node,
068f5dea 13107 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13108 tree v2di_ftype_v2di
13109 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13110
bd793c65
BS
13111 /* Add all builtins that are more or less simple operations on two
13112 operands. */
ca7558fc 13113 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13114 {
13115 /* Use one of the operands; the target can have a different mode for
13116 mask-generating compares. */
13117 enum machine_mode mode;
13118 tree type;
13119
13120 if (d->name == 0)
13121 continue;
13122 mode = insn_data[d->icode].operand[1].mode;
13123
bd793c65
BS
13124 switch (mode)
13125 {
fbe5eb6d
BS
13126 case V16QImode:
13127 type = v16qi_ftype_v16qi_v16qi;
13128 break;
13129 case V8HImode:
13130 type = v8hi_ftype_v8hi_v8hi;
13131 break;
13132 case V4SImode:
13133 type = v4si_ftype_v4si_v4si;
13134 break;
13135 case V2DImode:
13136 type = v2di_ftype_v2di_v2di;
13137 break;
13138 case V2DFmode:
13139 type = v2df_ftype_v2df_v2df;
13140 break;
13141 case TImode:
13142 type = ti_ftype_ti_ti;
13143 break;
bd793c65
BS
13144 case V4SFmode:
13145 type = v4sf_ftype_v4sf_v4sf;
13146 break;
13147 case V8QImode:
13148 type = v8qi_ftype_v8qi_v8qi;
13149 break;
13150 case V4HImode:
13151 type = v4hi_ftype_v4hi_v4hi;
13152 break;
13153 case V2SImode:
13154 type = v2si_ftype_v2si_v2si;
13155 break;
bd793c65
BS
13156 case DImode:
13157 type = di_ftype_di_di;
13158 break;
13159
13160 default:
13161 abort ();
13162 }
0f290768 13163
bd793c65
BS
13164 /* Override for comparisons. */
13165 if (d->icode == CODE_FOR_maskcmpv4sf3
13166 || d->icode == CODE_FOR_maskncmpv4sf3
13167 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13168 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13169 type = v4si_ftype_v4sf_v4sf;
13170
fbe5eb6d
BS
13171 if (d->icode == CODE_FOR_maskcmpv2df3
13172 || d->icode == CODE_FOR_maskncmpv2df3
13173 || d->icode == CODE_FOR_vmmaskcmpv2df3
13174 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13175 type = v2di_ftype_v2df_v2df;
13176
eeb06b1b 13177 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13178 }
13179
13180 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13181 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13182 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13183 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13184 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13185 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13186
13187 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13188 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13189 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13190
13191 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13192 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13193
13194 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13195 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13196
bd793c65 13197 /* comi/ucomi insns. */
ca7558fc 13198 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13199 if (d->mask == MASK_SSE2)
13200 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13201 else
13202 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13203
1255c85c
BS
13204 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13205 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13206 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13207
37f22004
L
13208 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13209 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13210 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13211 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13212 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13213 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13214 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13215 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13216 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13217 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13218 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13219
13220 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13221 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13222
13223 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13224
13225 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13226 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13227 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13228 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13229 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13230 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13231
13232 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13233 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13234 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13235 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13236
13237 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13238 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13239 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13240 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13241
13242 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13243
13244 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13245
13246 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13247 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13248 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13249 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13250 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13251 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13252
13253 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13254
47f339cf
BS
13255 /* Original 3DNow! */
13256 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13257 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13258 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13259 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13260 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13261 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13262 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13263 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13264 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13265 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13266 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13267 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13268 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13269 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13270 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13274 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13275 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13276
13277 /* 3DNow! extension as used in the Athlon CPU. */
13278 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13279 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13280 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13281 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13282 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13283 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13284
37f22004 13285 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13286
13287 /* SSE2 */
13288 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13289 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13290
13291 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13292 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13293 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13294
068f5dea
JH
13295 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13296 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13297 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13298 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13299 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13300 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13301
13302 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13303 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13304 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13305 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13306
13307 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13308 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13309 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13310 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13311 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13312
13313 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13314 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13316 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13317
13318 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13320
13321 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13322
13323 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13324 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13325
13326 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13327 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13329 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13330 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13331
13332 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13333
13334 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13335 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13336 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13337 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13338
13339 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13340 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13341 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13342
13343 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13344 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13345 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13346 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13347
13348 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13349 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13351 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13352 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13353 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13355
068f5dea 13356 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13357 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13359
068f5dea
JH
13360 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13361 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13362 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13363 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13364 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13365 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13366 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13367
37f22004 13368 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13369
916b60b7
BS
13370 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13371 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13373
13374 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13376 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13377
13378 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13379 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13380
ab3146fd 13381 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13382 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13383 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13384 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13385
ab3146fd 13386 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13387 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13388 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13389 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13390
13391 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13392 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13393
13394 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13395
13396 /* Prescott New Instructions. */
13397 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13398 void_ftype_pcvoid_unsigned_unsigned,
13399 IX86_BUILTIN_MONITOR);
13400 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13401 void_ftype_unsigned_unsigned,
13402 IX86_BUILTIN_MWAIT);
13403 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13404 v4sf_ftype_v4sf,
13405 IX86_BUILTIN_MOVSHDUP);
13406 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13407 v4sf_ftype_v4sf,
13408 IX86_BUILTIN_MOVSLDUP);
13409 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13410 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13411 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13412 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13413 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13414 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13415}
13416
13417/* Errors in the source file can cause expand_expr to return const0_rtx
13418 where we expect a vector. To avoid crashing, use one of the vector
13419 clear instructions. */
13420static rtx
b96a374d 13421safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
13422{
13423 if (x != const0_rtx)
13424 return x;
13425 x = gen_reg_rtx (mode);
13426
47f339cf 13427 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13428 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13429 : gen_rtx_SUBREG (DImode, x, 0)));
13430 else
e37af218 13431 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13432 : gen_rtx_SUBREG (V4SFmode, x, 0),
13433 CONST0_RTX (V4SFmode)));
bd793c65
BS
13434 return x;
13435}
13436
13437/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13438
13439static rtx
b96a374d 13440ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13441{
13442 rtx pat;
13443 tree arg0 = TREE_VALUE (arglist);
13444 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13445 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13446 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13447 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13448 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13449 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13450
13451 if (VECTOR_MODE_P (mode0))
13452 op0 = safe_vector_operand (op0, mode0);
13453 if (VECTOR_MODE_P (mode1))
13454 op1 = safe_vector_operand (op1, mode1);
13455
13456 if (! target
13457 || GET_MODE (target) != tmode
13458 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13459 target = gen_reg_rtx (tmode);
13460
d9deed68
JH
13461 if (GET_MODE (op1) == SImode && mode1 == TImode)
13462 {
13463 rtx x = gen_reg_rtx (V4SImode);
13464 emit_insn (gen_sse2_loadd (x, op1));
13465 op1 = gen_lowpart (TImode, x);
13466 }
13467
bd793c65
BS
13468 /* In case the insn wants input operands in modes different from
13469 the result, abort. */
13470 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13471 abort ();
13472
13473 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13474 op0 = copy_to_mode_reg (mode0, op0);
13475 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13476 op1 = copy_to_mode_reg (mode1, op1);
13477
59bef189
RH
13478 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13479 yet one of the two must not be a memory. This is normally enforced
13480 by expanders, but we didn't bother to create one here. */
13481 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13482 op0 = copy_to_mode_reg (mode0, op0);
13483
bd793c65
BS
13484 pat = GEN_FCN (icode) (target, op0, op1);
13485 if (! pat)
13486 return 0;
13487 emit_insn (pat);
13488 return target;
13489}
13490
13491/* Subroutine of ix86_expand_builtin to take care of stores. */
13492
13493static rtx
b96a374d 13494ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13495{
13496 rtx pat;
13497 tree arg0 = TREE_VALUE (arglist);
13498 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13499 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13500 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13501 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13502 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13503
13504 if (VECTOR_MODE_P (mode1))
13505 op1 = safe_vector_operand (op1, mode1);
13506
13507 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13508 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13509
bd793c65
BS
13510 pat = GEN_FCN (icode) (op0, op1);
13511 if (pat)
13512 emit_insn (pat);
13513 return 0;
13514}
13515
13516/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13517
13518static rtx
b96a374d
AJ
13519ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13520 rtx target, int do_load)
bd793c65
BS
13521{
13522 rtx pat;
13523 tree arg0 = TREE_VALUE (arglist);
13524 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13525 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13526 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13527
13528 if (! target
13529 || GET_MODE (target) != tmode
13530 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13531 target = gen_reg_rtx (tmode);
13532 if (do_load)
13533 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13534 else
13535 {
13536 if (VECTOR_MODE_P (mode0))
13537 op0 = safe_vector_operand (op0, mode0);
13538
13539 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13540 op0 = copy_to_mode_reg (mode0, op0);
13541 }
13542
13543 pat = GEN_FCN (icode) (target, op0);
13544 if (! pat)
13545 return 0;
13546 emit_insn (pat);
13547 return target;
13548}
13549
13550/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13551 sqrtss, rsqrtss, rcpss. */
13552
13553static rtx
b96a374d 13554ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13555{
13556 rtx pat;
13557 tree arg0 = TREE_VALUE (arglist);
59bef189 13558 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13559 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13560 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13561
13562 if (! target
13563 || GET_MODE (target) != tmode
13564 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13565 target = gen_reg_rtx (tmode);
13566
13567 if (VECTOR_MODE_P (mode0))
13568 op0 = safe_vector_operand (op0, mode0);
13569
13570 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13571 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13572
59bef189
RH
13573 op1 = op0;
13574 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13575 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13576
59bef189 13577 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13578 if (! pat)
13579 return 0;
13580 emit_insn (pat);
13581 return target;
13582}
13583
13584/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13585
13586static rtx
b96a374d
AJ
13587ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13588 rtx target)
bd793c65
BS
13589{
13590 rtx pat;
13591 tree arg0 = TREE_VALUE (arglist);
13592 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13593 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13594 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13595 rtx op2;
13596 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13597 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13598 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13599 enum rtx_code comparison = d->comparison;
13600
13601 if (VECTOR_MODE_P (mode0))
13602 op0 = safe_vector_operand (op0, mode0);
13603 if (VECTOR_MODE_P (mode1))
13604 op1 = safe_vector_operand (op1, mode1);
13605
13606 /* Swap operands if we have a comparison that isn't available in
13607 hardware. */
13608 if (d->flag)
13609 {
21e1b5f1
BS
13610 rtx tmp = gen_reg_rtx (mode1);
13611 emit_move_insn (tmp, op1);
bd793c65 13612 op1 = op0;
21e1b5f1 13613 op0 = tmp;
bd793c65 13614 }
21e1b5f1
BS
13615
13616 if (! target
13617 || GET_MODE (target) != tmode
13618 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13619 target = gen_reg_rtx (tmode);
13620
13621 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13622 op0 = copy_to_mode_reg (mode0, op0);
13623 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13624 op1 = copy_to_mode_reg (mode1, op1);
13625
13626 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13627 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13628 if (! pat)
13629 return 0;
13630 emit_insn (pat);
13631 return target;
13632}
13633
13634/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13635
13636static rtx
b96a374d
AJ
13637ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13638 rtx target)
bd793c65
BS
13639{
13640 rtx pat;
13641 tree arg0 = TREE_VALUE (arglist);
13642 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13643 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13644 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13645 rtx op2;
13646 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13647 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13648 enum rtx_code comparison = d->comparison;
13649
13650 if (VECTOR_MODE_P (mode0))
13651 op0 = safe_vector_operand (op0, mode0);
13652 if (VECTOR_MODE_P (mode1))
13653 op1 = safe_vector_operand (op1, mode1);
13654
13655 /* Swap operands if we have a comparison that isn't available in
13656 hardware. */
13657 if (d->flag)
13658 {
13659 rtx tmp = op1;
13660 op1 = op0;
13661 op0 = tmp;
bd793c65
BS
13662 }
13663
13664 target = gen_reg_rtx (SImode);
13665 emit_move_insn (target, const0_rtx);
13666 target = gen_rtx_SUBREG (QImode, target, 0);
13667
13668 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13669 op0 = copy_to_mode_reg (mode0, op0);
13670 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13671 op1 = copy_to_mode_reg (mode1, op1);
13672
13673 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13674 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13675 if (! pat)
13676 return 0;
13677 emit_insn (pat);
29628f27
BS
13678 emit_insn (gen_rtx_SET (VOIDmode,
13679 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13680 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13681 SET_DEST (pat),
29628f27 13682 const0_rtx)));
bd793c65 13683
6f1a6c5b 13684 return SUBREG_REG (target);
bd793c65
BS
13685}
13686
13687/* Expand an expression EXP that calls a built-in function,
13688 with result going to TARGET if that's convenient
13689 (and in mode MODE if that's convenient).
13690 SUBTARGET may be used as the target for computing one of EXP's operands.
13691 IGNORE is nonzero if the value is to be ignored. */
13692
13693rtx
b96a374d
AJ
13694ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13695 enum machine_mode mode ATTRIBUTE_UNUSED,
13696 int ignore ATTRIBUTE_UNUSED)
bd793c65 13697{
8b60264b 13698 const struct builtin_description *d;
77ebd435 13699 size_t i;
bd793c65
BS
13700 enum insn_code icode;
13701 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13702 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13703 tree arg0, arg1, arg2;
bd793c65
BS
13704 rtx op0, op1, op2, pat;
13705 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13706 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13707
13708 switch (fcode)
13709 {
13710 case IX86_BUILTIN_EMMS:
13711 emit_insn (gen_emms ());
13712 return 0;
13713
13714 case IX86_BUILTIN_SFENCE:
13715 emit_insn (gen_sfence ());
13716 return 0;
13717
bd793c65 13718 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13719 case IX86_BUILTIN_PEXTRW128:
13720 icode = (fcode == IX86_BUILTIN_PEXTRW
13721 ? CODE_FOR_mmx_pextrw
13722 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13723 arg0 = TREE_VALUE (arglist);
13724 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13725 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13726 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13727 tmode = insn_data[icode].operand[0].mode;
13728 mode0 = insn_data[icode].operand[1].mode;
13729 mode1 = insn_data[icode].operand[2].mode;
13730
13731 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13732 op0 = copy_to_mode_reg (mode0, op0);
13733 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13734 {
13735 /* @@@ better error message */
13736 error ("selector must be an immediate");
6f1a6c5b 13737 return gen_reg_rtx (tmode);
bd793c65
BS
13738 }
13739 if (target == 0
13740 || GET_MODE (target) != tmode
13741 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13742 target = gen_reg_rtx (tmode);
13743 pat = GEN_FCN (icode) (target, op0, op1);
13744 if (! pat)
13745 return 0;
13746 emit_insn (pat);
13747 return target;
13748
13749 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13750 case IX86_BUILTIN_PINSRW128:
13751 icode = (fcode == IX86_BUILTIN_PINSRW
13752 ? CODE_FOR_mmx_pinsrw
13753 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13754 arg0 = TREE_VALUE (arglist);
13755 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13756 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13757 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13758 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13759 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13760 tmode = insn_data[icode].operand[0].mode;
13761 mode0 = insn_data[icode].operand[1].mode;
13762 mode1 = insn_data[icode].operand[2].mode;
13763 mode2 = insn_data[icode].operand[3].mode;
13764
13765 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13766 op0 = copy_to_mode_reg (mode0, op0);
13767 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13768 op1 = copy_to_mode_reg (mode1, op1);
13769 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13770 {
13771 /* @@@ better error message */
13772 error ("selector must be an immediate");
13773 return const0_rtx;
13774 }
13775 if (target == 0
13776 || GET_MODE (target) != tmode
13777 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13778 target = gen_reg_rtx (tmode);
13779 pat = GEN_FCN (icode) (target, op0, op1, op2);
13780 if (! pat)
13781 return 0;
13782 emit_insn (pat);
13783 return target;
13784
13785 case IX86_BUILTIN_MASKMOVQ:
077084dd 13786 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13787 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13788 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13789 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13790 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13791 /* Note the arg order is different from the operand order. */
13792 arg1 = TREE_VALUE (arglist);
13793 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13794 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13795 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13796 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13797 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13798 mode0 = insn_data[icode].operand[0].mode;
13799 mode1 = insn_data[icode].operand[1].mode;
13800 mode2 = insn_data[icode].operand[2].mode;
13801
5c464583 13802 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13803 op0 = copy_to_mode_reg (mode0, op0);
13804 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13805 op1 = copy_to_mode_reg (mode1, op1);
13806 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13807 op2 = copy_to_mode_reg (mode2, op2);
13808 pat = GEN_FCN (icode) (op0, op1, op2);
13809 if (! pat)
13810 return 0;
13811 emit_insn (pat);
13812 return 0;
13813
13814 case IX86_BUILTIN_SQRTSS:
13815 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13816 case IX86_BUILTIN_RSQRTSS:
13817 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13818 case IX86_BUILTIN_RCPSS:
13819 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13820
13821 case IX86_BUILTIN_LOADAPS:
13822 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13823
13824 case IX86_BUILTIN_LOADUPS:
13825 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13826
13827 case IX86_BUILTIN_STOREAPS:
e37af218 13828 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 13829
bd793c65 13830 case IX86_BUILTIN_STOREUPS:
e37af218 13831 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13832
13833 case IX86_BUILTIN_LOADSS:
13834 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13835
13836 case IX86_BUILTIN_STORESS:
e37af218 13837 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13838
0f290768 13839 case IX86_BUILTIN_LOADHPS:
bd793c65 13840 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13841 case IX86_BUILTIN_LOADHPD:
13842 case IX86_BUILTIN_LOADLPD:
13843 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13844 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13845 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13846 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13847 arg0 = TREE_VALUE (arglist);
13848 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13849 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13850 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13851 tmode = insn_data[icode].operand[0].mode;
13852 mode0 = insn_data[icode].operand[1].mode;
13853 mode1 = insn_data[icode].operand[2].mode;
13854
13855 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13856 op0 = copy_to_mode_reg (mode0, op0);
13857 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13858 if (target == 0
13859 || GET_MODE (target) != tmode
13860 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13861 target = gen_reg_rtx (tmode);
13862 pat = GEN_FCN (icode) (target, op0, op1);
13863 if (! pat)
13864 return 0;
13865 emit_insn (pat);
13866 return target;
0f290768 13867
bd793c65
BS
13868 case IX86_BUILTIN_STOREHPS:
13869 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13870 case IX86_BUILTIN_STOREHPD:
13871 case IX86_BUILTIN_STORELPD:
13872 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13873 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13874 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13875 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13876 arg0 = TREE_VALUE (arglist);
13877 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13878 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13879 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13880 mode0 = insn_data[icode].operand[1].mode;
13881 mode1 = insn_data[icode].operand[2].mode;
13882
13883 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13884 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13885 op1 = copy_to_mode_reg (mode1, op1);
13886
13887 pat = GEN_FCN (icode) (op0, op0, op1);
13888 if (! pat)
13889 return 0;
13890 emit_insn (pat);
13891 return 0;
13892
13893 case IX86_BUILTIN_MOVNTPS:
e37af218 13894 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13895 case IX86_BUILTIN_MOVNTQ:
e37af218 13896 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13897
13898 case IX86_BUILTIN_LDMXCSR:
13899 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13900 target = assign_386_stack_local (SImode, 0);
13901 emit_move_insn (target, op0);
13902 emit_insn (gen_ldmxcsr (target));
13903 return 0;
13904
13905 case IX86_BUILTIN_STMXCSR:
13906 target = assign_386_stack_local (SImode, 0);
13907 emit_insn (gen_stmxcsr (target));
13908 return copy_to_mode_reg (SImode, target);
13909
bd793c65 13910 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13911 case IX86_BUILTIN_SHUFPD:
13912 icode = (fcode == IX86_BUILTIN_SHUFPS
13913 ? CODE_FOR_sse_shufps
13914 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13915 arg0 = TREE_VALUE (arglist);
13916 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13917 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13918 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13919 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13920 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13921 tmode = insn_data[icode].operand[0].mode;
13922 mode0 = insn_data[icode].operand[1].mode;
13923 mode1 = insn_data[icode].operand[2].mode;
13924 mode2 = insn_data[icode].operand[3].mode;
13925
13926 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13927 op0 = copy_to_mode_reg (mode0, op0);
13928 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13929 op1 = copy_to_mode_reg (mode1, op1);
13930 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13931 {
13932 /* @@@ better error message */
13933 error ("mask must be an immediate");
6f1a6c5b 13934 return gen_reg_rtx (tmode);
bd793c65
BS
13935 }
13936 if (target == 0
13937 || GET_MODE (target) != tmode
13938 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13939 target = gen_reg_rtx (tmode);
13940 pat = GEN_FCN (icode) (target, op0, op1, op2);
13941 if (! pat)
13942 return 0;
13943 emit_insn (pat);
13944 return target;
13945
13946 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13947 case IX86_BUILTIN_PSHUFD:
13948 case IX86_BUILTIN_PSHUFHW:
13949 case IX86_BUILTIN_PSHUFLW:
13950 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13951 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13952 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13953 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13954 arg0 = TREE_VALUE (arglist);
13955 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13956 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13957 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13958 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13959 mode1 = insn_data[icode].operand[1].mode;
13960 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13961
29628f27
BS
13962 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13963 op0 = copy_to_mode_reg (mode1, op0);
13964 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13965 {
13966 /* @@@ better error message */
13967 error ("mask must be an immediate");
13968 return const0_rtx;
13969 }
13970 if (target == 0
13971 || GET_MODE (target) != tmode
13972 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13973 target = gen_reg_rtx (tmode);
29628f27 13974 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13975 if (! pat)
13976 return 0;
13977 emit_insn (pat);
13978 return target;
13979
ab3146fd
ZD
13980 case IX86_BUILTIN_PSLLDQI128:
13981 case IX86_BUILTIN_PSRLDQI128:
13982 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13983 : CODE_FOR_sse2_lshrti3);
13984 arg0 = TREE_VALUE (arglist);
13985 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13986 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13987 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13988 tmode = insn_data[icode].operand[0].mode;
13989 mode1 = insn_data[icode].operand[1].mode;
13990 mode2 = insn_data[icode].operand[2].mode;
13991
13992 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13993 {
13994 op0 = copy_to_reg (op0);
13995 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13996 }
13997 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13998 {
13999 error ("shift must be an immediate");
14000 return const0_rtx;
14001 }
14002 target = gen_reg_rtx (V2DImode);
14003 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14004 if (! pat)
14005 return 0;
14006 emit_insn (pat);
14007 return target;
14008
47f339cf
BS
14009 case IX86_BUILTIN_FEMMS:
14010 emit_insn (gen_femms ());
14011 return NULL_RTX;
14012
14013 case IX86_BUILTIN_PAVGUSB:
14014 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14015
14016 case IX86_BUILTIN_PF2ID:
14017 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14018
14019 case IX86_BUILTIN_PFACC:
14020 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14021
14022 case IX86_BUILTIN_PFADD:
14023 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14024
14025 case IX86_BUILTIN_PFCMPEQ:
14026 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14027
14028 case IX86_BUILTIN_PFCMPGE:
14029 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14030
14031 case IX86_BUILTIN_PFCMPGT:
14032 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14033
14034 case IX86_BUILTIN_PFMAX:
14035 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14036
14037 case IX86_BUILTIN_PFMIN:
14038 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14039
14040 case IX86_BUILTIN_PFMUL:
14041 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14042
14043 case IX86_BUILTIN_PFRCP:
14044 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14045
14046 case IX86_BUILTIN_PFRCPIT1:
14047 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14048
14049 case IX86_BUILTIN_PFRCPIT2:
14050 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14051
14052 case IX86_BUILTIN_PFRSQIT1:
14053 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14054
14055 case IX86_BUILTIN_PFRSQRT:
14056 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14057
14058 case IX86_BUILTIN_PFSUB:
14059 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14060
14061 case IX86_BUILTIN_PFSUBR:
14062 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14063
14064 case IX86_BUILTIN_PI2FD:
14065 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14066
14067 case IX86_BUILTIN_PMULHRW:
14068 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14069
47f339cf
BS
14070 case IX86_BUILTIN_PF2IW:
14071 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14072
14073 case IX86_BUILTIN_PFNACC:
14074 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14075
14076 case IX86_BUILTIN_PFPNACC:
14077 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14078
14079 case IX86_BUILTIN_PI2FW:
14080 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14081
14082 case IX86_BUILTIN_PSWAPDSI:
14083 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14084
14085 case IX86_BUILTIN_PSWAPDSF:
14086 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14087
e37af218
RH
14088 case IX86_BUILTIN_SSE_ZERO:
14089 target = gen_reg_rtx (V4SFmode);
4977bab6 14090 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14091 return target;
14092
bd793c65
BS
14093 case IX86_BUILTIN_MMX_ZERO:
14094 target = gen_reg_rtx (DImode);
14095 emit_insn (gen_mmx_clrdi (target));
14096 return target;
14097
f02e1358
JH
14098 case IX86_BUILTIN_CLRTI:
14099 target = gen_reg_rtx (V2DImode);
14100 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14101 return target;
14102
14103
fbe5eb6d
BS
14104 case IX86_BUILTIN_SQRTSD:
14105 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14106 case IX86_BUILTIN_LOADAPD:
14107 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14108 case IX86_BUILTIN_LOADUPD:
14109 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14110
14111 case IX86_BUILTIN_STOREAPD:
14112 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14113 case IX86_BUILTIN_STOREUPD:
14114 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14115
14116 case IX86_BUILTIN_LOADSD:
14117 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14118
14119 case IX86_BUILTIN_STORESD:
14120 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14121
14122 case IX86_BUILTIN_SETPD1:
14123 target = assign_386_stack_local (DFmode, 0);
14124 arg0 = TREE_VALUE (arglist);
14125 emit_move_insn (adjust_address (target, DFmode, 0),
14126 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14127 op0 = gen_reg_rtx (V2DFmode);
14128 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14129 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14130 return op0;
14131
14132 case IX86_BUILTIN_SETPD:
14133 target = assign_386_stack_local (V2DFmode, 0);
14134 arg0 = TREE_VALUE (arglist);
14135 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14136 emit_move_insn (adjust_address (target, DFmode, 0),
14137 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14138 emit_move_insn (adjust_address (target, DFmode, 8),
14139 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14140 op0 = gen_reg_rtx (V2DFmode);
14141 emit_insn (gen_sse2_movapd (op0, target));
14142 return op0;
14143
14144 case IX86_BUILTIN_LOADRPD:
14145 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14146 gen_reg_rtx (V2DFmode), 1);
14147 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14148 return target;
14149
14150 case IX86_BUILTIN_LOADPD1:
14151 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14152 gen_reg_rtx (V2DFmode), 1);
14153 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14154 return target;
14155
14156 case IX86_BUILTIN_STOREPD1:
14157 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14158 case IX86_BUILTIN_STORERPD:
14159 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14160
48126a97
JH
14161 case IX86_BUILTIN_CLRPD:
14162 target = gen_reg_rtx (V2DFmode);
14163 emit_insn (gen_sse_clrv2df (target));
14164 return target;
14165
fbe5eb6d
BS
14166 case IX86_BUILTIN_MFENCE:
14167 emit_insn (gen_sse2_mfence ());
14168 return 0;
14169 case IX86_BUILTIN_LFENCE:
14170 emit_insn (gen_sse2_lfence ());
14171 return 0;
14172
14173 case IX86_BUILTIN_CLFLUSH:
14174 arg0 = TREE_VALUE (arglist);
14175 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14176 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14177 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14178 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14179
14180 emit_insn (gen_sse2_clflush (op0));
14181 return 0;
14182
14183 case IX86_BUILTIN_MOVNTPD:
14184 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14185 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14186 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14187 case IX86_BUILTIN_MOVNTI:
14188 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14189
f02e1358
JH
14190 case IX86_BUILTIN_LOADDQA:
14191 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14192 case IX86_BUILTIN_LOADDQU:
14193 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14194 case IX86_BUILTIN_LOADD:
14195 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14196
14197 case IX86_BUILTIN_STOREDQA:
14198 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14199 case IX86_BUILTIN_STOREDQU:
14200 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14201 case IX86_BUILTIN_STORED:
14202 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14203
22c7c85e
L
14204 case IX86_BUILTIN_MONITOR:
14205 arg0 = TREE_VALUE (arglist);
14206 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14207 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14208 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14209 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14210 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14211 if (!REG_P (op0))
14212 op0 = copy_to_mode_reg (SImode, op0);
14213 if (!REG_P (op1))
14214 op1 = copy_to_mode_reg (SImode, op1);
14215 if (!REG_P (op2))
14216 op2 = copy_to_mode_reg (SImode, op2);
14217 emit_insn (gen_monitor (op0, op1, op2));
14218 return 0;
14219
14220 case IX86_BUILTIN_MWAIT:
14221 arg0 = TREE_VALUE (arglist);
14222 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14223 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14224 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14225 if (!REG_P (op0))
14226 op0 = copy_to_mode_reg (SImode, op0);
14227 if (!REG_P (op1))
14228 op1 = copy_to_mode_reg (SImode, op1);
14229 emit_insn (gen_mwait (op0, op1));
14230 return 0;
14231
14232 case IX86_BUILTIN_LOADDDUP:
14233 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14234
14235 case IX86_BUILTIN_LDDQU:
14236 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14237 1);
14238
bd793c65
BS
14239 default:
14240 break;
14241 }
14242
ca7558fc 14243 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14244 if (d->code == fcode)
14245 {
14246 /* Compares are treated specially. */
14247 if (d->icode == CODE_FOR_maskcmpv4sf3
14248 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14249 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14250 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14251 || d->icode == CODE_FOR_maskcmpv2df3
14252 || d->icode == CODE_FOR_vmmaskcmpv2df3
14253 || d->icode == CODE_FOR_maskncmpv2df3
14254 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14255 return ix86_expand_sse_compare (d, arglist, target);
14256
14257 return ix86_expand_binop_builtin (d->icode, arglist, target);
14258 }
14259
ca7558fc 14260 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14261 if (d->code == fcode)
14262 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14263
ca7558fc 14264 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14265 if (d->code == fcode)
14266 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14267
bd793c65
BS
14268 /* @@@ Should really do something sensible here. */
14269 return 0;
bd793c65 14270}
4211a8fb
JH
14271
14272/* Store OPERAND to the memory after reload is completed. This means
f710504c 14273 that we can't easily use assign_stack_local. */
4211a8fb 14274rtx
b96a374d 14275ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14276{
898d374d 14277 rtx result;
4211a8fb
JH
14278 if (!reload_completed)
14279 abort ();
a5b378d6 14280 if (TARGET_RED_ZONE)
898d374d
JH
14281 {
14282 result = gen_rtx_MEM (mode,
14283 gen_rtx_PLUS (Pmode,
14284 stack_pointer_rtx,
14285 GEN_INT (-RED_ZONE_SIZE)));
14286 emit_move_insn (result, operand);
14287 }
a5b378d6 14288 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14289 {
898d374d 14290 switch (mode)
4211a8fb 14291 {
898d374d
JH
14292 case HImode:
14293 case SImode:
14294 operand = gen_lowpart (DImode, operand);
14295 /* FALLTHRU */
14296 case DImode:
4211a8fb 14297 emit_insn (
898d374d
JH
14298 gen_rtx_SET (VOIDmode,
14299 gen_rtx_MEM (DImode,
14300 gen_rtx_PRE_DEC (DImode,
14301 stack_pointer_rtx)),
14302 operand));
14303 break;
14304 default:
14305 abort ();
14306 }
14307 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14308 }
14309 else
14310 {
14311 switch (mode)
14312 {
14313 case DImode:
14314 {
14315 rtx operands[2];
14316 split_di (&operand, 1, operands, operands + 1);
14317 emit_insn (
14318 gen_rtx_SET (VOIDmode,
14319 gen_rtx_MEM (SImode,
14320 gen_rtx_PRE_DEC (Pmode,
14321 stack_pointer_rtx)),
14322 operands[1]));
14323 emit_insn (
14324 gen_rtx_SET (VOIDmode,
14325 gen_rtx_MEM (SImode,
14326 gen_rtx_PRE_DEC (Pmode,
14327 stack_pointer_rtx)),
14328 operands[0]));
14329 }
14330 break;
14331 case HImode:
14332 /* It is better to store HImodes as SImodes. */
14333 if (!TARGET_PARTIAL_REG_STALL)
14334 operand = gen_lowpart (SImode, operand);
14335 /* FALLTHRU */
14336 case SImode:
4211a8fb 14337 emit_insn (
898d374d
JH
14338 gen_rtx_SET (VOIDmode,
14339 gen_rtx_MEM (GET_MODE (operand),
14340 gen_rtx_PRE_DEC (SImode,
14341 stack_pointer_rtx)),
14342 operand));
14343 break;
14344 default:
14345 abort ();
4211a8fb 14346 }
898d374d 14347 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14348 }
898d374d 14349 return result;
4211a8fb
JH
14350}
14351
14352/* Free operand from the memory. */
14353void
b96a374d 14354ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14355{
a5b378d6 14356 if (!TARGET_RED_ZONE)
898d374d
JH
14357 {
14358 int size;
14359
14360 if (mode == DImode || TARGET_64BIT)
14361 size = 8;
14362 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14363 size = 2;
14364 else
14365 size = 4;
14366 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14367 to pop or add instruction if registers are available. */
14368 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14369 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14370 GEN_INT (size))));
14371 }
4211a8fb 14372}
a946dd00 14373
f84aa48a
JH
14374/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14375 QImode must go into class Q_REGS.
14376 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14377 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14378enum reg_class
b96a374d 14379ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14380{
1877be45
JH
14381 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14382 return NO_REGS;
f84aa48a
JH
14383 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14384 {
14385 /* SSE can't load any constant directly yet. */
14386 if (SSE_CLASS_P (class))
14387 return NO_REGS;
14388 /* Floats can load 0 and 1. */
14389 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14390 {
14391 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14392 if (MAYBE_SSE_CLASS_P (class))
14393 return (reg_class_subset_p (class, GENERAL_REGS)
14394 ? GENERAL_REGS : FLOAT_REGS);
14395 else
14396 return class;
14397 }
14398 /* General regs can load everything. */
14399 if (reg_class_subset_p (class, GENERAL_REGS))
14400 return GENERAL_REGS;
14401 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14402 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14403 return NO_REGS;
14404 }
14405 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14406 return NO_REGS;
14407 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14408 return Q_REGS;
14409 return class;
14410}
14411
14412/* If we are copying between general and FP registers, we need a memory
14413 location. The same is true for SSE and MMX registers.
14414
14415 The macro can't work reliably when one of the CLASSES is class containing
14416 registers from multiple units (SSE, MMX, integer). We avoid this by never
14417 combining those units in single alternative in the machine description.
14418 Ensure that this constraint holds to avoid unexpected surprises.
14419
14420 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14421 enforce these sanity checks. */
14422int
b96a374d
AJ
14423ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14424 enum machine_mode mode, int strict)
f84aa48a
JH
14425{
14426 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14427 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14428 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14429 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14430 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14431 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14432 {
14433 if (strict)
14434 abort ();
14435 else
14436 return 1;
14437 }
14438 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14439 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14440 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14441 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14442 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14443}
14444/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14445 one in class CLASS2.
f84aa48a
JH
14446
14447 It is not required that the cost always equal 2 when FROM is the same as TO;
14448 on some machines it is expensive to move between registers if they are not
14449 general registers. */
14450int
b96a374d
AJ
14451ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14452 enum reg_class class2)
f84aa48a
JH
14453{
14454 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14455 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14456 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14457
f84aa48a
JH
14458 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14459 {
d631b80a
RH
14460 int cost = 1;
14461
14462 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14463 MEMORY_MOVE_COST (mode, class1, 1));
14464 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14465 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14466
d631b80a
RH
14467 /* In case of copying from general_purpose_register we may emit multiple
14468 stores followed by single load causing memory size mismatch stall.
d1f87653 14469 Count this as arbitrarily high cost of 20. */
62415523 14470 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14471 cost += 20;
14472
14473 /* In the case of FP/MMX moves, the registers actually overlap, and we
14474 have to switch modes in order to treat them differently. */
14475 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14476 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14477 cost += 20;
14478
14479 return cost;
f84aa48a 14480 }
d631b80a 14481
92d0fb09 14482 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14483 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14484 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14485 return ix86_cost->mmxsse_to_integer;
14486 if (MAYBE_FLOAT_CLASS_P (class1))
14487 return ix86_cost->fp_move;
14488 if (MAYBE_SSE_CLASS_P (class1))
14489 return ix86_cost->sse_move;
14490 if (MAYBE_MMX_CLASS_P (class1))
14491 return ix86_cost->mmx_move;
f84aa48a
JH
14492 return 2;
14493}
14494
a946dd00
JH
14495/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14496int
b96a374d 14497ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14498{
14499 /* Flags and only flags can only hold CCmode values. */
14500 if (CC_REGNO_P (regno))
14501 return GET_MODE_CLASS (mode) == MODE_CC;
14502 if (GET_MODE_CLASS (mode) == MODE_CC
14503 || GET_MODE_CLASS (mode) == MODE_RANDOM
14504 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14505 return 0;
14506 if (FP_REGNO_P (regno))
14507 return VALID_FP_MODE_P (mode);
14508 if (SSE_REGNO_P (regno))
a67a3220 14509 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
a946dd00 14510 if (MMX_REGNO_P (regno))
a67a3220
JH
14511 return (TARGET_MMX
14512 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
a946dd00
JH
14513 /* We handle both integer and floats in the general purpose registers.
14514 In future we should be able to handle vector modes as well. */
14515 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14516 return 0;
14517 /* Take care for QImode values - they can be in non-QI regs, but then
14518 they do cause partial register stalls. */
d2836273 14519 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14520 return 1;
14521 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14522}
fa79946e
JH
14523
14524/* Return the cost of moving data of mode M between a
14525 register and memory. A value of 2 is the default; this cost is
14526 relative to those in `REGISTER_MOVE_COST'.
14527
14528 If moving between registers and memory is more expensive than
14529 between two registers, you should define this macro to express the
a4f31c00
AJ
14530 relative cost.
14531
fa79946e
JH
14532 Model also increased moving costs of QImode registers in non
14533 Q_REGS classes.
14534 */
14535int
b96a374d 14536ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14537{
14538 if (FLOAT_CLASS_P (class))
14539 {
14540 int index;
14541 switch (mode)
14542 {
14543 case SFmode:
14544 index = 0;
14545 break;
14546 case DFmode:
14547 index = 1;
14548 break;
14549 case XFmode:
14550 case TFmode:
14551 index = 2;
14552 break;
14553 default:
14554 return 100;
14555 }
14556 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14557 }
14558 if (SSE_CLASS_P (class))
14559 {
14560 int index;
14561 switch (GET_MODE_SIZE (mode))
14562 {
14563 case 4:
14564 index = 0;
14565 break;
14566 case 8:
14567 index = 1;
14568 break;
14569 case 16:
14570 index = 2;
14571 break;
14572 default:
14573 return 100;
14574 }
14575 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14576 }
14577 if (MMX_CLASS_P (class))
14578 {
14579 int index;
14580 switch (GET_MODE_SIZE (mode))
14581 {
14582 case 4:
14583 index = 0;
14584 break;
14585 case 8:
14586 index = 1;
14587 break;
14588 default:
14589 return 100;
14590 }
14591 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14592 }
14593 switch (GET_MODE_SIZE (mode))
14594 {
14595 case 1:
14596 if (in)
14597 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14598 : ix86_cost->movzbl_load);
14599 else
14600 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14601 : ix86_cost->int_store[0] + 4);
14602 break;
14603 case 2:
14604 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14605 default:
14606 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14607 if (mode == TFmode)
14608 mode = XFmode;
3bb7e126 14609 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14610 * (((int) GET_MODE_SIZE (mode)
14611 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14612 }
14613}
0ecf09f9 14614
3c50106f
RH
14615/* Compute a (partial) cost for rtx X. Return true if the complete
14616 cost has been computed, and false if subexpressions should be
14617 scanned. In either case, *TOTAL contains the cost result. */
14618
14619static bool
b96a374d 14620ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14621{
14622 enum machine_mode mode = GET_MODE (x);
14623
14624 switch (code)
14625 {
14626 case CONST_INT:
14627 case CONST:
14628 case LABEL_REF:
14629 case SYMBOL_REF:
14630 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14631 *total = 3;
14632 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14633 *total = 2;
3504dad3
JH
14634 else if (flag_pic && SYMBOLIC_CONST (x)
14635 && (!TARGET_64BIT
14636 || (!GET_CODE (x) != LABEL_REF
14637 && (GET_CODE (x) != SYMBOL_REF
12969f45 14638 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14639 *total = 1;
14640 else
14641 *total = 0;
14642 return true;
14643
14644 case CONST_DOUBLE:
14645 if (mode == VOIDmode)
14646 *total = 0;
14647 else
14648 switch (standard_80387_constant_p (x))
14649 {
14650 case 1: /* 0.0 */
14651 *total = 1;
14652 break;
881b2a96 14653 default: /* Other constants */
3c50106f
RH
14654 *total = 2;
14655 break;
881b2a96
RS
14656 case 0:
14657 case -1:
3c50106f
RH
14658 /* Start with (MEM (SYMBOL_REF)), since that's where
14659 it'll probably end up. Add a penalty for size. */
14660 *total = (COSTS_N_INSNS (1)
3504dad3 14661 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14662 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14663 break;
14664 }
14665 return true;
14666
14667 case ZERO_EXTEND:
14668 /* The zero extensions is often completely free on x86_64, so make
14669 it as cheap as possible. */
14670 if (TARGET_64BIT && mode == DImode
14671 && GET_MODE (XEXP (x, 0)) == SImode)
14672 *total = 1;
14673 else if (TARGET_ZERO_EXTEND_WITH_AND)
14674 *total = COSTS_N_INSNS (ix86_cost->add);
14675 else
14676 *total = COSTS_N_INSNS (ix86_cost->movzx);
14677 return false;
14678
14679 case SIGN_EXTEND:
14680 *total = COSTS_N_INSNS (ix86_cost->movsx);
14681 return false;
14682
14683 case ASHIFT:
14684 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14685 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14686 {
14687 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14688 if (value == 1)
14689 {
14690 *total = COSTS_N_INSNS (ix86_cost->add);
14691 return false;
14692 }
14693 if ((value == 2 || value == 3)
14694 && !TARGET_DECOMPOSE_LEA
14695 && ix86_cost->lea <= ix86_cost->shift_const)
14696 {
14697 *total = COSTS_N_INSNS (ix86_cost->lea);
14698 return false;
14699 }
14700 }
14701 /* FALLTHRU */
14702
14703 case ROTATE:
14704 case ASHIFTRT:
14705 case LSHIFTRT:
14706 case ROTATERT:
14707 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14708 {
14709 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14710 {
14711 if (INTVAL (XEXP (x, 1)) > 32)
14712 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14713 else
14714 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14715 }
14716 else
14717 {
14718 if (GET_CODE (XEXP (x, 1)) == AND)
14719 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14720 else
14721 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14722 }
14723 }
14724 else
14725 {
14726 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14727 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14728 else
14729 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14730 }
14731 return false;
14732
14733 case MULT:
14734 if (FLOAT_MODE_P (mode))
14735 *total = COSTS_N_INSNS (ix86_cost->fmul);
14736 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14737 {
14738 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14739 int nbits;
14740
14741 for (nbits = 0; value != 0; value >>= 1)
14742 nbits++;
14743
14744 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14745 + nbits * ix86_cost->mult_bit);
14746 }
14747 else
14748 {
14749 /* This is arbitrary */
14750 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14751 + 7 * ix86_cost->mult_bit);
14752 }
14753 return false;
14754
14755 case DIV:
14756 case UDIV:
14757 case MOD:
14758 case UMOD:
14759 if (FLOAT_MODE_P (mode))
14760 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14761 else
14762 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14763 return false;
14764
14765 case PLUS:
14766 if (FLOAT_MODE_P (mode))
14767 *total = COSTS_N_INSNS (ix86_cost->fadd);
14768 else if (!TARGET_DECOMPOSE_LEA
14769 && GET_MODE_CLASS (mode) == MODE_INT
14770 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14771 {
14772 if (GET_CODE (XEXP (x, 0)) == PLUS
14773 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14774 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14775 && CONSTANT_P (XEXP (x, 1)))
14776 {
14777 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14778 if (val == 2 || val == 4 || val == 8)
14779 {
14780 *total = COSTS_N_INSNS (ix86_cost->lea);
14781 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14782 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14783 outer_code);
14784 *total += rtx_cost (XEXP (x, 1), outer_code);
14785 return true;
14786 }
14787 }
14788 else if (GET_CODE (XEXP (x, 0)) == MULT
14789 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14790 {
14791 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14792 if (val == 2 || val == 4 || val == 8)
14793 {
14794 *total = COSTS_N_INSNS (ix86_cost->lea);
14795 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14796 *total += rtx_cost (XEXP (x, 1), outer_code);
14797 return true;
14798 }
14799 }
14800 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14801 {
14802 *total = COSTS_N_INSNS (ix86_cost->lea);
14803 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14804 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14805 *total += rtx_cost (XEXP (x, 1), outer_code);
14806 return true;
14807 }
14808 }
14809 /* FALLTHRU */
14810
14811 case MINUS:
14812 if (FLOAT_MODE_P (mode))
14813 {
14814 *total = COSTS_N_INSNS (ix86_cost->fadd);
14815 return false;
14816 }
14817 /* FALLTHRU */
14818
14819 case AND:
14820 case IOR:
14821 case XOR:
14822 if (!TARGET_64BIT && mode == DImode)
14823 {
14824 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14825 + (rtx_cost (XEXP (x, 0), outer_code)
14826 << (GET_MODE (XEXP (x, 0)) != DImode))
14827 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 14828 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
14829 return true;
14830 }
14831 /* FALLTHRU */
14832
14833 case NEG:
14834 if (FLOAT_MODE_P (mode))
14835 {
14836 *total = COSTS_N_INSNS (ix86_cost->fchs);
14837 return false;
14838 }
14839 /* FALLTHRU */
14840
14841 case NOT:
14842 if (!TARGET_64BIT && mode == DImode)
14843 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14844 else
14845 *total = COSTS_N_INSNS (ix86_cost->add);
14846 return false;
14847
14848 case FLOAT_EXTEND:
14849 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14850 *total = 0;
14851 return false;
14852
14853 case ABS:
14854 if (FLOAT_MODE_P (mode))
14855 *total = COSTS_N_INSNS (ix86_cost->fabs);
14856 return false;
14857
14858 case SQRT:
14859 if (FLOAT_MODE_P (mode))
14860 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14861 return false;
14862
74dc3e94
RH
14863 case UNSPEC:
14864 if (XINT (x, 1) == UNSPEC_TP)
14865 *total = 0;
14866 return false;
14867
3c50106f
RH
14868 default:
14869 return false;
14870 }
14871}
14872
21c318ba 14873#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4 14874static void
b96a374d 14875ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
2cc07db4
RH
14876{
14877 init_section ();
14878 fputs ("\tpushl $", asm_out_file);
14879 assemble_name (asm_out_file, XSTR (symbol, 0));
14880 fputc ('\n', asm_out_file);
14881}
14882#endif
162f023b 14883
b069de3b
SS
14884#if TARGET_MACHO
14885
14886static int current_machopic_label_num;
14887
14888/* Given a symbol name and its associated stub, write out the
14889 definition of the stub. */
14890
14891void
b96a374d 14892machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
14893{
14894 unsigned int length;
14895 char *binder_name, *symbol_name, lazy_ptr_name[32];
14896 int label = ++current_machopic_label_num;
14897
14898 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14899 symb = (*targetm.strip_name_encoding) (symb);
14900
14901 length = strlen (stub);
14902 binder_name = alloca (length + 32);
14903 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14904
14905 length = strlen (symb);
14906 symbol_name = alloca (length + 32);
14907 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14908
14909 sprintf (lazy_ptr_name, "L%d$lz", label);
14910
14911 if (MACHOPIC_PURE)
14912 machopic_picsymbol_stub_section ();
14913 else
14914 machopic_symbol_stub_section ();
14915
14916 fprintf (file, "%s:\n", stub);
14917 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14918
14919 if (MACHOPIC_PURE)
14920 {
14921 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14922 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14923 fprintf (file, "\tjmp %%edx\n");
14924 }
14925 else
14926 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 14927
b069de3b 14928 fprintf (file, "%s:\n", binder_name);
b96a374d 14929
b069de3b
SS
14930 if (MACHOPIC_PURE)
14931 {
14932 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14933 fprintf (file, "\tpushl %%eax\n");
14934 }
14935 else
14936 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14937
14938 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14939
14940 machopic_lazy_symbol_ptr_section ();
14941 fprintf (file, "%s:\n", lazy_ptr_name);
14942 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14943 fprintf (file, "\t.long %s\n", binder_name);
14944}
14945#endif /* TARGET_MACHO */
14946
162f023b
JH
14947/* Order the registers for register allocator. */
14948
14949void
b96a374d 14950x86_order_regs_for_local_alloc (void)
162f023b
JH
14951{
14952 int pos = 0;
14953 int i;
14954
14955 /* First allocate the local general purpose registers. */
14956 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14957 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14958 reg_alloc_order [pos++] = i;
14959
14960 /* Global general purpose registers. */
14961 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14962 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14963 reg_alloc_order [pos++] = i;
14964
14965 /* x87 registers come first in case we are doing FP math
14966 using them. */
14967 if (!TARGET_SSE_MATH)
14968 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14969 reg_alloc_order [pos++] = i;
fce5a9f2 14970
162f023b
JH
14971 /* SSE registers. */
14972 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14973 reg_alloc_order [pos++] = i;
14974 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14975 reg_alloc_order [pos++] = i;
14976
d1f87653 14977 /* x87 registers. */
162f023b
JH
14978 if (TARGET_SSE_MATH)
14979 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14980 reg_alloc_order [pos++] = i;
14981
14982 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14983 reg_alloc_order [pos++] = i;
14984
14985 /* Initialize the rest of array as we do not allocate some registers
14986 at all. */
14987 while (pos < FIRST_PSEUDO_REGISTER)
14988 reg_alloc_order [pos++] = 0;
14989}
194734e9 14990
4977bab6
ZW
14991#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14992#define TARGET_USE_MS_BITFIELD_LAYOUT 0
14993#endif
14994
fe77449a
DR
14995/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14996 struct attribute_spec.handler. */
14997static tree
b96a374d
AJ
14998ix86_handle_struct_attribute (tree *node, tree name,
14999 tree args ATTRIBUTE_UNUSED,
15000 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15001{
15002 tree *type = NULL;
15003 if (DECL_P (*node))
15004 {
15005 if (TREE_CODE (*node) == TYPE_DECL)
15006 type = &TREE_TYPE (*node);
15007 }
15008 else
15009 type = node;
15010
15011 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15012 || TREE_CODE (*type) == UNION_TYPE)))
15013 {
15014 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15015 *no_add_attrs = true;
15016 }
15017
15018 else if ((is_attribute_p ("ms_struct", name)
15019 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15020 || ((is_attribute_p ("gcc_struct", name)
15021 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15022 {
15023 warning ("`%s' incompatible attribute ignored",
15024 IDENTIFIER_POINTER (name));
15025 *no_add_attrs = true;
15026 }
15027
15028 return NULL_TREE;
15029}
15030
4977bab6 15031static bool
b96a374d 15032ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15033{
fe77449a 15034 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15035 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15036 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15037}
15038
483ab821
MM
15039/* Returns an expression indicating where the this parameter is
15040 located on entry to the FUNCTION. */
15041
15042static rtx
b96a374d 15043x86_this_parameter (tree function)
483ab821
MM
15044{
15045 tree type = TREE_TYPE (function);
15046
3961e8fe
RH
15047 if (TARGET_64BIT)
15048 {
15049 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15050 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15051 }
15052
483ab821
MM
15053 if (ix86_fntype_regparm (type) > 0)
15054 {
15055 tree parm;
15056
15057 parm = TYPE_ARG_TYPES (type);
15058 /* Figure out whether or not the function has a variable number of
15059 arguments. */
3961e8fe 15060 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15061 if (TREE_VALUE (parm) == void_type_node)
15062 break;
15063 /* If not, the this parameter is in %eax. */
15064 if (parm)
15065 return gen_rtx_REG (SImode, 0);
15066 }
15067
15068 if (aggregate_value_p (TREE_TYPE (type)))
15069 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15070 else
15071 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15072}
15073
3961e8fe
RH
15074/* Determine whether x86_output_mi_thunk can succeed. */
15075
15076static bool
b96a374d
AJ
15077x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15078 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15079 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15080{
15081 /* 64-bit can handle anything. */
15082 if (TARGET_64BIT)
15083 return true;
15084
15085 /* For 32-bit, everything's fine if we have one free register. */
15086 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15087 return true;
15088
15089 /* Need a free register for vcall_offset. */
15090 if (vcall_offset)
15091 return false;
15092
15093 /* Need a free register for GOT references. */
15094 if (flag_pic && !(*targetm.binds_local_p) (function))
15095 return false;
15096
15097 /* Otherwise ok. */
15098 return true;
15099}
15100
15101/* Output the assembler code for a thunk function. THUNK_DECL is the
15102 declaration for the thunk function itself, FUNCTION is the decl for
15103 the target function. DELTA is an immediate constant offset to be
272d0bee 15104 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15105 *(*this + vcall_offset) should be added to THIS. */
483ab821 15106
c590b625 15107static void
b96a374d
AJ
15108x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15109 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15110 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15111{
194734e9 15112 rtx xops[3];
3961e8fe
RH
15113 rtx this = x86_this_parameter (function);
15114 rtx this_reg, tmp;
194734e9 15115
3961e8fe
RH
15116 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15117 pull it in now and let DELTA benefit. */
15118 if (REG_P (this))
15119 this_reg = this;
15120 else if (vcall_offset)
15121 {
15122 /* Put the this parameter into %eax. */
15123 xops[0] = this;
15124 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15125 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15126 }
15127 else
15128 this_reg = NULL_RTX;
15129
15130 /* Adjust the this parameter by a fixed constant. */
15131 if (delta)
194734e9 15132 {
483ab821 15133 xops[0] = GEN_INT (delta);
3961e8fe
RH
15134 xops[1] = this_reg ? this_reg : this;
15135 if (TARGET_64BIT)
194734e9 15136 {
3961e8fe
RH
15137 if (!x86_64_general_operand (xops[0], DImode))
15138 {
15139 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15140 xops[1] = tmp;
15141 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15142 xops[0] = tmp;
15143 xops[1] = this;
15144 }
15145 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15146 }
15147 else
3961e8fe 15148 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15149 }
3961e8fe
RH
15150
15151 /* Adjust the this parameter by a value stored in the vtable. */
15152 if (vcall_offset)
194734e9 15153 {
3961e8fe
RH
15154 if (TARGET_64BIT)
15155 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15156 else
15157 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
483ab821 15158
3961e8fe
RH
15159 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15160 xops[1] = tmp;
15161 if (TARGET_64BIT)
15162 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15163 else
15164 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15165
3961e8fe
RH
15166 /* Adjust the this parameter. */
15167 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15168 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15169 {
15170 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15171 xops[0] = GEN_INT (vcall_offset);
15172 xops[1] = tmp2;
15173 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15174 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15175 }
3961e8fe
RH
15176 xops[1] = this_reg;
15177 if (TARGET_64BIT)
15178 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15179 else
15180 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15181 }
194734e9 15182
3961e8fe
RH
15183 /* If necessary, drop THIS back to its stack slot. */
15184 if (this_reg && this_reg != this)
15185 {
15186 xops[0] = this_reg;
15187 xops[1] = this;
15188 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15189 }
194734e9 15190
89ce1c8f 15191 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15192 if (TARGET_64BIT)
15193 {
15194 if (!flag_pic || (*targetm.binds_local_p) (function))
15195 output_asm_insn ("jmp\t%P0", xops);
15196 else
fcbe3b89 15197 {
89ce1c8f 15198 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15199 tmp = gen_rtx_CONST (Pmode, tmp);
15200 tmp = gen_rtx_MEM (QImode, tmp);
15201 xops[0] = tmp;
15202 output_asm_insn ("jmp\t%A0", xops);
15203 }
3961e8fe
RH
15204 }
15205 else
15206 {
15207 if (!flag_pic || (*targetm.binds_local_p) (function))
15208 output_asm_insn ("jmp\t%P0", xops);
194734e9 15209 else
21ff35fb 15210#if TARGET_MACHO
095fa594
SH
15211 if (TARGET_MACHO)
15212 {
15213 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15214 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15215 tmp = gen_rtx_MEM (QImode, tmp);
15216 xops[0] = tmp;
15217 output_asm_insn ("jmp\t%0", xops);
15218 }
15219 else
15220#endif /* TARGET_MACHO */
194734e9 15221 {
3961e8fe
RH
15222 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15223 output_set_got (tmp);
15224
15225 xops[1] = tmp;
15226 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15227 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15228 }
15229 }
15230}
e2500fed 15231
1bc7c5b6 15232static void
b96a374d 15233x86_file_start (void)
1bc7c5b6
ZW
15234{
15235 default_file_start ();
15236 if (X86_FILE_START_VERSION_DIRECTIVE)
15237 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15238 if (X86_FILE_START_FLTUSED)
15239 fputs ("\t.global\t__fltused\n", asm_out_file);
15240 if (ix86_asm_dialect == ASM_INTEL)
15241 fputs ("\t.intel_syntax\n", asm_out_file);
15242}
15243
e932b21b 15244int
b96a374d 15245x86_field_alignment (tree field, int computed)
e932b21b
JH
15246{
15247 enum machine_mode mode;
ad9335eb
JJ
15248 tree type = TREE_TYPE (field);
15249
15250 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15251 return computed;
ad9335eb
JJ
15252 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15253 ? get_inner_array_type (type) : type);
39e3a681
JJ
15254 if (mode == DFmode || mode == DCmode
15255 || GET_MODE_CLASS (mode) == MODE_INT
15256 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15257 return MIN (32, computed);
15258 return computed;
15259}
15260
a5fa1ecd
JH
15261/* Output assembler code to FILE to increment profiler label # LABELNO
15262 for profiling a function entry. */
15263void
b96a374d 15264x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15265{
15266 if (TARGET_64BIT)
15267 if (flag_pic)
15268 {
15269#ifndef NO_PROFILE_COUNTERS
15270 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15271#endif
15272 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15273 }
15274 else
15275 {
15276#ifndef NO_PROFILE_COUNTERS
15277 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15278#endif
15279 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15280 }
15281 else if (flag_pic)
15282 {
15283#ifndef NO_PROFILE_COUNTERS
15284 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15285 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15286#endif
15287 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15288 }
15289 else
15290 {
15291#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15292 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15293 PROFILE_COUNT_REGISTER);
15294#endif
15295 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15296 }
15297}
15298
d2c49530
JH
15299/* We don't have exact information about the insn sizes, but we may assume
15300 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15301 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15302 99% of cases. */
15303
15304static int
b96a374d 15305min_insn_size (rtx insn)
d2c49530
JH
15306{
15307 int l = 0;
15308
15309 if (!INSN_P (insn) || !active_insn_p (insn))
15310 return 0;
15311
15312 /* Discard alignments we've emit and jump instructions. */
15313 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15314 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15315 return 0;
15316 if (GET_CODE (insn) == JUMP_INSN
15317 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15318 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15319 return 0;
15320
15321 /* Important case - calls are always 5 bytes.
15322 It is common to have many calls in the row. */
15323 if (GET_CODE (insn) == CALL_INSN
15324 && symbolic_reference_mentioned_p (PATTERN (insn))
15325 && !SIBLING_CALL_P (insn))
15326 return 5;
15327 if (get_attr_length (insn) <= 1)
15328 return 1;
15329
15330 /* For normal instructions we may rely on the sizes of addresses
15331 and the presence of symbol to require 4 bytes of encoding.
15332 This is not the case for jumps where references are PC relative. */
15333 if (GET_CODE (insn) != JUMP_INSN)
15334 {
15335 l = get_attr_length_address (insn);
15336 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15337 l = 4;
15338 }
15339 if (l)
15340 return 1+l;
15341 else
15342 return 2;
15343}
15344
c51e6d85 15345/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15346 window. */
15347
15348static void
b96a374d 15349k8_avoid_jump_misspredicts (void)
d2c49530
JH
15350{
15351 rtx insn, start = get_insns ();
15352 int nbytes = 0, njumps = 0;
15353 int isjump = 0;
15354
15355 /* Look for all minimal intervals of instructions containing 4 jumps.
15356 The intervals are bounded by START and INSN. NBYTES is the total
15357 size of instructions in the interval including INSN and not including
15358 START. When the NBYTES is smaller than 16 bytes, it is possible
15359 that the end of START and INSN ends up in the same 16byte page.
15360
15361 The smallest offset in the page INSN can start is the case where START
15362 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15363 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15364 */
15365 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15366 {
15367
15368 nbytes += min_insn_size (insn);
15369 if (rtl_dump_file)
da2d4c01 15370 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15371 INSN_UID (insn), min_insn_size (insn));
15372 if ((GET_CODE (insn) == JUMP_INSN
15373 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15374 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15375 || GET_CODE (insn) == CALL_INSN)
15376 njumps++;
15377 else
15378 continue;
15379
15380 while (njumps > 3)
15381 {
15382 start = NEXT_INSN (start);
15383 if ((GET_CODE (start) == JUMP_INSN
15384 && GET_CODE (PATTERN (start)) != ADDR_VEC
15385 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15386 || GET_CODE (start) == CALL_INSN)
15387 njumps--, isjump = 1;
15388 else
15389 isjump = 0;
15390 nbytes -= min_insn_size (start);
15391 }
15392 if (njumps < 0)
15393 abort ();
15394 if (rtl_dump_file)
10e9fecc 15395 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15396 INSN_UID (start), INSN_UID (insn), nbytes);
15397
15398 if (njumps == 3 && isjump && nbytes < 16)
15399 {
15400 int padsize = 15 - nbytes + min_insn_size (insn);
15401
15402 if (rtl_dump_file)
15403 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15404 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15405 }
15406 }
15407}
15408
b96a374d 15409/* Implement machine specific optimizations.
2a500b9e 15410 At the moment we implement single transformation: AMD Athlon works faster
d1f87653 15411 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15412 by other jump instruction. We avoid the penalty by inserting NOP just
15413 before the RET instructions in such cases. */
18dbd950 15414static void
b96a374d 15415ix86_reorg (void)
2a500b9e
JH
15416{
15417 edge e;
15418
4977bab6 15419 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
2a500b9e
JH
15420 return;
15421 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15422 {
15423 basic_block bb = e->src;
15424 rtx ret = bb->end;
15425 rtx prev;
253c7a00 15426 bool replace = false;
2a500b9e 15427
253c7a00
JH
15428 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15429 || !maybe_hot_bb_p (bb))
2a500b9e 15430 continue;
4977bab6
ZW
15431 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15432 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15433 break;
2a500b9e
JH
15434 if (prev && GET_CODE (prev) == CODE_LABEL)
15435 {
15436 edge e;
15437 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15438 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e 15439 && !(e->flags & EDGE_FALLTHRU))
253c7a00 15440 replace = true;
2a500b9e 15441 }
253c7a00 15442 if (!replace)
2a500b9e 15443 {
4977bab6 15444 prev = prev_active_insn (ret);
25f57a0e
JH
15445 if (prev
15446 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15447 || GET_CODE (prev) == CALL_INSN))
253c7a00 15448 replace = true;
c51e6d85 15449 /* Empty functions get branch mispredict even when the jump destination
4977bab6
ZW
15450 is not visible to us. */
15451 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
253c7a00
JH
15452 replace = true;
15453 }
15454 if (replace)
15455 {
15456 emit_insn_before (gen_return_internal_long (), ret);
15457 delete_insn (ret);
2a500b9e 15458 }
2a500b9e 15459 }
10e9fecc 15460 k8_avoid_jump_misspredicts ();
2a500b9e
JH
15461}
15462
4977bab6
ZW
15463/* Return nonzero when QImode register that must be represented via REX prefix
15464 is used. */
15465bool
b96a374d 15466x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15467{
15468 int i;
15469 extract_insn_cached (insn);
15470 for (i = 0; i < recog_data.n_operands; i++)
15471 if (REG_P (recog_data.operand[i])
15472 && REGNO (recog_data.operand[i]) >= 4)
15473 return true;
15474 return false;
15475}
15476
15477/* Return nonzero when P points to register encoded via REX prefix.
15478 Called via for_each_rtx. */
15479static int
b96a374d 15480extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15481{
15482 unsigned int regno;
15483 if (!REG_P (*p))
15484 return 0;
15485 regno = REGNO (*p);
15486 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15487}
15488
15489/* Return true when INSN mentions register that must be encoded using REX
15490 prefix. */
15491bool
b96a374d 15492x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15493{
15494 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15495}
15496
8d705469
JH
15497/* Generate an unsigned DImode to FP conversion. This is the same code
15498 optabs would emit if we didn't have TFmode patterns. */
15499
15500void
b96a374d 15501x86_emit_floatuns (rtx operands[2])
8d705469
JH
15502{
15503 rtx neglab, donelab, i0, i1, f0, in, out;
15504 enum machine_mode mode;
15505
15506 out = operands[0];
15507 in = force_reg (DImode, operands[1]);
15508 mode = GET_MODE (out);
15509 neglab = gen_label_rtx ();
15510 donelab = gen_label_rtx ();
15511 i1 = gen_reg_rtx (Pmode);
15512 f0 = gen_reg_rtx (mode);
15513
15514 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15515
15516 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15517 emit_jump_insn (gen_jump (donelab));
15518 emit_barrier ();
15519
15520 emit_label (neglab);
15521
15522 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15523 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15524 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15525 expand_float (f0, i0, 0);
15526 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15527
15528 emit_label (donelab);
15529}
15530
dafc5b82
JH
15531/* Return if we do not know how to pass TYPE solely in registers. */
15532bool
b96a374d 15533ix86_must_pass_in_stack (enum machine_mode mode, tree type)
dafc5b82
JH
15534{
15535 if (default_must_pass_in_stack (mode, type))
15536 return true;
15537 return (!TARGET_64BIT && type && mode == TImode);
15538}
15539
e2500fed 15540#include "gt-i386.h"
This page took 4.533968 seconds and 5 git commands to generate.