]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
tree-complex.c (expand_complex_comparison): Use fold_convert instead of convert.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
e129d93a 3 2002, 2003, 2004 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
dafc5b82 48#include "cgraph.h"
2a2ab3f9 49
8dfe5673 50#ifndef CHECK_STACK_LIMIT
07933f72 51#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
52#endif
53
3c50106f
RH
54/* Return index of given mode in mult and division cost tables. */
55#define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
2ab0437e 62/* Processor costs (relative to an add) */
fce5a9f2 63static const
2ab0437e
JH
64struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
4977bab6 69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 70 0, /* cost of multiply per each bit set */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
72 3, /* cost of movsx */
73 3, /* cost of movzx */
2ab0437e
JH
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
f4365627
JH
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
4977bab6 98 1, /* Branch cost */
229b303a
RS
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
2ab0437e 105};
229b303a 106
32b5b1aa 107/* Processor costs (relative to an add) */
fce5a9f2 108static const
32b5b1aa 109struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 110 1, /* cost of an add instruction */
32b5b1aa
SC
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
4977bab6 114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 115 1, /* cost of multiply per each bit set */
4977bab6 116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
117 3, /* cost of movsx */
118 2, /* cost of movzx */
96e7ae40 119 15, /* "large" insn */
e2e52e1b 120 3, /* MOVE_RATIO */
7c6b971d 121 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
0f290768 124 Relative to reg-reg move (2). */
96e7ae40
JH
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
fa79946e
JH
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
f4365627
JH
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
4977bab6 143 1, /* Branch cost */
229b303a
RS
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
150};
151
fce5a9f2 152static const
32b5b1aa
SC
153struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
4977bab6 158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 159 1, /* cost of multiply per each bit set */
4977bab6 160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
161 3, /* cost of movsx */
162 2, /* cost of movzx */
96e7ae40 163 15, /* "large" insn */
e2e52e1b 164 3, /* MOVE_RATIO */
7c6b971d 165 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
96e7ae40
JH
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
fa79946e
JH
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
f4365627
JH
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
4977bab6 187 1, /* Branch cost */
229b303a
RS
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
194};
195
fce5a9f2 196static const
e5cb57e8 197struct processor_costs pentium_cost = {
32b5b1aa
SC
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
856b07a1 200 4, /* variable shift costs */
e5cb57e8 201 1, /* constant shift costs */
4977bab6 202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 203 0, /* cost of multiply per each bit set */
4977bab6 204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
205 3, /* cost of movsx */
206 2, /* cost of movzx */
96e7ae40 207 8, /* "large" insn */
e2e52e1b 208 6, /* MOVE_RATIO */
7c6b971d 209 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
0f290768 212 Relative to reg-reg move (2). */
96e7ae40
JH
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
fa79946e
JH
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
f4365627
JH
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
4977bab6 231 2, /* Branch cost */
229b303a
RS
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
238};
239
fce5a9f2 240static const
856b07a1
SC
241struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
e075ae69 244 1, /* variable shift costs */
856b07a1 245 1, /* constant shift costs */
4977bab6 246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 247 0, /* cost of multiply per each bit set */
4977bab6 248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
249 1, /* cost of movsx */
250 1, /* cost of movzx */
96e7ae40 251 8, /* "large" insn */
e2e52e1b 252 6, /* MOVE_RATIO */
7c6b971d 253 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
0f290768 256 Relative to reg-reg move (2). */
96e7ae40
JH
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
f4365627
JH
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
4977bab6 275 2, /* Branch cost */
229b303a
RS
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
856b07a1
SC
282};
283
fce5a9f2 284static const
a269a03c
JC
285struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
e075ae69 287 2, /* cost of a lea instruction */
a269a03c
JC
288 1, /* variable shift costs */
289 1, /* constant shift costs */
4977bab6 290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 291 0, /* cost of multiply per each bit set */
4977bab6 292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
293 2, /* cost of movsx */
294 2, /* cost of movzx */
96e7ae40 295 8, /* "large" insn */
e2e52e1b 296 4, /* MOVE_RATIO */
7c6b971d 297 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
0f290768 300 Relative to reg-reg move (2). */
96e7ae40
JH
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
fa79946e
JH
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
f4365627
JH
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
4977bab6 319 1, /* Branch cost */
229b303a
RS
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
4f770e7b
RS
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
229b303a
RS
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
a269a03c
JC
326};
327
fce5a9f2 328static const
309ada50
JH
329struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
0b5107cf 331 2, /* cost of a lea instruction */
309ada50
JH
332 1, /* variable shift costs */
333 1, /* constant shift costs */
4977bab6 334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 335 0, /* cost of multiply per each bit set */
4977bab6 336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
337 1, /* cost of movsx */
338 1, /* cost of movzx */
309ada50 339 8, /* "large" insn */
e2e52e1b 340 9, /* MOVE_RATIO */
309ada50 341 4, /* cost for loading QImode using movzbl */
b72b1c29 342 {3, 4, 3}, /* cost of loading integer registers
309ada50 343 in QImode, HImode and SImode.
0f290768 344 Relative to reg-reg move (2). */
b72b1c29 345 {3, 4, 3}, /* cost of storing integer registers */
309ada50 346 4, /* cost of reg,reg fld/fst */
b72b1c29 347 {4, 4, 12}, /* cost of loading fp registers
309ada50 348 in SFmode, DFmode and XFmode */
b72b1c29 349 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 350 2, /* cost of moving MMX register */
b72b1c29 351 {4, 4}, /* cost of loading MMX registers
fa79946e 352 in SImode and DImode */
b72b1c29 353 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
b72b1c29 356 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 357 in SImode, DImode and TImode */
b72b1c29 358 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 5, /* MMX or SSE register to integer */
f4365627
JH
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
4977bab6 363 2, /* Branch cost */
229b303a
RS
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
309ada50
JH
370};
371
4977bab6
ZW
372static const
373struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414};
415
fce5a9f2 416static const
b4e89e2d
JH
417struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
4977bab6
ZW
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 423 0, /* cost of multiply per each bit set */
4977bab6 424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
425 1, /* cost of movsx */
426 1, /* cost of movzx */
b4e89e2d
JH
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
f4365627
JH
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
4977bab6 451 2, /* Branch cost */
229b303a
RS
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
458};
459
89c43c0a
VM
460static const
461struct processor_costs nocona_cost = {
462 1, /* cost of an add instruction */
463 1, /* cost of a lea instruction */
464 1, /* variable shift costs */
465 1, /* constant shift costs */
466 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
467 0, /* cost of multiply per each bit set */
468 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
469 1, /* cost of movsx */
470 1, /* cost of movzx */
471 16, /* "large" insn */
472 9, /* MOVE_RATIO */
473 4, /* cost for loading QImode using movzbl */
474 {4, 4, 4}, /* cost of loading integer registers
475 in QImode, HImode and SImode.
476 Relative to reg-reg move (2). */
477 {4, 4, 4}, /* cost of storing integer registers */
478 3, /* cost of reg,reg fld/fst */
479 {12, 12, 12}, /* cost of loading fp registers
480 in SFmode, DFmode and XFmode */
481 {4, 4, 4}, /* cost of loading integer registers */
482 6, /* cost of moving MMX register */
483 {12, 12}, /* cost of loading MMX registers
484 in SImode and DImode */
485 {12, 12}, /* cost of storing MMX registers
486 in SImode and DImode */
487 6, /* cost of moving SSE register */
488 {12, 12, 12}, /* cost of loading SSE registers
489 in SImode, DImode and TImode */
490 {12, 12, 12}, /* cost of storing SSE registers
491 in SImode, DImode and TImode */
492 8, /* MMX or SSE register to integer */
493 128, /* size of prefetch block */
494 8, /* number of parallel prefetches */
495 1, /* Branch cost */
496 6, /* cost of FADD and FSUB insns. */
497 8, /* cost of FMUL instruction. */
498 40, /* cost of FDIV instruction. */
499 3, /* cost of FABS instruction. */
500 3, /* cost of FCHS instruction. */
501 44, /* cost of FSQRT instruction. */
502};
503
8b60264b 504const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 505
a269a03c
JC
506/* Processor feature/optimization bitmasks. */
507#define m_386 (1<<PROCESSOR_I386)
508#define m_486 (1<<PROCESSOR_I486)
509#define m_PENT (1<<PROCESSOR_PENTIUM)
510#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
511#define m_K6 (1<<PROCESSOR_K6)
309ada50 512#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 513#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
514#define m_K8 (1<<PROCESSOR_K8)
515#define m_ATHLON_K8 (m_K8 | m_ATHLON)
89c43c0a 516#define m_NOCONA (1<<PROCESSOR_NOCONA)
a269a03c 517
4977bab6 518const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
89c43c0a 519const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 520const int x86_zero_extend_with_and = m_486 | m_PENT;
89c43c0a 521const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
e075ae69 522const int x86_double_with_add = ~m_386;
a269a03c 523const int x86_use_bit_test = m_386;
4977bab6 524const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
89c43c0a 525const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
4977bab6 526const int x86_3dnow_a = m_ATHLON_K8;
89c43c0a
VM
527const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528const int x86_branch_hints = m_PENT4 | m_NOCONA;
529const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
e075ae69
RH
530const int x86_partial_reg_stall = m_PPRO;
531const int x86_use_loop = m_K6;
4977bab6 532const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
533const int x86_use_mov0 = m_K6;
534const int x86_use_cltd = ~(m_PENT | m_K6);
535const int x86_read_modify_write = ~m_PENT;
536const int x86_read_modify = ~(m_PENT | m_PPRO);
537const int x86_split_long_moves = m_PPRO;
4977bab6 538const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 539const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
89c43c0a 540const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
d9f32422
JH
541const int x86_qimode_math = ~(0);
542const int x86_promote_qi_regs = 0;
543const int x86_himode_math = ~(m_PPRO);
544const int x86_promote_hi_regs = m_PPRO;
89c43c0a
VM
545const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
546const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
547const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
548const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
549const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
550const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
551const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
552const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
7b50a809
JH
553const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
554const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
89c43c0a 555const int x86_decompose_lea = m_PENT4 | m_NOCONA;
495333a6 556const int x86_shift1 = ~m_486;
89c43c0a
VM
557const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
4977bab6 559/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 560 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
561 scalar values in proper format leaving the upper part undefined. */
562const int x86_sse_partial_regs = m_ATHLON_K8;
563/* Athlon optimizes partial-register FPS special case, thus avoiding the
564 need for extra instructions beforehand */
565const int x86_sse_partial_regs_for_cvtsd2ss = 0;
566const int x86_sse_typeless_stores = m_ATHLON_K8;
89c43c0a 567const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
4977bab6
ZW
568const int x86_use_ffreep = m_ATHLON_K8;
569const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 570const int x86_inter_unit_moves = ~(m_ATHLON_K8);
89c43c0a 571const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
be04394b
JH
572/* Some CPU cores are not able to predict more than 4 branch instructions in
573 the 16 byte window. */
89c43c0a 574const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 575
d1f87653 576/* In case the average insn count for single function invocation is
6ab16dd9
JH
577 lower than this constant, emit fast (but longer) prologue and
578 epilogue code. */
4977bab6 579#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 580
5bf0ebab
RH
581/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
582static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
583static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
584static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
585
586/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 587 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 588
e075ae69 589enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
590{
591 /* ax, dx, cx, bx */
ab408a86 592 AREG, DREG, CREG, BREG,
4c0d89b5 593 /* si, di, bp, sp */
e075ae69 594 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
595 /* FP registers */
596 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 597 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 598 /* arg pointer */
83774849 599 NON_Q_REGS,
564d80f4 600 /* flags, fpsr, dirflag, frame */
a7180f70
BS
601 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
602 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
603 SSE_REGS, SSE_REGS,
604 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
605 MMX_REGS, MMX_REGS,
606 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
607 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
4c0d89b5 610};
c572e5ba 611
3d117b30 612/* The "default" register map used in 32bit mode. */
83774849 613
0f290768 614int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
615{
616 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
617 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 618 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
619 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
620 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
621 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
622 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
623};
624
5bf0ebab
RH
625static int const x86_64_int_parameter_registers[6] =
626{
627 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
628 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
629};
630
631static int const x86_64_int_return_registers[4] =
632{
633 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
634};
53c17031 635
0f7fa3d0
JH
636/* The "default" register map used in 64bit mode. */
637int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
638{
639 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 640 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
641 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
642 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
643 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
644 8,9,10,11,12,13,14,15, /* extended integer registers */
645 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
646};
647
83774849
RH
648/* Define the register numbers to be used in Dwarf debugging information.
649 The SVR4 reference port C compiler uses the following register numbers
650 in its Dwarf output code:
651 0 for %eax (gcc regno = 0)
652 1 for %ecx (gcc regno = 2)
653 2 for %edx (gcc regno = 1)
654 3 for %ebx (gcc regno = 3)
655 4 for %esp (gcc regno = 7)
656 5 for %ebp (gcc regno = 6)
657 6 for %esi (gcc regno = 4)
658 7 for %edi (gcc regno = 5)
659 The following three DWARF register numbers are never generated by
660 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
661 believes these numbers have these meanings.
662 8 for %eip (no gcc equivalent)
663 9 for %eflags (gcc regno = 17)
664 10 for %trapno (no gcc equivalent)
665 It is not at all clear how we should number the FP stack registers
666 for the x86 architecture. If the version of SDB on x86/svr4 were
667 a bit less brain dead with respect to floating-point then we would
668 have a precedent to follow with respect to DWARF register numbers
669 for x86 FP registers, but the SDB on x86/svr4 is so completely
670 broken with respect to FP registers that it is hardly worth thinking
671 of it as something to strive for compatibility with.
672 The version of x86/svr4 SDB I have at the moment does (partially)
673 seem to believe that DWARF register number 11 is associated with
674 the x86 register %st(0), but that's about all. Higher DWARF
675 register numbers don't seem to be associated with anything in
676 particular, and even for DWARF regno 11, SDB only seems to under-
677 stand that it should say that a variable lives in %st(0) (when
678 asked via an `=' command) if we said it was in DWARF regno 11,
679 but SDB still prints garbage when asked for the value of the
680 variable in question (via a `/' command).
681 (Also note that the labels SDB prints for various FP stack regs
682 when doing an `x' command are all wrong.)
683 Note that these problems generally don't affect the native SVR4
684 C compiler because it doesn't allow the use of -O with -g and
685 because when it is *not* optimizing, it allocates a memory
686 location for each floating-point variable, and the memory
687 location is what gets described in the DWARF AT_location
688 attribute for the variable in question.
689 Regardless of the severe mental illness of the x86/svr4 SDB, we
690 do something sensible here and we use the following DWARF
691 register numbers. Note that these are all stack-top-relative
692 numbers.
693 11 for %st(0) (gcc regno = 8)
694 12 for %st(1) (gcc regno = 9)
695 13 for %st(2) (gcc regno = 10)
696 14 for %st(3) (gcc regno = 11)
697 15 for %st(4) (gcc regno = 12)
698 16 for %st(5) (gcc regno = 13)
699 17 for %st(6) (gcc regno = 14)
700 18 for %st(7) (gcc regno = 15)
701*/
0f290768 702int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
703{
704 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
705 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 706 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
707 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
708 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
709 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
710 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
711};
712
c572e5ba
JVA
713/* Test and compare insns in i386.md store the information needed to
714 generate branch and scc insns here. */
715
07933f72
GS
716rtx ix86_compare_op0 = NULL_RTX;
717rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 718
7a2e09f4 719#define MAX_386_STACK_LOCALS 3
8362f420
JH
720/* Size of the register save area. */
721#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
722
723/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
724
725struct stack_local_entry GTY(())
726{
727 unsigned short mode;
728 unsigned short n;
729 rtx rtl;
730 struct stack_local_entry *next;
731};
732
4dd2ac2c
JH
733/* Structure describing stack frame layout.
734 Stack grows downward:
735
736 [arguments]
737 <- ARG_POINTER
738 saved pc
739
740 saved frame pointer if frame_pointer_needed
741 <- HARD_FRAME_POINTER
742 [saved regs]
743
744 [padding1] \
745 )
746 [va_arg registers] (
747 > to_allocate <- FRAME_POINTER
748 [frame] (
749 )
750 [padding2] /
751 */
752struct ix86_frame
753{
754 int nregs;
755 int padding1;
8362f420 756 int va_arg_size;
4dd2ac2c
JH
757 HOST_WIDE_INT frame;
758 int padding2;
759 int outgoing_arguments_size;
8362f420 760 int red_zone_size;
4dd2ac2c
JH
761
762 HOST_WIDE_INT to_allocate;
763 /* The offsets relative to ARG_POINTER. */
764 HOST_WIDE_INT frame_pointer_offset;
765 HOST_WIDE_INT hard_frame_pointer_offset;
766 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
767
768 /* When save_regs_using_mov is set, emit prologue using
769 move instead of push instructions. */
770 bool save_regs_using_mov;
4dd2ac2c
JH
771};
772
c93e80a5
JH
773/* Used to enable/disable debugging features. */
774const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
775/* Code model option as passed by user. */
776const char *ix86_cmodel_string;
777/* Parsed value. */
778enum cmodel ix86_cmodel;
80f33d06
GS
779/* Asm dialect. */
780const char *ix86_asm_string;
781enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
782/* TLS dialext. */
783const char *ix86_tls_dialect_string;
784enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 785
5bf0ebab 786/* Which unit we are generating floating point math for. */
965f5423
JH
787enum fpmath_unit ix86_fpmath;
788
5bf0ebab 789/* Which cpu are we scheduling for. */
9e555526 790enum processor_type ix86_tune;
5bf0ebab
RH
791/* Which instruction set architecture to use. */
792enum processor_type ix86_arch;
c8c5cb99
SC
793
794/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 795const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 796const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 797const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 798
0f290768 799/* # of registers to use to pass arguments. */
e075ae69 800const char *ix86_regparm_string;
e9a25f70 801
f4365627
JH
802/* true if sse prefetch instruction is not NOOP. */
803int x86_prefetch_sse;
804
e075ae69
RH
805/* ix86_regparm_string as a number */
806int ix86_regparm;
e9a25f70
JL
807
808/* Alignment to use for loops and jumps: */
809
0f290768 810/* Power of two alignment for loops. */
e075ae69 811const char *ix86_align_loops_string;
e9a25f70 812
0f290768 813/* Power of two alignment for non-loop jumps. */
e075ae69 814const char *ix86_align_jumps_string;
e9a25f70 815
3af4bd89 816/* Power of two alignment for stack boundary in bytes. */
e075ae69 817const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
818
819/* Preferred alignment for stack boundary in bits. */
e075ae69 820int ix86_preferred_stack_boundary;
3af4bd89 821
e9a25f70 822/* Values 1-5: see jump.c */
e075ae69
RH
823int ix86_branch_cost;
824const char *ix86_branch_cost_string;
e9a25f70 825
0f290768 826/* Power of two alignment for functions. */
e075ae69 827const char *ix86_align_funcs_string;
623fe810
RH
828
829/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
830static char internal_label_prefix[16];
831static int internal_label_prefix_len;
e075ae69 832\f
b96a374d
AJ
833static int local_symbolic_operand (rtx, enum machine_mode);
834static int tls_symbolic_operand_1 (rtx, enum tls_model);
835static void output_pic_addr_const (FILE *, rtx, int);
836static void put_condition_code (enum rtx_code, enum machine_mode,
837 int, int, FILE *);
838static const char *get_some_local_dynamic_name (void);
839static int get_some_local_dynamic_name_1 (rtx *, void *);
840static rtx maybe_get_pool_constant (rtx);
841static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
842static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
843 rtx *);
e129d93a
ILT
844static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
845static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
846 enum machine_mode);
b96a374d
AJ
847static rtx get_thread_pointer (int);
848static rtx legitimize_tls_address (rtx, enum tls_model, int);
849static void get_pc_thunk_name (char [32], unsigned int);
850static rtx gen_push (rtx);
851static int memory_address_length (rtx addr);
852static int ix86_flags_dependant (rtx, rtx, enum attr_type);
853static int ix86_agi_dependant (rtx, rtx, enum attr_type);
b96a374d
AJ
854static struct machine_function * ix86_init_machine_status (void);
855static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
856static int ix86_nsaved_regs (void);
857static void ix86_emit_save_regs (void);
858static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 859static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 860static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
861static HOST_WIDE_INT ix86_GOT_alias_set (void);
862static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
863static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 864static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
865static int ix86_issue_rate (void);
866static int ix86_adjust_cost (rtx, rtx, rtx, int);
b96a374d
AJ
867static int ia32_use_dfa_pipeline_interface (void);
868static int ia32_multipass_dfa_lookahead (void);
869static void ix86_init_mmx_sse_builtins (void);
870static rtx x86_this_parameter (tree);
871static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
872 HOST_WIDE_INT, tree);
873static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
874static void x86_file_start (void);
875static void ix86_reorg (void);
c35d187f
RH
876static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
877static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
878static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
879 tree, int *, int);
e075ae69
RH
880
881struct ix86_address
882{
883 rtx base, index, disp;
884 HOST_WIDE_INT scale;
74dc3e94 885 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
e075ae69 886};
b08de47e 887
b96a374d
AJ
888static int ix86_decompose_address (rtx, struct ix86_address *);
889static int ix86_address_cost (rtx);
890static bool ix86_cannot_force_const_mem (rtx);
891static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
892
893struct builtin_description;
b96a374d
AJ
894static rtx ix86_expand_sse_comi (const struct builtin_description *,
895 tree, rtx);
896static rtx ix86_expand_sse_compare (const struct builtin_description *,
897 tree, rtx);
898static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
899static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
900static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
901static rtx ix86_expand_store_builtin (enum insn_code, tree);
902static rtx safe_vector_operand (rtx, enum machine_mode);
903static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
904static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
905 enum rtx_code *, enum rtx_code *);
906static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
907static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
908static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
909static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
910static int ix86_fp_comparison_cost (enum rtx_code code);
911static unsigned int ix86_select_alt_pic_regnum (void);
912static int ix86_save_reg (unsigned int, int);
913static void ix86_compute_frame_layout (struct ix86_frame *);
914static int ix86_comp_type_attributes (tree, tree);
e767b5be 915static int ix86_function_regparm (tree, tree);
91d231cb 916const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
917static bool ix86_function_ok_for_sibcall (tree, tree);
918static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
919static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
920static int ix86_value_regno (enum machine_mode);
921static bool contains_128bit_aligned_vector_p (tree);
922static bool ix86_ms_bitfield_layout_p (tree);
923static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
924static int extended_reg_mentioned_1 (rtx *, void *);
925static bool ix86_rtx_costs (rtx, int, int, int *);
926static int min_insn_size (rtx);
67dfe110 927static tree ix86_md_asm_clobbers (tree clobbers);
7c262518 928
21c318ba 929#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
b96a374d 930static void ix86_svr3_asm_out_constructor (rtx, int);
2cc07db4 931#endif
e56feed6 932
53c17031
JH
933/* Register class used for passing given 64bit part of the argument.
934 These represent classes as documented by the PS ABI, with the exception
935 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 936 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 937
d1f87653 938 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
939 whenever possible (upper half does contain padding).
940 */
941enum x86_64_reg_class
942 {
943 X86_64_NO_CLASS,
944 X86_64_INTEGER_CLASS,
945 X86_64_INTEGERSI_CLASS,
946 X86_64_SSE_CLASS,
947 X86_64_SSESF_CLASS,
948 X86_64_SSEDF_CLASS,
949 X86_64_SSEUP_CLASS,
950 X86_64_X87_CLASS,
951 X86_64_X87UP_CLASS,
952 X86_64_MEMORY_CLASS
953 };
0b5826ac 954static const char * const x86_64_reg_class_name[] =
53c17031
JH
955 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
956
957#define MAX_CLASSES 4
b96a374d
AJ
958static int classify_argument (enum machine_mode, tree,
959 enum x86_64_reg_class [MAX_CLASSES], int);
960static int examine_argument (enum machine_mode, tree, int, int *, int *);
961static rtx construct_container (enum machine_mode, tree, int, int, int,
962 const int *, int);
963static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
964 enum x86_64_reg_class);
881b2a96 965
43f3a59d 966/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
967static REAL_VALUE_TYPE ext_80387_constants_table [5];
968static bool ext_80387_constants_init = 0;
b96a374d 969static void init_ext_80387_constants (void);
672a6f42
NB
970\f
971/* Initialize the GCC target structure. */
91d231cb
JM
972#undef TARGET_ATTRIBUTE_TABLE
973#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 974#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
975# undef TARGET_MERGE_DECL_ATTRIBUTES
976# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
977#endif
978
8d8e52be
JM
979#undef TARGET_COMP_TYPE_ATTRIBUTES
980#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
981
f6155fda
SS
982#undef TARGET_INIT_BUILTINS
983#define TARGET_INIT_BUILTINS ix86_init_builtins
984
985#undef TARGET_EXPAND_BUILTIN
986#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
987
bd09bdeb
RH
988#undef TARGET_ASM_FUNCTION_EPILOGUE
989#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 990
17b53c33
NB
991#undef TARGET_ASM_OPEN_PAREN
992#define TARGET_ASM_OPEN_PAREN ""
993#undef TARGET_ASM_CLOSE_PAREN
994#define TARGET_ASM_CLOSE_PAREN ""
995
301d03af
RS
996#undef TARGET_ASM_ALIGNED_HI_OP
997#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
998#undef TARGET_ASM_ALIGNED_SI_OP
999#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1000#ifdef ASM_QUAD
1001#undef TARGET_ASM_ALIGNED_DI_OP
1002#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1003#endif
1004
1005#undef TARGET_ASM_UNALIGNED_HI_OP
1006#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1007#undef TARGET_ASM_UNALIGNED_SI_OP
1008#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1009#undef TARGET_ASM_UNALIGNED_DI_OP
1010#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1011
c237e94a
ZW
1012#undef TARGET_SCHED_ADJUST_COST
1013#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1014#undef TARGET_SCHED_ISSUE_RATE
1015#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
fce5a9f2 1016#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
1017#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
1018 ia32_use_dfa_pipeline_interface
1019#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1020#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1021 ia32_multipass_dfa_lookahead
c237e94a 1022
4977bab6
ZW
1023#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1024#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1025
f996902d
RH
1026#ifdef HAVE_AS_TLS
1027#undef TARGET_HAVE_TLS
1028#define TARGET_HAVE_TLS true
1029#endif
3a04ff64
RH
1030#undef TARGET_CANNOT_FORCE_CONST_MEM
1031#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 1032
7daebb7a 1033#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1034#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1035
4977bab6
ZW
1036#undef TARGET_MS_BITFIELD_LAYOUT_P
1037#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1038
c590b625
RH
1039#undef TARGET_ASM_OUTPUT_MI_THUNK
1040#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1041#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1043
1bc7c5b6
ZW
1044#undef TARGET_ASM_FILE_START
1045#define TARGET_ASM_FILE_START x86_file_start
1046
3c50106f
RH
1047#undef TARGET_RTX_COSTS
1048#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1049#undef TARGET_ADDRESS_COST
1050#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1051
e129d93a
ILT
1052#undef TARGET_FIXED_CONDITION_CODE_REGS
1053#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1054#undef TARGET_CC_MODES_COMPATIBLE
1055#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1056
18dbd950
RS
1057#undef TARGET_MACHINE_DEPENDENT_REORG
1058#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1059
c35d187f
RH
1060#undef TARGET_BUILD_BUILTIN_VA_LIST
1061#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1062
67dfe110
KH
1063#undef TARGET_MD_ASM_CLOBBERS
1064#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1065
9184f892
KH
1066#undef TARGET_PROMOTE_PROTOTYPES
1067#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1068
a0524eb3
KH
1069#undef TARGET_SETUP_INCOMING_VARARGS
1070#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1071
f6897b10 1072struct gcc_target targetm = TARGET_INITIALIZER;
89c43c0a 1073
e075ae69 1074\f
67c2b45f
JS
1075/* The svr4 ABI for the i386 says that records and unions are returned
1076 in memory. */
1077#ifndef DEFAULT_PCC_STRUCT_RETURN
1078#define DEFAULT_PCC_STRUCT_RETURN 1
1079#endif
1080
f5316dfe
MM
1081/* Sometimes certain combinations of command options do not make
1082 sense on a particular target machine. You can define a macro
1083 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1084 defined, is executed once just after all the command options have
1085 been parsed.
1086
1087 Don't use this macro to turn on various extra optimizations for
1088 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1089
1090void
b96a374d 1091override_options (void)
f5316dfe 1092{
400500c4 1093 int i;
e075ae69
RH
1094 /* Comes from final.c -- no real reason to change it. */
1095#define MAX_CODE_ALIGN 16
f5316dfe 1096
c8c5cb99
SC
1097 static struct ptt
1098 {
8b60264b
KG
1099 const struct processor_costs *cost; /* Processor costs */
1100 const int target_enable; /* Target flags to enable. */
1101 const int target_disable; /* Target flags to disable. */
1102 const int align_loop; /* Default alignments. */
2cca7283 1103 const int align_loop_max_skip;
8b60264b 1104 const int align_jump;
2cca7283 1105 const int align_jump_max_skip;
8b60264b 1106 const int align_func;
e075ae69 1107 }
0f290768 1108 const processor_target_table[PROCESSOR_max] =
e075ae69 1109 {
4977bab6
ZW
1110 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1111 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1112 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1113 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1114 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1115 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1116 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
89c43c0a
VM
1117 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1118 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
e075ae69
RH
1119 };
1120
f4365627 1121 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1122 static struct pta
1123 {
8b60264b
KG
1124 const char *const name; /* processor name or nickname. */
1125 const enum processor_type processor;
0dd0e980
JH
1126 const enum pta_flags
1127 {
1128 PTA_SSE = 1,
1129 PTA_SSE2 = 2,
5bbeea44
JH
1130 PTA_SSE3 = 4,
1131 PTA_MMX = 8,
1132 PTA_PREFETCH_SSE = 16,
1133 PTA_3DNOW = 32,
4977bab6
ZW
1134 PTA_3DNOW_A = 64,
1135 PTA_64BIT = 128
0dd0e980 1136 } flags;
e075ae69 1137 }
0f290768 1138 const processor_alias_table[] =
e075ae69 1139 {
0dd0e980
JH
1140 {"i386", PROCESSOR_I386, 0},
1141 {"i486", PROCESSOR_I486, 0},
1142 {"i586", PROCESSOR_PENTIUM, 0},
1143 {"pentium", PROCESSOR_PENTIUM, 0},
1144 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1145 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1146 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1147 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1148 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1149 {"i686", PROCESSOR_PENTIUMPRO, 0},
1150 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1151 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1152 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
5bbeea44
JH
1153 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1154 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1155 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1156 | PTA_MMX | PTA_PREFETCH_SSE},
1157 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1158 | PTA_MMX | PTA_PREFETCH_SSE},
89c43c0a
VM
1159 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1160 | PTA_MMX | PTA_PREFETCH_SSE},
1161 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
5bbeea44 1162 | PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1163 {"k6", PROCESSOR_K6, PTA_MMX},
1164 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1165 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1166 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1167 | PTA_3DNOW_A},
f4365627 1168 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1169 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1170 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1171 | PTA_3DNOW_A | PTA_SSE},
f4365627 1172 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1173 | PTA_3DNOW_A | PTA_SSE},
f4365627 1174 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1175 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1176 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1177 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1178 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1179 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1180 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1181 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1182 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1183 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1184 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1185 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1186 };
c8c5cb99 1187
ca7558fc 1188 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1189
41ed2237 1190 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1191 in case they weren't overwritten by command line options. */
55ba61f3
JH
1192 if (TARGET_64BIT)
1193 {
1194 if (flag_omit_frame_pointer == 2)
1195 flag_omit_frame_pointer = 1;
1196 if (flag_asynchronous_unwind_tables == 2)
1197 flag_asynchronous_unwind_tables = 1;
1198 if (flag_pcc_struct_return == 2)
1199 flag_pcc_struct_return = 0;
1200 }
1201 else
1202 {
1203 if (flag_omit_frame_pointer == 2)
1204 flag_omit_frame_pointer = 0;
1205 if (flag_asynchronous_unwind_tables == 2)
1206 flag_asynchronous_unwind_tables = 0;
1207 if (flag_pcc_struct_return == 2)
7c712dcc 1208 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1209 }
1210
f5316dfe
MM
1211#ifdef SUBTARGET_OVERRIDE_OPTIONS
1212 SUBTARGET_OVERRIDE_OPTIONS;
1213#endif
1214
9e555526
RH
1215 if (!ix86_tune_string && ix86_arch_string)
1216 ix86_tune_string = ix86_arch_string;
1217 if (!ix86_tune_string)
1218 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
f4365627 1219 if (!ix86_arch_string)
3fec9fa9 1220 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
e075ae69 1221
6189a572
JH
1222 if (ix86_cmodel_string != 0)
1223 {
1224 if (!strcmp (ix86_cmodel_string, "small"))
1225 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1226 else if (flag_pic)
c725bd79 1227 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1228 else if (!strcmp (ix86_cmodel_string, "32"))
1229 ix86_cmodel = CM_32;
1230 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1231 ix86_cmodel = CM_KERNEL;
1232 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1233 ix86_cmodel = CM_MEDIUM;
1234 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1235 ix86_cmodel = CM_LARGE;
1236 else
1237 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1238 }
1239 else
1240 {
1241 ix86_cmodel = CM_32;
1242 if (TARGET_64BIT)
1243 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1244 }
c93e80a5
JH
1245 if (ix86_asm_string != 0)
1246 {
1247 if (!strcmp (ix86_asm_string, "intel"))
1248 ix86_asm_dialect = ASM_INTEL;
1249 else if (!strcmp (ix86_asm_string, "att"))
1250 ix86_asm_dialect = ASM_ATT;
1251 else
1252 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1253 }
6189a572 1254 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1255 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1256 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1257 if (ix86_cmodel == CM_LARGE)
c725bd79 1258 sorry ("code model `large' not supported yet");
0c2dc519 1259 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1260 sorry ("%i-bit mode not compiled in",
0c2dc519 1261 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1262
f4365627
JH
1263 for (i = 0; i < pta_size; i++)
1264 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1265 {
1266 ix86_arch = processor_alias_table[i].processor;
1267 /* Default cpu tuning to the architecture. */
9e555526 1268 ix86_tune = ix86_arch;
f4365627 1269 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1270 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1271 target_flags |= MASK_MMX;
1272 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1273 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1274 target_flags |= MASK_3DNOW;
1275 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1276 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1277 target_flags |= MASK_3DNOW_A;
1278 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1279 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1280 target_flags |= MASK_SSE;
1281 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1282 && !(target_flags_explicit & MASK_SSE2))
f4365627 1283 target_flags |= MASK_SSE2;
5bbeea44
JH
1284 if (processor_alias_table[i].flags & PTA_SSE3
1285 && !(target_flags_explicit & MASK_SSE3))
1286 target_flags |= MASK_SSE3;
f4365627
JH
1287 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1288 x86_prefetch_sse = true;
4977bab6
ZW
1289 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1290 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1291 break;
1292 }
400500c4 1293
f4365627
JH
1294 if (i == pta_size)
1295 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1296
f4365627 1297 for (i = 0; i < pta_size; i++)
9e555526 1298 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1299 {
9e555526 1300 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1301 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1302 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1303 break;
1304 }
1305 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1306 x86_prefetch_sse = true;
1307 if (i == pta_size)
9e555526 1308 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1309
2ab0437e
JH
1310 if (optimize_size)
1311 ix86_cost = &size_cost;
1312 else
9e555526
RH
1313 ix86_cost = processor_target_table[ix86_tune].cost;
1314 target_flags |= processor_target_table[ix86_tune].target_enable;
1315 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1316
36edd3cc
BS
1317 /* Arrange to set up i386_stack_locals for all functions. */
1318 init_machine_status = ix86_init_machine_status;
fce5a9f2 1319
0f290768 1320 /* Validate -mregparm= value. */
e075ae69 1321 if (ix86_regparm_string)
b08de47e 1322 {
400500c4
RK
1323 i = atoi (ix86_regparm_string);
1324 if (i < 0 || i > REGPARM_MAX)
1325 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1326 else
1327 ix86_regparm = i;
b08de47e 1328 }
0d7d98ee
JH
1329 else
1330 if (TARGET_64BIT)
1331 ix86_regparm = REGPARM_MAX;
b08de47e 1332
3e18fdf6 1333 /* If the user has provided any of the -malign-* options,
a4f31c00 1334 warn and use that value only if -falign-* is not set.
3e18fdf6 1335 Remove this code in GCC 3.2 or later. */
e075ae69 1336 if (ix86_align_loops_string)
b08de47e 1337 {
3e18fdf6
GK
1338 warning ("-malign-loops is obsolete, use -falign-loops");
1339 if (align_loops == 0)
1340 {
1341 i = atoi (ix86_align_loops_string);
1342 if (i < 0 || i > MAX_CODE_ALIGN)
1343 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1344 else
1345 align_loops = 1 << i;
1346 }
b08de47e 1347 }
3af4bd89 1348
e075ae69 1349 if (ix86_align_jumps_string)
b08de47e 1350 {
3e18fdf6
GK
1351 warning ("-malign-jumps is obsolete, use -falign-jumps");
1352 if (align_jumps == 0)
1353 {
1354 i = atoi (ix86_align_jumps_string);
1355 if (i < 0 || i > MAX_CODE_ALIGN)
1356 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1357 else
1358 align_jumps = 1 << i;
1359 }
b08de47e 1360 }
b08de47e 1361
e075ae69 1362 if (ix86_align_funcs_string)
b08de47e 1363 {
3e18fdf6
GK
1364 warning ("-malign-functions is obsolete, use -falign-functions");
1365 if (align_functions == 0)
1366 {
1367 i = atoi (ix86_align_funcs_string);
1368 if (i < 0 || i > MAX_CODE_ALIGN)
1369 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1370 else
1371 align_functions = 1 << i;
1372 }
b08de47e 1373 }
3af4bd89 1374
3e18fdf6 1375 /* Default align_* from the processor table. */
3e18fdf6 1376 if (align_loops == 0)
2cca7283 1377 {
9e555526
RH
1378 align_loops = processor_target_table[ix86_tune].align_loop;
1379 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1380 }
3e18fdf6 1381 if (align_jumps == 0)
2cca7283 1382 {
9e555526
RH
1383 align_jumps = processor_target_table[ix86_tune].align_jump;
1384 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1385 }
3e18fdf6 1386 if (align_functions == 0)
2cca7283 1387 {
9e555526 1388 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1389 }
3e18fdf6 1390
e4c0478d 1391 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1392 The default of 128 bits is for Pentium III's SSE __m128, but we
1393 don't want additional code to keep the stack aligned when
1394 optimizing for code size. */
1395 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1396 ? TARGET_64BIT ? 128 : 32
fbb83b43 1397 : 128);
e075ae69 1398 if (ix86_preferred_stack_boundary_string)
3af4bd89 1399 {
400500c4 1400 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1401 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1402 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1403 TARGET_64BIT ? 4 : 2);
400500c4
RK
1404 else
1405 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1406 }
77a989d1 1407
0f290768 1408 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1409 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1410 if (ix86_branch_cost_string)
804a8ee0 1411 {
400500c4
RK
1412 i = atoi (ix86_branch_cost_string);
1413 if (i < 0 || i > 5)
1414 error ("-mbranch-cost=%d is not between 0 and 5", i);
1415 else
1416 ix86_branch_cost = i;
804a8ee0 1417 }
804a8ee0 1418
f996902d
RH
1419 if (ix86_tls_dialect_string)
1420 {
1421 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1422 ix86_tls_dialect = TLS_DIALECT_GNU;
1423 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1424 ix86_tls_dialect = TLS_DIALECT_SUN;
1425 else
1426 error ("bad value (%s) for -mtls-dialect= switch",
1427 ix86_tls_dialect_string);
1428 }
1429
e9a25f70
JL
1430 /* Keep nonleaf frame pointers. */
1431 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1432 flag_omit_frame_pointer = 1;
e075ae69
RH
1433
1434 /* If we're doing fast math, we don't care about comparison order
1435 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1436 if (flag_unsafe_math_optimizations)
e075ae69
RH
1437 target_flags &= ~MASK_IEEE_FP;
1438
30c99a84
RH
1439 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1440 since the insns won't need emulation. */
1441 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1442 target_flags &= ~MASK_NO_FANCY_MATH_387;
1443
9e200aaf
KC
1444 /* Turn on SSE2 builtins for -msse3. */
1445 if (TARGET_SSE3)
22c7c85e
L
1446 target_flags |= MASK_SSE2;
1447
1448 /* Turn on SSE builtins for -msse2. */
1449 if (TARGET_SSE2)
1450 target_flags |= MASK_SSE;
1451
14f73b5a
JH
1452 if (TARGET_64BIT)
1453 {
1454 if (TARGET_ALIGN_DOUBLE)
c725bd79 1455 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1456 if (TARGET_RTD)
c725bd79 1457 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1458 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1459 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1460 ix86_fpmath = FPMATH_SSE;
14f73b5a 1461 }
965f5423 1462 else
a5b378d6
JH
1463 {
1464 ix86_fpmath = FPMATH_387;
1465 /* i386 ABI does not specify red zone. It still makes sense to use it
1466 when programmer takes care to stack from being destroyed. */
1467 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1468 target_flags |= MASK_NO_RED_ZONE;
1469 }
965f5423
JH
1470
1471 if (ix86_fpmath_string != 0)
1472 {
1473 if (! strcmp (ix86_fpmath_string, "387"))
1474 ix86_fpmath = FPMATH_387;
1475 else if (! strcmp (ix86_fpmath_string, "sse"))
1476 {
1477 if (!TARGET_SSE)
1478 {
1479 warning ("SSE instruction set disabled, using 387 arithmetics");
1480 ix86_fpmath = FPMATH_387;
1481 }
1482 else
1483 ix86_fpmath = FPMATH_SSE;
1484 }
1485 else if (! strcmp (ix86_fpmath_string, "387,sse")
1486 || ! strcmp (ix86_fpmath_string, "sse,387"))
1487 {
1488 if (!TARGET_SSE)
1489 {
1490 warning ("SSE instruction set disabled, using 387 arithmetics");
1491 ix86_fpmath = FPMATH_387;
1492 }
1493 else if (!TARGET_80387)
1494 {
1495 warning ("387 instruction set disabled, using SSE arithmetics");
1496 ix86_fpmath = FPMATH_SSE;
1497 }
1498 else
1499 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1500 }
fce5a9f2 1501 else
965f5423
JH
1502 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1503 }
14f73b5a 1504
a7180f70
BS
1505 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1506 on by -msse. */
1507 if (TARGET_SSE)
e37af218
RH
1508 {
1509 target_flags |= MASK_MMX;
1510 x86_prefetch_sse = true;
1511 }
c6036a37 1512
47f339cf
BS
1513 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1514 if (TARGET_3DNOW)
1515 {
1516 target_flags |= MASK_MMX;
d1f87653 1517 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1518 extensions it adds. */
1519 if (x86_3dnow_a & (1 << ix86_arch))
1520 target_flags |= MASK_3DNOW_A;
1521 }
9e555526 1522 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1523 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1524 && !optimize_size)
1525 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1526
1527 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1528 {
1529 char *p;
1530 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1531 p = strchr (internal_label_prefix, 'X');
1532 internal_label_prefix_len = p - internal_label_prefix;
1533 *p = '\0';
1534 }
f5316dfe
MM
1535}
1536\f
32b5b1aa 1537void
b96a374d 1538optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1539{
e9a25f70
JL
1540 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1541 make the problem with not enough registers even worse. */
32b5b1aa
SC
1542#ifdef INSN_SCHEDULING
1543 if (level > 1)
1544 flag_schedule_insns = 0;
1545#endif
55ba61f3
JH
1546
1547 /* The default values of these switches depend on the TARGET_64BIT
1548 that is not known at this moment. Mark these values with 2 and
1549 let user the to override these. In case there is no command line option
1550 specifying them, we will set the defaults in override_options. */
1551 if (optimize >= 1)
1552 flag_omit_frame_pointer = 2;
1553 flag_pcc_struct_return = 2;
1554 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1555}
b08de47e 1556\f
91d231cb
JM
1557/* Table of valid machine attributes. */
1558const struct attribute_spec ix86_attribute_table[] =
b08de47e 1559{
91d231cb 1560 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1561 /* Stdcall attribute says callee is responsible for popping arguments
1562 if they are not variable. */
91d231cb 1563 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1564 /* Fastcall attribute says callee is responsible for popping arguments
1565 if they are not variable. */
1566 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1567 /* Cdecl attribute says the callee is a normal C declaration */
1568 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1569 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1570 passed in registers. */
91d231cb
JM
1571 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1572#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1573 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1574 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1575 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1576#endif
fe77449a
DR
1577 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1578 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1579 { NULL, 0, 0, false, false, false, NULL }
1580};
1581
5fbf0217
EB
1582/* Decide whether we can make a sibling call to a function. DECL is the
1583 declaration of the function being targeted by the call and EXP is the
1584 CALL_EXPR representing the call. */
4977bab6
ZW
1585
1586static bool
b96a374d 1587ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1588{
1589 /* If we are generating position-independent code, we cannot sibcall
1590 optimize any indirect call, or a direct call to a global function,
1591 as the PLT requires %ebx be live. */
1592 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1593 return false;
1594
1595 /* If we are returning floats on the 80387 register stack, we cannot
1596 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1597 function that does or, conversely, from a function that does return
1598 a float to a function that doesn't; the necessary stack adjustment
1599 would not be executed. */
4977bab6 1600 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1601 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1602 return false;
1603
1604 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1605 register for the address of the target function. Make sure that all
4977bab6
ZW
1606 such registers are not used for passing parameters. */
1607 if (!decl && !TARGET_64BIT)
1608 {
e767b5be 1609 tree type;
4977bab6
ZW
1610
1611 /* We're looking at the CALL_EXPR, we need the type of the function. */
1612 type = TREE_OPERAND (exp, 0); /* pointer expression */
1613 type = TREE_TYPE (type); /* pointer type */
1614 type = TREE_TYPE (type); /* function type */
1615
e767b5be 1616 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1617 {
1618 /* ??? Need to count the actual number of registers to be used,
1619 not the possible number of registers. Fix later. */
1620 return false;
1621 }
1622 }
1623
1624 /* Otherwise okay. That also includes certain types of indirect calls. */
1625 return true;
1626}
1627
e91f04de 1628/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1629 arguments as in struct attribute_spec.handler. */
1630static tree
b96a374d
AJ
1631ix86_handle_cdecl_attribute (tree *node, tree name,
1632 tree args ATTRIBUTE_UNUSED,
1633 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1634{
1635 if (TREE_CODE (*node) != FUNCTION_TYPE
1636 && TREE_CODE (*node) != METHOD_TYPE
1637 && TREE_CODE (*node) != FIELD_DECL
1638 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1639 {
91d231cb
JM
1640 warning ("`%s' attribute only applies to functions",
1641 IDENTIFIER_POINTER (name));
1642 *no_add_attrs = true;
1643 }
e91f04de
CH
1644 else
1645 {
1646 if (is_attribute_p ("fastcall", name))
1647 {
1648 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1649 {
1650 error ("fastcall and stdcall attributes are not compatible");
1651 }
1652 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1653 {
1654 error ("fastcall and regparm attributes are not compatible");
1655 }
1656 }
1657 else if (is_attribute_p ("stdcall", name))
1658 {
1659 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1660 {
1661 error ("fastcall and stdcall attributes are not compatible");
1662 }
1663 }
1664 }
b08de47e 1665
91d231cb
JM
1666 if (TARGET_64BIT)
1667 {
1668 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1669 *no_add_attrs = true;
1670 }
b08de47e 1671
91d231cb
JM
1672 return NULL_TREE;
1673}
b08de47e 1674
91d231cb
JM
1675/* Handle a "regparm" attribute;
1676 arguments as in struct attribute_spec.handler. */
1677static tree
b96a374d
AJ
1678ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1679 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1680{
1681 if (TREE_CODE (*node) != FUNCTION_TYPE
1682 && TREE_CODE (*node) != METHOD_TYPE
1683 && TREE_CODE (*node) != FIELD_DECL
1684 && TREE_CODE (*node) != TYPE_DECL)
1685 {
1686 warning ("`%s' attribute only applies to functions",
1687 IDENTIFIER_POINTER (name));
1688 *no_add_attrs = true;
1689 }
1690 else
1691 {
1692 tree cst;
b08de47e 1693
91d231cb
JM
1694 cst = TREE_VALUE (args);
1695 if (TREE_CODE (cst) != INTEGER_CST)
1696 {
1697 warning ("`%s' attribute requires an integer constant argument",
1698 IDENTIFIER_POINTER (name));
1699 *no_add_attrs = true;
1700 }
1701 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1702 {
1703 warning ("argument to `%s' attribute larger than %d",
1704 IDENTIFIER_POINTER (name), REGPARM_MAX);
1705 *no_add_attrs = true;
1706 }
e91f04de
CH
1707
1708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1709 {
1710 error ("fastcall and regparm attributes are not compatible");
1711 }
b08de47e
MM
1712 }
1713
91d231cb 1714 return NULL_TREE;
b08de47e
MM
1715}
1716
1717/* Return 0 if the attributes for two types are incompatible, 1 if they
1718 are compatible, and 2 if they are nearly compatible (which causes a
1719 warning to be generated). */
1720
8d8e52be 1721static int
b96a374d 1722ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1723{
0f290768 1724 /* Check for mismatch of non-default calling convention. */
27c38fbe 1725 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1726
1727 if (TREE_CODE (type1) != FUNCTION_TYPE)
1728 return 1;
1729
b96a374d 1730 /* Check for mismatched fastcall types */
e91f04de
CH
1731 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1732 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1733 return 0;
e91f04de 1734
afcfe58c 1735 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1736 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1737 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac
MM
1738 return 0;
1739 if (ix86_function_regparm (type1, NULL)
1740 != ix86_function_regparm (type2, NULL))
afcfe58c 1741 return 0;
b08de47e
MM
1742 return 1;
1743}
b08de47e 1744\f
e767b5be
JH
1745/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1746 DECL may be NULL when calling function indirectly
839a4992 1747 or considering a libcall. */
483ab821
MM
1748
1749static int
e767b5be 1750ix86_function_regparm (tree type, tree decl)
483ab821
MM
1751{
1752 tree attr;
e767b5be
JH
1753 int regparm = ix86_regparm;
1754 bool user_convention = false;
483ab821 1755
e767b5be
JH
1756 if (!TARGET_64BIT)
1757 {
1758 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1759 if (attr)
1760 {
1761 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1762 user_convention = true;
1763 }
1764
1765 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1766 {
1767 regparm = 2;
1768 user_convention = true;
1769 }
1770
1771 /* Use register calling convention for local functions when possible. */
1772 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1773 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1774 {
1775 struct cgraph_local_info *i = cgraph_local_info (decl);
1776 if (i && i->local)
1777 {
1778 /* We can't use regparm(3) for nested functions as these use
1779 static chain pointer in third argument. */
1780 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1781 regparm = 2;
1782 else
1783 regparm = 3;
1784 }
1785 }
1786 }
1787 return regparm;
483ab821
MM
1788}
1789
fe9f516f
RH
1790/* Return true if EAX is live at the start of the function. Used by
1791 ix86_expand_prologue to determine if we need special help before
1792 calling allocate_stack_worker. */
1793
1794static bool
1795ix86_eax_live_at_start_p (void)
1796{
1797 /* Cheat. Don't bother working forward from ix86_function_regparm
1798 to the function type to whether an actual argument is located in
1799 eax. Instead just look at cfg info, which is still close enough
1800 to correct at this point. This gives false positives for broken
1801 functions that might use uninitialized data that happens to be
1802 allocated in eax, but who cares? */
1803 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1804}
1805
b08de47e
MM
1806/* Value is the number of bytes of arguments automatically
1807 popped when returning from a subroutine call.
1808 FUNDECL is the declaration node of the function (as a tree),
1809 FUNTYPE is the data type of the function (as a tree),
1810 or for a library call it is an identifier node for the subroutine name.
1811 SIZE is the number of bytes of arguments passed on the stack.
1812
1813 On the 80386, the RTD insn may be used to pop them if the number
1814 of args is fixed, but if the number is variable then the caller
1815 must pop them all. RTD can't be used for library calls now
1816 because the library is compiled with the Unix compiler.
1817 Use of RTD is a selectable option, since it is incompatible with
1818 standard Unix calling sequences. If the option is not selected,
1819 the caller must always pop the args.
1820
1821 The attribute stdcall is equivalent to RTD on a per module basis. */
1822
1823int
b96a374d 1824ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1825{
3345ee7d 1826 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1827
43f3a59d 1828 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1829 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1830
43f3a59d
KH
1831 /* Stdcall and fastcall functions will pop the stack if not
1832 variable args. */
e91f04de
CH
1833 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1834 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1835 rtd = 1;
79325812 1836
698cdd84
SC
1837 if (rtd
1838 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1839 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1840 == void_type_node)))
698cdd84
SC
1841 return size;
1842 }
79325812 1843
232b8f52 1844 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1845 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
0d7d98ee 1846 && !TARGET_64BIT)
232b8f52 1847 {
e767b5be 1848 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1849
1850 if (!nregs)
1851 return GET_MODE_SIZE (Pmode);
1852 }
1853
1854 return 0;
b08de47e 1855}
b08de47e
MM
1856\f
1857/* Argument support functions. */
1858
53c17031
JH
1859/* Return true when register may be used to pass function parameters. */
1860bool
b96a374d 1861ix86_function_arg_regno_p (int regno)
53c17031
JH
1862{
1863 int i;
1864 if (!TARGET_64BIT)
0333394e
JJ
1865 return (regno < REGPARM_MAX
1866 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1867 if (SSE_REGNO_P (regno) && TARGET_SSE)
1868 return true;
1869 /* RAX is used as hidden argument to va_arg functions. */
1870 if (!regno)
1871 return true;
1872 for (i = 0; i < REGPARM_MAX; i++)
1873 if (regno == x86_64_int_parameter_registers[i])
1874 return true;
1875 return false;
1876}
1877
b08de47e
MM
1878/* Initialize a variable CUM of type CUMULATIVE_ARGS
1879 for a call to a function whose data type is FNTYPE.
1880 For a library call, FNTYPE is 0. */
1881
1882void
b96a374d
AJ
1883init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1884 tree fntype, /* tree ptr for function decl */
1885 rtx libname, /* SYMBOL_REF of library name or 0 */
1886 tree fndecl)
b08de47e
MM
1887{
1888 static CUMULATIVE_ARGS zero_cum;
1889 tree param, next_param;
1890
1891 if (TARGET_DEBUG_ARG)
1892 {
1893 fprintf (stderr, "\ninit_cumulative_args (");
1894 if (fntype)
e9a25f70
JL
1895 fprintf (stderr, "fntype code = %s, ret code = %s",
1896 tree_code_name[(int) TREE_CODE (fntype)],
1897 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1898 else
1899 fprintf (stderr, "no fntype");
1900
1901 if (libname)
1902 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1903 }
1904
1905 *cum = zero_cum;
1906
1907 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1908 if (fntype)
1909 cum->nregs = ix86_function_regparm (fntype, fndecl);
1910 else
1911 cum->nregs = ix86_regparm;
53c17031 1912 cum->sse_nregs = SSE_REGPARM_MAX;
bcf17554 1913 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
1914 cum->warn_sse = true;
1915 cum->warn_mmx = true;
53c17031 1916 cum->maybe_vaarg = false;
b08de47e 1917
e91f04de
CH
1918 /* Use ecx and edx registers if function has fastcall attribute */
1919 if (fntype && !TARGET_64BIT)
1920 {
1921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1922 {
1923 cum->nregs = 2;
1924 cum->fastcall = 1;
1925 }
1926 }
1927
1928
b08de47e
MM
1929 /* Determine if this function has variable arguments. This is
1930 indicated by the last argument being 'void_type_mode' if there
1931 are no variable arguments. If there are variable arguments, then
1932 we won't pass anything in registers */
1933
e1be55d0 1934 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
b08de47e
MM
1935 {
1936 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1937 param != 0; param = next_param)
b08de47e
MM
1938 {
1939 next_param = TREE_CHAIN (param);
e9a25f70 1940 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1941 {
1942 if (!TARGET_64BIT)
e91f04de
CH
1943 {
1944 cum->nregs = 0;
e1be55d0
JH
1945 cum->sse_nregs = 0;
1946 cum->mmx_nregs = 0;
1947 cum->warn_sse = 0;
1948 cum->warn_mmx = 0;
e91f04de
CH
1949 cum->fastcall = 0;
1950 }
53c17031
JH
1951 cum->maybe_vaarg = true;
1952 }
b08de47e
MM
1953 }
1954 }
53c17031
JH
1955 if ((!fntype && !libname)
1956 || (fntype && !TYPE_ARG_TYPES (fntype)))
1957 cum->maybe_vaarg = 1;
b08de47e
MM
1958
1959 if (TARGET_DEBUG_ARG)
1960 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1961
1962 return;
1963}
1964
d1f87653 1965/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1966 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1967 class and assign registers accordingly. */
1968
1969/* Return the union class of CLASS1 and CLASS2.
1970 See the x86-64 PS ABI for details. */
1971
1972static enum x86_64_reg_class
b96a374d 1973merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
1974{
1975 /* Rule #1: If both classes are equal, this is the resulting class. */
1976 if (class1 == class2)
1977 return class1;
1978
1979 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1980 the other class. */
1981 if (class1 == X86_64_NO_CLASS)
1982 return class2;
1983 if (class2 == X86_64_NO_CLASS)
1984 return class1;
1985
1986 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1987 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1988 return X86_64_MEMORY_CLASS;
1989
1990 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1991 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1992 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1993 return X86_64_INTEGERSI_CLASS;
1994 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1995 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1996 return X86_64_INTEGER_CLASS;
1997
1998 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1999 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2000 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2001 return X86_64_MEMORY_CLASS;
2002
2003 /* Rule #6: Otherwise class SSE is used. */
2004 return X86_64_SSE_CLASS;
2005}
2006
2007/* Classify the argument of type TYPE and mode MODE.
2008 CLASSES will be filled by the register class used to pass each word
2009 of the operand. The number of words is returned. In case the parameter
2010 should be passed in memory, 0 is returned. As a special case for zero
2011 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2012
2013 BIT_OFFSET is used internally for handling records and specifies offset
2014 of the offset in bits modulo 256 to avoid overflow cases.
2015
2016 See the x86-64 PS ABI for details.
2017*/
2018
2019static int
b96a374d
AJ
2020classify_argument (enum machine_mode mode, tree type,
2021 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 2022{
296e4ae8 2023 HOST_WIDE_INT bytes =
53c17031 2024 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 2025 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 2026
c60ee6f5
JH
2027 /* Variable sized entities are always passed/returned in memory. */
2028 if (bytes < 0)
2029 return 0;
2030
dafc5b82
JH
2031 if (mode != VOIDmode
2032 && MUST_PASS_IN_STACK (mode, type))
2033 return 0;
2034
53c17031
JH
2035 if (type && AGGREGATE_TYPE_P (type))
2036 {
2037 int i;
2038 tree field;
2039 enum x86_64_reg_class subclasses[MAX_CLASSES];
2040
2041 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2042 if (bytes > 16)
2043 return 0;
2044
2045 for (i = 0; i < words; i++)
2046 classes[i] = X86_64_NO_CLASS;
2047
2048 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2049 signalize memory class, so handle it as special case. */
2050 if (!words)
2051 {
2052 classes[0] = X86_64_NO_CLASS;
2053 return 1;
2054 }
2055
2056 /* Classify each field of record and merge classes. */
2057 if (TREE_CODE (type) == RECORD_TYPE)
2058 {
91ea38f9
JH
2059 /* For classes first merge in the field of the subclasses. */
2060 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2061 {
2062 tree bases = TYPE_BINFO_BASETYPES (type);
2063 int n_bases = TREE_VEC_LENGTH (bases);
2064 int i;
2065
2066 for (i = 0; i < n_bases; ++i)
2067 {
2068 tree binfo = TREE_VEC_ELT (bases, i);
2069 int num;
2070 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2071 tree type = BINFO_TYPE (binfo);
2072
2073 num = classify_argument (TYPE_MODE (type),
2074 type, subclasses,
2075 (offset + bit_offset) % 256);
2076 if (!num)
2077 return 0;
2078 for (i = 0; i < num; i++)
2079 {
db01f480 2080 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2081 classes[i + pos] =
2082 merge_classes (subclasses[i], classes[i + pos]);
2083 }
2084 }
2085 }
43f3a59d 2086 /* And now merge the fields of structure. */
53c17031
JH
2087 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2088 {
2089 if (TREE_CODE (field) == FIELD_DECL)
2090 {
2091 int num;
2092
2093 /* Bitfields are always classified as integer. Handle them
2094 early, since later code would consider them to be
2095 misaligned integers. */
2096 if (DECL_BIT_FIELD (field))
2097 {
2098 for (i = int_bit_position (field) / 8 / 8;
2099 i < (int_bit_position (field)
2100 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2101 + 63) / 8 / 8; i++)
53c17031
JH
2102 classes[i] =
2103 merge_classes (X86_64_INTEGER_CLASS,
2104 classes[i]);
2105 }
2106 else
2107 {
2108 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2109 TREE_TYPE (field), subclasses,
2110 (int_bit_position (field)
2111 + bit_offset) % 256);
2112 if (!num)
2113 return 0;
2114 for (i = 0; i < num; i++)
2115 {
2116 int pos =
db01f480 2117 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2118 classes[i + pos] =
2119 merge_classes (subclasses[i], classes[i + pos]);
2120 }
2121 }
2122 }
2123 }
2124 }
2125 /* Arrays are handled as small records. */
2126 else if (TREE_CODE (type) == ARRAY_TYPE)
2127 {
2128 int num;
2129 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2130 TREE_TYPE (type), subclasses, bit_offset);
2131 if (!num)
2132 return 0;
2133
2134 /* The partial classes are now full classes. */
2135 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2136 subclasses[0] = X86_64_SSE_CLASS;
2137 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2138 subclasses[0] = X86_64_INTEGER_CLASS;
2139
2140 for (i = 0; i < words; i++)
2141 classes[i] = subclasses[i % num];
2142 }
2143 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2144 else if (TREE_CODE (type) == UNION_TYPE
2145 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2146 {
91ea38f9
JH
2147 /* For classes first merge in the field of the subclasses. */
2148 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2149 {
2150 tree bases = TYPE_BINFO_BASETYPES (type);
2151 int n_bases = TREE_VEC_LENGTH (bases);
2152 int i;
2153
2154 for (i = 0; i < n_bases; ++i)
2155 {
2156 tree binfo = TREE_VEC_ELT (bases, i);
2157 int num;
2158 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2159 tree type = BINFO_TYPE (binfo);
2160
2161 num = classify_argument (TYPE_MODE (type),
2162 type, subclasses,
db01f480 2163 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2164 if (!num)
2165 return 0;
2166 for (i = 0; i < num; i++)
2167 {
c16576e6 2168 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2169 classes[i + pos] =
2170 merge_classes (subclasses[i], classes[i + pos]);
2171 }
2172 }
2173 }
53c17031
JH
2174 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2175 {
2176 if (TREE_CODE (field) == FIELD_DECL)
2177 {
2178 int num;
2179 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2180 TREE_TYPE (field), subclasses,
2181 bit_offset);
2182 if (!num)
2183 return 0;
2184 for (i = 0; i < num; i++)
2185 classes[i] = merge_classes (subclasses[i], classes[i]);
2186 }
2187 }
2188 }
448ec26c
WH
2189 else if (TREE_CODE (type) == SET_TYPE)
2190 {
2191 if (bytes <= 4)
2192 {
2193 classes[0] = X86_64_INTEGERSI_CLASS;
2194 return 1;
2195 }
2196 else if (bytes <= 8)
2197 {
2198 classes[0] = X86_64_INTEGER_CLASS;
2199 return 1;
2200 }
2201 else if (bytes <= 12)
2202 {
2203 classes[0] = X86_64_INTEGER_CLASS;
2204 classes[1] = X86_64_INTEGERSI_CLASS;
2205 return 2;
2206 }
2207 else
2208 {
2209 classes[0] = X86_64_INTEGER_CLASS;
2210 classes[1] = X86_64_INTEGER_CLASS;
2211 return 2;
2212 }
2213 }
53c17031
JH
2214 else
2215 abort ();
2216
2217 /* Final merger cleanup. */
2218 for (i = 0; i < words; i++)
2219 {
2220 /* If one class is MEMORY, everything should be passed in
2221 memory. */
2222 if (classes[i] == X86_64_MEMORY_CLASS)
2223 return 0;
2224
d6a7951f 2225 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2226 X86_64_SSE_CLASS. */
2227 if (classes[i] == X86_64_SSEUP_CLASS
2228 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2229 classes[i] = X86_64_SSE_CLASS;
2230
d6a7951f 2231 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2232 if (classes[i] == X86_64_X87UP_CLASS
2233 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2234 classes[i] = X86_64_SSE_CLASS;
2235 }
2236 return words;
2237 }
2238
2239 /* Compute alignment needed. We align all types to natural boundaries with
2240 exception of XFmode that is aligned to 64bits. */
2241 if (mode != VOIDmode && mode != BLKmode)
2242 {
2243 int mode_alignment = GET_MODE_BITSIZE (mode);
2244
2245 if (mode == XFmode)
2246 mode_alignment = 128;
2247 else if (mode == XCmode)
2248 mode_alignment = 256;
2c6b27c3
JH
2249 if (COMPLEX_MODE_P (mode))
2250 mode_alignment /= 2;
f5143c46 2251 /* Misaligned fields are always returned in memory. */
53c17031
JH
2252 if (bit_offset % mode_alignment)
2253 return 0;
2254 }
2255
2256 /* Classification of atomic types. */
2257 switch (mode)
2258 {
2259 case DImode:
2260 case SImode:
2261 case HImode:
2262 case QImode:
2263 case CSImode:
2264 case CHImode:
2265 case CQImode:
2266 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2267 classes[0] = X86_64_INTEGERSI_CLASS;
2268 else
2269 classes[0] = X86_64_INTEGER_CLASS;
2270 return 1;
2271 case CDImode:
2272 case TImode:
2273 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2274 return 2;
2275 case CTImode:
2276 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2277 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2278 return 4;
2279 case SFmode:
2280 if (!(bit_offset % 64))
2281 classes[0] = X86_64_SSESF_CLASS;
2282 else
2283 classes[0] = X86_64_SSE_CLASS;
2284 return 1;
2285 case DFmode:
2286 classes[0] = X86_64_SSEDF_CLASS;
2287 return 1;
f8a1ebc6 2288 case XFmode:
53c17031
JH
2289 classes[0] = X86_64_X87_CLASS;
2290 classes[1] = X86_64_X87UP_CLASS;
2291 return 2;
f8a1ebc6 2292 case TFmode:
cf2348cb
JH
2293 case TCmode:
2294 return 0;
f8a1ebc6 2295 case XCmode:
53c17031
JH
2296 classes[0] = X86_64_X87_CLASS;
2297 classes[1] = X86_64_X87UP_CLASS;
2298 classes[2] = X86_64_X87_CLASS;
2299 classes[3] = X86_64_X87UP_CLASS;
2300 return 4;
2301 case DCmode:
2302 classes[0] = X86_64_SSEDF_CLASS;
2303 classes[1] = X86_64_SSEDF_CLASS;
2304 return 2;
2305 case SCmode:
2306 classes[0] = X86_64_SSE_CLASS;
2307 return 1;
e95d6b23
JH
2308 case V4SFmode:
2309 case V4SImode:
495333a6
JH
2310 case V16QImode:
2311 case V8HImode:
2312 case V2DFmode:
2313 case V2DImode:
e95d6b23
JH
2314 classes[0] = X86_64_SSE_CLASS;
2315 classes[1] = X86_64_SSEUP_CLASS;
2316 return 2;
2317 case V2SFmode:
2318 case V2SImode:
2319 case V4HImode:
2320 case V8QImode:
1194ca05 2321 return 0;
53c17031 2322 case BLKmode:
e95d6b23 2323 case VOIDmode:
53c17031
JH
2324 return 0;
2325 default:
2326 abort ();
2327 }
2328}
2329
2330/* Examine the argument and return set number of register required in each
f5143c46 2331 class. Return 0 iff parameter should be passed in memory. */
53c17031 2332static int
b96a374d
AJ
2333examine_argument (enum machine_mode mode, tree type, int in_return,
2334 int *int_nregs, int *sse_nregs)
53c17031
JH
2335{
2336 enum x86_64_reg_class class[MAX_CLASSES];
2337 int n = classify_argument (mode, type, class, 0);
2338
2339 *int_nregs = 0;
2340 *sse_nregs = 0;
2341 if (!n)
2342 return 0;
2343 for (n--; n >= 0; n--)
2344 switch (class[n])
2345 {
2346 case X86_64_INTEGER_CLASS:
2347 case X86_64_INTEGERSI_CLASS:
2348 (*int_nregs)++;
2349 break;
2350 case X86_64_SSE_CLASS:
2351 case X86_64_SSESF_CLASS:
2352 case X86_64_SSEDF_CLASS:
2353 (*sse_nregs)++;
2354 break;
2355 case X86_64_NO_CLASS:
2356 case X86_64_SSEUP_CLASS:
2357 break;
2358 case X86_64_X87_CLASS:
2359 case X86_64_X87UP_CLASS:
2360 if (!in_return)
2361 return 0;
2362 break;
2363 case X86_64_MEMORY_CLASS:
2364 abort ();
2365 }
2366 return 1;
2367}
2368/* Construct container for the argument used by GCC interface. See
2369 FUNCTION_ARG for the detailed description. */
2370static rtx
b96a374d
AJ
2371construct_container (enum machine_mode mode, tree type, int in_return,
2372 int nintregs, int nsseregs, const int * intreg,
2373 int sse_regno)
53c17031
JH
2374{
2375 enum machine_mode tmpmode;
2376 int bytes =
2377 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2378 enum x86_64_reg_class class[MAX_CLASSES];
2379 int n;
2380 int i;
2381 int nexps = 0;
2382 int needed_sseregs, needed_intregs;
2383 rtx exp[MAX_CLASSES];
2384 rtx ret;
2385
2386 n = classify_argument (mode, type, class, 0);
2387 if (TARGET_DEBUG_ARG)
2388 {
2389 if (!n)
2390 fprintf (stderr, "Memory class\n");
2391 else
2392 {
2393 fprintf (stderr, "Classes:");
2394 for (i = 0; i < n; i++)
2395 {
2396 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2397 }
2398 fprintf (stderr, "\n");
2399 }
2400 }
2401 if (!n)
2402 return NULL;
2403 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2404 return NULL;
2405 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2406 return NULL;
2407
2408 /* First construct simple cases. Avoid SCmode, since we want to use
2409 single register to pass this type. */
2410 if (n == 1 && mode != SCmode)
2411 switch (class[0])
2412 {
2413 case X86_64_INTEGER_CLASS:
2414 case X86_64_INTEGERSI_CLASS:
2415 return gen_rtx_REG (mode, intreg[0]);
2416 case X86_64_SSE_CLASS:
2417 case X86_64_SSESF_CLASS:
2418 case X86_64_SSEDF_CLASS:
2419 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2420 case X86_64_X87_CLASS:
2421 return gen_rtx_REG (mode, FIRST_STACK_REG);
2422 case X86_64_NO_CLASS:
2423 /* Zero sized array, struct or class. */
2424 return NULL;
2425 default:
2426 abort ();
2427 }
2c6b27c3
JH
2428 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2429 && mode != BLKmode)
e95d6b23 2430 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2431 if (n == 2
2432 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2433 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2434 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2435 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2436 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2437 && intreg[0] + 1 == intreg[1])
2438 return gen_rtx_REG (mode, intreg[0]);
2439 if (n == 4
2440 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2c6b27c3
JH
2441 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2442 && mode != BLKmode)
f8a1ebc6 2443 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
53c17031
JH
2444
2445 /* Otherwise figure out the entries of the PARALLEL. */
2446 for (i = 0; i < n; i++)
2447 {
2448 switch (class[i])
2449 {
2450 case X86_64_NO_CLASS:
2451 break;
2452 case X86_64_INTEGER_CLASS:
2453 case X86_64_INTEGERSI_CLASS:
d1f87653 2454 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2455 if (i * 8 + 8 > bytes)
2456 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2457 else if (class[i] == X86_64_INTEGERSI_CLASS)
2458 tmpmode = SImode;
2459 else
2460 tmpmode = DImode;
2461 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2462 if (tmpmode == BLKmode)
2463 tmpmode = DImode;
2464 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2465 gen_rtx_REG (tmpmode, *intreg),
2466 GEN_INT (i*8));
2467 intreg++;
2468 break;
2469 case X86_64_SSESF_CLASS:
2470 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2471 gen_rtx_REG (SFmode,
2472 SSE_REGNO (sse_regno)),
2473 GEN_INT (i*8));
2474 sse_regno++;
2475 break;
2476 case X86_64_SSEDF_CLASS:
2477 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2478 gen_rtx_REG (DFmode,
2479 SSE_REGNO (sse_regno)),
2480 GEN_INT (i*8));
2481 sse_regno++;
2482 break;
2483 case X86_64_SSE_CLASS:
12f5c45e
JH
2484 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2485 tmpmode = TImode;
53c17031
JH
2486 else
2487 tmpmode = DImode;
2488 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2489 gen_rtx_REG (tmpmode,
2490 SSE_REGNO (sse_regno)),
2491 GEN_INT (i*8));
12f5c45e
JH
2492 if (tmpmode == TImode)
2493 i++;
53c17031
JH
2494 sse_regno++;
2495 break;
2496 default:
2497 abort ();
2498 }
2499 }
2500 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2501 for (i = 0; i < nexps; i++)
2502 XVECEXP (ret, 0, i) = exp [i];
2503 return ret;
2504}
2505
b08de47e
MM
2506/* Update the data in CUM to advance over an argument
2507 of mode MODE and data type TYPE.
2508 (TYPE is null for libcalls where that information may not be available.) */
2509
2510void
b96a374d
AJ
2511function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2512 enum machine_mode mode, /* current arg mode */
2513 tree type, /* type of the argument or 0 if lib support */
2514 int named) /* whether or not the argument was named */
b08de47e 2515{
5ac9118e
KG
2516 int bytes =
2517 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2518 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2519
2520 if (TARGET_DEBUG_ARG)
2521 fprintf (stderr,
bcf17554
JH
2522 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2523 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
53c17031 2524 if (TARGET_64BIT)
b08de47e 2525 {
53c17031
JH
2526 int int_nregs, sse_nregs;
2527 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2528 cum->words += words;
2529 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2530 {
53c17031
JH
2531 cum->nregs -= int_nregs;
2532 cum->sse_nregs -= sse_nregs;
2533 cum->regno += int_nregs;
2534 cum->sse_regno += sse_nregs;
82a127a9 2535 }
53c17031
JH
2536 else
2537 cum->words += words;
b08de47e 2538 }
a4f31c00 2539 else
82a127a9 2540 {
bcf17554
JH
2541 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2542 && (!type || !AGGREGATE_TYPE_P (type)))
53c17031
JH
2543 {
2544 cum->sse_words += words;
2545 cum->sse_nregs -= 1;
2546 cum->sse_regno += 1;
2547 if (cum->sse_nregs <= 0)
2548 {
2549 cum->sse_nregs = 0;
2550 cum->sse_regno = 0;
2551 }
2552 }
bcf17554
JH
2553 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2554 && (!type || !AGGREGATE_TYPE_P (type)))
2555 {
2556 cum->mmx_words += words;
2557 cum->mmx_nregs -= 1;
2558 cum->mmx_regno += 1;
2559 if (cum->mmx_nregs <= 0)
2560 {
2561 cum->mmx_nregs = 0;
2562 cum->mmx_regno = 0;
2563 }
2564 }
53c17031 2565 else
82a127a9 2566 {
53c17031
JH
2567 cum->words += words;
2568 cum->nregs -= words;
2569 cum->regno += words;
2570
2571 if (cum->nregs <= 0)
2572 {
2573 cum->nregs = 0;
2574 cum->regno = 0;
2575 }
82a127a9
CM
2576 }
2577 }
b08de47e
MM
2578 return;
2579}
2580
2581/* Define where to put the arguments to a function.
2582 Value is zero to push the argument on the stack,
2583 or a hard register in which to store the argument.
2584
2585 MODE is the argument's machine mode.
2586 TYPE is the data type of the argument (as a tree).
2587 This is null for libcalls where that information may
2588 not be available.
2589 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2590 the preceding args and about the function being called.
2591 NAMED is nonzero if this argument is a named parameter
2592 (otherwise it is an extra parameter matching an ellipsis). */
2593
07933f72 2594rtx
b96a374d
AJ
2595function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2596 enum machine_mode mode, /* current arg mode */
2597 tree type, /* type of the argument or 0 if lib support */
2598 int named) /* != 0 for normal args, == 0 for ... args */
b08de47e
MM
2599{
2600 rtx ret = NULL_RTX;
5ac9118e
KG
2601 int bytes =
2602 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 2603 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 2604 static bool warnedsse, warnedmmx;
b08de47e 2605
5bdc5878 2606 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2607 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2608 any AL settings. */
32ee7d1d 2609 if (mode == VOIDmode)
b08de47e 2610 {
53c17031
JH
2611 if (TARGET_64BIT)
2612 return GEN_INT (cum->maybe_vaarg
2613 ? (cum->sse_nregs < 0
2614 ? SSE_REGPARM_MAX
2615 : cum->sse_regno)
2616 : -1);
2617 else
2618 return constm1_rtx;
b08de47e 2619 }
53c17031
JH
2620 if (TARGET_64BIT)
2621 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2622 &x86_64_int_parameter_registers [cum->regno],
2623 cum->sse_regno);
2624 else
2625 switch (mode)
2626 {
2627 /* For now, pass fp/complex values on the stack. */
2628 default:
2629 break;
2630
2631 case BLKmode:
8d454008
RH
2632 if (bytes < 0)
2633 break;
5efb1046 2634 /* FALLTHRU */
53c17031
JH
2635 case DImode:
2636 case SImode:
2637 case HImode:
2638 case QImode:
2639 if (words <= cum->nregs)
b96a374d
AJ
2640 {
2641 int regno = cum->regno;
2642
2643 /* Fastcall allocates the first two DWORD (SImode) or
2644 smaller arguments to ECX and EDX. */
2645 if (cum->fastcall)
2646 {
2647 if (mode == BLKmode || mode == DImode)
2648 break;
2649
2650 /* ECX not EAX is the first allocated register. */
2651 if (regno == 0)
e767b5be 2652 regno = 2;
b96a374d
AJ
2653 }
2654 ret = gen_rtx_REG (mode, regno);
2655 }
53c17031
JH
2656 break;
2657 case TImode:
bcf17554
JH
2658 case V16QImode:
2659 case V8HImode:
2660 case V4SImode:
2661 case V2DImode:
2662 case V4SFmode:
2663 case V2DFmode:
2664 if (!type || !AGGREGATE_TYPE_P (type))
2665 {
e1be55d0 2666 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
bcf17554
JH
2667 {
2668 warnedsse = true;
2669 warning ("SSE vector argument without SSE enabled "
2670 "changes the ABI");
2671 }
2672 if (cum->sse_nregs)
2673 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2674 }
2675 break;
2676 case V8QImode:
2677 case V4HImode:
2678 case V2SImode:
2679 case V2SFmode:
2680 if (!type || !AGGREGATE_TYPE_P (type))
2681 {
e1be55d0 2682 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
2683 {
2684 warnedmmx = true;
2685 warning ("MMX vector argument without MMX enabled "
2686 "changes the ABI");
2687 }
2688 if (cum->mmx_nregs)
2689 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2690 }
53c17031
JH
2691 break;
2692 }
b08de47e
MM
2693
2694 if (TARGET_DEBUG_ARG)
2695 {
2696 fprintf (stderr,
91ea38f9 2697 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2698 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2699
2700 if (ret)
91ea38f9 2701 print_simple_rtl (stderr, ret);
b08de47e
MM
2702 else
2703 fprintf (stderr, ", stack");
2704
2705 fprintf (stderr, " )\n");
2706 }
2707
2708 return ret;
2709}
53c17031 2710
09b2e78d
ZD
2711/* A C expression that indicates when an argument must be passed by
2712 reference. If nonzero for an argument, a copy of that argument is
2713 made in memory and a pointer to the argument is passed instead of
2714 the argument itself. The pointer is passed in whatever way is
2715 appropriate for passing a pointer to that type. */
2716
2717int
b96a374d
AJ
2718function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2719 enum machine_mode mode ATTRIBUTE_UNUSED,
2720 tree type, int named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2721{
2722 if (!TARGET_64BIT)
2723 return 0;
2724
2725 if (type && int_size_in_bytes (type) == -1)
2726 {
2727 if (TARGET_DEBUG_ARG)
2728 fprintf (stderr, "function_arg_pass_by_reference\n");
2729 return 1;
2730 }
2731
2732 return 0;
2733}
2734
8b978a57
JH
2735/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2736 ABI */
2737static bool
b96a374d 2738contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2739{
2740 enum machine_mode mode = TYPE_MODE (type);
2741 if (SSE_REG_MODE_P (mode)
2742 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2743 return true;
2744 if (TYPE_ALIGN (type) < 128)
2745 return false;
2746
2747 if (AGGREGATE_TYPE_P (type))
2748 {
2a43945f 2749 /* Walk the aggregates recursively. */
8b978a57
JH
2750 if (TREE_CODE (type) == RECORD_TYPE
2751 || TREE_CODE (type) == UNION_TYPE
2752 || TREE_CODE (type) == QUAL_UNION_TYPE)
2753 {
2754 tree field;
2755
2756 if (TYPE_BINFO (type) != NULL
2757 && TYPE_BINFO_BASETYPES (type) != NULL)
2758 {
2759 tree bases = TYPE_BINFO_BASETYPES (type);
2760 int n_bases = TREE_VEC_LENGTH (bases);
2761 int i;
2762
2763 for (i = 0; i < n_bases; ++i)
2764 {
2765 tree binfo = TREE_VEC_ELT (bases, i);
2766 tree type = BINFO_TYPE (binfo);
2767
2768 if (contains_128bit_aligned_vector_p (type))
2769 return true;
2770 }
2771 }
43f3a59d 2772 /* And now merge the fields of structure. */
8b978a57
JH
2773 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2774 {
2775 if (TREE_CODE (field) == FIELD_DECL
2776 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2777 return true;
2778 }
2779 }
2780 /* Just for use if some languages passes arrays by value. */
2781 else if (TREE_CODE (type) == ARRAY_TYPE)
2782 {
2783 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2784 return true;
2785 }
2786 else
2787 abort ();
2788 }
2789 return false;
2790}
2791
bb498ea3
AH
2792/* Gives the alignment boundary, in bits, of an argument with the
2793 specified mode and type. */
53c17031
JH
2794
2795int
b96a374d 2796ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2797{
2798 int align;
53c17031
JH
2799 if (type)
2800 align = TYPE_ALIGN (type);
2801 else
2802 align = GET_MODE_ALIGNMENT (mode);
2803 if (align < PARM_BOUNDARY)
2804 align = PARM_BOUNDARY;
8b978a57
JH
2805 if (!TARGET_64BIT)
2806 {
2807 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2808 make an exception for SSE modes since these require 128bit
b96a374d 2809 alignment.
8b978a57
JH
2810
2811 The handling here differs from field_alignment. ICC aligns MMX
2812 arguments to 4 byte boundaries, while structure fields are aligned
2813 to 8 byte boundaries. */
2814 if (!type)
2815 {
2816 if (!SSE_REG_MODE_P (mode))
2817 align = PARM_BOUNDARY;
2818 }
2819 else
2820 {
2821 if (!contains_128bit_aligned_vector_p (type))
2822 align = PARM_BOUNDARY;
2823 }
8b978a57 2824 }
53c17031
JH
2825 if (align > 128)
2826 align = 128;
2827 return align;
2828}
2829
2830/* Return true if N is a possible register number of function value. */
2831bool
b96a374d 2832ix86_function_value_regno_p (int regno)
53c17031
JH
2833{
2834 if (!TARGET_64BIT)
2835 {
2836 return ((regno) == 0
2837 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2838 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2839 }
2840 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2841 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2842 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2843}
2844
2845/* Define how to find the value returned by a function.
2846 VALTYPE is the data type of the value (as a tree).
2847 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2848 otherwise, FUNC is 0. */
2849rtx
b96a374d 2850ix86_function_value (tree valtype)
53c17031
JH
2851{
2852 if (TARGET_64BIT)
2853 {
2854 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2855 REGPARM_MAX, SSE_REGPARM_MAX,
2856 x86_64_int_return_registers, 0);
d1f87653
KH
2857 /* For zero sized structures, construct_container return NULL, but we need
2858 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2859 if (!ret)
2860 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2861 return ret;
2862 }
2863 else
b069de3b
SS
2864 return gen_rtx_REG (TYPE_MODE (valtype),
2865 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2866}
2867
f5143c46 2868/* Return false iff type is returned in memory. */
53c17031 2869int
b96a374d 2870ix86_return_in_memory (tree type)
53c17031 2871{
a30b6839
RH
2872 int needed_intregs, needed_sseregs, size;
2873 enum machine_mode mode = TYPE_MODE (type);
2874
53c17031 2875 if (TARGET_64BIT)
a30b6839
RH
2876 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2877
2878 if (mode == BLKmode)
2879 return 1;
2880
2881 size = int_size_in_bytes (type);
2882
2883 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2884 return 0;
2885
2886 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 2887 {
a30b6839
RH
2888 /* User-created vectors small enough to fit in EAX. */
2889 if (size < 8)
5e062767 2890 return 0;
a30b6839
RH
2891
2892 /* MMX/3dNow values are returned on the stack, since we've
2893 got to EMMS/FEMMS before returning. */
2894 if (size == 8)
53c17031 2895 return 1;
a30b6839
RH
2896
2897 /* SSE values are returned in XMM0. */
2898 /* ??? Except when it doesn't exist? We have a choice of
2899 either (1) being abi incompatible with a -march switch,
2900 or (2) generating an error here. Given no good solution,
2901 I think the safest thing is one warning. The user won't
43f3a59d 2902 be able to use -Werror, but.... */
a30b6839
RH
2903 if (size == 16)
2904 {
2905 static bool warned;
2906
2907 if (TARGET_SSE)
2908 return 0;
2909
2910 if (!warned)
2911 {
2912 warned = true;
2913 warning ("SSE vector return without SSE enabled "
2914 "changes the ABI");
2915 }
2916 return 1;
2917 }
53c17031 2918 }
a30b6839 2919
cf2348cb 2920 if (mode == XFmode)
a30b6839 2921 return 0;
f8a1ebc6 2922
a30b6839
RH
2923 if (size > 12)
2924 return 1;
2925 return 0;
53c17031
JH
2926}
2927
2928/* Define how to find the value returned by a library function
2929 assuming the value has mode MODE. */
2930rtx
b96a374d 2931ix86_libcall_value (enum machine_mode mode)
53c17031
JH
2932{
2933 if (TARGET_64BIT)
2934 {
2935 switch (mode)
2936 {
f8a1ebc6
JH
2937 case SFmode:
2938 case SCmode:
2939 case DFmode:
2940 case DCmode:
2941 return gen_rtx_REG (mode, FIRST_SSE_REG);
2942 case XFmode:
2943 case XCmode:
2944 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2945 case TFmode:
f8a1ebc6
JH
2946 case TCmode:
2947 return NULL;
2948 default:
2949 return gen_rtx_REG (mode, 0);
53c17031
JH
2950 }
2951 }
2952 else
f8a1ebc6 2953 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
2954}
2955
2956/* Given a mode, return the register to use for a return value. */
2957
2958static int
b96a374d 2959ix86_value_regno (enum machine_mode mode)
b069de3b 2960{
a30b6839 2961 /* Floating point return values in %st(0). */
b069de3b
SS
2962 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2963 return FIRST_FLOAT_REG;
a30b6839
RH
2964 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2965 we prevent this case when sse is not available. */
2966 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 2967 return FIRST_SSE_REG;
a30b6839 2968 /* Everything else in %eax. */
b069de3b 2969 return 0;
53c17031 2970}
ad919812
JH
2971\f
2972/* Create the va_list data type. */
53c17031 2973
c35d187f
RH
2974static tree
2975ix86_build_builtin_va_list (void)
ad919812
JH
2976{
2977 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2978
ad919812
JH
2979 /* For i386 we use plain pointer to argument area. */
2980 if (!TARGET_64BIT)
2981 return build_pointer_type (char_type_node);
2982
f1e639b1 2983 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2984 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2985
fce5a9f2 2986 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2987 unsigned_type_node);
fce5a9f2 2988 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2989 unsigned_type_node);
2990 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2991 ptr_type_node);
2992 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2993 ptr_type_node);
2994
2995 DECL_FIELD_CONTEXT (f_gpr) = record;
2996 DECL_FIELD_CONTEXT (f_fpr) = record;
2997 DECL_FIELD_CONTEXT (f_ovf) = record;
2998 DECL_FIELD_CONTEXT (f_sav) = record;
2999
3000 TREE_CHAIN (record) = type_decl;
3001 TYPE_NAME (record) = type_decl;
3002 TYPE_FIELDS (record) = f_gpr;
3003 TREE_CHAIN (f_gpr) = f_fpr;
3004 TREE_CHAIN (f_fpr) = f_ovf;
3005 TREE_CHAIN (f_ovf) = f_sav;
3006
3007 layout_type (record);
3008
3009 /* The correct type is an array type of one element. */
3010 return build_array_type (record, build_index_type (size_zero_node));
3011}
3012
a0524eb3 3013/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 3014
a0524eb3 3015static void
b96a374d
AJ
3016ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3017 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3018 int no_rtl)
ad919812
JH
3019{
3020 CUMULATIVE_ARGS next_cum;
3021 rtx save_area = NULL_RTX, mem;
3022 rtx label;
3023 rtx label_ref;
3024 rtx tmp_reg;
3025 rtx nsse_reg;
3026 int set;
3027 tree fntype;
3028 int stdarg_p;
3029 int i;
3030
3031 if (!TARGET_64BIT)
3032 return;
3033
3034 /* Indicate to allocate space on the stack for varargs save area. */
3035 ix86_save_varrargs_registers = 1;
3036
5474eed5
JH
3037 cfun->stack_alignment_needed = 128;
3038
ad919812
JH
3039 fntype = TREE_TYPE (current_function_decl);
3040 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3041 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3042 != void_type_node));
3043
3044 /* For varargs, we do not want to skip the dummy va_dcl argument.
3045 For stdargs, we do want to skip the last named argument. */
3046 next_cum = *cum;
3047 if (stdarg_p)
3048 function_arg_advance (&next_cum, mode, type, 1);
3049
3050 if (!no_rtl)
3051 save_area = frame_pointer_rtx;
3052
3053 set = get_varargs_alias_set ();
3054
3055 for (i = next_cum.regno; i < ix86_regparm; i++)
3056 {
3057 mem = gen_rtx_MEM (Pmode,
3058 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 3059 set_mem_alias_set (mem, set);
ad919812
JH
3060 emit_move_insn (mem, gen_rtx_REG (Pmode,
3061 x86_64_int_parameter_registers[i]));
3062 }
3063
3064 if (next_cum.sse_nregs)
3065 {
3066 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 3067 of SSE parameter registers used to call this function. We use
ad919812
JH
3068 sse_prologue_save insn template that produces computed jump across
3069 SSE saves. We need some preparation work to get this working. */
3070
3071 label = gen_label_rtx ();
3072 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3073
3074 /* Compute address to jump to :
3075 label - 5*eax + nnamed_sse_arguments*5 */
3076 tmp_reg = gen_reg_rtx (Pmode);
3077 nsse_reg = gen_reg_rtx (Pmode);
3078 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3079 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 3080 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
3081 GEN_INT (4))));
3082 if (next_cum.sse_regno)
3083 emit_move_insn
3084 (nsse_reg,
3085 gen_rtx_CONST (DImode,
3086 gen_rtx_PLUS (DImode,
3087 label_ref,
3088 GEN_INT (next_cum.sse_regno * 4))));
3089 else
3090 emit_move_insn (nsse_reg, label_ref);
3091 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3092
3093 /* Compute address of memory block we save into. We always use pointer
3094 pointing 127 bytes after first byte to store - this is needed to keep
3095 instruction size limited by 4 bytes. */
3096 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
3097 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3098 plus_constant (save_area,
3099 8 * REGPARM_MAX + 127)));
ad919812 3100 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 3101 set_mem_alias_set (mem, set);
8ac61af7 3102 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
3103
3104 /* And finally do the dirty job! */
8ac61af7
RK
3105 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3106 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
3107 }
3108
3109}
3110
3111/* Implement va_start. */
3112
3113void
b96a374d 3114ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
3115{
3116 HOST_WIDE_INT words, n_gpr, n_fpr;
3117 tree f_gpr, f_fpr, f_ovf, f_sav;
3118 tree gpr, fpr, ovf, sav, t;
3119
3120 /* Only 64bit target needs something special. */
3121 if (!TARGET_64BIT)
3122 {
e5faf155 3123 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3124 return;
3125 }
3126
3127 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3128 f_fpr = TREE_CHAIN (f_gpr);
3129 f_ovf = TREE_CHAIN (f_fpr);
3130 f_sav = TREE_CHAIN (f_ovf);
3131
3132 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3133 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3134 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3135 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3136 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3137
3138 /* Count number of gp and fp argument registers used. */
3139 words = current_function_args_info.words;
3140 n_gpr = current_function_args_info.regno;
3141 n_fpr = current_function_args_info.sse_regno;
3142
3143 if (TARGET_DEBUG_ARG)
3144 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3145 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
3146
3147 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3148 build_int_2 (n_gpr * 8, 0));
3149 TREE_SIDE_EFFECTS (t) = 1;
3150 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3151
3152 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3153 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3154 TREE_SIDE_EFFECTS (t) = 1;
3155 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3156
3157 /* Find the overflow area. */
3158 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3159 if (words != 0)
3160 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3161 build_int_2 (words * UNITS_PER_WORD, 0));
3162 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3163 TREE_SIDE_EFFECTS (t) = 1;
3164 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3165
3166 /* Find the register save area.
3167 Prologue of the function save it right above stack frame. */
3168 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3169 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3170 TREE_SIDE_EFFECTS (t) = 1;
3171 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3172}
3173
3174/* Implement va_arg. */
3175rtx
b96a374d 3176ix86_va_arg (tree valist, tree type)
ad919812 3177{
0139adca 3178 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
3179 tree f_gpr, f_fpr, f_ovf, f_sav;
3180 tree gpr, fpr, ovf, sav, t;
b932f770 3181 int size, rsize;
ad919812
JH
3182 rtx lab_false, lab_over = NULL_RTX;
3183 rtx addr_rtx, r;
3184 rtx container;
09b2e78d 3185 int indirect_p = 0;
ad919812
JH
3186
3187 /* Only 64bit target needs something special. */
3188 if (!TARGET_64BIT)
3189 {
3190 return std_expand_builtin_va_arg (valist, type);
3191 }
3192
3193 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3194 f_fpr = TREE_CHAIN (f_gpr);
3195 f_ovf = TREE_CHAIN (f_fpr);
3196 f_sav = TREE_CHAIN (f_ovf);
3197
3198 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3199 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3200 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3201 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3202 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3203
3204 size = int_size_in_bytes (type);
09b2e78d
ZD
3205 if (size == -1)
3206 {
3207 /* Passed by reference. */
3208 indirect_p = 1;
3209 type = build_pointer_type (type);
3210 size = int_size_in_bytes (type);
3211 }
ad919812
JH
3212 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3213
3214 container = construct_container (TYPE_MODE (type), type, 0,
3215 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3216 /*
3217 * Pull the value out of the saved registers ...
3218 */
3219
3220 addr_rtx = gen_reg_rtx (Pmode);
3221
3222 if (container)
3223 {
3224 rtx int_addr_rtx, sse_addr_rtx;
3225 int needed_intregs, needed_sseregs;
3226 int need_temp;
3227
3228 lab_over = gen_label_rtx ();
3229 lab_false = gen_label_rtx ();
8bad7136 3230
ad919812
JH
3231 examine_argument (TYPE_MODE (type), type, 0,
3232 &needed_intregs, &needed_sseregs);
3233
3234
3235 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3236 || TYPE_ALIGN (type) > 128);
3237
d1f87653 3238 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
3239 on the register save area. If not we need to do moves. */
3240 if (!need_temp && !REG_P (container))
3241 {
d1f87653 3242 /* Verify that all registers are strictly consecutive */
ad919812
JH
3243 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3244 {
3245 int i;
3246
3247 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3248 {
3249 rtx slot = XVECEXP (container, 0, i);
b531087a 3250 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
3251 || INTVAL (XEXP (slot, 1)) != i * 16)
3252 need_temp = 1;
3253 }
3254 }
3255 else
3256 {
3257 int i;
3258
3259 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3260 {
3261 rtx slot = XVECEXP (container, 0, i);
b531087a 3262 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
3263 || INTVAL (XEXP (slot, 1)) != i * 8)
3264 need_temp = 1;
3265 }
3266 }
3267 }
3268 if (!need_temp)
3269 {
3270 int_addr_rtx = addr_rtx;
3271 sse_addr_rtx = addr_rtx;
3272 }
3273 else
3274 {
3275 int_addr_rtx = gen_reg_rtx (Pmode);
3276 sse_addr_rtx = gen_reg_rtx (Pmode);
3277 }
3278 /* First ensure that we fit completely in registers. */
3279 if (needed_intregs)
3280 {
3281 emit_cmp_and_jump_insns (expand_expr
3282 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3283 GEN_INT ((REGPARM_MAX - needed_intregs +
3284 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 3285 1, lab_false);
ad919812
JH
3286 }
3287 if (needed_sseregs)
3288 {
3289 emit_cmp_and_jump_insns (expand_expr
3290 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3291 GEN_INT ((SSE_REGPARM_MAX -
3292 needed_sseregs + 1) * 16 +
3293 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 3294 SImode, 1, lab_false);
ad919812
JH
3295 }
3296
3297 /* Compute index to start of area used for integer regs. */
3298 if (needed_intregs)
3299 {
3300 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3301 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3302 if (r != int_addr_rtx)
3303 emit_move_insn (int_addr_rtx, r);
3304 }
3305 if (needed_sseregs)
3306 {
3307 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3308 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3309 if (r != sse_addr_rtx)
3310 emit_move_insn (sse_addr_rtx, r);
3311 }
3312 if (need_temp)
3313 {
3314 int i;
3315 rtx mem;
70642ee3 3316 rtx x;
ad919812 3317
b932f770 3318 /* Never use the memory itself, as it has the alias set. */
70642ee3
JH
3319 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3320 mem = gen_rtx_MEM (BLKmode, x);
3321 force_operand (x, addr_rtx);
0692acba 3322 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3323 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3324
ad919812
JH
3325 for (i = 0; i < XVECLEN (container, 0); i++)
3326 {
3327 rtx slot = XVECEXP (container, 0, i);
3328 rtx reg = XEXP (slot, 0);
3329 enum machine_mode mode = GET_MODE (reg);
3330 rtx src_addr;
3331 rtx src_mem;
3332 int src_offset;
3333 rtx dest_mem;
3334
3335 if (SSE_REGNO_P (REGNO (reg)))
3336 {
3337 src_addr = sse_addr_rtx;
3338 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3339 }
3340 else
3341 {
3342 src_addr = int_addr_rtx;
3343 src_offset = REGNO (reg) * 8;
3344 }
3345 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3346 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3347 src_mem = adjust_address (src_mem, mode, src_offset);
3348 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3349 emit_move_insn (dest_mem, src_mem);
3350 }
3351 }
3352
3353 if (needed_intregs)
3354 {
3355 t =
3356 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3357 build_int_2 (needed_intregs * 8, 0));
3358 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3359 TREE_SIDE_EFFECTS (t) = 1;
3360 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3361 }
3362 if (needed_sseregs)
3363 {
3364 t =
3365 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3366 build_int_2 (needed_sseregs * 16, 0));
3367 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3368 TREE_SIDE_EFFECTS (t) = 1;
3369 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3370 }
3371
3372 emit_jump_insn (gen_jump (lab_over));
3373 emit_barrier ();
3374 emit_label (lab_false);
3375 }
3376
3377 /* ... otherwise out of the overflow area. */
3378
3379 /* Care for on-stack alignment if needed. */
3380 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3381 t = ovf;
3382 else
3383 {
3384 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3385 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3386 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3387 }
3388 t = save_expr (t);
3389
3390 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3391 if (r != addr_rtx)
3392 emit_move_insn (addr_rtx, r);
3393
3394 t =
3395 build (PLUS_EXPR, TREE_TYPE (t), t,
3396 build_int_2 (rsize * UNITS_PER_WORD, 0));
3397 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3398 TREE_SIDE_EFFECTS (t) = 1;
3399 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3400
3401 if (container)
3402 emit_label (lab_over);
3403
09b2e78d
ZD
3404 if (indirect_p)
3405 {
3406 r = gen_rtx_MEM (Pmode, addr_rtx);
3407 set_mem_alias_set (r, get_varargs_alias_set ());
3408 emit_move_insn (addr_rtx, r);
3409 }
3410
ad919812
JH
3411 return addr_rtx;
3412}
3413\f
c3c637e3
GS
3414/* Return nonzero if OP is either a i387 or SSE fp register. */
3415int
b96a374d 3416any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3417{
3418 return ANY_FP_REG_P (op);
3419}
3420
3421/* Return nonzero if OP is an i387 fp register. */
3422int
b96a374d 3423fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3424{
3425 return FP_REG_P (op);
3426}
3427
3428/* Return nonzero if OP is a non-fp register_operand. */
3429int
b96a374d 3430register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3431{
3432 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3433}
3434
40b982a9 3435/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3436 i387 fp register. */
3437int
b96a374d 3438register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3439{
3440 return register_operand (op, mode) && !FP_REG_P (op);
3441}
3442
7dd4b4a3
JH
3443/* Return nonzero if OP is general operand representable on x86_64. */
3444
3445int
b96a374d 3446x86_64_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3447{
3448 if (!TARGET_64BIT)
3449 return general_operand (op, mode);
3450 if (nonimmediate_operand (op, mode))
3451 return 1;
c05dbe81 3452 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3453}
3454
3455/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3456 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3457
3458int
b96a374d 3459x86_64_szext_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3460{
3461 if (!TARGET_64BIT)
3462 return general_operand (op, mode);
3463 if (nonimmediate_operand (op, mode))
3464 return 1;
c05dbe81 3465 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3466}
3467
3468/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3469
3470int
b96a374d 3471x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3472{
3473 if (!TARGET_64BIT)
3474 return nonmemory_operand (op, mode);
3475 if (register_operand (op, mode))
3476 return 1;
c05dbe81 3477 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3478}
3479
3480/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3481
3482int
b96a374d 3483x86_64_movabs_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3484{
3485 if (!TARGET_64BIT || !flag_pic)
3486 return nonmemory_operand (op, mode);
c05dbe81 3487 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3488 return 1;
3489 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3490 return 1;
3491 return 0;
3492}
3493
7e6dc358
JJ
3494/* Return nonzero if OPNUM's MEM should be matched
3495 in movabs* patterns. */
3496
3497int
3498ix86_check_movabs (rtx insn, int opnum)
3499{
3500 rtx set, mem;
3501
3502 set = PATTERN (insn);
3503 if (GET_CODE (set) == PARALLEL)
3504 set = XVECEXP (set, 0, 0);
3505 if (GET_CODE (set) != SET)
3506 abort ();
3507 mem = XEXP (set, opnum);
3508 while (GET_CODE (mem) == SUBREG)
3509 mem = SUBREG_REG (mem);
3510 if (GET_CODE (mem) != MEM)
3511 abort ();
3512 return (volatile_ok || !MEM_VOLATILE_P (mem));
3513}
3514
7dd4b4a3
JH
3515/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3516
3517int
b96a374d 3518x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3519{
3520 if (!TARGET_64BIT)
3521 return nonmemory_operand (op, mode);
3522 if (register_operand (op, mode))
3523 return 1;
c05dbe81 3524 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3525}
3526
3527/* Return nonzero if OP is immediate operand representable on x86_64. */
3528
3529int
b96a374d 3530x86_64_immediate_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3531{
3532 if (!TARGET_64BIT)
3533 return immediate_operand (op, mode);
c05dbe81 3534 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3535}
3536
3537/* Return nonzero if OP is immediate operand representable on x86_64. */
3538
3539int
b96a374d 3540x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7dd4b4a3
JH
3541{
3542 return x86_64_zero_extended_value (op);
3543}
3544
794a292d
JJ
3545/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3546 for shift & compare patterns, as shifting by 0 does not change flags),
3547 else return zero. */
3548
3549int
b96a374d 3550const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
794a292d
JJ
3551{
3552 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3553}
3554
e075ae69
RH
3555/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3556 reference and a constant. */
b08de47e
MM
3557
3558int
8d531ab9 3559symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3560{
e075ae69 3561 switch (GET_CODE (op))
2a2ab3f9 3562 {
e075ae69
RH
3563 case SYMBOL_REF:
3564 case LABEL_REF:
3565 return 1;
3566
3567 case CONST:
3568 op = XEXP (op, 0);
3569 if (GET_CODE (op) == SYMBOL_REF
3570 || GET_CODE (op) == LABEL_REF
3571 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3572 && (XINT (op, 1) == UNSPEC_GOT
3573 || XINT (op, 1) == UNSPEC_GOTOFF
3574 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3575 return 1;
3576 if (GET_CODE (op) != PLUS
3577 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3578 return 0;
3579
3580 op = XEXP (op, 0);
3581 if (GET_CODE (op) == SYMBOL_REF
3582 || GET_CODE (op) == LABEL_REF)
3583 return 1;
3584 /* Only @GOTOFF gets offsets. */
3585 if (GET_CODE (op) != UNSPEC
8ee41eaf 3586 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3587 return 0;
3588
3589 op = XVECEXP (op, 0, 0);
3590 if (GET_CODE (op) == SYMBOL_REF
3591 || GET_CODE (op) == LABEL_REF)
3592 return 1;
3593 return 0;
3594
3595 default:
3596 return 0;
2a2ab3f9
JVA
3597 }
3598}
2a2ab3f9 3599
e075ae69 3600/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3601
e075ae69 3602int
8d531ab9 3603pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3604{
6eb791fc
JH
3605 if (GET_CODE (op) != CONST)
3606 return 0;
3607 op = XEXP (op, 0);
3608 if (TARGET_64BIT)
3609 {
a0c8285b
JH
3610 if (GET_CODE (op) == UNSPEC
3611 && XINT (op, 1) == UNSPEC_GOTPCREL)
3612 return 1;
3613 if (GET_CODE (op) == PLUS
fdacb904
JH
3614 && GET_CODE (XEXP (op, 0)) == UNSPEC
3615 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
6eb791fc
JH
3616 return 1;
3617 }
fce5a9f2 3618 else
2a2ab3f9 3619 {
e075ae69
RH
3620 if (GET_CODE (op) == UNSPEC)
3621 return 1;
3622 if (GET_CODE (op) != PLUS
3623 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3624 return 0;
3625 op = XEXP (op, 0);
3626 if (GET_CODE (op) == UNSPEC)
3627 return 1;
2a2ab3f9 3628 }
e075ae69 3629 return 0;
2a2ab3f9 3630}
2a2ab3f9 3631
623fe810
RH
3632/* Return true if OP is a symbolic operand that resolves locally. */
3633
3634static int
b96a374d 3635local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
623fe810 3636{
623fe810
RH
3637 if (GET_CODE (op) == CONST
3638 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3639 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3640 op = XEXP (XEXP (op, 0), 0);
3641
8bfb45f8
JJ
3642 if (GET_CODE (op) == LABEL_REF)
3643 return 1;
3644
623fe810
RH
3645 if (GET_CODE (op) != SYMBOL_REF)
3646 return 0;
3647
2ae5ae57 3648 if (SYMBOL_REF_LOCAL_P (op))
623fe810
RH
3649 return 1;
3650
3651 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3652 the compiler that assumes it can just stick the results of
623fe810
RH
3653 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3654 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3655 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3656 if (strncmp (XSTR (op, 0), internal_label_prefix,
3657 internal_label_prefix_len) == 0)
3658 return 1;
3659
3660 return 0;
3661}
3662
2ae5ae57 3663/* Test for various thread-local symbols. */
f996902d
RH
3664
3665int
8d531ab9 3666tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d 3667{
f996902d
RH
3668 if (GET_CODE (op) != SYMBOL_REF)
3669 return 0;
2ae5ae57 3670 return SYMBOL_REF_TLS_MODEL (op);
f996902d
RH
3671}
3672
2ae5ae57 3673static inline int
b96a374d 3674tls_symbolic_operand_1 (rtx op, enum tls_model kind)
f996902d 3675{
f996902d
RH
3676 if (GET_CODE (op) != SYMBOL_REF)
3677 return 0;
2ae5ae57 3678 return SYMBOL_REF_TLS_MODEL (op) == kind;
f996902d
RH
3679}
3680
3681int
8d531ab9 3682global_dynamic_symbolic_operand (rtx op,
b96a374d 3683 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3684{
3685 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3686}
3687
3688int
8d531ab9 3689local_dynamic_symbolic_operand (rtx op,
b96a374d 3690 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3691{
3692 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3693}
3694
3695int
8d531ab9 3696initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3697{
3698 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3699}
3700
3701int
8d531ab9 3702local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3703{
3704 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3705}
3706
28d52ffb
RH
3707/* Test for a valid operand for a call instruction. Don't allow the
3708 arg pointer register or virtual regs since they may decay into
3709 reg + const, which the patterns can't handle. */
2a2ab3f9 3710
e075ae69 3711int
b96a374d 3712call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3713{
e075ae69
RH
3714 /* Disallow indirect through a virtual register. This leads to
3715 compiler aborts when trying to eliminate them. */
3716 if (GET_CODE (op) == REG
3717 && (op == arg_pointer_rtx
564d80f4 3718 || op == frame_pointer_rtx
e075ae69
RH
3719 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3720 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3721 return 0;
2a2ab3f9 3722
28d52ffb
RH
3723 /* Disallow `call 1234'. Due to varying assembler lameness this
3724 gets either rejected or translated to `call .+1234'. */
3725 if (GET_CODE (op) == CONST_INT)
3726 return 0;
3727
cbbf65e0
RH
3728 /* Explicitly allow SYMBOL_REF even if pic. */
3729 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3730 return 1;
2a2ab3f9 3731
cbbf65e0
RH
3732 /* Otherwise we can allow any general_operand in the address. */
3733 return general_operand (op, Pmode);
e075ae69 3734}
79325812 3735
4977bab6
ZW
3736/* Test for a valid operand for a call instruction. Don't allow the
3737 arg pointer register or virtual regs since they may decay into
3738 reg + const, which the patterns can't handle. */
3739
3740int
b96a374d 3741sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3742{
3743 /* Disallow indirect through a virtual register. This leads to
3744 compiler aborts when trying to eliminate them. */
3745 if (GET_CODE (op) == REG
3746 && (op == arg_pointer_rtx
3747 || op == frame_pointer_rtx
3748 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3749 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3750 return 0;
3751
3752 /* Explicitly allow SYMBOL_REF even if pic. */
3753 if (GET_CODE (op) == SYMBOL_REF)
3754 return 1;
3755
3756 /* Otherwise we can only allow register operands. */
3757 return register_operand (op, Pmode);
3758}
3759
e075ae69 3760int
b96a374d 3761constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3762{
eaf19aba
JJ
3763 if (GET_CODE (op) == CONST
3764 && GET_CODE (XEXP (op, 0)) == PLUS
3765 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3766 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3767 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3768}
2a2ab3f9 3769
e075ae69 3770/* Match exactly zero and one. */
e9a25f70 3771
0f290768 3772int
8d531ab9 3773const0_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3774{
3775 return op == CONST0_RTX (mode);
3776}
e9a25f70 3777
0f290768 3778int
8d531ab9 3779const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3780{
3781 return op == const1_rtx;
3782}
2a2ab3f9 3783
e075ae69 3784/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3785
e075ae69 3786int
8d531ab9 3787const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3788{
3789 return (GET_CODE (op) == CONST_INT
3790 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3791}
e9a25f70 3792
ebe75517 3793int
8d531ab9 3794const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3795{
3796 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3797}
3798
3799int
8d531ab9 3800const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3801{
3802 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3803}
3804
3805int
8d531ab9 3806const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3807{
3808 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3809}
3810
3811int
8d531ab9 3812const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3813{
3814 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3815}
3816
3817
d1f87653 3818/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3819
e075ae69 3820int
8d531ab9 3821incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3822{
f5143c46 3823 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d 3824 registers, since carry flag is not set. */
89c43c0a 3825 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
b4e89e2d 3826 return 0;
2b1c08f5 3827 return op == const1_rtx || op == constm1_rtx;
e075ae69 3828}
2a2ab3f9 3829
371bc54b
JH
3830/* Return nonzero if OP is acceptable as operand of DImode shift
3831 expander. */
3832
3833int
b96a374d 3834shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
371bc54b
JH
3835{
3836 if (TARGET_64BIT)
3837 return nonimmediate_operand (op, mode);
3838 else
3839 return register_operand (op, mode);
3840}
3841
0f290768 3842/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3843 register eliminable to the stack pointer. Otherwise, this is
3844 a register operand.
2a2ab3f9 3845
e075ae69
RH
3846 This is used to prevent esp from being used as an index reg.
3847 Which would only happen in pathological cases. */
5f1ec3e6 3848
e075ae69 3849int
8d531ab9 3850reg_no_sp_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3851{
3852 rtx t = op;
3853 if (GET_CODE (t) == SUBREG)
3854 t = SUBREG_REG (t);
564d80f4 3855 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3856 return 0;
2a2ab3f9 3857
e075ae69 3858 return register_operand (op, mode);
2a2ab3f9 3859}
b840bfb0 3860
915119a5 3861int
8d531ab9 3862mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3863{
3864 return MMX_REG_P (op);
3865}
3866
2c5a510c
RH
3867/* Return false if this is any eliminable register. Otherwise
3868 general_operand. */
3869
3870int
8d531ab9 3871general_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3872{
3873 rtx t = op;
3874 if (GET_CODE (t) == SUBREG)
3875 t = SUBREG_REG (t);
3876 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3877 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3878 || t == virtual_stack_dynamic_rtx)
3879 return 0;
1020a5ab
RH
3880 if (REG_P (t)
3881 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3882 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3883 return 0;
2c5a510c
RH
3884
3885 return general_operand (op, mode);
3886}
3887
3888/* Return false if this is any eliminable register. Otherwise
3889 register_operand or const_int. */
3890
3891int
8d531ab9 3892nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3893{
3894 rtx t = op;
3895 if (GET_CODE (t) == SUBREG)
3896 t = SUBREG_REG (t);
3897 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3898 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3899 || t == virtual_stack_dynamic_rtx)
3900 return 0;
3901
3902 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3903}
3904
7ec70495
JH
3905/* Return false if this is any eliminable register or stack register,
3906 otherwise work like register_operand. */
3907
3908int
8d531ab9 3909index_register_operand (rtx op, enum machine_mode mode)
7ec70495
JH
3910{
3911 rtx t = op;
3912 if (GET_CODE (t) == SUBREG)
3913 t = SUBREG_REG (t);
3914 if (!REG_P (t))
3915 return 0;
3916 if (t == arg_pointer_rtx
3917 || t == frame_pointer_rtx
3918 || t == virtual_incoming_args_rtx
3919 || t == virtual_stack_vars_rtx
3920 || t == virtual_stack_dynamic_rtx
3921 || REGNO (t) == STACK_POINTER_REGNUM)
3922 return 0;
3923
3924 return general_operand (op, mode);
3925}
3926
e075ae69 3927/* Return true if op is a Q_REGS class register. */
b840bfb0 3928
e075ae69 3929int
8d531ab9 3930q_regs_operand (rtx op, enum machine_mode mode)
b840bfb0 3931{
e075ae69
RH
3932 if (mode != VOIDmode && GET_MODE (op) != mode)
3933 return 0;
3934 if (GET_CODE (op) == SUBREG)
3935 op = SUBREG_REG (op);
7799175f 3936 return ANY_QI_REG_P (op);
0f290768 3937}
b840bfb0 3938
4977bab6
ZW
3939/* Return true if op is an flags register. */
3940
3941int
8d531ab9 3942flags_reg_operand (rtx op, enum machine_mode mode)
4977bab6
ZW
3943{
3944 if (mode != VOIDmode && GET_MODE (op) != mode)
3945 return 0;
3946 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3947}
3948
e075ae69 3949/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3950
e075ae69 3951int
8d531ab9 3952non_q_regs_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3953{
3954 if (mode != VOIDmode && GET_MODE (op) != mode)
3955 return 0;
3956 if (GET_CODE (op) == SUBREG)
3957 op = SUBREG_REG (op);
3958 return NON_QI_REG_P (op);
0f290768 3959}
b840bfb0 3960
4977bab6 3961int
b96a374d
AJ
3962zero_extended_scalar_load_operand (rtx op,
3963 enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3964{
3965 unsigned n_elts;
3966 if (GET_CODE (op) != MEM)
3967 return 0;
3968 op = maybe_get_pool_constant (op);
3969 if (!op)
3970 return 0;
3971 if (GET_CODE (op) != CONST_VECTOR)
3972 return 0;
3973 n_elts =
3974 (GET_MODE_SIZE (GET_MODE (op)) /
3975 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3976 for (n_elts--; n_elts > 0; n_elts--)
3977 {
3978 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3979 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3980 return 0;
3981 }
3982 return 1;
3983}
3984
fdc4b40b
JH
3985/* Return 1 when OP is operand acceptable for standard SSE move. */
3986int
b96a374d 3987vector_move_operand (rtx op, enum machine_mode mode)
fdc4b40b
JH
3988{
3989 if (nonimmediate_operand (op, mode))
3990 return 1;
3991 if (GET_MODE (op) != mode && mode != VOIDmode)
3992 return 0;
3993 return (op == CONST0_RTX (GET_MODE (op)));
3994}
3995
74dc3e94
RH
3996/* Return true if op if a valid address, and does not contain
3997 a segment override. */
3998
3999int
8d531ab9 4000no_seg_address_operand (rtx op, enum machine_mode mode)
74dc3e94
RH
4001{
4002 struct ix86_address parts;
4003
4004 if (! address_operand (op, mode))
4005 return 0;
4006
4007 if (! ix86_decompose_address (op, &parts))
4008 abort ();
4009
4010 return parts.seg == SEG_DEFAULT;
4011}
4012
915119a5
BS
4013/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4014 insns. */
4015int
b96a374d 4016sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
4017{
4018 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
4019 switch (code)
4020 {
4021 /* Operations supported directly. */
4022 case EQ:
4023 case LT:
4024 case LE:
4025 case UNORDERED:
4026 case NE:
4027 case UNGE:
4028 case UNGT:
4029 case ORDERED:
4030 return 1;
4031 /* These are equivalent to ones above in non-IEEE comparisons. */
4032 case UNEQ:
4033 case UNLT:
4034 case UNLE:
4035 case LTGT:
4036 case GE:
4037 case GT:
4038 return !TARGET_IEEE_FP;
4039 default:
4040 return 0;
4041 }
915119a5 4042}
9076b9c1 4043/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 4044int
8d531ab9 4045ix86_comparison_operator (rtx op, enum machine_mode mode)
e075ae69 4046{
9076b9c1 4047 enum machine_mode inmode;
9a915772 4048 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
4049 if (mode != VOIDmode && GET_MODE (op) != mode)
4050 return 0;
ec8e098d 4051 if (!COMPARISON_P (op))
9a915772
JH
4052 return 0;
4053 inmode = GET_MODE (XEXP (op, 0));
4054
4055 if (inmode == CCFPmode || inmode == CCFPUmode)
4056 {
4057 enum rtx_code second_code, bypass_code;
4058 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4059 return (bypass_code == NIL && second_code == NIL);
4060 }
4061 switch (code)
3a3677ff
RH
4062 {
4063 case EQ: case NE:
3a3677ff 4064 return 1;
9076b9c1 4065 case LT: case GE:
7e08e190 4066 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
4067 || inmode == CCGOCmode || inmode == CCNOmode)
4068 return 1;
4069 return 0;
7e08e190 4070 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 4071 if (inmode == CCmode)
9076b9c1
JH
4072 return 1;
4073 return 0;
4074 case GT: case LE:
7e08e190 4075 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
4076 return 1;
4077 return 0;
3a3677ff
RH
4078 default:
4079 return 0;
4080 }
4081}
4082
e6e81735
JH
4083/* Return 1 if OP is a valid comparison operator testing carry flag
4084 to be set. */
4085int
8d531ab9 4086ix86_carry_flag_operator (rtx op, enum machine_mode mode)
e6e81735
JH
4087{
4088 enum machine_mode inmode;
4089 enum rtx_code code = GET_CODE (op);
4090
4091 if (mode != VOIDmode && GET_MODE (op) != mode)
4092 return 0;
ec8e098d 4093 if (!COMPARISON_P (op))
e6e81735
JH
4094 return 0;
4095 inmode = GET_MODE (XEXP (op, 0));
4096 if (GET_CODE (XEXP (op, 0)) != REG
4097 || REGNO (XEXP (op, 0)) != 17
4098 || XEXP (op, 1) != const0_rtx)
4099 return 0;
4100
4101 if (inmode == CCFPmode || inmode == CCFPUmode)
4102 {
4103 enum rtx_code second_code, bypass_code;
4104
4105 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4106 if (bypass_code != NIL || second_code != NIL)
4107 return 0;
4108 code = ix86_fp_compare_code_to_integer (code);
4109 }
4110 else if (inmode != CCmode)
4111 return 0;
4112 return code == LTU;
4113}
4114
9076b9c1 4115/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 4116
9076b9c1 4117int
8d531ab9 4118fcmov_comparison_operator (rtx op, enum machine_mode mode)
3a3677ff 4119{
b62d22a2 4120 enum machine_mode inmode;
9a915772 4121 enum rtx_code code = GET_CODE (op);
e6e81735 4122
3a3677ff
RH
4123 if (mode != VOIDmode && GET_MODE (op) != mode)
4124 return 0;
ec8e098d 4125 if (!COMPARISON_P (op))
9a915772
JH
4126 return 0;
4127 inmode = GET_MODE (XEXP (op, 0));
4128 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 4129 {
9a915772 4130 enum rtx_code second_code, bypass_code;
e6e81735 4131
9a915772
JH
4132 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4133 if (bypass_code != NIL || second_code != NIL)
4134 return 0;
4135 code = ix86_fp_compare_code_to_integer (code);
4136 }
4137 /* i387 supports just limited amount of conditional codes. */
4138 switch (code)
4139 {
4140 case LTU: case GTU: case LEU: case GEU:
4141 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
4142 return 1;
4143 return 0;
9a915772
JH
4144 case ORDERED: case UNORDERED:
4145 case EQ: case NE:
4146 return 1;
3a3677ff
RH
4147 default:
4148 return 0;
4149 }
e075ae69 4150}
b840bfb0 4151
e9e80858
JH
4152/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4153
4154int
8d531ab9 4155promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e9e80858
JH
4156{
4157 switch (GET_CODE (op))
4158 {
4159 case MULT:
4160 /* Modern CPUs have same latency for HImode and SImode multiply,
4161 but 386 and 486 do HImode multiply faster. */
9e555526 4162 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
4163 case PLUS:
4164 case AND:
4165 case IOR:
4166 case XOR:
4167 case ASHIFT:
4168 return 1;
4169 default:
4170 return 0;
4171 }
4172}
4173
e075ae69
RH
4174/* Nearly general operand, but accept any const_double, since we wish
4175 to be able to drop them into memory rather than have them get pulled
4176 into registers. */
b840bfb0 4177
2a2ab3f9 4178int
8d531ab9 4179cmp_fp_expander_operand (rtx op, enum machine_mode mode)
2a2ab3f9 4180{
e075ae69 4181 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 4182 return 0;
e075ae69 4183 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 4184 return 1;
e075ae69 4185 return general_operand (op, mode);
2a2ab3f9
JVA
4186}
4187
e075ae69 4188/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
4189
4190int
8d531ab9 4191ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 4192{
3522082b 4193 int regno;
0d7d98ee
JH
4194 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4195 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 4196 return 0;
3522082b
JH
4197
4198 if (!register_operand (op, VOIDmode))
4199 return 0;
4200
d1f87653 4201 /* Be careful to accept only registers having upper parts. */
3522082b
JH
4202 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4203 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
4204}
4205
4206/* Return 1 if this is a valid binary floating-point operation.
0f290768 4207 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
4208
4209int
8d531ab9 4210binary_fp_operator (rtx op, enum machine_mode mode)
e075ae69
RH
4211{
4212 if (mode != VOIDmode && mode != GET_MODE (op))
4213 return 0;
4214
2a2ab3f9
JVA
4215 switch (GET_CODE (op))
4216 {
e075ae69
RH
4217 case PLUS:
4218 case MINUS:
4219 case MULT:
4220 case DIV:
4221 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 4222
2a2ab3f9
JVA
4223 default:
4224 return 0;
4225 }
4226}
fee2770d 4227
e075ae69 4228int
8d531ab9 4229mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4230{
4231 return GET_CODE (op) == MULT;
4232}
4233
4234int
8d531ab9 4235div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4236{
4237 return GET_CODE (op) == DIV;
4238}
0a726ef1
JL
4239
4240int
b96a374d 4241arith_or_logical_operator (rtx op, enum machine_mode mode)
0a726ef1 4242{
e075ae69 4243 return ((mode == VOIDmode || GET_MODE (op) == mode)
ec8e098d 4244 && ARITHMETIC_P (op));
0a726ef1
JL
4245}
4246
e075ae69 4247/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4248
4249int
8d531ab9 4250memory_displacement_operand (rtx op, enum machine_mode mode)
4f2c8ebb 4251{
e075ae69 4252 struct ix86_address parts;
e9a25f70 4253
e075ae69
RH
4254 if (! memory_operand (op, mode))
4255 return 0;
4256
4257 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4258 abort ();
4259
4260 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4261}
4262
16189740 4263/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4264 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4265
4266 ??? It seems likely that this will only work because cmpsi is an
4267 expander, and no actual insns use this. */
4f2c8ebb
RS
4268
4269int
b96a374d 4270cmpsi_operand (rtx op, enum machine_mode mode)
fee2770d 4271{
b9b2c339 4272 if (nonimmediate_operand (op, mode))
e075ae69
RH
4273 return 1;
4274
4275 if (GET_CODE (op) == AND
4276 && GET_MODE (op) == SImode
4277 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4278 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4279 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4280 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4281 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4282 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4283 return 1;
e9a25f70 4284
fee2770d
RS
4285 return 0;
4286}
d784886d 4287
e075ae69
RH
4288/* Returns 1 if OP is memory operand that can not be represented by the
4289 modRM array. */
d784886d
RK
4290
4291int
8d531ab9 4292long_memory_operand (rtx op, enum machine_mode mode)
d784886d 4293{
e075ae69 4294 if (! memory_operand (op, mode))
d784886d
RK
4295 return 0;
4296
e075ae69 4297 return memory_address_length (op) != 0;
d784886d 4298}
2247f6ed
JH
4299
4300/* Return nonzero if the rtx is known aligned. */
4301
4302int
b96a374d 4303aligned_operand (rtx op, enum machine_mode mode)
2247f6ed
JH
4304{
4305 struct ix86_address parts;
4306
4307 if (!general_operand (op, mode))
4308 return 0;
4309
0f290768 4310 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4311 if (GET_CODE (op) != MEM)
4312 return 1;
4313
0f290768 4314 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4315 if (MEM_VOLATILE_P (op))
4316 return 0;
4317
4318 op = XEXP (op, 0);
4319
4320 /* Pushes and pops are only valid on the stack pointer. */
4321 if (GET_CODE (op) == PRE_DEC
4322 || GET_CODE (op) == POST_INC)
4323 return 1;
4324
4325 /* Decode the address. */
4326 if (! ix86_decompose_address (op, &parts))
4327 abort ();
4328
4329 /* Look for some component that isn't known to be aligned. */
4330 if (parts.index)
4331 {
4332 if (parts.scale < 4
bdb429a5 4333 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4334 return 0;
4335 }
4336 if (parts.base)
4337 {
bdb429a5 4338 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4339 return 0;
4340 }
4341 if (parts.disp)
4342 {
4343 if (GET_CODE (parts.disp) != CONST_INT
4344 || (INTVAL (parts.disp) & 3) != 0)
4345 return 0;
4346 }
4347
4348 /* Didn't find one -- this must be an aligned address. */
4349 return 1;
4350}
e075ae69 4351\f
881b2a96
RS
4352/* Initialize the table of extra 80387 mathematical constants. */
4353
4354static void
b96a374d 4355init_ext_80387_constants (void)
881b2a96
RS
4356{
4357 static const char * cst[5] =
4358 {
4359 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4360 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4361 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4362 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4363 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4364 };
4365 int i;
4366
4367 for (i = 0; i < 5; i++)
4368 {
4369 real_from_string (&ext_80387_constants_table[i], cst[i]);
4370 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 4371 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 4372 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
4373 }
4374
4375 ext_80387_constants_init = 1;
4376}
4377
e075ae69 4378/* Return true if the constant is something that can be loaded with
881b2a96 4379 a special instruction. */
57dbca5e
BS
4380
4381int
b96a374d 4382standard_80387_constant_p (rtx x)
57dbca5e 4383{
2b04e52b 4384 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4385 return -1;
881b2a96 4386
2b04e52b
JH
4387 if (x == CONST0_RTX (GET_MODE (x)))
4388 return 1;
4389 if (x == CONST1_RTX (GET_MODE (x)))
4390 return 2;
881b2a96 4391
22cc69c4
RS
4392 /* For XFmode constants, try to find a special 80387 instruction when
4393 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 4394 if (GET_MODE (x) == XFmode
22cc69c4 4395 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
4396 {
4397 REAL_VALUE_TYPE r;
4398 int i;
4399
4400 if (! ext_80387_constants_init)
4401 init_ext_80387_constants ();
4402
4403 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4404 for (i = 0; i < 5; i++)
4405 if (real_identical (&r, &ext_80387_constants_table[i]))
4406 return i + 3;
4407 }
4408
e075ae69 4409 return 0;
57dbca5e
BS
4410}
4411
881b2a96
RS
4412/* Return the opcode of the special instruction to be used to load
4413 the constant X. */
4414
4415const char *
b96a374d 4416standard_80387_constant_opcode (rtx x)
881b2a96
RS
4417{
4418 switch (standard_80387_constant_p (x))
4419 {
b96a374d 4420 case 1:
881b2a96
RS
4421 return "fldz";
4422 case 2:
4423 return "fld1";
b96a374d 4424 case 3:
881b2a96
RS
4425 return "fldlg2";
4426 case 4:
4427 return "fldln2";
b96a374d 4428 case 5:
881b2a96
RS
4429 return "fldl2e";
4430 case 6:
4431 return "fldl2t";
b96a374d 4432 case 7:
881b2a96
RS
4433 return "fldpi";
4434 }
4435 abort ();
4436}
4437
4438/* Return the CONST_DOUBLE representing the 80387 constant that is
4439 loaded by the specified special instruction. The argument IDX
4440 matches the return value from standard_80387_constant_p. */
4441
4442rtx
b96a374d 4443standard_80387_constant_rtx (int idx)
881b2a96
RS
4444{
4445 int i;
4446
4447 if (! ext_80387_constants_init)
4448 init_ext_80387_constants ();
4449
4450 switch (idx)
4451 {
4452 case 3:
4453 case 4:
4454 case 5:
4455 case 6:
4456 case 7:
4457 i = idx - 3;
4458 break;
4459
4460 default:
4461 abort ();
4462 }
4463
1f48e56d 4464 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 4465 XFmode);
881b2a96
RS
4466}
4467
2b04e52b
JH
4468/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4469 */
4470int
b96a374d 4471standard_sse_constant_p (rtx x)
2b04e52b 4472{
0e67d460
JH
4473 if (x == const0_rtx)
4474 return 1;
2b04e52b
JH
4475 return (x == CONST0_RTX (GET_MODE (x)));
4476}
4477
2a2ab3f9
JVA
4478/* Returns 1 if OP contains a symbol reference */
4479
4480int
b96a374d 4481symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4482{
8d531ab9
KH
4483 const char *fmt;
4484 int i;
2a2ab3f9
JVA
4485
4486 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4487 return 1;
4488
4489 fmt = GET_RTX_FORMAT (GET_CODE (op));
4490 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4491 {
4492 if (fmt[i] == 'E')
4493 {
8d531ab9 4494 int j;
2a2ab3f9
JVA
4495
4496 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4497 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4498 return 1;
4499 }
e9a25f70 4500
2a2ab3f9
JVA
4501 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4502 return 1;
4503 }
4504
4505 return 0;
4506}
e075ae69
RH
4507
4508/* Return 1 if it is appropriate to emit `ret' instructions in the
4509 body of a function. Do this only if the epilogue is simple, needing a
4510 couple of insns. Prior to reloading, we can't tell how many registers
4511 must be saved, so return 0 then. Return 0 if there is no frame
4512 marker to de-allocate.
4513
4514 If NON_SAVING_SETJMP is defined and true, then it is not possible
4515 for the epilogue to be simple, so return 0. This is a special case
4516 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4517 until final, but jump_optimize may need to know sooner if a
4518 `return' is OK. */
32b5b1aa
SC
4519
4520int
b96a374d 4521ix86_can_use_return_insn_p (void)
32b5b1aa 4522{
4dd2ac2c 4523 struct ix86_frame frame;
9a7372d6 4524
e075ae69
RH
4525#ifdef NON_SAVING_SETJMP
4526 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4527 return 0;
4528#endif
9a7372d6
RH
4529
4530 if (! reload_completed || frame_pointer_needed)
4531 return 0;
32b5b1aa 4532
9a7372d6
RH
4533 /* Don't allow more than 32 pop, since that's all we can do
4534 with one instruction. */
4535 if (current_function_pops_args
4536 && current_function_args_size >= 32768)
e075ae69 4537 return 0;
32b5b1aa 4538
4dd2ac2c
JH
4539 ix86_compute_frame_layout (&frame);
4540 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4541}
6189a572
JH
4542\f
4543/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4544int
b96a374d 4545x86_64_sign_extended_value (rtx value)
6189a572
JH
4546{
4547 switch (GET_CODE (value))
4548 {
4549 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4550 to be at least 32 and this all acceptable constants are
4551 represented as CONST_INT. */
4552 case CONST_INT:
4553 if (HOST_BITS_PER_WIDE_INT == 32)
4554 return 1;
4555 else
4556 {
4557 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4558 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4559 }
4560 break;
4561
75d38379
JJ
4562 /* For certain code models, the symbolic references are known to fit.
4563 in CM_SMALL_PIC model we know it fits if it is local to the shared
4564 library. Don't count TLS SYMBOL_REFs here, since they should fit
4565 only if inside of UNSPEC handled below. */
6189a572 4566 case SYMBOL_REF:
d7222e38
JH
4567 /* TLS symbols are not constant. */
4568 if (tls_symbolic_operand (value, Pmode))
4569 return false;
c05dbe81 4570 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4571
4572 /* For certain code models, the code is near as well. */
4573 case LABEL_REF:
c05dbe81
JH
4574 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4575 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4576
4577 /* We also may accept the offsetted memory references in certain special
4578 cases. */
4579 case CONST:
75d38379
JJ
4580 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4581 switch (XINT (XEXP (value, 0), 1))
4582 {
4583 case UNSPEC_GOTPCREL:
4584 case UNSPEC_DTPOFF:
4585 case UNSPEC_GOTNTPOFF:
4586 case UNSPEC_NTPOFF:
4587 return 1;
4588 default:
4589 break;
4590 }
4591 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4592 {
4593 rtx op1 = XEXP (XEXP (value, 0), 0);
4594 rtx op2 = XEXP (XEXP (value, 0), 1);
4595 HOST_WIDE_INT offset;
4596
4597 if (ix86_cmodel == CM_LARGE)
4598 return 0;
4599 if (GET_CODE (op2) != CONST_INT)
4600 return 0;
4601 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4602 switch (GET_CODE (op1))
4603 {
4604 case SYMBOL_REF:
75d38379 4605 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4606 end of 31bits boundary. We may also accept pretty
4607 large negative constants knowing that all objects are
4608 in the positive half of address space. */
4609 if (ix86_cmodel == CM_SMALL
75d38379 4610 && offset < 16*1024*1024
6189a572
JH
4611 && trunc_int_for_mode (offset, SImode) == offset)
4612 return 1;
4613 /* For CM_KERNEL we know that all object resist in the
4614 negative half of 32bits address space. We may not
4615 accept negative offsets, since they may be just off
d6a7951f 4616 and we may accept pretty large positive ones. */
6189a572
JH
4617 if (ix86_cmodel == CM_KERNEL
4618 && offset > 0
4619 && trunc_int_for_mode (offset, SImode) == offset)
4620 return 1;
4621 break;
4622 case LABEL_REF:
4623 /* These conditions are similar to SYMBOL_REF ones, just the
4624 constraints for code models differ. */
c05dbe81 4625 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4626 && offset < 16*1024*1024
6189a572
JH
4627 && trunc_int_for_mode (offset, SImode) == offset)
4628 return 1;
4629 if (ix86_cmodel == CM_KERNEL
4630 && offset > 0
4631 && trunc_int_for_mode (offset, SImode) == offset)
4632 return 1;
4633 break;
75d38379
JJ
4634 case UNSPEC:
4635 switch (XINT (op1, 1))
4636 {
4637 case UNSPEC_DTPOFF:
4638 case UNSPEC_NTPOFF:
4639 if (offset > 0
4640 && trunc_int_for_mode (offset, SImode) == offset)
4641 return 1;
4642 }
4643 break;
6189a572
JH
4644 default:
4645 return 0;
4646 }
4647 }
4648 return 0;
4649 default:
4650 return 0;
4651 }
4652}
4653
4654/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4655int
b96a374d 4656x86_64_zero_extended_value (rtx value)
6189a572
JH
4657{
4658 switch (GET_CODE (value))
4659 {
4660 case CONST_DOUBLE:
4661 if (HOST_BITS_PER_WIDE_INT == 32)
4662 return (GET_MODE (value) == VOIDmode
4663 && !CONST_DOUBLE_HIGH (value));
4664 else
4665 return 0;
4666 case CONST_INT:
4667 if (HOST_BITS_PER_WIDE_INT == 32)
4668 return INTVAL (value) >= 0;
4669 else
b531087a 4670 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4671 break;
4672
4673 /* For certain code models, the symbolic references are known to fit. */
4674 case SYMBOL_REF:
d7222e38
JH
4675 /* TLS symbols are not constant. */
4676 if (tls_symbolic_operand (value, Pmode))
4677 return false;
6189a572
JH
4678 return ix86_cmodel == CM_SMALL;
4679
4680 /* For certain code models, the code is near as well. */
4681 case LABEL_REF:
4682 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4683
4684 /* We also may accept the offsetted memory references in certain special
4685 cases. */
4686 case CONST:
4687 if (GET_CODE (XEXP (value, 0)) == PLUS)
4688 {
4689 rtx op1 = XEXP (XEXP (value, 0), 0);
4690 rtx op2 = XEXP (XEXP (value, 0), 1);
4691
4692 if (ix86_cmodel == CM_LARGE)
4693 return 0;
4694 switch (GET_CODE (op1))
4695 {
4696 case SYMBOL_REF:
4697 return 0;
d6a7951f 4698 /* For small code model we may accept pretty large positive
6189a572
JH
4699 offsets, since one bit is available for free. Negative
4700 offsets are limited by the size of NULL pointer area
4701 specified by the ABI. */
4702 if (ix86_cmodel == CM_SMALL
4703 && GET_CODE (op2) == CONST_INT
4704 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4705 && (trunc_int_for_mode (INTVAL (op2), SImode)
4706 == INTVAL (op2)))
4707 return 1;
4708 /* ??? For the kernel, we may accept adjustment of
4709 -0x10000000, since we know that it will just convert
d6a7951f 4710 negative address space to positive, but perhaps this
6189a572
JH
4711 is not worthwhile. */
4712 break;
4713 case LABEL_REF:
4714 /* These conditions are similar to SYMBOL_REF ones, just the
4715 constraints for code models differ. */
4716 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4717 && GET_CODE (op2) == CONST_INT
4718 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4719 && (trunc_int_for_mode (INTVAL (op2), SImode)
4720 == INTVAL (op2)))
4721 return 1;
4722 break;
4723 default:
4724 return 0;
4725 }
4726 }
4727 return 0;
4728 default:
4729 return 0;
4730 }
4731}
6fca22eb
RH
4732
4733/* Value should be nonzero if functions must have frame pointers.
4734 Zero means the frame pointer need not be set up (and parms may
4735 be accessed via the stack pointer) in functions that seem suitable. */
4736
4737int
b96a374d 4738ix86_frame_pointer_required (void)
6fca22eb
RH
4739{
4740 /* If we accessed previous frames, then the generated code expects
4741 to be able to access the saved ebp value in our frame. */
4742 if (cfun->machine->accesses_prev_frame)
4743 return 1;
a4f31c00 4744
6fca22eb
RH
4745 /* Several x86 os'es need a frame pointer for other reasons,
4746 usually pertaining to setjmp. */
4747 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4748 return 1;
4749
4750 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4751 the frame pointer by default. Turn it back on now if we've not
4752 got a leaf function. */
a7943381 4753 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4754 && (!current_function_is_leaf))
4755 return 1;
4756
4757 if (current_function_profile)
6fca22eb
RH
4758 return 1;
4759
4760 return 0;
4761}
4762
4763/* Record that the current function accesses previous call frames. */
4764
4765void
b96a374d 4766ix86_setup_frame_addresses (void)
6fca22eb
RH
4767{
4768 cfun->machine->accesses_prev_frame = 1;
4769}
e075ae69 4770\f
145aacc2
RH
4771#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4772# define USE_HIDDEN_LINKONCE 1
4773#else
4774# define USE_HIDDEN_LINKONCE 0
4775#endif
4776
bd09bdeb 4777static int pic_labels_used;
e9a25f70 4778
145aacc2
RH
4779/* Fills in the label name that should be used for a pc thunk for
4780 the given register. */
4781
4782static void
b96a374d 4783get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4784{
4785 if (USE_HIDDEN_LINKONCE)
4786 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4787 else
4788 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4789}
4790
4791
e075ae69
RH
4792/* This function generates code for -fpic that loads %ebx with
4793 the return address of the caller and then returns. */
4794
4795void
b96a374d 4796ix86_file_end (void)
e075ae69
RH
4797{
4798 rtx xops[2];
bd09bdeb 4799 int regno;
32b5b1aa 4800
bd09bdeb 4801 for (regno = 0; regno < 8; ++regno)
7c262518 4802 {
145aacc2
RH
4803 char name[32];
4804
bd09bdeb
RH
4805 if (! ((pic_labels_used >> regno) & 1))
4806 continue;
4807
145aacc2 4808 get_pc_thunk_name (name, regno);
bd09bdeb 4809
145aacc2
RH
4810 if (USE_HIDDEN_LINKONCE)
4811 {
4812 tree decl;
4813
4814 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4815 error_mark_node);
4816 TREE_PUBLIC (decl) = 1;
4817 TREE_STATIC (decl) = 1;
4818 DECL_ONE_ONLY (decl) = 1;
4819
4820 (*targetm.asm_out.unique_section) (decl, 0);
4821 named_section (decl, NULL, 0);
4822
a5fe455b
ZW
4823 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4824 fputs ("\t.hidden\t", asm_out_file);
4825 assemble_name (asm_out_file, name);
4826 fputc ('\n', asm_out_file);
4827 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4828 }
4829 else
4830 {
4831 text_section ();
a5fe455b 4832 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4833 }
bd09bdeb
RH
4834
4835 xops[0] = gen_rtx_REG (SImode, regno);
4836 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4837 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4838 output_asm_insn ("ret", xops);
7c262518 4839 }
3edc56a9 4840
a5fe455b
ZW
4841 if (NEED_INDICATE_EXEC_STACK)
4842 file_end_indicate_exec_stack ();
32b5b1aa 4843}
32b5b1aa 4844
c8c03509 4845/* Emit code for the SET_GOT patterns. */
32b5b1aa 4846
c8c03509 4847const char *
b96a374d 4848output_set_got (rtx dest)
c8c03509
RH
4849{
4850 rtx xops[3];
0d7d98ee 4851
c8c03509 4852 xops[0] = dest;
5fc0e5df 4853 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4854
c8c03509 4855 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4856 {
c8c03509
RH
4857 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4858
4859 if (!flag_pic)
4860 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4861 else
4862 output_asm_insn ("call\t%a2", xops);
4863
b069de3b
SS
4864#if TARGET_MACHO
4865 /* Output the "canonical" label name ("Lxx$pb") here too. This
4866 is what will be referred to by the Mach-O PIC subsystem. */
4867 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4868#endif
4977bab6 4869 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4870 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4871
4872 if (flag_pic)
4873 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4874 }
e075ae69 4875 else
e5cb57e8 4876 {
145aacc2
RH
4877 char name[32];
4878 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4879 pic_labels_used |= 1 << REGNO (dest);
f996902d 4880
145aacc2 4881 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4882 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4883 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4884 }
e5cb57e8 4885
c8c03509
RH
4886 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4887 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4888 else if (!TARGET_MACHO)
8e9fadc3 4889 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4890
c8c03509 4891 return "";
e9a25f70 4892}
8dfe5673 4893
0d7d98ee 4894/* Generate an "push" pattern for input ARG. */
e9a25f70 4895
e075ae69 4896static rtx
b96a374d 4897gen_push (rtx arg)
e9a25f70 4898{
c5c76735 4899 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4900 gen_rtx_MEM (Pmode,
4901 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4902 stack_pointer_rtx)),
4903 arg);
e9a25f70
JL
4904}
4905
bd09bdeb
RH
4906/* Return >= 0 if there is an unused call-clobbered register available
4907 for the entire function. */
4908
4909static unsigned int
b96a374d 4910ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
4911{
4912 if (current_function_is_leaf && !current_function_profile)
4913 {
4914 int i;
4915 for (i = 2; i >= 0; --i)
4916 if (!regs_ever_live[i])
4917 return i;
4918 }
4919
4920 return INVALID_REGNUM;
4921}
fce5a9f2 4922
4dd2ac2c
JH
4923/* Return 1 if we need to save REGNO. */
4924static int
b96a374d 4925ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 4926{
bd09bdeb
RH
4927 if (pic_offset_table_rtx
4928 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4929 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4930 || current_function_profile
8c38a24f
MM
4931 || current_function_calls_eh_return
4932 || current_function_uses_const_pool))
bd09bdeb
RH
4933 {
4934 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4935 return 0;
4936 return 1;
4937 }
1020a5ab
RH
4938
4939 if (current_function_calls_eh_return && maybe_eh_return)
4940 {
4941 unsigned i;
4942 for (i = 0; ; i++)
4943 {
b531087a 4944 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4945 if (test == INVALID_REGNUM)
4946 break;
9b690711 4947 if (test == regno)
1020a5ab
RH
4948 return 1;
4949 }
4950 }
4dd2ac2c 4951
1020a5ab
RH
4952 return (regs_ever_live[regno]
4953 && !call_used_regs[regno]
4954 && !fixed_regs[regno]
4955 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4956}
4957
0903fcab
JH
4958/* Return number of registers to be saved on the stack. */
4959
4960static int
b96a374d 4961ix86_nsaved_regs (void)
0903fcab
JH
4962{
4963 int nregs = 0;
0903fcab
JH
4964 int regno;
4965
4dd2ac2c 4966 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4967 if (ix86_save_reg (regno, true))
4dd2ac2c 4968 nregs++;
0903fcab
JH
4969 return nregs;
4970}
4971
4972/* Return the offset between two registers, one to be eliminated, and the other
4973 its replacement, at the start of a routine. */
4974
4975HOST_WIDE_INT
b96a374d 4976ix86_initial_elimination_offset (int from, int to)
0903fcab 4977{
4dd2ac2c
JH
4978 struct ix86_frame frame;
4979 ix86_compute_frame_layout (&frame);
564d80f4
JH
4980
4981 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4982 return frame.hard_frame_pointer_offset;
564d80f4
JH
4983 else if (from == FRAME_POINTER_REGNUM
4984 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4985 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4986 else
4987 {
564d80f4
JH
4988 if (to != STACK_POINTER_REGNUM)
4989 abort ();
4990 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4991 return frame.stack_pointer_offset;
564d80f4
JH
4992 else if (from != FRAME_POINTER_REGNUM)
4993 abort ();
0903fcab 4994 else
4dd2ac2c 4995 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4996 }
4997}
4998
4dd2ac2c 4999/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 5000
4dd2ac2c 5001static void
b96a374d 5002ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 5003{
65954bd8 5004 HOST_WIDE_INT total_size;
564d80f4 5005 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
b19ee4bd 5006 HOST_WIDE_INT offset;
44affdae 5007 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 5008 HOST_WIDE_INT size = get_frame_size ();
65954bd8 5009
4dd2ac2c 5010 frame->nregs = ix86_nsaved_regs ();
564d80f4 5011 total_size = size;
65954bd8 5012
d7394366
JH
5013 /* During reload iteration the amount of registers saved can change.
5014 Recompute the value as needed. Do not recompute when amount of registers
5015 didn't change as reload does mutiple calls to the function and does not
5016 expect the decision to change within single iteration. */
5017 if (!optimize_size
5018 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
5019 {
5020 int count = frame->nregs;
5021
d7394366 5022 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
5023 /* The fast prologue uses move instead of push to save registers. This
5024 is significantly longer, but also executes faster as modern hardware
5025 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 5026
d9b40e8d
JH
5027 Be careful about choosing what prologue to emit: When function takes
5028 many instructions to execute we may use slow version as well as in
5029 case function is known to be outside hot spot (this is known with
5030 feedback only). Weight the size of function by number of registers
5031 to save as it is cheap to use one or two push instructions but very
5032 slow to use many of them. */
5033 if (count)
5034 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5035 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5036 || (flag_branch_probabilities
5037 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5038 cfun->machine->use_fast_prologue_epilogue = false;
5039 else
5040 cfun->machine->use_fast_prologue_epilogue
5041 = !expensive_function_p (count);
5042 }
5043 if (TARGET_PROLOGUE_USING_MOVE
5044 && cfun->machine->use_fast_prologue_epilogue)
5045 frame->save_regs_using_mov = true;
5046 else
5047 frame->save_regs_using_mov = false;
5048
5049
9ba81eaa 5050 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
5051 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5052
5053 frame->hard_frame_pointer_offset = offset;
564d80f4 5054
fcbfaa65
RK
5055 /* Do some sanity checking of stack_alignment_needed and
5056 preferred_alignment, since i386 port is the only using those features
f710504c 5057 that may break easily. */
564d80f4 5058
44affdae
JH
5059 if (size && !stack_alignment_needed)
5060 abort ();
44affdae
JH
5061 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5062 abort ();
5063 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5064 abort ();
5065 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5066 abort ();
564d80f4 5067
4dd2ac2c
JH
5068 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5069 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 5070
4dd2ac2c
JH
5071 /* Register save area */
5072 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 5073
8362f420
JH
5074 /* Va-arg area */
5075 if (ix86_save_varrargs_registers)
5076 {
5077 offset += X86_64_VARARGS_SIZE;
5078 frame->va_arg_size = X86_64_VARARGS_SIZE;
5079 }
5080 else
5081 frame->va_arg_size = 0;
5082
4dd2ac2c
JH
5083 /* Align start of frame for local function. */
5084 frame->padding1 = ((offset + stack_alignment_needed - 1)
5085 & -stack_alignment_needed) - offset;
f73ad30e 5086
4dd2ac2c 5087 offset += frame->padding1;
65954bd8 5088
4dd2ac2c
JH
5089 /* Frame pointer points here. */
5090 frame->frame_pointer_offset = offset;
54ff41b7 5091
4dd2ac2c 5092 offset += size;
65954bd8 5093
0b7ae565 5094 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
5095 all the function calls as dead code.
5096 Skipping is however impossible when function calls alloca. Alloca
5097 expander assumes that last current_function_outgoing_args_size
5098 of stack frame are unused. */
5099 if (ACCUMULATE_OUTGOING_ARGS
5100 && (!current_function_is_leaf || current_function_calls_alloca))
4dd2ac2c
JH
5101 {
5102 offset += current_function_outgoing_args_size;
5103 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5104 }
5105 else
5106 frame->outgoing_arguments_size = 0;
564d80f4 5107
002ff5bc
RH
5108 /* Align stack boundary. Only needed if we're calling another function
5109 or using alloca. */
5110 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
5111 frame->padding2 = ((offset + preferred_alignment - 1)
5112 & -preferred_alignment) - offset;
5113 else
5114 frame->padding2 = 0;
4dd2ac2c
JH
5115
5116 offset += frame->padding2;
5117
5118 /* We've reached end of stack frame. */
5119 frame->stack_pointer_offset = offset;
5120
5121 /* Size prologue needs to allocate. */
5122 frame->to_allocate =
5123 (size + frame->padding1 + frame->padding2
8362f420 5124 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 5125
b19ee4bd
JJ
5126 if ((!frame->to_allocate && frame->nregs <= 1)
5127 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
5128 frame->save_regs_using_mov = false;
5129
a5b378d6 5130 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
5131 && current_function_is_leaf)
5132 {
5133 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
5134 if (frame->save_regs_using_mov)
5135 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
5136 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5137 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5138 }
5139 else
5140 frame->red_zone_size = 0;
5141 frame->to_allocate -= frame->red_zone_size;
5142 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
5143#if 0
5144 fprintf (stderr, "nregs: %i\n", frame->nregs);
5145 fprintf (stderr, "size: %i\n", size);
5146 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5147 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 5148 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
5149 fprintf (stderr, "padding2: %i\n", frame->padding2);
5150 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 5151 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
5152 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5153 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5154 frame->hard_frame_pointer_offset);
5155 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5156#endif
65954bd8
JL
5157}
5158
0903fcab
JH
5159/* Emit code to save registers in the prologue. */
5160
5161static void
b96a374d 5162ix86_emit_save_regs (void)
0903fcab 5163{
8d531ab9 5164 int regno;
0903fcab 5165 rtx insn;
0903fcab 5166
4dd2ac2c 5167 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5168 if (ix86_save_reg (regno, true))
0903fcab 5169 {
0d7d98ee 5170 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5171 RTX_FRAME_RELATED_P (insn) = 1;
5172 }
5173}
5174
c6036a37
JH
5175/* Emit code to save registers using MOV insns. First register
5176 is restored from POINTER + OFFSET. */
5177static void
b96a374d 5178ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
5179{
5180 int regno;
5181 rtx insn;
5182
5183 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5184 if (ix86_save_reg (regno, true))
5185 {
b72f00af
RK
5186 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5187 Pmode, offset),
c6036a37
JH
5188 gen_rtx_REG (Pmode, regno));
5189 RTX_FRAME_RELATED_P (insn) = 1;
5190 offset += UNITS_PER_WORD;
5191 }
5192}
5193
839a4992 5194/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
5195 The pattern exist to put a dependency on all ebp-based memory accesses.
5196 STYLE should be negative if instructions should be marked as frame related,
5197 zero if %r11 register is live and cannot be freely used and positive
5198 otherwise. */
5199
5200static void
5201pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5202{
5203 rtx insn;
5204
5205 if (! TARGET_64BIT)
5206 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5207 else if (x86_64_immediate_operand (offset, DImode))
5208 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5209 else
5210 {
5211 rtx r11;
5212 /* r11 is used by indirect sibcall return as well, set before the
5213 epilogue and used after the epilogue. ATM indirect sibcall
5214 shouldn't be used together with huge frame sizes in one
5215 function because of the frame_size check in sibcall.c. */
5216 if (style == 0)
5217 abort ();
5218 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5219 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5220 if (style < 0)
5221 RTX_FRAME_RELATED_P (insn) = 1;
5222 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5223 offset));
5224 }
5225 if (style < 0)
5226 RTX_FRAME_RELATED_P (insn) = 1;
5227}
5228
0f290768 5229/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5230
5231void
b96a374d 5232ix86_expand_prologue (void)
2a2ab3f9 5233{
564d80f4 5234 rtx insn;
bd09bdeb 5235 bool pic_reg_used;
4dd2ac2c 5236 struct ix86_frame frame;
c6036a37 5237 HOST_WIDE_INT allocate;
4dd2ac2c 5238
4977bab6 5239 ix86_compute_frame_layout (&frame);
79325812 5240
e075ae69
RH
5241 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5242 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5243
2a2ab3f9
JVA
5244 if (frame_pointer_needed)
5245 {
564d80f4 5246 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5247 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5248
564d80f4 5249 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5250 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5251 }
5252
c6036a37 5253 allocate = frame.to_allocate;
c6036a37 5254
d9b40e8d 5255 if (!frame.save_regs_using_mov)
c6036a37
JH
5256 ix86_emit_save_regs ();
5257 else
5258 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5259
d9b40e8d
JH
5260 /* When using red zone we may start register saving before allocating
5261 the stack frame saving one cycle of the prologue. */
5262 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5263 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5264 : stack_pointer_rtx,
5265 -frame.nregs * UNITS_PER_WORD);
5266
c6036a37 5267 if (allocate == 0)
8dfe5673 5268 ;
e323735c 5269 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
5270 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5271 GEN_INT (-allocate), -1);
79325812 5272 else
8dfe5673 5273 {
fe9f516f
RH
5274 /* Only valid for Win32. */
5275 rtx eax = gen_rtx_REG (SImode, 0);
5276 bool eax_live = ix86_eax_live_at_start_p ();
e9a25f70 5277
8362f420 5278 if (TARGET_64BIT)
b1177d69 5279 abort ();
e075ae69 5280
fe9f516f
RH
5281 if (eax_live)
5282 {
5283 emit_insn (gen_push (eax));
5284 allocate -= 4;
5285 }
5286
5287 insn = emit_move_insn (eax, GEN_INT (allocate));
b1177d69 5288 RTX_FRAME_RELATED_P (insn) = 1;
98417968 5289
b1177d69
KC
5290 insn = emit_insn (gen_allocate_stack_worker (eax));
5291 RTX_FRAME_RELATED_P (insn) = 1;
fe9f516f
RH
5292
5293 if (eax_live)
5294 {
5295 rtx t = plus_constant (stack_pointer_rtx, allocate);
5296 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5297 }
e075ae69 5298 }
fe9f516f 5299
d9b40e8d 5300 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5301 {
5302 if (!frame_pointer_needed || !frame.to_allocate)
5303 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5304 else
5305 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5306 -frame.nregs * UNITS_PER_WORD);
5307 }
e9a25f70 5308
bd09bdeb
RH
5309 pic_reg_used = false;
5310 if (pic_offset_table_rtx
5311 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5312 || current_function_profile))
5313 {
5314 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5315
5316 if (alt_pic_reg_used != INVALID_REGNUM)
5317 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5318
5319 pic_reg_used = true;
5320 }
5321
e9a25f70 5322 if (pic_reg_used)
c8c03509
RH
5323 {
5324 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5325
66edd3b4
RH
5326 /* Even with accurate pre-reload life analysis, we can wind up
5327 deleting all references to the pic register after reload.
5328 Consider if cross-jumping unifies two sides of a branch
d1f87653 5329 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5330 In which case, allow the set_got to be deleted, though we're
5331 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5332 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5333 }
77a989d1 5334
66edd3b4
RH
5335 /* Prevent function calls from be scheduled before the call to mcount.
5336 In the pic_reg_used case, make sure that the got load isn't deleted. */
5337 if (current_function_profile)
5338 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5339}
5340
da2d1d3a
JH
5341/* Emit code to restore saved registers using MOV insns. First register
5342 is restored from POINTER + OFFSET. */
5343static void
72613dfa
JH
5344ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5345 int maybe_eh_return)
da2d1d3a
JH
5346{
5347 int regno;
72613dfa 5348 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 5349
4dd2ac2c 5350 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5351 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5352 {
72613dfa
JH
5353 /* Ensure that adjust_address won't be forced to produce pointer
5354 out of range allowed by x86-64 instruction set. */
5355 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5356 {
5357 rtx r11;
5358
5359 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5360 emit_move_insn (r11, GEN_INT (offset));
5361 emit_insn (gen_adddi3 (r11, r11, pointer));
5362 base_address = gen_rtx_MEM (Pmode, r11);
5363 offset = 0;
5364 }
4dd2ac2c 5365 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 5366 adjust_address (base_address, Pmode, offset));
4dd2ac2c 5367 offset += UNITS_PER_WORD;
da2d1d3a
JH
5368 }
5369}
5370
0f290768 5371/* Restore function stack, frame, and registers. */
e9a25f70 5372
2a2ab3f9 5373void
b96a374d 5374ix86_expand_epilogue (int style)
2a2ab3f9 5375{
1c71e60e 5376 int regno;
fdb8a883 5377 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5378 struct ix86_frame frame;
65954bd8 5379 HOST_WIDE_INT offset;
4dd2ac2c
JH
5380
5381 ix86_compute_frame_layout (&frame);
2a2ab3f9 5382
a4f31c00 5383 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5384 must be taken for the normal return case of a function using
5385 eh_return: the eax and edx registers are marked as saved, but not
5386 restored along this path. */
5387 offset = frame.nregs;
5388 if (current_function_calls_eh_return && style != 2)
5389 offset -= 2;
5390 offset *= -UNITS_PER_WORD;
2a2ab3f9 5391
fdb8a883
JW
5392 /* If we're only restoring one register and sp is not valid then
5393 using a move instruction to restore the register since it's
0f290768 5394 less work than reloading sp and popping the register.
da2d1d3a
JH
5395
5396 The default code result in stack adjustment using add/lea instruction,
5397 while this code results in LEAVE instruction (or discrete equivalent),
5398 so it is profitable in some other cases as well. Especially when there
5399 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5400 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5401 tuning in future. */
4dd2ac2c 5402 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5403 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5404 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5405 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5406 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5407 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5408 && cfun->machine->use_fast_prologue_epilogue
5409 && frame.nregs == 1)
2ab0437e 5410 || current_function_calls_eh_return)
2a2ab3f9 5411 {
da2d1d3a
JH
5412 /* Restore registers. We can use ebp or esp to address the memory
5413 locations. If both are available, default to ebp, since offsets
5414 are known to be small. Only exception is esp pointing directly to the
5415 end of block of saved registers, where we may simplify addressing
5416 mode. */
5417
4dd2ac2c 5418 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5419 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5420 frame.to_allocate, style == 2);
da2d1d3a 5421 else
1020a5ab
RH
5422 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5423 offset, style == 2);
5424
5425 /* eh_return epilogues need %ecx added to the stack pointer. */
5426 if (style == 2)
5427 {
5428 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5429
1020a5ab
RH
5430 if (frame_pointer_needed)
5431 {
5432 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5433 tmp = plus_constant (tmp, UNITS_PER_WORD);
5434 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5435
5436 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5437 emit_move_insn (hard_frame_pointer_rtx, tmp);
5438
b19ee4bd
JJ
5439 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5440 const0_rtx, style);
1020a5ab
RH
5441 }
5442 else
5443 {
5444 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5445 tmp = plus_constant (tmp, (frame.to_allocate
5446 + frame.nregs * UNITS_PER_WORD));
5447 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5448 }
5449 }
5450 else if (!frame_pointer_needed)
b19ee4bd
JJ
5451 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5452 GEN_INT (frame.to_allocate
5453 + frame.nregs * UNITS_PER_WORD),
5454 style);
0f290768 5455 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5456 else if (TARGET_USE_LEAVE || optimize_size
5457 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5458 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5459 else
2a2ab3f9 5460 {
b19ee4bd
JJ
5461 pro_epilogue_adjust_stack (stack_pointer_rtx,
5462 hard_frame_pointer_rtx,
5463 const0_rtx, style);
8362f420
JH
5464 if (TARGET_64BIT)
5465 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5466 else
5467 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5468 }
5469 }
1c71e60e 5470 else
68f654ec 5471 {
1c71e60e
JH
5472 /* First step is to deallocate the stack frame so that we can
5473 pop the registers. */
5474 if (!sp_valid)
5475 {
5476 if (!frame_pointer_needed)
5477 abort ();
b19ee4bd
JJ
5478 pro_epilogue_adjust_stack (stack_pointer_rtx,
5479 hard_frame_pointer_rtx,
5480 GEN_INT (offset), style);
1c71e60e 5481 }
4dd2ac2c 5482 else if (frame.to_allocate)
b19ee4bd
JJ
5483 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5484 GEN_INT (frame.to_allocate), style);
1c71e60e 5485
4dd2ac2c 5486 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5487 if (ix86_save_reg (regno, false))
8362f420
JH
5488 {
5489 if (TARGET_64BIT)
5490 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5491 else
5492 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5493 }
4dd2ac2c 5494 if (frame_pointer_needed)
8362f420 5495 {
f5143c46 5496 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5497 able to grok it fast. */
5498 if (TARGET_USE_LEAVE)
5499 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5500 else if (TARGET_64BIT)
8362f420
JH
5501 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5502 else
5503 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5504 }
68f654ec 5505 }
68f654ec 5506
cbbf65e0 5507 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5508 if (style == 0)
cbbf65e0
RH
5509 return;
5510
2a2ab3f9
JVA
5511 if (current_function_pops_args && current_function_args_size)
5512 {
e075ae69 5513 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5514
b8c752c8
UD
5515 /* i386 can only pop 64K bytes. If asked to pop more, pop
5516 return address, do explicit add, and jump indirectly to the
0f290768 5517 caller. */
2a2ab3f9 5518
b8c752c8 5519 if (current_function_pops_args >= 65536)
2a2ab3f9 5520 {
e075ae69 5521 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5522
b19ee4bd 5523 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 5524 if (TARGET_64BIT)
b531087a 5525 abort ();
8362f420 5526
e075ae69
RH
5527 emit_insn (gen_popsi1 (ecx));
5528 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5529 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5530 }
79325812 5531 else
e075ae69
RH
5532 emit_jump_insn (gen_return_pop_internal (popc));
5533 }
5534 else
5535 emit_jump_insn (gen_return_internal ());
5536}
bd09bdeb
RH
5537
5538/* Reset from the function's potential modifications. */
5539
5540static void
b96a374d
AJ
5541ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5542 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5543{
5544 if (pic_offset_table_rtx)
5545 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5546}
e075ae69
RH
5547\f
5548/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5549 for an instruction. Return 0 if the structure of the address is
5550 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5551 strictly valid, but still used for computing length of lea instruction. */
e075ae69
RH
5552
5553static int
8d531ab9 5554ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69
RH
5555{
5556 rtx base = NULL_RTX;
5557 rtx index = NULL_RTX;
5558 rtx disp = NULL_RTX;
5559 HOST_WIDE_INT scale = 1;
5560 rtx scale_rtx = NULL_RTX;
b446e5a2 5561 int retval = 1;
74dc3e94 5562 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5563
90e4e4c5 5564 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
5565 base = addr;
5566 else if (GET_CODE (addr) == PLUS)
5567 {
74dc3e94
RH
5568 rtx addends[4], op;
5569 int n = 0, i;
e075ae69 5570
74dc3e94
RH
5571 op = addr;
5572 do
e075ae69 5573 {
74dc3e94
RH
5574 if (n >= 4)
5575 return 0;
5576 addends[n++] = XEXP (op, 1);
5577 op = XEXP (op, 0);
2a2ab3f9 5578 }
74dc3e94
RH
5579 while (GET_CODE (op) == PLUS);
5580 if (n >= 4)
5581 return 0;
5582 addends[n] = op;
5583
5584 for (i = n; i >= 0; --i)
e075ae69 5585 {
74dc3e94
RH
5586 op = addends[i];
5587 switch (GET_CODE (op))
5588 {
5589 case MULT:
5590 if (index)
5591 return 0;
5592 index = XEXP (op, 0);
5593 scale_rtx = XEXP (op, 1);
5594 break;
5595
5596 case UNSPEC:
5597 if (XINT (op, 1) == UNSPEC_TP
5598 && TARGET_TLS_DIRECT_SEG_REFS
5599 && seg == SEG_DEFAULT)
5600 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5601 else
5602 return 0;
5603 break;
5604
5605 case REG:
5606 case SUBREG:
5607 if (!base)
5608 base = op;
5609 else if (!index)
5610 index = op;
5611 else
5612 return 0;
5613 break;
5614
5615 case CONST:
5616 case CONST_INT:
5617 case SYMBOL_REF:
5618 case LABEL_REF:
5619 if (disp)
5620 return 0;
5621 disp = op;
5622 break;
5623
5624 default:
5625 return 0;
5626 }
e075ae69 5627 }
e075ae69
RH
5628 }
5629 else if (GET_CODE (addr) == MULT)
5630 {
5631 index = XEXP (addr, 0); /* index*scale */
5632 scale_rtx = XEXP (addr, 1);
5633 }
5634 else if (GET_CODE (addr) == ASHIFT)
5635 {
5636 rtx tmp;
5637
5638 /* We're called for lea too, which implements ashift on occasion. */
5639 index = XEXP (addr, 0);
5640 tmp = XEXP (addr, 1);
5641 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5642 return 0;
e075ae69
RH
5643 scale = INTVAL (tmp);
5644 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5645 return 0;
e075ae69 5646 scale = 1 << scale;
b446e5a2 5647 retval = -1;
2a2ab3f9 5648 }
2a2ab3f9 5649 else
e075ae69
RH
5650 disp = addr; /* displacement */
5651
5652 /* Extract the integral value of scale. */
5653 if (scale_rtx)
e9a25f70 5654 {
e075ae69 5655 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5656 return 0;
e075ae69 5657 scale = INTVAL (scale_rtx);
e9a25f70 5658 }
3b3c6a3f 5659
74dc3e94 5660 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 5661 if (base && index && scale == 1
74dc3e94
RH
5662 && (index == arg_pointer_rtx
5663 || index == frame_pointer_rtx
5664 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
5665 {
5666 rtx tmp = base;
5667 base = index;
5668 index = tmp;
5669 }
5670
5671 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5672 if ((base == hard_frame_pointer_rtx
5673 || base == frame_pointer_rtx
5674 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5675 disp = const0_rtx;
5676
5677 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5678 Avoid this by transforming to [%esi+0]. */
9e555526 5679 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5680 && base && !index && !disp
329e1d01 5681 && REG_P (base)
e075ae69
RH
5682 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5683 disp = const0_rtx;
5684
5685 /* Special case: encode reg+reg instead of reg*2. */
5686 if (!base && index && scale && scale == 2)
5687 base = index, scale = 1;
0f290768 5688
e075ae69
RH
5689 /* Special case: scaling cannot be encoded without base or displacement. */
5690 if (!base && !disp && index && scale != 1)
5691 disp = const0_rtx;
5692
5693 out->base = base;
5694 out->index = index;
5695 out->disp = disp;
5696 out->scale = scale;
74dc3e94 5697 out->seg = seg;
3b3c6a3f 5698
b446e5a2 5699 return retval;
e075ae69 5700}
01329426
JH
5701\f
5702/* Return cost of the memory address x.
5703 For i386, it is better to use a complex address than let gcc copy
5704 the address into a reg and make a new pseudo. But not if the address
5705 requires to two regs - that would mean more pseudos with longer
5706 lifetimes. */
dcefdf67 5707static int
b96a374d 5708ix86_address_cost (rtx x)
01329426
JH
5709{
5710 struct ix86_address parts;
5711 int cost = 1;
3b3c6a3f 5712
01329426
JH
5713 if (!ix86_decompose_address (x, &parts))
5714 abort ();
5715
5716 /* More complex memory references are better. */
5717 if (parts.disp && parts.disp != const0_rtx)
5718 cost--;
74dc3e94
RH
5719 if (parts.seg != SEG_DEFAULT)
5720 cost--;
01329426
JH
5721
5722 /* Attempt to minimize number of registers in the address. */
5723 if ((parts.base
5724 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5725 || (parts.index
5726 && (!REG_P (parts.index)
5727 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5728 cost++;
5729
5730 if (parts.base
5731 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5732 && parts.index
5733 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5734 && parts.base != parts.index)
5735 cost++;
5736
5737 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5738 since it's predecode logic can't detect the length of instructions
5739 and it degenerates to vector decoded. Increase cost of such
5740 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5741 to split such addresses or even refuse such addresses at all.
01329426
JH
5742
5743 Following addressing modes are affected:
5744 [base+scale*index]
5745 [scale*index+disp]
5746 [base+index]
0f290768 5747
01329426
JH
5748 The first and last case may be avoidable by explicitly coding the zero in
5749 memory address, but I don't have AMD-K6 machine handy to check this
5750 theory. */
5751
5752 if (TARGET_K6
5753 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5754 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5755 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5756 cost += 10;
0f290768 5757
01329426
JH
5758 return cost;
5759}
5760\f
b949ea8b
JW
5761/* If X is a machine specific address (i.e. a symbol or label being
5762 referenced as a displacement from the GOT implemented using an
5763 UNSPEC), then return the base term. Otherwise return X. */
5764
5765rtx
b96a374d 5766ix86_find_base_term (rtx x)
b949ea8b
JW
5767{
5768 rtx term;
5769
6eb791fc
JH
5770 if (TARGET_64BIT)
5771 {
5772 if (GET_CODE (x) != CONST)
5773 return x;
5774 term = XEXP (x, 0);
5775 if (GET_CODE (term) == PLUS
5776 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5777 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5778 term = XEXP (term, 0);
5779 if (GET_CODE (term) != UNSPEC
8ee41eaf 5780 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5781 return x;
5782
5783 term = XVECEXP (term, 0, 0);
5784
5785 if (GET_CODE (term) != SYMBOL_REF
5786 && GET_CODE (term) != LABEL_REF)
5787 return x;
5788
5789 return term;
5790 }
5791
69bd9368 5792 term = ix86_delegitimize_address (x);
b949ea8b
JW
5793
5794 if (GET_CODE (term) != SYMBOL_REF
5795 && GET_CODE (term) != LABEL_REF)
5796 return x;
5797
5798 return term;
5799}
5800\f
f996902d
RH
5801/* Determine if a given RTX is a valid constant. We already know this
5802 satisfies CONSTANT_P. */
5803
5804bool
b96a374d 5805legitimate_constant_p (rtx x)
f996902d
RH
5806{
5807 rtx inner;
5808
5809 switch (GET_CODE (x))
5810 {
5811 case SYMBOL_REF:
5812 /* TLS symbols are not constant. */
5813 if (tls_symbolic_operand (x, Pmode))
5814 return false;
5815 break;
5816
5817 case CONST:
5818 inner = XEXP (x, 0);
5819
5820 /* Offsets of TLS symbols are never valid.
5821 Discourage CSE from creating them. */
5822 if (GET_CODE (inner) == PLUS
5823 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5824 return false;
5825
d599f329
ZD
5826 if (GET_CODE (inner) == PLUS
5827 || GET_CODE (inner) == MINUS)
799b33a0
JH
5828 {
5829 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5830 return false;
5831 inner = XEXP (inner, 0);
5832 }
5833
f996902d
RH
5834 /* Only some unspecs are valid as "constants". */
5835 if (GET_CODE (inner) == UNSPEC)
5836 switch (XINT (inner, 1))
5837 {
5838 case UNSPEC_TPOFF:
cb0e3e3f 5839 case UNSPEC_NTPOFF:
f996902d 5840 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
cb0e3e3f
RH
5841 case UNSPEC_DTPOFF:
5842 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5843 default:
5844 return false;
5845 }
5846 break;
5847
5848 default:
5849 break;
5850 }
5851
5852 /* Otherwise we handle everything else in the move patterns. */
5853 return true;
5854}
5855
3a04ff64
RH
5856/* Determine if it's legal to put X into the constant pool. This
5857 is not possible for the address of thread-local symbols, which
5858 is checked above. */
5859
5860static bool
b96a374d 5861ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5862{
5863 return !legitimate_constant_p (x);
5864}
5865
f996902d
RH
5866/* Determine if a given RTX is a valid constant address. */
5867
5868bool
b96a374d 5869constant_address_p (rtx x)
f996902d 5870{
a94f136b 5871 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5872}
5873
5874/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5875 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5876 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5877
5878bool
b96a374d 5879legitimate_pic_operand_p (rtx x)
f996902d
RH
5880{
5881 rtx inner;
5882
5883 switch (GET_CODE (x))
5884 {
5885 case CONST:
5886 inner = XEXP (x, 0);
5887
5888 /* Only some unspecs are valid as "constants". */
5889 if (GET_CODE (inner) == UNSPEC)
5890 switch (XINT (inner, 1))
5891 {
5892 case UNSPEC_TPOFF:
5893 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5894 default:
5895 return false;
5896 }
5efb1046 5897 /* FALLTHRU */
f996902d
RH
5898
5899 case SYMBOL_REF:
5900 case LABEL_REF:
5901 return legitimate_pic_address_disp_p (x);
5902
5903 default:
5904 return true;
5905 }
5906}
5907
e075ae69
RH
5908/* Determine if a given CONST RTX is a valid memory displacement
5909 in PIC mode. */
0f290768 5910
59be65f6 5911int
8d531ab9 5912legitimate_pic_address_disp_p (rtx disp)
91bb873f 5913{
f996902d
RH
5914 bool saw_plus;
5915
6eb791fc
JH
5916 /* In 64bit mode we can allow direct addresses of symbols and labels
5917 when they are not dynamic symbols. */
c05dbe81
JH
5918 if (TARGET_64BIT)
5919 {
5920 /* TLS references should always be enclosed in UNSPEC. */
5921 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5922 return 0;
5923 if (GET_CODE (disp) == SYMBOL_REF
5924 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5925 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
5926 return 1;
5927 if (GET_CODE (disp) == LABEL_REF)
5928 return 1;
5929 if (GET_CODE (disp) == CONST
a132b6a8
JJ
5930 && GET_CODE (XEXP (disp, 0)) == PLUS)
5931 {
5932 rtx op0 = XEXP (XEXP (disp, 0), 0);
5933 rtx op1 = XEXP (XEXP (disp, 0), 1);
5934
5935 /* TLS references should always be enclosed in UNSPEC. */
5936 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5937 return 0;
5938 if (((GET_CODE (op0) == SYMBOL_REF
5939 && ix86_cmodel == CM_SMALL_PIC
5940 && SYMBOL_REF_LOCAL_P (op0))
5941 || GET_CODE (op0) == LABEL_REF)
5942 && GET_CODE (op1) == CONST_INT
5943 && INTVAL (op1) < 16*1024*1024
5944 && INTVAL (op1) >= -16*1024*1024)
5945 return 1;
5946 }
c05dbe81 5947 }
91bb873f
RH
5948 if (GET_CODE (disp) != CONST)
5949 return 0;
5950 disp = XEXP (disp, 0);
5951
6eb791fc
JH
5952 if (TARGET_64BIT)
5953 {
5954 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5955 of GOT tables. We should not need these anyway. */
5956 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5957 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5958 return 0;
5959
5960 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5961 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5962 return 0;
5963 return 1;
5964 }
5965
f996902d 5966 saw_plus = false;
91bb873f
RH
5967 if (GET_CODE (disp) == PLUS)
5968 {
5969 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5970 return 0;
5971 disp = XEXP (disp, 0);
f996902d 5972 saw_plus = true;
91bb873f
RH
5973 }
5974
b069de3b
SS
5975 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5976 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5977 {
5978 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5979 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5980 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5981 {
5982 const char *sym_name = XSTR (XEXP (disp, 1), 0);
86ecdfb6 5983 if (! strcmp (sym_name, "<pic base>"))
b069de3b
SS
5984 return 1;
5985 }
5986 }
5987
8ee41eaf 5988 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5989 return 0;
5990
623fe810
RH
5991 switch (XINT (disp, 1))
5992 {
8ee41eaf 5993 case UNSPEC_GOT:
f996902d
RH
5994 if (saw_plus)
5995 return false;
623fe810 5996 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5997 case UNSPEC_GOTOFF:
799b33a0
JH
5998 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5999 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6000 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6001 return false;
f996902d 6002 case UNSPEC_GOTTPOFF:
dea73790
JJ
6003 case UNSPEC_GOTNTPOFF:
6004 case UNSPEC_INDNTPOFF:
f996902d
RH
6005 if (saw_plus)
6006 return false;
6007 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6008 case UNSPEC_NTPOFF:
f996902d
RH
6009 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6010 case UNSPEC_DTPOFF:
f996902d 6011 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 6012 }
fce5a9f2 6013
623fe810 6014 return 0;
91bb873f
RH
6015}
6016
e075ae69
RH
6017/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6018 memory address for an instruction. The MODE argument is the machine mode
6019 for the MEM expression that wants to use this address.
6020
6021 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6022 convert common non-canonical forms to canonical form so that they will
6023 be recognized. */
6024
3b3c6a3f 6025int
8d531ab9 6026legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 6027{
e075ae69
RH
6028 struct ix86_address parts;
6029 rtx base, index, disp;
6030 HOST_WIDE_INT scale;
6031 const char *reason = NULL;
6032 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
6033
6034 if (TARGET_DEBUG_ADDR)
6035 {
6036 fprintf (stderr,
e9a25f70 6037 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 6038 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
6039 debug_rtx (addr);
6040 }
6041
b446e5a2 6042 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 6043 {
e075ae69 6044 reason = "decomposition failed";
50e60bc3 6045 goto report_error;
3b3c6a3f
MM
6046 }
6047
e075ae69
RH
6048 base = parts.base;
6049 index = parts.index;
6050 disp = parts.disp;
6051 scale = parts.scale;
91f0226f 6052
e075ae69 6053 /* Validate base register.
e9a25f70
JL
6054
6055 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
6056 is one word out of a two word structure, which is represented internally
6057 as a DImode int. */
e9a25f70 6058
3b3c6a3f
MM
6059 if (base)
6060 {
e075ae69
RH
6061 reason_rtx = base;
6062
90e4e4c5 6063 if (GET_CODE (base) != REG)
3b3c6a3f 6064 {
e075ae69 6065 reason = "base is not a register";
50e60bc3 6066 goto report_error;
3b3c6a3f
MM
6067 }
6068
c954bd01
RH
6069 if (GET_MODE (base) != Pmode)
6070 {
e075ae69 6071 reason = "base is not in Pmode";
50e60bc3 6072 goto report_error;
c954bd01
RH
6073 }
6074
90e4e4c5
RH
6075 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6076 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 6077 {
e075ae69 6078 reason = "base is not valid";
50e60bc3 6079 goto report_error;
3b3c6a3f
MM
6080 }
6081 }
6082
e075ae69 6083 /* Validate index register.
e9a25f70
JL
6084
6085 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
6086 is one word out of a two word structure, which is represented internally
6087 as a DImode int. */
e075ae69
RH
6088
6089 if (index)
3b3c6a3f 6090 {
e075ae69
RH
6091 reason_rtx = index;
6092
90e4e4c5 6093 if (GET_CODE (index) != REG)
3b3c6a3f 6094 {
e075ae69 6095 reason = "index is not a register";
50e60bc3 6096 goto report_error;
3b3c6a3f
MM
6097 }
6098
e075ae69 6099 if (GET_MODE (index) != Pmode)
c954bd01 6100 {
e075ae69 6101 reason = "index is not in Pmode";
50e60bc3 6102 goto report_error;
c954bd01
RH
6103 }
6104
90e4e4c5
RH
6105 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6106 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 6107 {
e075ae69 6108 reason = "index is not valid";
50e60bc3 6109 goto report_error;
3b3c6a3f
MM
6110 }
6111 }
3b3c6a3f 6112
e075ae69
RH
6113 /* Validate scale factor. */
6114 if (scale != 1)
3b3c6a3f 6115 {
e075ae69
RH
6116 reason_rtx = GEN_INT (scale);
6117 if (!index)
3b3c6a3f 6118 {
e075ae69 6119 reason = "scale without index";
50e60bc3 6120 goto report_error;
3b3c6a3f
MM
6121 }
6122
e075ae69 6123 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 6124 {
e075ae69 6125 reason = "scale is not a valid multiplier";
50e60bc3 6126 goto report_error;
3b3c6a3f
MM
6127 }
6128 }
6129
91bb873f 6130 /* Validate displacement. */
3b3c6a3f
MM
6131 if (disp)
6132 {
e075ae69
RH
6133 reason_rtx = disp;
6134
f996902d
RH
6135 if (GET_CODE (disp) == CONST
6136 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6137 switch (XINT (XEXP (disp, 0), 1))
6138 {
6139 case UNSPEC_GOT:
6140 case UNSPEC_GOTOFF:
6141 case UNSPEC_GOTPCREL:
6142 if (!flag_pic)
6143 abort ();
6144 goto is_legitimate_pic;
6145
6146 case UNSPEC_GOTTPOFF:
dea73790
JJ
6147 case UNSPEC_GOTNTPOFF:
6148 case UNSPEC_INDNTPOFF:
f996902d
RH
6149 case UNSPEC_NTPOFF:
6150 case UNSPEC_DTPOFF:
6151 break;
6152
6153 default:
6154 reason = "invalid address unspec";
6155 goto report_error;
6156 }
6157
b069de3b
SS
6158 else if (flag_pic && (SYMBOLIC_CONST (disp)
6159#if TARGET_MACHO
6160 && !machopic_operand_p (disp)
6161#endif
6162 ))
3b3c6a3f 6163 {
f996902d 6164 is_legitimate_pic:
0d7d98ee
JH
6165 if (TARGET_64BIT && (index || base))
6166 {
75d38379
JJ
6167 /* foo@dtpoff(%rX) is ok. */
6168 if (GET_CODE (disp) != CONST
6169 || GET_CODE (XEXP (disp, 0)) != PLUS
6170 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6171 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6172 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6173 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6174 {
6175 reason = "non-constant pic memory reference";
6176 goto report_error;
6177 }
0d7d98ee 6178 }
75d38379 6179 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 6180 {
e075ae69 6181 reason = "displacement is an invalid pic construct";
50e60bc3 6182 goto report_error;
91bb873f
RH
6183 }
6184
4e9efe54 6185 /* This code used to verify that a symbolic pic displacement
0f290768
KH
6186 includes the pic_offset_table_rtx register.
6187
4e9efe54
JH
6188 While this is good idea, unfortunately these constructs may
6189 be created by "adds using lea" optimization for incorrect
6190 code like:
6191
6192 int a;
6193 int foo(int i)
6194 {
6195 return *(&a+i);
6196 }
6197
50e60bc3 6198 This code is nonsensical, but results in addressing
4e9efe54 6199 GOT table with pic_offset_table_rtx base. We can't
f710504c 6200 just refuse it easily, since it gets matched by
4e9efe54
JH
6201 "addsi3" pattern, that later gets split to lea in the
6202 case output register differs from input. While this
6203 can be handled by separate addsi pattern for this case
6204 that never results in lea, this seems to be easier and
6205 correct fix for crash to disable this test. */
3b3c6a3f 6206 }
a94f136b
JH
6207 else if (GET_CODE (disp) != LABEL_REF
6208 && GET_CODE (disp) != CONST_INT
6209 && (GET_CODE (disp) != CONST
6210 || !legitimate_constant_p (disp))
6211 && (GET_CODE (disp) != SYMBOL_REF
6212 || !legitimate_constant_p (disp)))
f996902d
RH
6213 {
6214 reason = "displacement is not constant";
6215 goto report_error;
6216 }
c05dbe81
JH
6217 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6218 {
6219 reason = "displacement is out of range";
6220 goto report_error;
6221 }
3b3c6a3f
MM
6222 }
6223
e075ae69 6224 /* Everything looks valid. */
3b3c6a3f 6225 if (TARGET_DEBUG_ADDR)
e075ae69 6226 fprintf (stderr, "Success.\n");
3b3c6a3f 6227 return TRUE;
e075ae69 6228
5bf0ebab 6229 report_error:
e075ae69
RH
6230 if (TARGET_DEBUG_ADDR)
6231 {
6232 fprintf (stderr, "Error: %s\n", reason);
6233 debug_rtx (reason_rtx);
6234 }
6235 return FALSE;
3b3c6a3f 6236}
3b3c6a3f 6237\f
55efb413
JW
6238/* Return an unique alias set for the GOT. */
6239
0f290768 6240static HOST_WIDE_INT
b96a374d 6241ix86_GOT_alias_set (void)
55efb413 6242{
5bf0ebab
RH
6243 static HOST_WIDE_INT set = -1;
6244 if (set == -1)
6245 set = new_alias_set ();
6246 return set;
0f290768 6247}
55efb413 6248
3b3c6a3f
MM
6249/* Return a legitimate reference for ORIG (an address) using the
6250 register REG. If REG is 0, a new pseudo is generated.
6251
91bb873f 6252 There are two types of references that must be handled:
3b3c6a3f
MM
6253
6254 1. Global data references must load the address from the GOT, via
6255 the PIC reg. An insn is emitted to do this load, and the reg is
6256 returned.
6257
91bb873f
RH
6258 2. Static data references, constant pool addresses, and code labels
6259 compute the address as an offset from the GOT, whose base is in
2ae5ae57 6260 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
6261 differentiate them from global data objects. The returned
6262 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6263
6264 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6265 reg also appears in the address. */
3b3c6a3f
MM
6266
6267rtx
b96a374d 6268legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
6269{
6270 rtx addr = orig;
6271 rtx new = orig;
91bb873f 6272 rtx base;
3b3c6a3f 6273
b069de3b
SS
6274#if TARGET_MACHO
6275 if (reg == 0)
6276 reg = gen_reg_rtx (Pmode);
6277 /* Use the generic Mach-O PIC machinery. */
6278 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6279#endif
6280
c05dbe81
JH
6281 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6282 new = addr;
6283 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6284 {
c05dbe81
JH
6285 /* This symbol may be referenced via a displacement from the PIC
6286 base address (@GOTOFF). */
3b3c6a3f 6287
c05dbe81
JH
6288 if (reload_in_progress)
6289 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6290 if (GET_CODE (addr) == CONST)
6291 addr = XEXP (addr, 0);
6292 if (GET_CODE (addr) == PLUS)
6293 {
6294 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6295 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6296 }
6297 else
6298 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6299 new = gen_rtx_CONST (Pmode, new);
6300 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6301
c05dbe81
JH
6302 if (reg != 0)
6303 {
6304 emit_move_insn (reg, new);
6305 new = reg;
6306 }
3b3c6a3f 6307 }
91bb873f 6308 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6309 {
14f73b5a
JH
6310 if (TARGET_64BIT)
6311 {
8ee41eaf 6312 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6313 new = gen_rtx_CONST (Pmode, new);
6314 new = gen_rtx_MEM (Pmode, new);
6315 RTX_UNCHANGING_P (new) = 1;
6316 set_mem_alias_set (new, ix86_GOT_alias_set ());
6317
6318 if (reg == 0)
6319 reg = gen_reg_rtx (Pmode);
6320 /* Use directly gen_movsi, otherwise the address is loaded
6321 into register for CSE. We don't want to CSE this addresses,
6322 instead we CSE addresses from the GOT table, so skip this. */
6323 emit_insn (gen_movsi (reg, new));
6324 new = reg;
6325 }
6326 else
6327 {
6328 /* This symbol must be referenced via a load from the
6329 Global Offset Table (@GOT). */
3b3c6a3f 6330
66edd3b4
RH
6331 if (reload_in_progress)
6332 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6333 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6334 new = gen_rtx_CONST (Pmode, new);
6335 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6336 new = gen_rtx_MEM (Pmode, new);
6337 RTX_UNCHANGING_P (new) = 1;
6338 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6339
14f73b5a
JH
6340 if (reg == 0)
6341 reg = gen_reg_rtx (Pmode);
6342 emit_move_insn (reg, new);
6343 new = reg;
6344 }
0f290768 6345 }
91bb873f
RH
6346 else
6347 {
6348 if (GET_CODE (addr) == CONST)
3b3c6a3f 6349 {
91bb873f 6350 addr = XEXP (addr, 0);
e3c8ea67
RH
6351
6352 /* We must match stuff we generate before. Assume the only
6353 unspecs that can get here are ours. Not that we could do
43f3a59d 6354 anything with them anyway.... */
e3c8ea67
RH
6355 if (GET_CODE (addr) == UNSPEC
6356 || (GET_CODE (addr) == PLUS
6357 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6358 return orig;
6359 if (GET_CODE (addr) != PLUS)
564d80f4 6360 abort ();
3b3c6a3f 6361 }
91bb873f
RH
6362 if (GET_CODE (addr) == PLUS)
6363 {
6364 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6365
91bb873f
RH
6366 /* Check first to see if this is a constant offset from a @GOTOFF
6367 symbol reference. */
623fe810 6368 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6369 && GET_CODE (op1) == CONST_INT)
6370 {
6eb791fc
JH
6371 if (!TARGET_64BIT)
6372 {
66edd3b4
RH
6373 if (reload_in_progress)
6374 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6375 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6376 UNSPEC_GOTOFF);
6eb791fc
JH
6377 new = gen_rtx_PLUS (Pmode, new, op1);
6378 new = gen_rtx_CONST (Pmode, new);
6379 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6380
6eb791fc
JH
6381 if (reg != 0)
6382 {
6383 emit_move_insn (reg, new);
6384 new = reg;
6385 }
6386 }
6387 else
91bb873f 6388 {
75d38379
JJ
6389 if (INTVAL (op1) < -16*1024*1024
6390 || INTVAL (op1) >= 16*1024*1024)
6391 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6392 }
6393 }
6394 else
6395 {
6396 base = legitimize_pic_address (XEXP (addr, 0), reg);
6397 new = legitimize_pic_address (XEXP (addr, 1),
6398 base == reg ? NULL_RTX : reg);
6399
6400 if (GET_CODE (new) == CONST_INT)
6401 new = plus_constant (base, INTVAL (new));
6402 else
6403 {
6404 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6405 {
6406 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6407 new = XEXP (new, 1);
6408 }
6409 new = gen_rtx_PLUS (Pmode, base, new);
6410 }
6411 }
6412 }
3b3c6a3f
MM
6413 }
6414 return new;
6415}
6416\f
74dc3e94 6417/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6418
6419static rtx
b96a374d 6420get_thread_pointer (int to_reg)
f996902d 6421{
74dc3e94 6422 rtx tp, reg, insn;
f996902d
RH
6423
6424 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6425 if (!to_reg)
6426 return tp;
f996902d 6427
74dc3e94
RH
6428 reg = gen_reg_rtx (Pmode);
6429 insn = gen_rtx_SET (VOIDmode, reg, tp);
6430 insn = emit_insn (insn);
6431
6432 return reg;
6433}
6434
6435/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6436 false if we expect this to be used for a memory address and true if
6437 we expect to load the address into a register. */
6438
6439static rtx
b96a374d 6440legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
6441{
6442 rtx dest, base, off, pic;
6443 int type;
6444
6445 switch (model)
6446 {
6447 case TLS_MODEL_GLOBAL_DYNAMIC:
6448 dest = gen_reg_rtx (Pmode);
6449 if (TARGET_64BIT)
6450 {
6451 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6452
6453 start_sequence ();
6454 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6455 insns = get_insns ();
6456 end_sequence ();
6457
6458 emit_libcall_block (insns, dest, rax, x);
6459 }
6460 else
6461 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6462 break;
6463
6464 case TLS_MODEL_LOCAL_DYNAMIC:
6465 base = gen_reg_rtx (Pmode);
6466 if (TARGET_64BIT)
6467 {
6468 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6469
6470 start_sequence ();
6471 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6472 insns = get_insns ();
6473 end_sequence ();
6474
6475 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6476 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6477 emit_libcall_block (insns, base, rax, note);
6478 }
6479 else
6480 emit_insn (gen_tls_local_dynamic_base_32 (base));
6481
6482 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6483 off = gen_rtx_CONST (Pmode, off);
6484
6485 return gen_rtx_PLUS (Pmode, base, off);
6486
6487 case TLS_MODEL_INITIAL_EXEC:
6488 if (TARGET_64BIT)
6489 {
6490 pic = NULL;
6491 type = UNSPEC_GOTNTPOFF;
6492 }
6493 else if (flag_pic)
6494 {
6495 if (reload_in_progress)
6496 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6497 pic = pic_offset_table_rtx;
6498 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6499 }
6500 else if (!TARGET_GNU_TLS)
6501 {
6502 pic = gen_reg_rtx (Pmode);
6503 emit_insn (gen_set_got (pic));
6504 type = UNSPEC_GOTTPOFF;
6505 }
6506 else
6507 {
6508 pic = NULL;
6509 type = UNSPEC_INDNTPOFF;
6510 }
6511
6512 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6513 off = gen_rtx_CONST (Pmode, off);
6514 if (pic)
6515 off = gen_rtx_PLUS (Pmode, pic, off);
6516 off = gen_rtx_MEM (Pmode, off);
6517 RTX_UNCHANGING_P (off) = 1;
6518 set_mem_alias_set (off, ix86_GOT_alias_set ());
6519
6520 if (TARGET_64BIT || TARGET_GNU_TLS)
6521 {
6522 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6523 off = force_reg (Pmode, off);
6524 return gen_rtx_PLUS (Pmode, base, off);
6525 }
6526 else
6527 {
6528 base = get_thread_pointer (true);
6529 dest = gen_reg_rtx (Pmode);
6530 emit_insn (gen_subsi3 (dest, base, off));
6531 }
6532 break;
6533
6534 case TLS_MODEL_LOCAL_EXEC:
6535 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6536 (TARGET_64BIT || TARGET_GNU_TLS)
6537 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6538 off = gen_rtx_CONST (Pmode, off);
6539
6540 if (TARGET_64BIT || TARGET_GNU_TLS)
6541 {
6542 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6543 return gen_rtx_PLUS (Pmode, base, off);
6544 }
6545 else
6546 {
6547 base = get_thread_pointer (true);
6548 dest = gen_reg_rtx (Pmode);
6549 emit_insn (gen_subsi3 (dest, base, off));
6550 }
6551 break;
6552
6553 default:
6554 abort ();
6555 }
6556
6557 return dest;
f996902d 6558}
fce5a9f2 6559
3b3c6a3f
MM
6560/* Try machine-dependent ways of modifying an illegitimate address
6561 to be legitimate. If we find one, return the new, valid address.
6562 This macro is used in only one place: `memory_address' in explow.c.
6563
6564 OLDX is the address as it was before break_out_memory_refs was called.
6565 In some cases it is useful to look at this to decide what needs to be done.
6566
6567 MODE and WIN are passed so that this macro can use
6568 GO_IF_LEGITIMATE_ADDRESS.
6569
6570 It is always safe for this macro to do nothing. It exists to recognize
6571 opportunities to optimize the output.
6572
6573 For the 80386, we handle X+REG by loading X into a register R and
6574 using R+REG. R will go in a general reg and indexing will be used.
6575 However, if REG is a broken-out memory address or multiplication,
6576 nothing needs to be done because REG can certainly go in a general reg.
6577
6578 When -fpic is used, special handling is needed for symbolic references.
6579 See comments by legitimize_pic_address in i386.c for details. */
6580
6581rtx
8d531ab9 6582legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
6583{
6584 int changed = 0;
6585 unsigned log;
6586
6587 if (TARGET_DEBUG_ADDR)
6588 {
e9a25f70
JL
6589 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6590 GET_MODE_NAME (mode));
3b3c6a3f
MM
6591 debug_rtx (x);
6592 }
6593
f996902d
RH
6594 log = tls_symbolic_operand (x, mode);
6595 if (log)
74dc3e94 6596 return legitimize_tls_address (x, log, false);
f996902d 6597
3b3c6a3f
MM
6598 if (flag_pic && SYMBOLIC_CONST (x))
6599 return legitimize_pic_address (x, 0);
6600
6601 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6602 if (GET_CODE (x) == ASHIFT
6603 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6604 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6605 {
6606 changed = 1;
a269a03c
JC
6607 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6608 GEN_INT (1 << log));
3b3c6a3f
MM
6609 }
6610
6611 if (GET_CODE (x) == PLUS)
6612 {
0f290768 6613 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6614
3b3c6a3f
MM
6615 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6616 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6617 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6618 {
6619 changed = 1;
c5c76735
JL
6620 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6621 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6622 GEN_INT (1 << log));
3b3c6a3f
MM
6623 }
6624
6625 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6626 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6627 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6628 {
6629 changed = 1;
c5c76735
JL
6630 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6631 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6632 GEN_INT (1 << log));
3b3c6a3f
MM
6633 }
6634
0f290768 6635 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6636 if (GET_CODE (XEXP (x, 1)) == MULT)
6637 {
6638 rtx tmp = XEXP (x, 0);
6639 XEXP (x, 0) = XEXP (x, 1);
6640 XEXP (x, 1) = tmp;
6641 changed = 1;
6642 }
6643
6644 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6645 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6646 created by virtual register instantiation, register elimination, and
6647 similar optimizations. */
6648 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6649 {
6650 changed = 1;
c5c76735
JL
6651 x = gen_rtx_PLUS (Pmode,
6652 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6653 XEXP (XEXP (x, 1), 0)),
6654 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6655 }
6656
e9a25f70
JL
6657 /* Canonicalize
6658 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6659 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6660 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6661 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6662 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6663 && CONSTANT_P (XEXP (x, 1)))
6664 {
00c79232
ML
6665 rtx constant;
6666 rtx other = NULL_RTX;
3b3c6a3f
MM
6667
6668 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6669 {
6670 constant = XEXP (x, 1);
6671 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6672 }
6673 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6674 {
6675 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6676 other = XEXP (x, 1);
6677 }
6678 else
6679 constant = 0;
6680
6681 if (constant)
6682 {
6683 changed = 1;
c5c76735
JL
6684 x = gen_rtx_PLUS (Pmode,
6685 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6686 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6687 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6688 }
6689 }
6690
6691 if (changed && legitimate_address_p (mode, x, FALSE))
6692 return x;
6693
6694 if (GET_CODE (XEXP (x, 0)) == MULT)
6695 {
6696 changed = 1;
6697 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6698 }
6699
6700 if (GET_CODE (XEXP (x, 1)) == MULT)
6701 {
6702 changed = 1;
6703 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6704 }
6705
6706 if (changed
6707 && GET_CODE (XEXP (x, 1)) == REG
6708 && GET_CODE (XEXP (x, 0)) == REG)
6709 return x;
6710
6711 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6712 {
6713 changed = 1;
6714 x = legitimize_pic_address (x, 0);
6715 }
6716
6717 if (changed && legitimate_address_p (mode, x, FALSE))
6718 return x;
6719
6720 if (GET_CODE (XEXP (x, 0)) == REG)
6721 {
8d531ab9
KH
6722 rtx temp = gen_reg_rtx (Pmode);
6723 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
6724 if (val != temp)
6725 emit_move_insn (temp, val);
6726
6727 XEXP (x, 1) = temp;
6728 return x;
6729 }
6730
6731 else if (GET_CODE (XEXP (x, 1)) == REG)
6732 {
8d531ab9
KH
6733 rtx temp = gen_reg_rtx (Pmode);
6734 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
6735 if (val != temp)
6736 emit_move_insn (temp, val);
6737
6738 XEXP (x, 0) = temp;
6739 return x;
6740 }
6741 }
6742
6743 return x;
6744}
2a2ab3f9
JVA
6745\f
6746/* Print an integer constant expression in assembler syntax. Addition
6747 and subtraction are the only arithmetic that may appear in these
6748 expressions. FILE is the stdio stream to write to, X is the rtx, and
6749 CODE is the operand print code from the output string. */
6750
6751static void
b96a374d 6752output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6753{
6754 char buf[256];
6755
6756 switch (GET_CODE (x))
6757 {
6758 case PC:
6759 if (flag_pic)
6760 putc ('.', file);
6761 else
6762 abort ();
6763 break;
6764
6765 case SYMBOL_REF:
79bba51c
AP
6766 /* Mark the decl as referenced so that cgraph will output the function. */
6767 if (SYMBOL_REF_DECL (x))
6768 mark_decl_referenced (SYMBOL_REF_DECL (x));
6769
91bb873f 6770 assemble_name (file, XSTR (x, 0));
12969f45 6771 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6772 fputs ("@PLT", file);
2a2ab3f9
JVA
6773 break;
6774
91bb873f
RH
6775 case LABEL_REF:
6776 x = XEXP (x, 0);
5efb1046 6777 /* FALLTHRU */
2a2ab3f9
JVA
6778 case CODE_LABEL:
6779 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6780 assemble_name (asm_out_file, buf);
6781 break;
6782
6783 case CONST_INT:
f64cecad 6784 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6785 break;
6786
6787 case CONST:
6788 /* This used to output parentheses around the expression,
6789 but that does not work on the 386 (either ATT or BSD assembler). */
6790 output_pic_addr_const (file, XEXP (x, 0), code);
6791 break;
6792
6793 case CONST_DOUBLE:
6794 if (GET_MODE (x) == VOIDmode)
6795 {
6796 /* We can use %d if the number is <32 bits and positive. */
6797 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6798 fprintf (file, "0x%lx%08lx",
6799 (unsigned long) CONST_DOUBLE_HIGH (x),
6800 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6801 else
f64cecad 6802 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6803 }
6804 else
6805 /* We can't handle floating point constants;
6806 PRINT_OPERAND must handle them. */
6807 output_operand_lossage ("floating constant misused");
6808 break;
6809
6810 case PLUS:
e9a25f70 6811 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6812 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6813 {
2a2ab3f9 6814 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6815 putc ('+', file);
e9a25f70 6816 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6817 }
91bb873f 6818 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6819 {
2a2ab3f9 6820 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6821 putc ('+', file);
e9a25f70 6822 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6823 }
91bb873f
RH
6824 else
6825 abort ();
2a2ab3f9
JVA
6826 break;
6827
6828 case MINUS:
b069de3b
SS
6829 if (!TARGET_MACHO)
6830 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6831 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6832 putc ('-', file);
2a2ab3f9 6833 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6834 if (!TARGET_MACHO)
6835 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6836 break;
6837
91bb873f
RH
6838 case UNSPEC:
6839 if (XVECLEN (x, 0) != 1)
5bf0ebab 6840 abort ();
91bb873f
RH
6841 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6842 switch (XINT (x, 1))
77ebd435 6843 {
8ee41eaf 6844 case UNSPEC_GOT:
77ebd435
AJ
6845 fputs ("@GOT", file);
6846 break;
8ee41eaf 6847 case UNSPEC_GOTOFF:
77ebd435
AJ
6848 fputs ("@GOTOFF", file);
6849 break;
8ee41eaf 6850 case UNSPEC_GOTPCREL:
edfe8595 6851 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6852 break;
f996902d 6853 case UNSPEC_GOTTPOFF:
dea73790 6854 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6855 fputs ("@GOTTPOFF", file);
6856 break;
6857 case UNSPEC_TPOFF:
6858 fputs ("@TPOFF", file);
6859 break;
6860 case UNSPEC_NTPOFF:
75d38379
JJ
6861 if (TARGET_64BIT)
6862 fputs ("@TPOFF", file);
6863 else
6864 fputs ("@NTPOFF", file);
f996902d
RH
6865 break;
6866 case UNSPEC_DTPOFF:
6867 fputs ("@DTPOFF", file);
6868 break;
dea73790 6869 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6870 if (TARGET_64BIT)
6871 fputs ("@GOTTPOFF(%rip)", file);
6872 else
6873 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6874 break;
6875 case UNSPEC_INDNTPOFF:
6876 fputs ("@INDNTPOFF", file);
6877 break;
77ebd435
AJ
6878 default:
6879 output_operand_lossage ("invalid UNSPEC as operand");
6880 break;
6881 }
91bb873f
RH
6882 break;
6883
2a2ab3f9
JVA
6884 default:
6885 output_operand_lossage ("invalid expression as operand");
6886 }
6887}
1865dbb5 6888
0f290768 6889/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6890 We need to handle our special PIC relocations. */
6891
0f290768 6892void
b96a374d 6893i386_dwarf_output_addr_const (FILE *file, rtx x)
1865dbb5 6894{
14f73b5a 6895#ifdef ASM_QUAD
18b5b8d6 6896 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6897#else
6898 if (TARGET_64BIT)
6899 abort ();
18b5b8d6 6900 fprintf (file, "%s", ASM_LONG);
14f73b5a 6901#endif
1865dbb5
JM
6902 if (flag_pic)
6903 output_pic_addr_const (file, x, '\0');
6904 else
6905 output_addr_const (file, x);
6906 fputc ('\n', file);
6907}
6908
b9203463
RH
6909/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6910 We need to emit DTP-relative relocations. */
6911
6912void
b96a374d 6913i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 6914{
75d38379
JJ
6915 fputs (ASM_LONG, file);
6916 output_addr_const (file, x);
6917 fputs ("@DTPOFF", file);
b9203463
RH
6918 switch (size)
6919 {
6920 case 4:
b9203463
RH
6921 break;
6922 case 8:
75d38379 6923 fputs (", 0", file);
b9203463 6924 break;
b9203463
RH
6925 default:
6926 abort ();
6927 }
b9203463
RH
6928}
6929
1865dbb5
JM
6930/* In the name of slightly smaller debug output, and to cater to
6931 general assembler losage, recognize PIC+GOTOFF and turn it back
6932 into a direct symbol reference. */
6933
69bd9368 6934static rtx
b96a374d 6935ix86_delegitimize_address (rtx orig_x)
1865dbb5 6936{
ec65b2e3 6937 rtx x = orig_x, y;
1865dbb5 6938
4c8c0dec
JJ
6939 if (GET_CODE (x) == MEM)
6940 x = XEXP (x, 0);
6941
6eb791fc
JH
6942 if (TARGET_64BIT)
6943 {
6944 if (GET_CODE (x) != CONST
6945 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6946 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6947 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6948 return orig_x;
6949 return XVECEXP (XEXP (x, 0), 0, 0);
6950 }
6951
1865dbb5 6952 if (GET_CODE (x) != PLUS
1865dbb5
JM
6953 || GET_CODE (XEXP (x, 1)) != CONST)
6954 return orig_x;
6955
ec65b2e3
JJ
6956 if (GET_CODE (XEXP (x, 0)) == REG
6957 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6958 /* %ebx + GOT/GOTOFF */
6959 y = NULL;
6960 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6961 {
6962 /* %ebx + %reg * scale + GOT/GOTOFF */
6963 y = XEXP (x, 0);
6964 if (GET_CODE (XEXP (y, 0)) == REG
6965 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6966 y = XEXP (y, 1);
6967 else if (GET_CODE (XEXP (y, 1)) == REG
6968 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6969 y = XEXP (y, 0);
6970 else
6971 return orig_x;
6972 if (GET_CODE (y) != REG
6973 && GET_CODE (y) != MULT
6974 && GET_CODE (y) != ASHIFT)
6975 return orig_x;
6976 }
6977 else
6978 return orig_x;
6979
1865dbb5
JM
6980 x = XEXP (XEXP (x, 1), 0);
6981 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6982 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6983 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6984 {
6985 if (y)
6986 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6987 return XVECEXP (x, 0, 0);
6988 }
1865dbb5
JM
6989
6990 if (GET_CODE (x) == PLUS
6991 && GET_CODE (XEXP (x, 0)) == UNSPEC
6992 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6993 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6994 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6995 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6996 {
6997 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6998 if (y)
6999 return gen_rtx_PLUS (Pmode, y, x);
7000 return x;
7001 }
1865dbb5
JM
7002
7003 return orig_x;
7004}
2a2ab3f9 7005\f
a269a03c 7006static void
b96a374d
AJ
7007put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7008 int fp, FILE *file)
a269a03c 7009{
a269a03c
JC
7010 const char *suffix;
7011
9a915772
JH
7012 if (mode == CCFPmode || mode == CCFPUmode)
7013 {
7014 enum rtx_code second_code, bypass_code;
7015 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7016 if (bypass_code != NIL || second_code != NIL)
b531087a 7017 abort ();
9a915772
JH
7018 code = ix86_fp_compare_code_to_integer (code);
7019 mode = CCmode;
7020 }
a269a03c
JC
7021 if (reverse)
7022 code = reverse_condition (code);
e075ae69 7023
a269a03c
JC
7024 switch (code)
7025 {
7026 case EQ:
7027 suffix = "e";
7028 break;
a269a03c
JC
7029 case NE:
7030 suffix = "ne";
7031 break;
a269a03c 7032 case GT:
7e08e190 7033 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
7034 abort ();
7035 suffix = "g";
a269a03c 7036 break;
a269a03c 7037 case GTU:
e075ae69
RH
7038 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7039 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 7040 if (mode != CCmode)
0f290768 7041 abort ();
e075ae69 7042 suffix = fp ? "nbe" : "a";
a269a03c 7043 break;
a269a03c 7044 case LT:
9076b9c1 7045 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 7046 suffix = "s";
7e08e190 7047 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7048 suffix = "l";
9076b9c1 7049 else
0f290768 7050 abort ();
a269a03c 7051 break;
a269a03c 7052 case LTU:
9076b9c1 7053 if (mode != CCmode)
0f290768 7054 abort ();
a269a03c
JC
7055 suffix = "b";
7056 break;
a269a03c 7057 case GE:
9076b9c1 7058 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 7059 suffix = "ns";
7e08e190 7060 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7061 suffix = "ge";
9076b9c1 7062 else
0f290768 7063 abort ();
a269a03c 7064 break;
a269a03c 7065 case GEU:
e075ae69 7066 /* ??? As above. */
7e08e190 7067 if (mode != CCmode)
0f290768 7068 abort ();
7e08e190 7069 suffix = fp ? "nb" : "ae";
a269a03c 7070 break;
a269a03c 7071 case LE:
7e08e190 7072 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
7073 abort ();
7074 suffix = "le";
a269a03c 7075 break;
a269a03c 7076 case LEU:
9076b9c1
JH
7077 if (mode != CCmode)
7078 abort ();
7e08e190 7079 suffix = "be";
a269a03c 7080 break;
3a3677ff 7081 case UNORDERED:
9e7adcb3 7082 suffix = fp ? "u" : "p";
3a3677ff
RH
7083 break;
7084 case ORDERED:
9e7adcb3 7085 suffix = fp ? "nu" : "np";
3a3677ff 7086 break;
a269a03c
JC
7087 default:
7088 abort ();
7089 }
7090 fputs (suffix, file);
7091}
7092
a55f4481
RK
7093/* Print the name of register X to FILE based on its machine mode and number.
7094 If CODE is 'w', pretend the mode is HImode.
7095 If CODE is 'b', pretend the mode is QImode.
7096 If CODE is 'k', pretend the mode is SImode.
7097 If CODE is 'q', pretend the mode is DImode.
7098 If CODE is 'h', pretend the reg is the `high' byte register.
7099 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7100
e075ae69 7101void
b96a374d 7102print_reg (rtx x, int code, FILE *file)
e5cb57e8 7103{
a55f4481
RK
7104 if (REGNO (x) == ARG_POINTER_REGNUM
7105 || REGNO (x) == FRAME_POINTER_REGNUM
7106 || REGNO (x) == FLAGS_REG
7107 || REGNO (x) == FPSR_REG)
480feac0
ZW
7108 abort ();
7109
5bf0ebab 7110 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
7111 putc ('%', file);
7112
ef6257cd 7113 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
7114 code = 2;
7115 else if (code == 'b')
7116 code = 1;
7117 else if (code == 'k')
7118 code = 4;
3f3f2124
JH
7119 else if (code == 'q')
7120 code = 8;
e075ae69
RH
7121 else if (code == 'y')
7122 code = 3;
7123 else if (code == 'h')
7124 code = 0;
7125 else
7126 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 7127
3f3f2124
JH
7128 /* Irritatingly, AMD extended registers use different naming convention
7129 from the normal registers. */
7130 if (REX_INT_REG_P (x))
7131 {
885a70fd
JH
7132 if (!TARGET_64BIT)
7133 abort ();
3f3f2124
JH
7134 switch (code)
7135 {
ef6257cd 7136 case 0:
c725bd79 7137 error ("extended registers have no high halves");
3f3f2124
JH
7138 break;
7139 case 1:
7140 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7141 break;
7142 case 2:
7143 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7144 break;
7145 case 4:
7146 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7147 break;
7148 case 8:
7149 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7150 break;
7151 default:
c725bd79 7152 error ("unsupported operand size for extended register");
3f3f2124
JH
7153 break;
7154 }
7155 return;
7156 }
e075ae69
RH
7157 switch (code)
7158 {
7159 case 3:
7160 if (STACK_TOP_P (x))
7161 {
7162 fputs ("st(0)", file);
7163 break;
7164 }
5efb1046 7165 /* FALLTHRU */
e075ae69 7166 case 8:
3f3f2124 7167 case 4:
e075ae69 7168 case 12:
446988df 7169 if (! ANY_FP_REG_P (x))
885a70fd 7170 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 7171 /* FALLTHRU */
a7180f70 7172 case 16:
e075ae69 7173 case 2:
d4c32b6f 7174 normal:
e075ae69
RH
7175 fputs (hi_reg_name[REGNO (x)], file);
7176 break;
7177 case 1:
d4c32b6f
RH
7178 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7179 goto normal;
e075ae69
RH
7180 fputs (qi_reg_name[REGNO (x)], file);
7181 break;
7182 case 0:
d4c32b6f
RH
7183 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7184 goto normal;
e075ae69
RH
7185 fputs (qi_high_reg_name[REGNO (x)], file);
7186 break;
7187 default:
7188 abort ();
fe25fea3 7189 }
e5cb57e8
SC
7190}
7191
f996902d
RH
7192/* Locate some local-dynamic symbol still in use by this function
7193 so that we can print its name in some tls_local_dynamic_base
7194 pattern. */
7195
7196static const char *
b96a374d 7197get_some_local_dynamic_name (void)
f996902d
RH
7198{
7199 rtx insn;
7200
7201 if (cfun->machine->some_ld_name)
7202 return cfun->machine->some_ld_name;
7203
7204 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7205 if (INSN_P (insn)
7206 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7207 return cfun->machine->some_ld_name;
7208
7209 abort ();
7210}
7211
7212static int
b96a374d 7213get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
7214{
7215 rtx x = *px;
7216
7217 if (GET_CODE (x) == SYMBOL_REF
7218 && local_dynamic_symbolic_operand (x, Pmode))
7219 {
7220 cfun->machine->some_ld_name = XSTR (x, 0);
7221 return 1;
7222 }
7223
7224 return 0;
7225}
7226
2a2ab3f9 7227/* Meaning of CODE:
fe25fea3 7228 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7229 C -- print opcode suffix for set/cmov insn.
fe25fea3 7230 c -- like C, but print reversed condition
ef6257cd 7231 F,f -- likewise, but for floating-point.
f6f5dff2
RO
7232 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7233 otherwise nothing
2a2ab3f9
JVA
7234 R -- print the prefix for register names.
7235 z -- print the opcode suffix for the size of the current operand.
7236 * -- print a star (in certain assembler syntax)
fb204271 7237 A -- print an absolute memory reference.
2a2ab3f9 7238 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7239 s -- print a shift double count, followed by the assemblers argument
7240 delimiter.
fe25fea3
SC
7241 b -- print the QImode name of the register for the indicated operand.
7242 %b0 would print %al if operands[0] is reg 0.
7243 w -- likewise, print the HImode name of the register.
7244 k -- likewise, print the SImode name of the register.
3f3f2124 7245 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7246 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7247 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7248 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7249 P -- if PIC, print an @PLT suffix.
7250 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7251 & -- print some in-use local-dynamic symbol name.
a46d1d38 7252 */
2a2ab3f9
JVA
7253
7254void
b96a374d 7255print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7256{
7257 if (code)
7258 {
7259 switch (code)
7260 {
7261 case '*':
80f33d06 7262 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7263 putc ('*', file);
7264 return;
7265
f996902d
RH
7266 case '&':
7267 assemble_name (file, get_some_local_dynamic_name ());
7268 return;
7269
fb204271 7270 case 'A':
80f33d06 7271 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 7272 putc ('*', file);
80f33d06 7273 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7274 {
7275 /* Intel syntax. For absolute addresses, registers should not
7276 be surrounded by braces. */
7277 if (GET_CODE (x) != REG)
7278 {
7279 putc ('[', file);
7280 PRINT_OPERAND (file, x, 0);
7281 putc (']', file);
7282 return;
7283 }
7284 }
80f33d06
GS
7285 else
7286 abort ();
fb204271
DN
7287
7288 PRINT_OPERAND (file, x, 0);
7289 return;
7290
7291
2a2ab3f9 7292 case 'L':
80f33d06 7293 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7294 putc ('l', file);
2a2ab3f9
JVA
7295 return;
7296
7297 case 'W':
80f33d06 7298 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7299 putc ('w', file);
2a2ab3f9
JVA
7300 return;
7301
7302 case 'B':
80f33d06 7303 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7304 putc ('b', file);
2a2ab3f9
JVA
7305 return;
7306
7307 case 'Q':
80f33d06 7308 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7309 putc ('l', file);
2a2ab3f9
JVA
7310 return;
7311
7312 case 'S':
80f33d06 7313 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7314 putc ('s', file);
2a2ab3f9
JVA
7315 return;
7316
5f1ec3e6 7317 case 'T':
80f33d06 7318 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7319 putc ('t', file);
5f1ec3e6
JVA
7320 return;
7321
2a2ab3f9
JVA
7322 case 'z':
7323 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7324 registers. */
2a2ab3f9
JVA
7325 if (STACK_REG_P (x))
7326 return;
7327
831c4e87
KC
7328 /* Likewise if using Intel opcodes. */
7329 if (ASSEMBLER_DIALECT == ASM_INTEL)
7330 return;
7331
7332 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7333 switch (GET_MODE_SIZE (GET_MODE (x)))
7334 {
2a2ab3f9 7335 case 2:
155d8a47
JW
7336#ifdef HAVE_GAS_FILDS_FISTS
7337 putc ('s', file);
7338#endif
2a2ab3f9
JVA
7339 return;
7340
7341 case 4:
7342 if (GET_MODE (x) == SFmode)
7343 {
e075ae69 7344 putc ('s', file);
2a2ab3f9
JVA
7345 return;
7346 }
7347 else
e075ae69 7348 putc ('l', file);
2a2ab3f9
JVA
7349 return;
7350
5f1ec3e6 7351 case 12:
2b589241 7352 case 16:
e075ae69
RH
7353 putc ('t', file);
7354 return;
5f1ec3e6 7355
2a2ab3f9
JVA
7356 case 8:
7357 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7358 {
7359#ifdef GAS_MNEMONICS
e075ae69 7360 putc ('q', file);
56c0e8fa 7361#else
e075ae69
RH
7362 putc ('l', file);
7363 putc ('l', file);
56c0e8fa
JVA
7364#endif
7365 }
e075ae69
RH
7366 else
7367 putc ('l', file);
2a2ab3f9 7368 return;
155d8a47
JW
7369
7370 default:
7371 abort ();
2a2ab3f9 7372 }
4af3895e
JVA
7373
7374 case 'b':
7375 case 'w':
7376 case 'k':
3f3f2124 7377 case 'q':
4af3895e
JVA
7378 case 'h':
7379 case 'y':
5cb6195d 7380 case 'X':
e075ae69 7381 case 'P':
4af3895e
JVA
7382 break;
7383
2d49677f
SC
7384 case 's':
7385 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7386 {
7387 PRINT_OPERAND (file, x, 0);
e075ae69 7388 putc (',', file);
2d49677f 7389 }
a269a03c
JC
7390 return;
7391
a46d1d38
JH
7392 case 'D':
7393 /* Little bit of braindamage here. The SSE compare instructions
7394 does use completely different names for the comparisons that the
7395 fp conditional moves. */
7396 switch (GET_CODE (x))
7397 {
7398 case EQ:
7399 case UNEQ:
7400 fputs ("eq", file);
7401 break;
7402 case LT:
7403 case UNLT:
7404 fputs ("lt", file);
7405 break;
7406 case LE:
7407 case UNLE:
7408 fputs ("le", file);
7409 break;
7410 case UNORDERED:
7411 fputs ("unord", file);
7412 break;
7413 case NE:
7414 case LTGT:
7415 fputs ("neq", file);
7416 break;
7417 case UNGE:
7418 case GE:
7419 fputs ("nlt", file);
7420 break;
7421 case UNGT:
7422 case GT:
7423 fputs ("nle", file);
7424 break;
7425 case ORDERED:
7426 fputs ("ord", file);
7427 break;
7428 default:
7429 abort ();
7430 break;
7431 }
7432 return;
048b1c95 7433 case 'O':
f6f5dff2 7434#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7435 if (ASSEMBLER_DIALECT == ASM_ATT)
7436 {
7437 switch (GET_MODE (x))
7438 {
7439 case HImode: putc ('w', file); break;
7440 case SImode:
7441 case SFmode: putc ('l', file); break;
7442 case DImode:
7443 case DFmode: putc ('q', file); break;
7444 default: abort ();
7445 }
7446 putc ('.', file);
7447 }
7448#endif
7449 return;
1853aadd 7450 case 'C':
e075ae69 7451 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7452 return;
fe25fea3 7453 case 'F':
f6f5dff2 7454#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7455 if (ASSEMBLER_DIALECT == ASM_ATT)
7456 putc ('.', file);
7457#endif
e075ae69 7458 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7459 return;
7460
e9a25f70 7461 /* Like above, but reverse condition */
e075ae69 7462 case 'c':
fce5a9f2 7463 /* Check to see if argument to %c is really a constant
c1d5afc4 7464 and not a condition code which needs to be reversed. */
ec8e098d 7465 if (!COMPARISON_P (x))
c1d5afc4
CR
7466 {
7467 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7468 return;
7469 }
e075ae69
RH
7470 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7471 return;
fe25fea3 7472 case 'f':
f6f5dff2 7473#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7474 if (ASSEMBLER_DIALECT == ASM_ATT)
7475 putc ('.', file);
7476#endif
e075ae69 7477 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7478 return;
ef6257cd
JH
7479 case '+':
7480 {
7481 rtx x;
e5cb57e8 7482
ef6257cd
JH
7483 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7484 return;
a4f31c00 7485
ef6257cd
JH
7486 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7487 if (x)
7488 {
7489 int pred_val = INTVAL (XEXP (x, 0));
7490
7491 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7492 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7493 {
7494 int taken = pred_val > REG_BR_PROB_BASE / 2;
7495 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7496
7497 /* Emit hints only in the case default branch prediction
d1f87653 7498 heuristics would fail. */
ef6257cd
JH
7499 if (taken != cputaken)
7500 {
7501 /* We use 3e (DS) prefix for taken branches and
7502 2e (CS) prefix for not taken branches. */
7503 if (taken)
7504 fputs ("ds ; ", file);
7505 else
7506 fputs ("cs ; ", file);
7507 }
7508 }
7509 }
7510 return;
7511 }
4af3895e 7512 default:
a52453cc 7513 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7514 }
7515 }
e9a25f70 7516
2a2ab3f9 7517 if (GET_CODE (x) == REG)
a55f4481 7518 print_reg (x, code, file);
e9a25f70 7519
2a2ab3f9
JVA
7520 else if (GET_CODE (x) == MEM)
7521 {
e075ae69 7522 /* No `byte ptr' prefix for call instructions. */
80f33d06 7523 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7524 {
69ddee61 7525 const char * size;
e075ae69
RH
7526 switch (GET_MODE_SIZE (GET_MODE (x)))
7527 {
7528 case 1: size = "BYTE"; break;
7529 case 2: size = "WORD"; break;
7530 case 4: size = "DWORD"; break;
7531 case 8: size = "QWORD"; break;
7532 case 12: size = "XWORD"; break;
a7180f70 7533 case 16: size = "XMMWORD"; break;
e075ae69 7534 default:
564d80f4 7535 abort ();
e075ae69 7536 }
fb204271
DN
7537
7538 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7539 if (code == 'b')
7540 size = "BYTE";
7541 else if (code == 'w')
7542 size = "WORD";
7543 else if (code == 'k')
7544 size = "DWORD";
7545
e075ae69
RH
7546 fputs (size, file);
7547 fputs (" PTR ", file);
2a2ab3f9 7548 }
e075ae69
RH
7549
7550 x = XEXP (x, 0);
0d7d98ee 7551 /* Avoid (%rip) for call operands. */
d10f5ecf 7552 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7553 && GET_CODE (x) != CONST_INT)
7554 output_addr_const (file, x);
c8b94768
RH
7555 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7556 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7557 else
e075ae69 7558 output_address (x);
2a2ab3f9 7559 }
e9a25f70 7560
2a2ab3f9
JVA
7561 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7562 {
e9a25f70
JL
7563 REAL_VALUE_TYPE r;
7564 long l;
7565
5f1ec3e6
JVA
7566 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7567 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7568
80f33d06 7569 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7570 putc ('$', file);
781f4ec1 7571 fprintf (file, "0x%08lx", l);
5f1ec3e6 7572 }
e9a25f70 7573
74dc3e94
RH
7574 /* These float cases don't actually occur as immediate operands. */
7575 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7576 {
e9a25f70
JL
7577 char dstr[30];
7578
da6eec72 7579 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7580 fprintf (file, "%s", dstr);
2a2ab3f9 7581 }
e9a25f70 7582
2b589241 7583 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 7584 && GET_MODE (x) == XFmode)
2a2ab3f9 7585 {
e9a25f70
JL
7586 char dstr[30];
7587
da6eec72 7588 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7589 fprintf (file, "%s", dstr);
2a2ab3f9 7590 }
f996902d 7591
79325812 7592 else
2a2ab3f9 7593 {
4af3895e 7594 if (code != 'P')
2a2ab3f9 7595 {
695dac07 7596 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7597 {
80f33d06 7598 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7599 putc ('$', file);
7600 }
2a2ab3f9
JVA
7601 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7602 || GET_CODE (x) == LABEL_REF)
e075ae69 7603 {
80f33d06 7604 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7605 putc ('$', file);
7606 else
7607 fputs ("OFFSET FLAT:", file);
7608 }
2a2ab3f9 7609 }
e075ae69
RH
7610 if (GET_CODE (x) == CONST_INT)
7611 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7612 else if (flag_pic)
2a2ab3f9
JVA
7613 output_pic_addr_const (file, x, code);
7614 else
7615 output_addr_const (file, x);
7616 }
7617}
7618\f
7619/* Print a memory operand whose address is ADDR. */
7620
7621void
8d531ab9 7622print_operand_address (FILE *file, rtx addr)
2a2ab3f9 7623{
e075ae69
RH
7624 struct ix86_address parts;
7625 rtx base, index, disp;
7626 int scale;
e9a25f70 7627
e075ae69
RH
7628 if (! ix86_decompose_address (addr, &parts))
7629 abort ();
e9a25f70 7630
e075ae69
RH
7631 base = parts.base;
7632 index = parts.index;
7633 disp = parts.disp;
7634 scale = parts.scale;
e9a25f70 7635
74dc3e94
RH
7636 switch (parts.seg)
7637 {
7638 case SEG_DEFAULT:
7639 break;
7640 case SEG_FS:
7641 case SEG_GS:
7642 if (USER_LABEL_PREFIX[0] == 0)
7643 putc ('%', file);
7644 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7645 break;
7646 default:
7647 abort ();
7648 }
7649
e075ae69
RH
7650 if (!base && !index)
7651 {
7652 /* Displacement only requires special attention. */
e9a25f70 7653
e075ae69 7654 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7655 {
74dc3e94 7656 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7657 {
7658 if (USER_LABEL_PREFIX[0] == 0)
7659 putc ('%', file);
7660 fputs ("ds:", file);
7661 }
74dc3e94 7662 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7663 }
e075ae69 7664 else if (flag_pic)
74dc3e94 7665 output_pic_addr_const (file, disp, 0);
e075ae69 7666 else
74dc3e94 7667 output_addr_const (file, disp);
0d7d98ee
JH
7668
7669 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7670 if (TARGET_64BIT
74dc3e94
RH
7671 && ((GET_CODE (disp) == SYMBOL_REF
7672 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7673 || GET_CODE (disp) == LABEL_REF
7674 || (GET_CODE (disp) == CONST
7675 && GET_CODE (XEXP (disp, 0)) == PLUS
7676 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7677 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7678 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 7679 fputs ("(%rip)", file);
e075ae69
RH
7680 }
7681 else
7682 {
80f33d06 7683 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7684 {
e075ae69 7685 if (disp)
2a2ab3f9 7686 {
c399861d 7687 if (flag_pic)
e075ae69
RH
7688 output_pic_addr_const (file, disp, 0);
7689 else if (GET_CODE (disp) == LABEL_REF)
7690 output_asm_label (disp);
2a2ab3f9 7691 else
e075ae69 7692 output_addr_const (file, disp);
2a2ab3f9
JVA
7693 }
7694
e075ae69
RH
7695 putc ('(', file);
7696 if (base)
a55f4481 7697 print_reg (base, 0, file);
e075ae69 7698 if (index)
2a2ab3f9 7699 {
e075ae69 7700 putc (',', file);
a55f4481 7701 print_reg (index, 0, file);
e075ae69
RH
7702 if (scale != 1)
7703 fprintf (file, ",%d", scale);
2a2ab3f9 7704 }
e075ae69 7705 putc (')', file);
2a2ab3f9 7706 }
2a2ab3f9
JVA
7707 else
7708 {
e075ae69 7709 rtx offset = NULL_RTX;
e9a25f70 7710
e075ae69
RH
7711 if (disp)
7712 {
7713 /* Pull out the offset of a symbol; print any symbol itself. */
7714 if (GET_CODE (disp) == CONST
7715 && GET_CODE (XEXP (disp, 0)) == PLUS
7716 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7717 {
7718 offset = XEXP (XEXP (disp, 0), 1);
7719 disp = gen_rtx_CONST (VOIDmode,
7720 XEXP (XEXP (disp, 0), 0));
7721 }
ce193852 7722
e075ae69
RH
7723 if (flag_pic)
7724 output_pic_addr_const (file, disp, 0);
7725 else if (GET_CODE (disp) == LABEL_REF)
7726 output_asm_label (disp);
7727 else if (GET_CODE (disp) == CONST_INT)
7728 offset = disp;
7729 else
7730 output_addr_const (file, disp);
7731 }
e9a25f70 7732
e075ae69
RH
7733 putc ('[', file);
7734 if (base)
a8620236 7735 {
a55f4481 7736 print_reg (base, 0, file);
e075ae69
RH
7737 if (offset)
7738 {
7739 if (INTVAL (offset) >= 0)
7740 putc ('+', file);
7741 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7742 }
a8620236 7743 }
e075ae69
RH
7744 else if (offset)
7745 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7746 else
e075ae69 7747 putc ('0', file);
e9a25f70 7748
e075ae69
RH
7749 if (index)
7750 {
7751 putc ('+', file);
a55f4481 7752 print_reg (index, 0, file);
e075ae69
RH
7753 if (scale != 1)
7754 fprintf (file, "*%d", scale);
7755 }
7756 putc (']', file);
7757 }
2a2ab3f9
JVA
7758 }
7759}
f996902d
RH
7760
7761bool
b96a374d 7762output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7763{
7764 rtx op;
7765
7766 if (GET_CODE (x) != UNSPEC)
7767 return false;
7768
7769 op = XVECEXP (x, 0, 0);
7770 switch (XINT (x, 1))
7771 {
7772 case UNSPEC_GOTTPOFF:
7773 output_addr_const (file, op);
dea73790 7774 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7775 fputs ("@GOTTPOFF", file);
7776 break;
7777 case UNSPEC_TPOFF:
7778 output_addr_const (file, op);
7779 fputs ("@TPOFF", file);
7780 break;
7781 case UNSPEC_NTPOFF:
7782 output_addr_const (file, op);
75d38379
JJ
7783 if (TARGET_64BIT)
7784 fputs ("@TPOFF", file);
7785 else
7786 fputs ("@NTPOFF", file);
f996902d
RH
7787 break;
7788 case UNSPEC_DTPOFF:
7789 output_addr_const (file, op);
7790 fputs ("@DTPOFF", file);
7791 break;
dea73790
JJ
7792 case UNSPEC_GOTNTPOFF:
7793 output_addr_const (file, op);
75d38379
JJ
7794 if (TARGET_64BIT)
7795 fputs ("@GOTTPOFF(%rip)", file);
7796 else
7797 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7798 break;
7799 case UNSPEC_INDNTPOFF:
7800 output_addr_const (file, op);
7801 fputs ("@INDNTPOFF", file);
7802 break;
f996902d
RH
7803
7804 default:
7805 return false;
7806 }
7807
7808 return true;
7809}
2a2ab3f9
JVA
7810\f
7811/* Split one or more DImode RTL references into pairs of SImode
7812 references. The RTL can be REG, offsettable MEM, integer constant, or
7813 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7814 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7815 that parallel "operands". */
2a2ab3f9
JVA
7816
7817void
b96a374d 7818split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
7819{
7820 while (num--)
7821 {
57dbca5e 7822 rtx op = operands[num];
b932f770
JH
7823
7824 /* simplify_subreg refuse to split volatile memory addresses,
7825 but we still have to handle it. */
7826 if (GET_CODE (op) == MEM)
2a2ab3f9 7827 {
f4ef873c 7828 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7829 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7830 }
7831 else
b932f770 7832 {
38ca929b
JH
7833 lo_half[num] = simplify_gen_subreg (SImode, op,
7834 GET_MODE (op) == VOIDmode
7835 ? DImode : GET_MODE (op), 0);
7836 hi_half[num] = simplify_gen_subreg (SImode, op,
7837 GET_MODE (op) == VOIDmode
7838 ? DImode : GET_MODE (op), 4);
b932f770 7839 }
2a2ab3f9
JVA
7840 }
7841}
44cf5b6a
JH
7842/* Split one or more TImode RTL references into pairs of SImode
7843 references. The RTL can be REG, offsettable MEM, integer constant, or
7844 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7845 split and "num" is its length. lo_half and hi_half are output arrays
7846 that parallel "operands". */
7847
7848void
b96a374d 7849split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
7850{
7851 while (num--)
7852 {
7853 rtx op = operands[num];
b932f770
JH
7854
7855 /* simplify_subreg refuse to split volatile memory addresses, but we
7856 still have to handle it. */
7857 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7858 {
7859 lo_half[num] = adjust_address (op, DImode, 0);
7860 hi_half[num] = adjust_address (op, DImode, 8);
7861 }
7862 else
b932f770
JH
7863 {
7864 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7865 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7866 }
44cf5b6a
JH
7867 }
7868}
2a2ab3f9 7869\f
2a2ab3f9
JVA
7870/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7871 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7872 is the expression of the binary operation. The output may either be
7873 emitted here, or returned to the caller, like all output_* functions.
7874
7875 There is no guarantee that the operands are the same mode, as they
0f290768 7876 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7877
e3c2afab
AM
7878#ifndef SYSV386_COMPAT
7879/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7880 wants to fix the assemblers because that causes incompatibility
7881 with gcc. No-one wants to fix gcc because that causes
7882 incompatibility with assemblers... You can use the option of
7883 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7884#define SYSV386_COMPAT 1
7885#endif
7886
69ddee61 7887const char *
b96a374d 7888output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 7889{
e3c2afab 7890 static char buf[30];
69ddee61 7891 const char *p;
1deaa899
JH
7892 const char *ssep;
7893 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7894
e3c2afab
AM
7895#ifdef ENABLE_CHECKING
7896 /* Even if we do not want to check the inputs, this documents input
7897 constraints. Which helps in understanding the following code. */
7898 if (STACK_REG_P (operands[0])
7899 && ((REG_P (operands[1])
7900 && REGNO (operands[0]) == REGNO (operands[1])
7901 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7902 || (REG_P (operands[2])
7903 && REGNO (operands[0]) == REGNO (operands[2])
7904 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7905 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7906 ; /* ok */
1deaa899 7907 else if (!is_sse)
e3c2afab
AM
7908 abort ();
7909#endif
7910
2a2ab3f9
JVA
7911 switch (GET_CODE (operands[3]))
7912 {
7913 case PLUS:
e075ae69
RH
7914 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7915 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7916 p = "fiadd";
7917 else
7918 p = "fadd";
1deaa899 7919 ssep = "add";
2a2ab3f9
JVA
7920 break;
7921
7922 case MINUS:
e075ae69
RH
7923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7925 p = "fisub";
7926 else
7927 p = "fsub";
1deaa899 7928 ssep = "sub";
2a2ab3f9
JVA
7929 break;
7930
7931 case MULT:
e075ae69
RH
7932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7934 p = "fimul";
7935 else
7936 p = "fmul";
1deaa899 7937 ssep = "mul";
2a2ab3f9
JVA
7938 break;
7939
7940 case DIV:
e075ae69
RH
7941 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7942 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7943 p = "fidiv";
7944 else
7945 p = "fdiv";
1deaa899 7946 ssep = "div";
2a2ab3f9
JVA
7947 break;
7948
7949 default:
7950 abort ();
7951 }
7952
1deaa899
JH
7953 if (is_sse)
7954 {
7955 strcpy (buf, ssep);
7956 if (GET_MODE (operands[0]) == SFmode)
7957 strcat (buf, "ss\t{%2, %0|%0, %2}");
7958 else
7959 strcat (buf, "sd\t{%2, %0|%0, %2}");
7960 return buf;
7961 }
e075ae69 7962 strcpy (buf, p);
2a2ab3f9
JVA
7963
7964 switch (GET_CODE (operands[3]))
7965 {
7966 case MULT:
7967 case PLUS:
7968 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7969 {
e3c2afab 7970 rtx temp = operands[2];
2a2ab3f9
JVA
7971 operands[2] = operands[1];
7972 operands[1] = temp;
7973 }
7974
e3c2afab
AM
7975 /* know operands[0] == operands[1]. */
7976
2a2ab3f9 7977 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7978 {
7979 p = "%z2\t%2";
7980 break;
7981 }
2a2ab3f9
JVA
7982
7983 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7984 {
7985 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7986 /* How is it that we are storing to a dead operand[2]?
7987 Well, presumably operands[1] is dead too. We can't
7988 store the result to st(0) as st(0) gets popped on this
7989 instruction. Instead store to operands[2] (which I
7990 think has to be st(1)). st(1) will be popped later.
7991 gcc <= 2.8.1 didn't have this check and generated
7992 assembly code that the Unixware assembler rejected. */
7993 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7994 else
e3c2afab 7995 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7996 break;
6b28fd63 7997 }
2a2ab3f9
JVA
7998
7999 if (STACK_TOP_P (operands[0]))
e3c2afab 8000 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8001 else
e3c2afab 8002 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 8003 break;
2a2ab3f9
JVA
8004
8005 case MINUS:
8006 case DIV:
8007 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
8008 {
8009 p = "r%z1\t%1";
8010 break;
8011 }
2a2ab3f9
JVA
8012
8013 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
8014 {
8015 p = "%z2\t%2";
8016 break;
8017 }
2a2ab3f9 8018
2a2ab3f9 8019 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 8020 {
e3c2afab
AM
8021#if SYSV386_COMPAT
8022 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8023 derived assemblers, confusingly reverse the direction of
8024 the operation for fsub{r} and fdiv{r} when the
8025 destination register is not st(0). The Intel assembler
8026 doesn't have this brain damage. Read !SYSV386_COMPAT to
8027 figure out what the hardware really does. */
8028 if (STACK_TOP_P (operands[0]))
8029 p = "{p\t%0, %2|rp\t%2, %0}";
8030 else
8031 p = "{rp\t%2, %0|p\t%0, %2}";
8032#else
6b28fd63 8033 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
8034 /* As above for fmul/fadd, we can't store to st(0). */
8035 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 8036 else
e3c2afab
AM
8037 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8038#endif
e075ae69 8039 break;
6b28fd63 8040 }
2a2ab3f9
JVA
8041
8042 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 8043 {
e3c2afab 8044#if SYSV386_COMPAT
6b28fd63 8045 if (STACK_TOP_P (operands[0]))
e3c2afab 8046 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 8047 else
e3c2afab
AM
8048 p = "{p\t%1, %0|rp\t%0, %1}";
8049#else
8050 if (STACK_TOP_P (operands[0]))
8051 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8052 else
8053 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8054#endif
e075ae69 8055 break;
6b28fd63 8056 }
2a2ab3f9
JVA
8057
8058 if (STACK_TOP_P (operands[0]))
8059 {
8060 if (STACK_TOP_P (operands[1]))
e3c2afab 8061 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8062 else
e3c2afab 8063 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 8064 break;
2a2ab3f9
JVA
8065 }
8066 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
8067 {
8068#if SYSV386_COMPAT
8069 p = "{\t%1, %0|r\t%0, %1}";
8070#else
8071 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8072#endif
8073 }
2a2ab3f9 8074 else
e3c2afab
AM
8075 {
8076#if SYSV386_COMPAT
8077 p = "{r\t%2, %0|\t%0, %2}";
8078#else
8079 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8080#endif
8081 }
e075ae69 8082 break;
2a2ab3f9
JVA
8083
8084 default:
8085 abort ();
8086 }
e075ae69
RH
8087
8088 strcat (buf, p);
8089 return buf;
2a2ab3f9 8090}
e075ae69 8091
a4f31c00 8092/* Output code to initialize control word copies used by
7a2e09f4
JH
8093 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8094 is set to control word rounding downwards. */
8095void
b96a374d 8096emit_i387_cw_initialization (rtx normal, rtx round_down)
7a2e09f4
JH
8097{
8098 rtx reg = gen_reg_rtx (HImode);
8099
8100 emit_insn (gen_x86_fnstcw_1 (normal));
8101 emit_move_insn (reg, normal);
8102 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8103 && !TARGET_64BIT)
8104 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8105 else
8106 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8107 emit_move_insn (round_down, reg);
8108}
8109
2a2ab3f9 8110/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 8111 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 8112 operand may be [SDX]Fmode. */
2a2ab3f9 8113
69ddee61 8114const char *
b96a374d 8115output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
8116{
8117 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 8118 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 8119
e075ae69
RH
8120 /* Jump through a hoop or two for DImode, since the hardware has no
8121 non-popping instruction. We used to do this a different way, but
8122 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
8123 if (dimode_p && !stack_top_dies)
8124 output_asm_insn ("fld\t%y1", operands);
e075ae69 8125
7a2e09f4 8126 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
8127 abort ();
8128
e075ae69 8129 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 8130 abort ();
e9a25f70 8131
7a2e09f4 8132 output_asm_insn ("fldcw\t%3", operands);
e075ae69 8133 if (stack_top_dies || dimode_p)
7a2e09f4 8134 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 8135 else
7a2e09f4 8136 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 8137 output_asm_insn ("fldcw\t%2", operands);
10195bd8 8138
e075ae69 8139 return "";
2a2ab3f9 8140}
cda749b1 8141
e075ae69
RH
8142/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8143 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8144 when fucom should be used. */
8145
69ddee61 8146const char *
b96a374d 8147output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 8148{
e075ae69
RH
8149 int stack_top_dies;
8150 rtx cmp_op0 = operands[0];
8151 rtx cmp_op1 = operands[1];
0644b628 8152 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
8153
8154 if (eflags_p == 2)
8155 {
8156 cmp_op0 = cmp_op1;
8157 cmp_op1 = operands[2];
8158 }
0644b628
JH
8159 if (is_sse)
8160 {
8161 if (GET_MODE (operands[0]) == SFmode)
8162 if (unordered_p)
8163 return "ucomiss\t{%1, %0|%0, %1}";
8164 else
a5cf80f0 8165 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
8166 else
8167 if (unordered_p)
8168 return "ucomisd\t{%1, %0|%0, %1}";
8169 else
a5cf80f0 8170 return "comisd\t{%1, %0|%0, %1}";
0644b628 8171 }
cda749b1 8172
e075ae69 8173 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
8174 abort ();
8175
e075ae69 8176 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 8177
e075ae69
RH
8178 if (STACK_REG_P (cmp_op1)
8179 && stack_top_dies
8180 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8181 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 8182 {
e075ae69
RH
8183 /* If both the top of the 387 stack dies, and the other operand
8184 is also a stack register that dies, then this must be a
8185 `fcompp' float compare */
8186
8187 if (eflags_p == 1)
8188 {
8189 /* There is no double popping fcomi variant. Fortunately,
8190 eflags is immune from the fstp's cc clobbering. */
8191 if (unordered_p)
8192 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8193 else
8194 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8195 return "fstp\t%y0";
8196 }
8197 else
cda749b1 8198 {
e075ae69
RH
8199 if (eflags_p == 2)
8200 {
8201 if (unordered_p)
8202 return "fucompp\n\tfnstsw\t%0";
8203 else
8204 return "fcompp\n\tfnstsw\t%0";
8205 }
cda749b1
JW
8206 else
8207 {
e075ae69
RH
8208 if (unordered_p)
8209 return "fucompp";
8210 else
8211 return "fcompp";
cda749b1
JW
8212 }
8213 }
cda749b1
JW
8214 }
8215 else
8216 {
e075ae69 8217 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 8218
0f290768 8219 static const char * const alt[24] =
e075ae69
RH
8220 {
8221 "fcom%z1\t%y1",
8222 "fcomp%z1\t%y1",
8223 "fucom%z1\t%y1",
8224 "fucomp%z1\t%y1",
0f290768 8225
e075ae69
RH
8226 "ficom%z1\t%y1",
8227 "ficomp%z1\t%y1",
8228 NULL,
8229 NULL,
8230
8231 "fcomi\t{%y1, %0|%0, %y1}",
8232 "fcomip\t{%y1, %0|%0, %y1}",
8233 "fucomi\t{%y1, %0|%0, %y1}",
8234 "fucomip\t{%y1, %0|%0, %y1}",
8235
8236 NULL,
8237 NULL,
8238 NULL,
8239 NULL,
8240
8241 "fcom%z2\t%y2\n\tfnstsw\t%0",
8242 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8243 "fucom%z2\t%y2\n\tfnstsw\t%0",
8244 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8245
e075ae69
RH
8246 "ficom%z2\t%y2\n\tfnstsw\t%0",
8247 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8248 NULL,
8249 NULL
8250 };
8251
8252 int mask;
69ddee61 8253 const char *ret;
e075ae69
RH
8254
8255 mask = eflags_p << 3;
8256 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8257 mask |= unordered_p << 1;
8258 mask |= stack_top_dies;
8259
8260 if (mask >= 24)
8261 abort ();
8262 ret = alt[mask];
8263 if (ret == NULL)
8264 abort ();
cda749b1 8265
e075ae69 8266 return ret;
cda749b1
JW
8267 }
8268}
2a2ab3f9 8269
f88c65f7 8270void
b96a374d 8271ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
8272{
8273 const char *directive = ASM_LONG;
8274
8275 if (TARGET_64BIT)
8276 {
8277#ifdef ASM_QUAD
8278 directive = ASM_QUAD;
8279#else
8280 abort ();
8281#endif
8282 }
8283
8284 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8285}
8286
8287void
b96a374d 8288ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
8289{
8290 if (TARGET_64BIT)
74411039 8291 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8292 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8293 else if (HAVE_AS_GOTOFF_IN_DATA)
8294 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8295#if TARGET_MACHO
8296 else if (TARGET_MACHO)
86ecdfb6
AP
8297 {
8298 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8299 machopic_output_function_base_name (file);
8300 fprintf(file, "\n");
8301 }
b069de3b 8302#endif
f88c65f7 8303 else
5fc0e5df
KW
8304 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8305 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8306}
32b5b1aa 8307\f
a8bac9ab
RH
8308/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8309 for the target. */
8310
8311void
b96a374d 8312ix86_expand_clear (rtx dest)
a8bac9ab
RH
8313{
8314 rtx tmp;
8315
8316 /* We play register width games, which are only valid after reload. */
8317 if (!reload_completed)
8318 abort ();
8319
8320 /* Avoid HImode and its attendant prefix byte. */
8321 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8322 dest = gen_rtx_REG (SImode, REGNO (dest));
8323
8324 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8325
8326 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8327 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8328 {
8329 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8330 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8331 }
8332
8333 emit_insn (tmp);
8334}
8335
f996902d
RH
8336/* X is an unchanging MEM. If it is a constant pool reference, return
8337 the constant pool rtx, else NULL. */
8338
8339static rtx
b96a374d 8340maybe_get_pool_constant (rtx x)
f996902d 8341{
69bd9368 8342 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8343
8344 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8345 return get_pool_constant (x);
8346
8347 return NULL_RTX;
8348}
8349
79325812 8350void
b96a374d 8351ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8352{
e075ae69 8353 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8354 rtx op0, op1;
8355 enum tls_model model;
f996902d
RH
8356
8357 op0 = operands[0];
8358 op1 = operands[1];
8359
74dc3e94
RH
8360 model = tls_symbolic_operand (op1, Pmode);
8361 if (model)
f996902d 8362 {
74dc3e94
RH
8363 op1 = legitimize_tls_address (op1, model, true);
8364 op1 = force_operand (op1, op0);
8365 if (op1 == op0)
8366 return;
f996902d 8367 }
74dc3e94
RH
8368
8369 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8370 {
b069de3b
SS
8371#if TARGET_MACHO
8372 if (MACHOPIC_PURE)
8373 {
8374 rtx temp = ((reload_in_progress
8375 || ((op0 && GET_CODE (op0) == REG)
8376 && mode == Pmode))
8377 ? op0 : gen_reg_rtx (Pmode));
8378 op1 = machopic_indirect_data_reference (op1, temp);
8379 op1 = machopic_legitimize_pic_address (op1, mode,
8380 temp == op1 ? 0 : temp);
8381 }
74dc3e94
RH
8382 else if (MACHOPIC_INDIRECT)
8383 op1 = machopic_indirect_data_reference (op1, 0);
8384 if (op0 == op1)
8385 return;
8386#else
f996902d
RH
8387 if (GET_CODE (op0) == MEM)
8388 op1 = force_reg (Pmode, op1);
e075ae69 8389 else
32b5b1aa 8390 {
f996902d 8391 rtx temp = op0;
e075ae69
RH
8392 if (GET_CODE (temp) != REG)
8393 temp = gen_reg_rtx (Pmode);
f996902d
RH
8394 temp = legitimize_pic_address (op1, temp);
8395 if (temp == op0)
e075ae69 8396 return;
f996902d 8397 op1 = temp;
32b5b1aa 8398 }
74dc3e94 8399#endif /* TARGET_MACHO */
e075ae69
RH
8400 }
8401 else
8402 {
f996902d 8403 if (GET_CODE (op0) == MEM
44cf5b6a 8404 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8405 || !push_operand (op0, mode))
8406 && GET_CODE (op1) == MEM)
8407 op1 = force_reg (mode, op1);
e9a25f70 8408
f996902d
RH
8409 if (push_operand (op0, mode)
8410 && ! general_no_elim_operand (op1, mode))
8411 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8412
44cf5b6a
JH
8413 /* Force large constants in 64bit compilation into register
8414 to get them CSEed. */
8415 if (TARGET_64BIT && mode == DImode
f996902d
RH
8416 && immediate_operand (op1, mode)
8417 && !x86_64_zero_extended_value (op1)
8418 && !register_operand (op0, mode)
44cf5b6a 8419 && optimize && !reload_completed && !reload_in_progress)
f996902d 8420 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8421
e075ae69 8422 if (FLOAT_MODE_P (mode))
32b5b1aa 8423 {
d7a29404
JH
8424 /* If we are loading a floating point constant to a register,
8425 force the value to memory now, since we'll get better code
8426 out the back end. */
e075ae69
RH
8427
8428 if (strict)
8429 ;
ddc67067
MM
8430 else if (GET_CODE (op1) == CONST_DOUBLE)
8431 {
8432 op1 = validize_mem (force_const_mem (mode, op1));
8433 if (!register_operand (op0, mode))
8434 {
8435 rtx temp = gen_reg_rtx (mode);
8436 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8437 emit_move_insn (op0, temp);
8438 return;
8439 }
8440 }
32b5b1aa 8441 }
32b5b1aa 8442 }
e9a25f70 8443
74dc3e94 8444 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8445}
e9a25f70 8446
e37af218 8447void
b96a374d 8448ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
8449{
8450 /* Force constants other than zero into memory. We do not know how
8451 the instructions used to build constants modify the upper 64 bits
8452 of the register, once we have that information we may be able
8453 to handle some of them more efficiently. */
8454 if ((reload_in_progress | reload_completed) == 0
8455 && register_operand (operands[0], mode)
fdc4b40b 8456 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8457 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8458
8459 /* Make operand1 a register if it isn't already. */
f8ca7923 8460 if (!no_new_pseudos
e37af218 8461 && !register_operand (operands[0], mode)
b105d6da 8462 && !register_operand (operands[1], mode))
e37af218 8463 {
59bef189 8464 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8465 emit_move_insn (operands[0], temp);
8466 return;
8467 }
8468
8469 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8470}
e37af218 8471
e075ae69
RH
8472/* Attempt to expand a binary operator. Make the expansion closer to the
8473 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8474 memory references (one output, two input) in a single insn. */
e9a25f70 8475
e075ae69 8476void
b96a374d
AJ
8477ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8478 rtx operands[])
e075ae69
RH
8479{
8480 int matching_memory;
8481 rtx src1, src2, dst, op, clob;
8482
8483 dst = operands[0];
8484 src1 = operands[1];
8485 src2 = operands[2];
8486
8487 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
ec8e098d 8488 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8489 && (rtx_equal_p (dst, src2)
8490 || immediate_operand (src1, mode)))
8491 {
8492 rtx temp = src1;
8493 src1 = src2;
8494 src2 = temp;
32b5b1aa 8495 }
e9a25f70 8496
e075ae69
RH
8497 /* If the destination is memory, and we do not have matching source
8498 operands, do things in registers. */
8499 matching_memory = 0;
8500 if (GET_CODE (dst) == MEM)
32b5b1aa 8501 {
e075ae69
RH
8502 if (rtx_equal_p (dst, src1))
8503 matching_memory = 1;
ec8e098d 8504 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8505 && rtx_equal_p (dst, src2))
8506 matching_memory = 2;
8507 else
8508 dst = gen_reg_rtx (mode);
8509 }
0f290768 8510
e075ae69
RH
8511 /* Both source operands cannot be in memory. */
8512 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8513 {
8514 if (matching_memory != 2)
8515 src2 = force_reg (mode, src2);
8516 else
8517 src1 = force_reg (mode, src1);
32b5b1aa 8518 }
e9a25f70 8519
06a964de
JH
8520 /* If the operation is not commutable, source 1 cannot be a constant
8521 or non-matching memory. */
0f290768 8522 if ((CONSTANT_P (src1)
06a964de 8523 || (!matching_memory && GET_CODE (src1) == MEM))
ec8e098d 8524 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69 8525 src1 = force_reg (mode, src1);
0f290768 8526
e075ae69 8527 /* If optimizing, copy to regs to improve CSE */
fe577e58 8528 if (optimize && ! no_new_pseudos)
32b5b1aa 8529 {
e075ae69
RH
8530 if (GET_CODE (dst) == MEM)
8531 dst = gen_reg_rtx (mode);
8532 if (GET_CODE (src1) == MEM)
8533 src1 = force_reg (mode, src1);
8534 if (GET_CODE (src2) == MEM)
8535 src2 = force_reg (mode, src2);
32b5b1aa 8536 }
e9a25f70 8537
e075ae69
RH
8538 /* Emit the instruction. */
8539
8540 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8541 if (reload_in_progress)
8542 {
8543 /* Reload doesn't know about the flags register, and doesn't know that
8544 it doesn't want to clobber it. We can only do this with PLUS. */
8545 if (code != PLUS)
8546 abort ();
8547 emit_insn (op);
8548 }
8549 else
32b5b1aa 8550 {
e075ae69
RH
8551 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8552 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8553 }
e9a25f70 8554
e075ae69
RH
8555 /* Fix up the destination if needed. */
8556 if (dst != operands[0])
8557 emit_move_insn (operands[0], dst);
8558}
8559
8560/* Return TRUE or FALSE depending on whether the binary operator meets the
8561 appropriate constraints. */
8562
8563int
b96a374d
AJ
8564ix86_binary_operator_ok (enum rtx_code code,
8565 enum machine_mode mode ATTRIBUTE_UNUSED,
8566 rtx operands[3])
e075ae69
RH
8567{
8568 /* Both source operands cannot be in memory. */
8569 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8570 return 0;
8571 /* If the operation is not commutable, source 1 cannot be a constant. */
ec8e098d 8572 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69
RH
8573 return 0;
8574 /* If the destination is memory, we must have a matching source operand. */
8575 if (GET_CODE (operands[0]) == MEM
8576 && ! (rtx_equal_p (operands[0], operands[1])
ec8e098d 8577 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8578 && rtx_equal_p (operands[0], operands[2]))))
8579 return 0;
06a964de 8580 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8581 have a matching destination. */
06a964de 8582 if (GET_CODE (operands[1]) == MEM
ec8e098d 8583 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
06a964de
JH
8584 && ! rtx_equal_p (operands[0], operands[1]))
8585 return 0;
e075ae69
RH
8586 return 1;
8587}
8588
8589/* Attempt to expand a unary operator. Make the expansion closer to the
8590 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8591 memory references (one output, one input) in a single insn. */
e075ae69 8592
9d81fc27 8593void
b96a374d
AJ
8594ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8595 rtx operands[])
e075ae69 8596{
06a964de
JH
8597 int matching_memory;
8598 rtx src, dst, op, clob;
8599
8600 dst = operands[0];
8601 src = operands[1];
e075ae69 8602
06a964de
JH
8603 /* If the destination is memory, and we do not have matching source
8604 operands, do things in registers. */
8605 matching_memory = 0;
8606 if (GET_CODE (dst) == MEM)
32b5b1aa 8607 {
06a964de
JH
8608 if (rtx_equal_p (dst, src))
8609 matching_memory = 1;
e075ae69 8610 else
06a964de 8611 dst = gen_reg_rtx (mode);
32b5b1aa 8612 }
e9a25f70 8613
06a964de
JH
8614 /* When source operand is memory, destination must match. */
8615 if (!matching_memory && GET_CODE (src) == MEM)
8616 src = force_reg (mode, src);
0f290768 8617
06a964de 8618 /* If optimizing, copy to regs to improve CSE */
fe577e58 8619 if (optimize && ! no_new_pseudos)
06a964de
JH
8620 {
8621 if (GET_CODE (dst) == MEM)
8622 dst = gen_reg_rtx (mode);
8623 if (GET_CODE (src) == MEM)
8624 src = force_reg (mode, src);
8625 }
8626
8627 /* Emit the instruction. */
8628
8629 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8630 if (reload_in_progress || code == NOT)
8631 {
8632 /* Reload doesn't know about the flags register, and doesn't know that
8633 it doesn't want to clobber it. */
8634 if (code != NOT)
8635 abort ();
8636 emit_insn (op);
8637 }
8638 else
8639 {
8640 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8641 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8642 }
8643
8644 /* Fix up the destination if needed. */
8645 if (dst != operands[0])
8646 emit_move_insn (operands[0], dst);
e075ae69
RH
8647}
8648
8649/* Return TRUE or FALSE depending on whether the unary operator meets the
8650 appropriate constraints. */
8651
8652int
b96a374d
AJ
8653ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8654 enum machine_mode mode ATTRIBUTE_UNUSED,
8655 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 8656{
06a964de
JH
8657 /* If one of operands is memory, source and destination must match. */
8658 if ((GET_CODE (operands[0]) == MEM
8659 || GET_CODE (operands[1]) == MEM)
8660 && ! rtx_equal_p (operands[0], operands[1]))
8661 return FALSE;
e075ae69
RH
8662 return TRUE;
8663}
8664
16189740
RH
8665/* Return TRUE or FALSE depending on whether the first SET in INSN
8666 has source and destination with matching CC modes, and that the
8667 CC mode is at least as constrained as REQ_MODE. */
8668
8669int
b96a374d 8670ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8671{
8672 rtx set;
8673 enum machine_mode set_mode;
8674
8675 set = PATTERN (insn);
8676 if (GET_CODE (set) == PARALLEL)
8677 set = XVECEXP (set, 0, 0);
8678 if (GET_CODE (set) != SET)
8679 abort ();
9076b9c1
JH
8680 if (GET_CODE (SET_SRC (set)) != COMPARE)
8681 abort ();
16189740
RH
8682
8683 set_mode = GET_MODE (SET_DEST (set));
8684 switch (set_mode)
8685 {
9076b9c1
JH
8686 case CCNOmode:
8687 if (req_mode != CCNOmode
8688 && (req_mode != CCmode
8689 || XEXP (SET_SRC (set), 1) != const0_rtx))
8690 return 0;
8691 break;
16189740 8692 case CCmode:
9076b9c1 8693 if (req_mode == CCGCmode)
16189740 8694 return 0;
5efb1046 8695 /* FALLTHRU */
9076b9c1
JH
8696 case CCGCmode:
8697 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8698 return 0;
5efb1046 8699 /* FALLTHRU */
9076b9c1 8700 case CCGOCmode:
16189740
RH
8701 if (req_mode == CCZmode)
8702 return 0;
5efb1046 8703 /* FALLTHRU */
16189740
RH
8704 case CCZmode:
8705 break;
8706
8707 default:
8708 abort ();
8709 }
8710
8711 return (GET_MODE (SET_SRC (set)) == set_mode);
8712}
8713
e075ae69
RH
8714/* Generate insn patterns to do an integer compare of OPERANDS. */
8715
8716static rtx
b96a374d 8717ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8718{
8719 enum machine_mode cmpmode;
8720 rtx tmp, flags;
8721
8722 cmpmode = SELECT_CC_MODE (code, op0, op1);
8723 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8724
8725 /* This is very simple, but making the interface the same as in the
8726 FP case makes the rest of the code easier. */
8727 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8728 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8729
8730 /* Return the test that should be put into the flags user, i.e.
8731 the bcc, scc, or cmov instruction. */
8732 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8733}
8734
3a3677ff
RH
8735/* Figure out whether to use ordered or unordered fp comparisons.
8736 Return the appropriate mode to use. */
e075ae69 8737
b1cdafbb 8738enum machine_mode
b96a374d 8739ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8740{
9e7adcb3
JH
8741 /* ??? In order to make all comparisons reversible, we do all comparisons
8742 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8743 all forms trapping and nontrapping comparisons, we can make inequality
8744 comparisons trapping again, since it results in better code when using
8745 FCOM based compares. */
8746 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8747}
8748
9076b9c1 8749enum machine_mode
b96a374d 8750ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8751{
8752 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8753 return ix86_fp_compare_mode (code);
8754 switch (code)
8755 {
8756 /* Only zero flag is needed. */
8757 case EQ: /* ZF=0 */
8758 case NE: /* ZF!=0 */
8759 return CCZmode;
8760 /* Codes needing carry flag. */
265dab10
JH
8761 case GEU: /* CF=0 */
8762 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8763 case LTU: /* CF=1 */
8764 case LEU: /* CF=1 | ZF=1 */
265dab10 8765 return CCmode;
9076b9c1
JH
8766 /* Codes possibly doable only with sign flag when
8767 comparing against zero. */
8768 case GE: /* SF=OF or SF=0 */
7e08e190 8769 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8770 if (op1 == const0_rtx)
8771 return CCGOCmode;
8772 else
8773 /* For other cases Carry flag is not required. */
8774 return CCGCmode;
8775 /* Codes doable only with sign flag when comparing
8776 against zero, but we miss jump instruction for it
4aae8a9a 8777 so we need to use relational tests against overflow
9076b9c1
JH
8778 that thus needs to be zero. */
8779 case GT: /* ZF=0 & SF=OF */
8780 case LE: /* ZF=1 | SF<>OF */
8781 if (op1 == const0_rtx)
8782 return CCNOmode;
8783 else
8784 return CCGCmode;
7fcd7218
JH
8785 /* strcmp pattern do (use flags) and combine may ask us for proper
8786 mode. */
8787 case USE:
8788 return CCmode;
9076b9c1 8789 default:
0f290768 8790 abort ();
9076b9c1
JH
8791 }
8792}
8793
e129d93a
ILT
8794/* Return the fixed registers used for condition codes. */
8795
8796static bool
8797ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8798{
8799 *p1 = FLAGS_REG;
8800 *p2 = FPSR_REG;
8801 return true;
8802}
8803
8804/* If two condition code modes are compatible, return a condition code
8805 mode which is compatible with both. Otherwise, return
8806 VOIDmode. */
8807
8808static enum machine_mode
8809ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8810{
8811 if (m1 == m2)
8812 return m1;
8813
8814 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8815 return VOIDmode;
8816
8817 if ((m1 == CCGCmode && m2 == CCGOCmode)
8818 || (m1 == CCGOCmode && m2 == CCGCmode))
8819 return CCGCmode;
8820
8821 switch (m1)
8822 {
8823 default:
8824 abort ();
8825
8826 case CCmode:
8827 case CCGCmode:
8828 case CCGOCmode:
8829 case CCNOmode:
8830 case CCZmode:
8831 switch (m2)
8832 {
8833 default:
8834 return VOIDmode;
8835
8836 case CCmode:
8837 case CCGCmode:
8838 case CCGOCmode:
8839 case CCNOmode:
8840 case CCZmode:
8841 return CCmode;
8842 }
8843
8844 case CCFPmode:
8845 case CCFPUmode:
8846 /* These are only compatible with themselves, which we already
8847 checked above. */
8848 return VOIDmode;
8849 }
8850}
8851
3a3677ff
RH
8852/* Return true if we should use an FCOMI instruction for this fp comparison. */
8853
a940d8bd 8854int
b96a374d 8855ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8856{
9e7adcb3
JH
8857 enum rtx_code swapped_code = swap_condition (code);
8858 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8859 || (ix86_fp_comparison_cost (swapped_code)
8860 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8861}
8862
0f290768 8863/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8864 to a fp comparison. The operands are updated in place; the new
d1f87653 8865 comparison code is returned. */
3a3677ff
RH
8866
8867static enum rtx_code
b96a374d 8868ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8869{
8870 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8871 rtx op0 = *pop0, op1 = *pop1;
8872 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8873 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8874
e075ae69 8875 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8876 The same is true of the XFmode compare instructions. The same is
8877 true of the fcomi compare instructions. */
8878
0644b628
JH
8879 if (!is_sse
8880 && (fpcmp_mode == CCFPUmode
8881 || op_mode == XFmode
0644b628 8882 || ix86_use_fcomi_compare (code)))
e075ae69 8883 {
3a3677ff
RH
8884 op0 = force_reg (op_mode, op0);
8885 op1 = force_reg (op_mode, op1);
e075ae69
RH
8886 }
8887 else
8888 {
8889 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8890 things around if they appear profitable, otherwise force op0
8891 into a register. */
8892
8893 if (standard_80387_constant_p (op0) == 0
8894 || (GET_CODE (op0) == MEM
8895 && ! (standard_80387_constant_p (op1) == 0
8896 || GET_CODE (op1) == MEM)))
32b5b1aa 8897 {
e075ae69
RH
8898 rtx tmp;
8899 tmp = op0, op0 = op1, op1 = tmp;
8900 code = swap_condition (code);
8901 }
8902
8903 if (GET_CODE (op0) != REG)
3a3677ff 8904 op0 = force_reg (op_mode, op0);
e075ae69
RH
8905
8906 if (CONSTANT_P (op1))
8907 {
8908 if (standard_80387_constant_p (op1))
3a3677ff 8909 op1 = force_reg (op_mode, op1);
e075ae69 8910 else
3a3677ff 8911 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8912 }
8913 }
e9a25f70 8914
9e7adcb3
JH
8915 /* Try to rearrange the comparison to make it cheaper. */
8916 if (ix86_fp_comparison_cost (code)
8917 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8918 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8919 {
8920 rtx tmp;
8921 tmp = op0, op0 = op1, op1 = tmp;
8922 code = swap_condition (code);
8923 if (GET_CODE (op0) != REG)
8924 op0 = force_reg (op_mode, op0);
8925 }
8926
3a3677ff
RH
8927 *pop0 = op0;
8928 *pop1 = op1;
8929 return code;
8930}
8931
c0c102a9
JH
8932/* Convert comparison codes we use to represent FP comparison to integer
8933 code that will result in proper branch. Return UNKNOWN if no such code
8934 is available. */
8935static enum rtx_code
b96a374d 8936ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8937{
8938 switch (code)
8939 {
8940 case GT:
8941 return GTU;
8942 case GE:
8943 return GEU;
8944 case ORDERED:
8945 case UNORDERED:
8946 return code;
8947 break;
8948 case UNEQ:
8949 return EQ;
8950 break;
8951 case UNLT:
8952 return LTU;
8953 break;
8954 case UNLE:
8955 return LEU;
8956 break;
8957 case LTGT:
8958 return NE;
8959 break;
8960 default:
8961 return UNKNOWN;
8962 }
8963}
8964
8965/* Split comparison code CODE into comparisons we can do using branch
8966 instructions. BYPASS_CODE is comparison code for branch that will
8967 branch around FIRST_CODE and SECOND_CODE. If some of branches
8968 is not required, set value to NIL.
8969 We never require more than two branches. */
8970static void
b96a374d
AJ
8971ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8972 enum rtx_code *first_code,
8973 enum rtx_code *second_code)
c0c102a9
JH
8974{
8975 *first_code = code;
8976 *bypass_code = NIL;
8977 *second_code = NIL;
8978
8979 /* The fcomi comparison sets flags as follows:
8980
8981 cmp ZF PF CF
8982 > 0 0 0
8983 < 0 0 1
8984 = 1 0 0
8985 un 1 1 1 */
8986
8987 switch (code)
8988 {
8989 case GT: /* GTU - CF=0 & ZF=0 */
8990 case GE: /* GEU - CF=0 */
8991 case ORDERED: /* PF=0 */
8992 case UNORDERED: /* PF=1 */
8993 case UNEQ: /* EQ - ZF=1 */
8994 case UNLT: /* LTU - CF=1 */
8995 case UNLE: /* LEU - CF=1 | ZF=1 */
8996 case LTGT: /* EQ - ZF=0 */
8997 break;
8998 case LT: /* LTU - CF=1 - fails on unordered */
8999 *first_code = UNLT;
9000 *bypass_code = UNORDERED;
9001 break;
9002 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9003 *first_code = UNLE;
9004 *bypass_code = UNORDERED;
9005 break;
9006 case EQ: /* EQ - ZF=1 - fails on unordered */
9007 *first_code = UNEQ;
9008 *bypass_code = UNORDERED;
9009 break;
9010 case NE: /* NE - ZF=0 - fails on unordered */
9011 *first_code = LTGT;
9012 *second_code = UNORDERED;
9013 break;
9014 case UNGE: /* GEU - CF=0 - fails on unordered */
9015 *first_code = GE;
9016 *second_code = UNORDERED;
9017 break;
9018 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9019 *first_code = GT;
9020 *second_code = UNORDERED;
9021 break;
9022 default:
9023 abort ();
9024 }
9025 if (!TARGET_IEEE_FP)
9026 {
9027 *second_code = NIL;
9028 *bypass_code = NIL;
9029 }
9030}
9031
9e7adcb3 9032/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 9033 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
9034 In future this should be tweaked to compute bytes for optimize_size and
9035 take into account performance of various instructions on various CPUs. */
9036static int
b96a374d 9037ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
9038{
9039 if (!TARGET_IEEE_FP)
9040 return 4;
9041 /* The cost of code output by ix86_expand_fp_compare. */
9042 switch (code)
9043 {
9044 case UNLE:
9045 case UNLT:
9046 case LTGT:
9047 case GT:
9048 case GE:
9049 case UNORDERED:
9050 case ORDERED:
9051 case UNEQ:
9052 return 4;
9053 break;
9054 case LT:
9055 case NE:
9056 case EQ:
9057 case UNGE:
9058 return 5;
9059 break;
9060 case LE:
9061 case UNGT:
9062 return 6;
9063 break;
9064 default:
9065 abort ();
9066 }
9067}
9068
9069/* Return cost of comparison done using fcomi operation.
9070 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9071static int
b96a374d 9072ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
9073{
9074 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9075 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
9076 prevents gcc from using it. */
9077 if (!TARGET_CMOVE)
9078 return 1024;
9079 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9080 return (bypass_code != NIL || second_code != NIL) + 2;
9081}
9082
9083/* Return cost of comparison done using sahf operation.
9084 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9085static int
b96a374d 9086ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
9087{
9088 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9089 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
9090 avoids gcc from using it. */
9091 if (!TARGET_USE_SAHF && !optimize_size)
9092 return 1024;
9093 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9094 return (bypass_code != NIL || second_code != NIL) + 3;
9095}
9096
9097/* Compute cost of the comparison done using any method.
9098 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9099static int
b96a374d 9100ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
9101{
9102 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9103 int min;
9104
9105 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9106 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9107
9108 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9109 if (min > sahf_cost)
9110 min = sahf_cost;
9111 if (min > fcomi_cost)
9112 min = fcomi_cost;
9113 return min;
9114}
c0c102a9 9115
3a3677ff
RH
9116/* Generate insn patterns to do a floating point compare of OPERANDS. */
9117
9e7adcb3 9118static rtx
b96a374d
AJ
9119ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9120 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
9121{
9122 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 9123 rtx tmp, tmp2;
9e7adcb3 9124 int cost = ix86_fp_comparison_cost (code);
c0c102a9 9125 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9126
9127 fpcmp_mode = ix86_fp_compare_mode (code);
9128 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9129
9e7adcb3
JH
9130 if (second_test)
9131 *second_test = NULL_RTX;
9132 if (bypass_test)
9133 *bypass_test = NULL_RTX;
9134
c0c102a9
JH
9135 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9136
9e7adcb3
JH
9137 /* Do fcomi/sahf based test when profitable. */
9138 if ((bypass_code == NIL || bypass_test)
9139 && (second_code == NIL || second_test)
9140 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 9141 {
c0c102a9
JH
9142 if (TARGET_CMOVE)
9143 {
9144 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9145 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9146 tmp);
9147 emit_insn (tmp);
9148 }
9149 else
9150 {
9151 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9152 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9153 if (!scratch)
9154 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
9155 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9156 emit_insn (gen_x86_sahf_1 (scratch));
9157 }
e075ae69
RH
9158
9159 /* The FP codes work out to act like unsigned. */
9a915772 9160 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
9161 code = first_code;
9162 if (bypass_code != NIL)
9163 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9164 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9165 const0_rtx);
9166 if (second_code != NIL)
9167 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9168 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9169 const0_rtx);
e075ae69
RH
9170 }
9171 else
9172 {
9173 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 9174 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9175 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9176 if (!scratch)
9177 scratch = gen_reg_rtx (HImode);
3a3677ff 9178 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 9179
9a915772
JH
9180 /* In the unordered case, we have to check C2 for NaN's, which
9181 doesn't happen to work out to anything nice combination-wise.
9182 So do some bit twiddling on the value we've got in AH to come
9183 up with an appropriate set of condition codes. */
e075ae69 9184
9a915772
JH
9185 intcmp_mode = CCNOmode;
9186 switch (code)
32b5b1aa 9187 {
9a915772
JH
9188 case GT:
9189 case UNGT:
9190 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 9191 {
3a3677ff 9192 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 9193 code = EQ;
9a915772
JH
9194 }
9195 else
9196 {
9197 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9198 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9199 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9200 intcmp_mode = CCmode;
9201 code = GEU;
9202 }
9203 break;
9204 case LT:
9205 case UNLT:
9206 if (code == LT && TARGET_IEEE_FP)
9207 {
3a3677ff
RH
9208 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9209 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
9210 intcmp_mode = CCmode;
9211 code = EQ;
9a915772
JH
9212 }
9213 else
9214 {
9215 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9216 code = NE;
9217 }
9218 break;
9219 case GE:
9220 case UNGE:
9221 if (code == GE || !TARGET_IEEE_FP)
9222 {
3a3677ff 9223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 9224 code = EQ;
9a915772
JH
9225 }
9226 else
9227 {
9228 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9229 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9230 GEN_INT (0x01)));
9231 code = NE;
9232 }
9233 break;
9234 case LE:
9235 case UNLE:
9236 if (code == LE && TARGET_IEEE_FP)
9237 {
3a3677ff
RH
9238 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9239 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9240 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9241 intcmp_mode = CCmode;
9242 code = LTU;
9a915772
JH
9243 }
9244 else
9245 {
9246 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9247 code = NE;
9248 }
9249 break;
9250 case EQ:
9251 case UNEQ:
9252 if (code == EQ && TARGET_IEEE_FP)
9253 {
3a3677ff
RH
9254 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9255 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9256 intcmp_mode = CCmode;
9257 code = EQ;
9a915772
JH
9258 }
9259 else
9260 {
3a3677ff
RH
9261 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9262 code = NE;
9263 break;
9a915772
JH
9264 }
9265 break;
9266 case NE:
9267 case LTGT:
9268 if (code == NE && TARGET_IEEE_FP)
9269 {
3a3677ff 9270 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
9271 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9272 GEN_INT (0x40)));
3a3677ff 9273 code = NE;
9a915772
JH
9274 }
9275 else
9276 {
3a3677ff
RH
9277 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9278 code = EQ;
32b5b1aa 9279 }
9a915772
JH
9280 break;
9281
9282 case UNORDERED:
9283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9284 code = NE;
9285 break;
9286 case ORDERED:
9287 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9288 code = EQ;
9289 break;
9290
9291 default:
9292 abort ();
32b5b1aa 9293 }
32b5b1aa 9294 }
e075ae69
RH
9295
9296 /* Return the test that should be put into the flags user, i.e.
9297 the bcc, scc, or cmov instruction. */
9298 return gen_rtx_fmt_ee (code, VOIDmode,
9299 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9300 const0_rtx);
9301}
9302
9e3e266c 9303rtx
b96a374d 9304ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
9305{
9306 rtx op0, op1, ret;
9307 op0 = ix86_compare_op0;
9308 op1 = ix86_compare_op1;
9309
a1b8572c
JH
9310 if (second_test)
9311 *second_test = NULL_RTX;
9312 if (bypass_test)
9313 *bypass_test = NULL_RTX;
9314
e075ae69 9315 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9316 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9317 second_test, bypass_test);
32b5b1aa 9318 else
e075ae69
RH
9319 ret = ix86_expand_int_compare (code, op0, op1);
9320
9321 return ret;
9322}
9323
03598dea
JH
9324/* Return true if the CODE will result in nontrivial jump sequence. */
9325bool
b96a374d 9326ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
9327{
9328 enum rtx_code bypass_code, first_code, second_code;
9329 if (!TARGET_CMOVE)
9330 return true;
9331 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9332 return bypass_code != NIL || second_code != NIL;
9333}
9334
e075ae69 9335void
b96a374d 9336ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 9337{
3a3677ff 9338 rtx tmp;
e075ae69 9339
3a3677ff 9340 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9341 {
3a3677ff
RH
9342 case QImode:
9343 case HImode:
9344 case SImode:
0d7d98ee 9345 simple:
a1b8572c 9346 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9347 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9348 gen_rtx_LABEL_REF (VOIDmode, label),
9349 pc_rtx);
9350 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9351 return;
e075ae69 9352
3a3677ff
RH
9353 case SFmode:
9354 case DFmode:
0f290768 9355 case XFmode:
3a3677ff
RH
9356 {
9357 rtvec vec;
9358 int use_fcomi;
03598dea 9359 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9360
9361 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9362 &ix86_compare_op1);
fce5a9f2 9363
03598dea
JH
9364 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9365
9366 /* Check whether we will use the natural sequence with one jump. If
9367 so, we can expand jump early. Otherwise delay expansion by
9368 creating compound insn to not confuse optimizers. */
9369 if (bypass_code == NIL && second_code == NIL
9370 && TARGET_CMOVE)
9371 {
9372 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9373 gen_rtx_LABEL_REF (VOIDmode, label),
9374 pc_rtx, NULL_RTX);
9375 }
9376 else
9377 {
9378 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9379 ix86_compare_op0, ix86_compare_op1);
9380 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9381 gen_rtx_LABEL_REF (VOIDmode, label),
9382 pc_rtx);
9383 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9384
9385 use_fcomi = ix86_use_fcomi_compare (code);
9386 vec = rtvec_alloc (3 + !use_fcomi);
9387 RTVEC_ELT (vec, 0) = tmp;
9388 RTVEC_ELT (vec, 1)
9389 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9390 RTVEC_ELT (vec, 2)
9391 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9392 if (! use_fcomi)
9393 RTVEC_ELT (vec, 3)
9394 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9395
9396 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9397 }
3a3677ff
RH
9398 return;
9399 }
32b5b1aa 9400
3a3677ff 9401 case DImode:
0d7d98ee
JH
9402 if (TARGET_64BIT)
9403 goto simple;
3a3677ff
RH
9404 /* Expand DImode branch into multiple compare+branch. */
9405 {
9406 rtx lo[2], hi[2], label2;
9407 enum rtx_code code1, code2, code3;
32b5b1aa 9408
3a3677ff
RH
9409 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9410 {
9411 tmp = ix86_compare_op0;
9412 ix86_compare_op0 = ix86_compare_op1;
9413 ix86_compare_op1 = tmp;
9414 code = swap_condition (code);
9415 }
9416 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9417 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9418
3a3677ff
RH
9419 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9420 avoid two branches. This costs one extra insn, so disable when
9421 optimizing for size. */
32b5b1aa 9422
3a3677ff
RH
9423 if ((code == EQ || code == NE)
9424 && (!optimize_size
9425 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9426 {
9427 rtx xor0, xor1;
32b5b1aa 9428
3a3677ff
RH
9429 xor1 = hi[0];
9430 if (hi[1] != const0_rtx)
9431 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9432 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9433
3a3677ff
RH
9434 xor0 = lo[0];
9435 if (lo[1] != const0_rtx)
9436 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9437 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9438
3a3677ff
RH
9439 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9440 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9441
3a3677ff
RH
9442 ix86_compare_op0 = tmp;
9443 ix86_compare_op1 = const0_rtx;
9444 ix86_expand_branch (code, label);
9445 return;
9446 }
e075ae69 9447
1f9124e4
JJ
9448 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9449 op1 is a constant and the low word is zero, then we can just
9450 examine the high word. */
32b5b1aa 9451
1f9124e4
JJ
9452 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9453 switch (code)
9454 {
9455 case LT: case LTU: case GE: case GEU:
9456 ix86_compare_op0 = hi[0];
9457 ix86_compare_op1 = hi[1];
9458 ix86_expand_branch (code, label);
9459 return;
9460 default:
9461 break;
9462 }
e075ae69 9463
3a3677ff 9464 /* Otherwise, we need two or three jumps. */
e075ae69 9465
3a3677ff 9466 label2 = gen_label_rtx ();
e075ae69 9467
3a3677ff
RH
9468 code1 = code;
9469 code2 = swap_condition (code);
9470 code3 = unsigned_condition (code);
e075ae69 9471
3a3677ff
RH
9472 switch (code)
9473 {
9474 case LT: case GT: case LTU: case GTU:
9475 break;
e075ae69 9476
3a3677ff
RH
9477 case LE: code1 = LT; code2 = GT; break;
9478 case GE: code1 = GT; code2 = LT; break;
9479 case LEU: code1 = LTU; code2 = GTU; break;
9480 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9481
3a3677ff
RH
9482 case EQ: code1 = NIL; code2 = NE; break;
9483 case NE: code2 = NIL; break;
e075ae69 9484
3a3677ff
RH
9485 default:
9486 abort ();
9487 }
e075ae69 9488
3a3677ff
RH
9489 /*
9490 * a < b =>
9491 * if (hi(a) < hi(b)) goto true;
9492 * if (hi(a) > hi(b)) goto false;
9493 * if (lo(a) < lo(b)) goto true;
9494 * false:
9495 */
9496
9497 ix86_compare_op0 = hi[0];
9498 ix86_compare_op1 = hi[1];
9499
9500 if (code1 != NIL)
9501 ix86_expand_branch (code1, label);
9502 if (code2 != NIL)
9503 ix86_expand_branch (code2, label2);
9504
9505 ix86_compare_op0 = lo[0];
9506 ix86_compare_op1 = lo[1];
9507 ix86_expand_branch (code3, label);
9508
9509 if (code2 != NIL)
9510 emit_label (label2);
9511 return;
9512 }
e075ae69 9513
3a3677ff
RH
9514 default:
9515 abort ();
9516 }
32b5b1aa 9517}
e075ae69 9518
9e7adcb3
JH
9519/* Split branch based on floating point condition. */
9520void
b96a374d
AJ
9521ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9522 rtx target1, rtx target2, rtx tmp)
9e7adcb3
JH
9523{
9524 rtx second, bypass;
9525 rtx label = NULL_RTX;
03598dea 9526 rtx condition;
6b24c259
JH
9527 int bypass_probability = -1, second_probability = -1, probability = -1;
9528 rtx i;
9e7adcb3
JH
9529
9530 if (target2 != pc_rtx)
9531 {
9532 rtx tmp = target2;
9533 code = reverse_condition_maybe_unordered (code);
9534 target2 = target1;
9535 target1 = tmp;
9536 }
9537
9538 condition = ix86_expand_fp_compare (code, op1, op2,
9539 tmp, &second, &bypass);
6b24c259
JH
9540
9541 if (split_branch_probability >= 0)
9542 {
9543 /* Distribute the probabilities across the jumps.
9544 Assume the BYPASS and SECOND to be always test
9545 for UNORDERED. */
9546 probability = split_branch_probability;
9547
d6a7951f 9548 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9549 to be updated. Later we may run some experiments and see
9550 if unordered values are more frequent in practice. */
9551 if (bypass)
9552 bypass_probability = 1;
9553 if (second)
9554 second_probability = 1;
9555 }
9e7adcb3
JH
9556 if (bypass != NULL_RTX)
9557 {
9558 label = gen_label_rtx ();
6b24c259
JH
9559 i = emit_jump_insn (gen_rtx_SET
9560 (VOIDmode, pc_rtx,
9561 gen_rtx_IF_THEN_ELSE (VOIDmode,
9562 bypass,
9563 gen_rtx_LABEL_REF (VOIDmode,
9564 label),
9565 pc_rtx)));
9566 if (bypass_probability >= 0)
9567 REG_NOTES (i)
9568 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9569 GEN_INT (bypass_probability),
9570 REG_NOTES (i));
9571 }
9572 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9573 (VOIDmode, pc_rtx,
9574 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9575 condition, target1, target2)));
9576 if (probability >= 0)
9577 REG_NOTES (i)
9578 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9579 GEN_INT (probability),
9580 REG_NOTES (i));
9581 if (second != NULL_RTX)
9e7adcb3 9582 {
6b24c259
JH
9583 i = emit_jump_insn (gen_rtx_SET
9584 (VOIDmode, pc_rtx,
9585 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9586 target2)));
9587 if (second_probability >= 0)
9588 REG_NOTES (i)
9589 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9590 GEN_INT (second_probability),
9591 REG_NOTES (i));
9e7adcb3 9592 }
9e7adcb3
JH
9593 if (label != NULL_RTX)
9594 emit_label (label);
9595}
9596
32b5b1aa 9597int
b96a374d 9598ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9599{
3a627503 9600 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9601 rtx second_test, bypass_test;
e075ae69 9602
885a70fd
JH
9603 if (GET_MODE (ix86_compare_op0) == DImode
9604 && !TARGET_64BIT)
e075ae69
RH
9605 return 0; /* FAIL */
9606
b932f770
JH
9607 if (GET_MODE (dest) != QImode)
9608 abort ();
e075ae69 9609
a1b8572c 9610 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9611 PUT_MODE (ret, QImode);
9612
9613 tmp = dest;
a1b8572c 9614 tmpreg = dest;
32b5b1aa 9615
e075ae69 9616 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9617 if (bypass_test || second_test)
9618 {
9619 rtx test = second_test;
9620 int bypass = 0;
9621 rtx tmp2 = gen_reg_rtx (QImode);
9622 if (bypass_test)
9623 {
9624 if (second_test)
b531087a 9625 abort ();
a1b8572c
JH
9626 test = bypass_test;
9627 bypass = 1;
9628 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9629 }
9630 PUT_MODE (test, QImode);
9631 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9632
9633 if (bypass)
9634 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9635 else
9636 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9637 }
e075ae69 9638
3a627503
RS
9639 /* Attach a REG_EQUAL note describing the comparison result. */
9640 equiv = simplify_gen_relational (code, QImode,
9641 GET_MODE (ix86_compare_op0),
9642 ix86_compare_op0, ix86_compare_op1);
9643 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9644
e075ae69 9645 return 1; /* DONE */
32b5b1aa 9646}
e075ae69 9647
c35d187f
RH
9648/* Expand comparison setting or clearing carry flag. Return true when
9649 successful and set pop for the operation. */
9650static bool
b96a374d 9651ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9652{
9653 enum machine_mode mode =
9654 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9655
9656 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 9657 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9658 if ((mode == DImode && !TARGET_64BIT))
9659 return false;
9660 if (FLOAT_MODE_P (mode))
9661 {
9662 rtx second_test = NULL, bypass_test = NULL;
9663 rtx compare_op, compare_seq;
9664
9665 /* Shortcut: following common codes never translate into carry flag compares. */
9666 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9667 || code == ORDERED || code == UNORDERED)
9668 return false;
9669
9670 /* These comparisons require zero flag; swap operands so they won't. */
9671 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9672 && !TARGET_IEEE_FP)
9673 {
9674 rtx tmp = op0;
9675 op0 = op1;
9676 op1 = tmp;
9677 code = swap_condition (code);
9678 }
9679
c51e6d85
KH
9680 /* Try to expand the comparison and verify that we end up with carry flag
9681 based comparison. This is fails to be true only when we decide to expand
9682 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9683 start_sequence ();
9684 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9685 &second_test, &bypass_test);
9686 compare_seq = get_insns ();
9687 end_sequence ();
9688
9689 if (second_test || bypass_test)
9690 return false;
9691 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9692 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9693 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9694 else
9695 code = GET_CODE (compare_op);
9696 if (code != LTU && code != GEU)
9697 return false;
9698 emit_insn (compare_seq);
9699 *pop = compare_op;
9700 return true;
9701 }
9702 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9703 return false;
9704 switch (code)
9705 {
9706 case LTU:
9707 case GEU:
9708 break;
9709
9710 /* Convert a==0 into (unsigned)a<1. */
9711 case EQ:
9712 case NE:
9713 if (op1 != const0_rtx)
9714 return false;
9715 op1 = const1_rtx;
9716 code = (code == EQ ? LTU : GEU);
9717 break;
9718
9719 /* Convert a>b into b<a or a>=b-1. */
9720 case GTU:
9721 case LEU:
9722 if (GET_CODE (op1) == CONST_INT)
9723 {
9724 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9725 /* Bail out on overflow. We still can swap operands but that
43f3a59d 9726 would force loading of the constant into register. */
4977bab6
ZW
9727 if (op1 == const0_rtx
9728 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9729 return false;
9730 code = (code == GTU ? GEU : LTU);
9731 }
9732 else
9733 {
9734 rtx tmp = op1;
9735 op1 = op0;
9736 op0 = tmp;
9737 code = (code == GTU ? LTU : GEU);
9738 }
9739 break;
9740
ccea753c 9741 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9742 case LT:
9743 case GE:
9744 if (mode == DImode || op1 != const0_rtx)
9745 return false;
ccea753c 9746 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9747 code = (code == LT ? GEU : LTU);
9748 break;
9749 case LE:
9750 case GT:
9751 if (mode == DImode || op1 != constm1_rtx)
9752 return false;
ccea753c 9753 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9754 code = (code == LE ? GEU : LTU);
9755 break;
9756
9757 default:
9758 return false;
9759 }
ebe75517
JH
9760 /* Swapping operands may cause constant to appear as first operand. */
9761 if (!nonimmediate_operand (op0, VOIDmode))
9762 {
9763 if (no_new_pseudos)
9764 return false;
9765 op0 = force_reg (mode, op0);
9766 }
4977bab6
ZW
9767 ix86_compare_op0 = op0;
9768 ix86_compare_op1 = op1;
9769 *pop = ix86_expand_compare (code, NULL, NULL);
9770 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9771 abort ();
9772 return true;
9773}
9774
32b5b1aa 9775int
b96a374d 9776ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9777{
e075ae69
RH
9778 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9779 rtx compare_seq, compare_op;
a1b8572c 9780 rtx second_test, bypass_test;
635559ab 9781 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9782 bool sign_bit_compare_p = false;;
3a3677ff 9783
e075ae69 9784 start_sequence ();
a1b8572c 9785 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9786 compare_seq = get_insns ();
e075ae69
RH
9787 end_sequence ();
9788
9789 compare_code = GET_CODE (compare_op);
9790
4977bab6
ZW
9791 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9792 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9793 sign_bit_compare_p = true;
9794
e075ae69
RH
9795 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9796 HImode insns, we'd be swallowed in word prefix ops. */
9797
4977bab6 9798 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9799 && (mode != DImode || TARGET_64BIT)
0f290768 9800 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9801 && GET_CODE (operands[3]) == CONST_INT)
9802 {
9803 rtx out = operands[0];
9804 HOST_WIDE_INT ct = INTVAL (operands[2]);
9805 HOST_WIDE_INT cf = INTVAL (operands[3]);
9806 HOST_WIDE_INT diff;
9807
4977bab6
ZW
9808 diff = ct - cf;
9809 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9810 sbb. */
4977bab6
ZW
9811 if (sign_bit_compare_p
9812 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9813 ix86_compare_op1, &compare_op))
e075ae69 9814 {
e075ae69
RH
9815 /* Detect overlap between destination and compare sources. */
9816 rtx tmp = out;
9817
4977bab6 9818 if (!sign_bit_compare_p)
36583fea 9819 {
e6e81735
JH
9820 bool fpcmp = false;
9821
4977bab6
ZW
9822 compare_code = GET_CODE (compare_op);
9823
e6e81735
JH
9824 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9825 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9826 {
9827 fpcmp = true;
9828 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9829 }
9830
4977bab6
ZW
9831 /* To simplify rest of code, restrict to the GEU case. */
9832 if (compare_code == LTU)
9833 {
9834 HOST_WIDE_INT tmp = ct;
9835 ct = cf;
9836 cf = tmp;
9837 compare_code = reverse_condition (compare_code);
9838 code = reverse_condition (code);
9839 }
e6e81735
JH
9840 else
9841 {
9842 if (fpcmp)
9843 PUT_CODE (compare_op,
9844 reverse_condition_maybe_unordered
9845 (GET_CODE (compare_op)));
9846 else
9847 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9848 }
4977bab6 9849 diff = ct - cf;
36583fea 9850
4977bab6
ZW
9851 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9852 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9853 tmp = gen_reg_rtx (mode);
e075ae69 9854
4977bab6 9855 if (mode == DImode)
e6e81735 9856 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9857 else
e6e81735 9858 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9859 }
14f73b5a 9860 else
4977bab6
ZW
9861 {
9862 if (code == GT || code == GE)
9863 code = reverse_condition (code);
9864 else
9865 {
9866 HOST_WIDE_INT tmp = ct;
9867 ct = cf;
9868 cf = tmp;
5fb48685 9869 diff = ct - cf;
4977bab6
ZW
9870 }
9871 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9872 ix86_compare_op1, VOIDmode, 0, -1);
9873 }
e075ae69 9874
36583fea
JH
9875 if (diff == 1)
9876 {
9877 /*
9878 * cmpl op0,op1
9879 * sbbl dest,dest
9880 * [addl dest, ct]
9881 *
9882 * Size 5 - 8.
9883 */
9884 if (ct)
b96a374d 9885 tmp = expand_simple_binop (mode, PLUS,
635559ab 9886 tmp, GEN_INT (ct),
4977bab6 9887 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9888 }
9889 else if (cf == -1)
9890 {
9891 /*
9892 * cmpl op0,op1
9893 * sbbl dest,dest
9894 * orl $ct, dest
9895 *
9896 * Size 8.
9897 */
635559ab
JH
9898 tmp = expand_simple_binop (mode, IOR,
9899 tmp, GEN_INT (ct),
4977bab6 9900 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9901 }
9902 else if (diff == -1 && ct)
9903 {
9904 /*
9905 * cmpl op0,op1
9906 * sbbl dest,dest
06ec023f 9907 * notl dest
36583fea
JH
9908 * [addl dest, cf]
9909 *
9910 * Size 8 - 11.
9911 */
4977bab6 9912 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9913 if (cf)
b96a374d 9914 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9915 copy_rtx (tmp), GEN_INT (cf),
9916 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9917 }
9918 else
9919 {
9920 /*
9921 * cmpl op0,op1
9922 * sbbl dest,dest
06ec023f 9923 * [notl dest]
36583fea
JH
9924 * andl cf - ct, dest
9925 * [addl dest, ct]
9926 *
9927 * Size 8 - 11.
9928 */
06ec023f
RB
9929
9930 if (cf == 0)
9931 {
9932 cf = ct;
9933 ct = 0;
4977bab6 9934 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9935 }
9936
635559ab 9937 tmp = expand_simple_binop (mode, AND,
4977bab6 9938 copy_rtx (tmp),
d8bf17f9 9939 gen_int_mode (cf - ct, mode),
4977bab6 9940 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9941 if (ct)
b96a374d 9942 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9943 copy_rtx (tmp), GEN_INT (ct),
9944 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9945 }
e075ae69 9946
4977bab6
ZW
9947 if (!rtx_equal_p (tmp, out))
9948 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9949
9950 return 1; /* DONE */
9951 }
9952
e075ae69
RH
9953 if (diff < 0)
9954 {
9955 HOST_WIDE_INT tmp;
9956 tmp = ct, ct = cf, cf = tmp;
9957 diff = -diff;
734dba19
JH
9958 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9959 {
9960 /* We may be reversing unordered compare to normal compare, that
9961 is not valid in general (we may convert non-trapping condition
9962 to trapping one), however on i386 we currently emit all
9963 comparisons unordered. */
9964 compare_code = reverse_condition_maybe_unordered (compare_code);
9965 code = reverse_condition_maybe_unordered (code);
9966 }
9967 else
9968 {
9969 compare_code = reverse_condition (compare_code);
9970 code = reverse_condition (code);
9971 }
e075ae69 9972 }
0f2a3457
JJ
9973
9974 compare_code = NIL;
9975 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9976 && GET_CODE (ix86_compare_op1) == CONST_INT)
9977 {
9978 if (ix86_compare_op1 == const0_rtx
9979 && (code == LT || code == GE))
9980 compare_code = code;
9981 else if (ix86_compare_op1 == constm1_rtx)
9982 {
9983 if (code == LE)
9984 compare_code = LT;
9985 else if (code == GT)
9986 compare_code = GE;
9987 }
9988 }
9989
9990 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9991 if (compare_code != NIL
9992 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9993 && (cf == -1 || ct == -1))
9994 {
9995 /* If lea code below could be used, only optimize
9996 if it results in a 2 insn sequence. */
9997
9998 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9999 || diff == 3 || diff == 5 || diff == 9)
10000 || (compare_code == LT && ct == -1)
10001 || (compare_code == GE && cf == -1))
10002 {
10003 /*
10004 * notl op1 (if necessary)
10005 * sarl $31, op1
10006 * orl cf, op1
10007 */
10008 if (ct != -1)
10009 {
10010 cf = ct;
b96a374d 10011 ct = -1;
0f2a3457
JJ
10012 code = reverse_condition (code);
10013 }
10014
10015 out = emit_store_flag (out, code, ix86_compare_op0,
10016 ix86_compare_op1, VOIDmode, 0, -1);
10017
10018 out = expand_simple_binop (mode, IOR,
10019 out, GEN_INT (cf),
10020 out, 1, OPTAB_DIRECT);
10021 if (out != operands[0])
10022 emit_move_insn (operands[0], out);
10023
10024 return 1; /* DONE */
10025 }
10026 }
10027
4977bab6 10028
635559ab
JH
10029 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10030 || diff == 3 || diff == 5 || diff == 9)
4977bab6 10031 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 10032 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
10033 {
10034 /*
10035 * xorl dest,dest
10036 * cmpl op1,op2
10037 * setcc dest
10038 * lea cf(dest*(ct-cf)),dest
10039 *
10040 * Size 14.
10041 *
10042 * This also catches the degenerate setcc-only case.
10043 */
10044
10045 rtx tmp;
10046 int nops;
10047
10048 out = emit_store_flag (out, code, ix86_compare_op0,
10049 ix86_compare_op1, VOIDmode, 0, 1);
10050
10051 nops = 0;
97f51ac4
RB
10052 /* On x86_64 the lea instruction operates on Pmode, so we need
10053 to get arithmetics done in proper mode to match. */
e075ae69 10054 if (diff == 1)
068f5dea 10055 tmp = copy_rtx (out);
e075ae69
RH
10056 else
10057 {
885a70fd 10058 rtx out1;
068f5dea 10059 out1 = copy_rtx (out);
635559ab 10060 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
10061 nops++;
10062 if (diff & 1)
10063 {
635559ab 10064 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
10065 nops++;
10066 }
10067 }
10068 if (cf != 0)
10069 {
635559ab 10070 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
10071 nops++;
10072 }
4977bab6 10073 if (!rtx_equal_p (tmp, out))
e075ae69 10074 {
14f73b5a 10075 if (nops == 1)
a5cf80f0 10076 out = force_operand (tmp, copy_rtx (out));
e075ae69 10077 else
4977bab6 10078 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 10079 }
4977bab6 10080 if (!rtx_equal_p (out, operands[0]))
1985ef90 10081 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10082
10083 return 1; /* DONE */
10084 }
10085
10086 /*
10087 * General case: Jumpful:
10088 * xorl dest,dest cmpl op1, op2
10089 * cmpl op1, op2 movl ct, dest
10090 * setcc dest jcc 1f
10091 * decl dest movl cf, dest
10092 * andl (cf-ct),dest 1:
10093 * addl ct,dest
0f290768 10094 *
e075ae69
RH
10095 * Size 20. Size 14.
10096 *
10097 * This is reasonably steep, but branch mispredict costs are
10098 * high on modern cpus, so consider failing only if optimizing
10099 * for space.
e075ae69
RH
10100 */
10101
4977bab6
ZW
10102 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10103 && BRANCH_COST >= 2)
e075ae69 10104 {
97f51ac4 10105 if (cf == 0)
e075ae69 10106 {
97f51ac4
RB
10107 cf = ct;
10108 ct = 0;
734dba19 10109 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
10110 /* We may be reversing unordered compare to normal compare,
10111 that is not valid in general (we may convert non-trapping
10112 condition to trapping one), however on i386 we currently
10113 emit all comparisons unordered. */
10114 code = reverse_condition_maybe_unordered (code);
10115 else
10116 {
10117 code = reverse_condition (code);
10118 if (compare_code != NIL)
10119 compare_code = reverse_condition (compare_code);
10120 }
10121 }
10122
10123 if (compare_code != NIL)
10124 {
10125 /* notl op1 (if needed)
10126 sarl $31, op1
10127 andl (cf-ct), op1
b96a374d 10128 addl ct, op1
0f2a3457
JJ
10129
10130 For x < 0 (resp. x <= -1) there will be no notl,
10131 so if possible swap the constants to get rid of the
10132 complement.
10133 True/false will be -1/0 while code below (store flag
10134 followed by decrement) is 0/-1, so the constants need
10135 to be exchanged once more. */
10136
10137 if (compare_code == GE || !cf)
734dba19 10138 {
b96a374d 10139 code = reverse_condition (code);
0f2a3457 10140 compare_code = LT;
734dba19
JH
10141 }
10142 else
10143 {
0f2a3457 10144 HOST_WIDE_INT tmp = cf;
b96a374d 10145 cf = ct;
0f2a3457 10146 ct = tmp;
734dba19 10147 }
0f2a3457
JJ
10148
10149 out = emit_store_flag (out, code, ix86_compare_op0,
10150 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 10151 }
0f2a3457
JJ
10152 else
10153 {
10154 out = emit_store_flag (out, code, ix86_compare_op0,
10155 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 10156
4977bab6
ZW
10157 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10158 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 10159 }
e075ae69 10160
4977bab6 10161 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 10162 gen_int_mode (cf - ct, mode),
4977bab6 10163 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 10164 if (ct)
4977bab6
ZW
10165 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10166 copy_rtx (out), 1, OPTAB_DIRECT);
10167 if (!rtx_equal_p (out, operands[0]))
10168 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10169
10170 return 1; /* DONE */
10171 }
10172 }
10173
4977bab6 10174 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
10175 {
10176 /* Try a few things more with specific constants and a variable. */
10177
78a0d70c 10178 optab op;
e075ae69
RH
10179 rtx var, orig_out, out, tmp;
10180
4977bab6 10181 if (BRANCH_COST <= 2)
e075ae69
RH
10182 return 0; /* FAIL */
10183
0f290768 10184 /* If one of the two operands is an interesting constant, load a
e075ae69 10185 constant with the above and mask it in with a logical operation. */
0f290768 10186
e075ae69
RH
10187 if (GET_CODE (operands[2]) == CONST_INT)
10188 {
10189 var = operands[3];
4977bab6 10190 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 10191 operands[3] = constm1_rtx, op = and_optab;
4977bab6 10192 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 10193 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10194 else
10195 return 0; /* FAIL */
e075ae69
RH
10196 }
10197 else if (GET_CODE (operands[3]) == CONST_INT)
10198 {
10199 var = operands[2];
4977bab6 10200 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 10201 operands[2] = constm1_rtx, op = and_optab;
4977bab6 10202 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 10203 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10204 else
10205 return 0; /* FAIL */
e075ae69 10206 }
78a0d70c 10207 else
e075ae69
RH
10208 return 0; /* FAIL */
10209
10210 orig_out = operands[0];
635559ab 10211 tmp = gen_reg_rtx (mode);
e075ae69
RH
10212 operands[0] = tmp;
10213
10214 /* Recurse to get the constant loaded. */
10215 if (ix86_expand_int_movcc (operands) == 0)
10216 return 0; /* FAIL */
10217
10218 /* Mask in the interesting variable. */
635559ab 10219 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 10220 OPTAB_WIDEN);
4977bab6
ZW
10221 if (!rtx_equal_p (out, orig_out))
10222 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
10223
10224 return 1; /* DONE */
10225 }
10226
10227 /*
10228 * For comparison with above,
10229 *
10230 * movl cf,dest
10231 * movl ct,tmp
10232 * cmpl op1,op2
10233 * cmovcc tmp,dest
10234 *
10235 * Size 15.
10236 */
10237
635559ab
JH
10238 if (! nonimmediate_operand (operands[2], mode))
10239 operands[2] = force_reg (mode, operands[2]);
10240 if (! nonimmediate_operand (operands[3], mode))
10241 operands[3] = force_reg (mode, operands[3]);
e075ae69 10242
a1b8572c
JH
10243 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10244 {
635559ab 10245 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10246 emit_move_insn (tmp, operands[3]);
10247 operands[3] = tmp;
10248 }
10249 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10250 {
635559ab 10251 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10252 emit_move_insn (tmp, operands[2]);
10253 operands[2] = tmp;
10254 }
4977bab6 10255
c9682caf 10256 if (! register_operand (operands[2], VOIDmode)
b96a374d 10257 && (mode == QImode
4977bab6 10258 || ! register_operand (operands[3], VOIDmode)))
635559ab 10259 operands[2] = force_reg (mode, operands[2]);
a1b8572c 10260
4977bab6
ZW
10261 if (mode == QImode
10262 && ! register_operand (operands[3], VOIDmode))
10263 operands[3] = force_reg (mode, operands[3]);
10264
e075ae69
RH
10265 emit_insn (compare_seq);
10266 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 10267 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
10268 compare_op, operands[2],
10269 operands[3])));
a1b8572c 10270 if (bypass_test)
4977bab6 10271 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10272 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10273 bypass_test,
4977bab6
ZW
10274 copy_rtx (operands[3]),
10275 copy_rtx (operands[0]))));
a1b8572c 10276 if (second_test)
4977bab6 10277 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10278 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10279 second_test,
4977bab6
ZW
10280 copy_rtx (operands[2]),
10281 copy_rtx (operands[0]))));
e075ae69
RH
10282
10283 return 1; /* DONE */
e9a25f70 10284}
e075ae69 10285
32b5b1aa 10286int
b96a374d 10287ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 10288{
e075ae69 10289 enum rtx_code code;
e075ae69 10290 rtx tmp;
a1b8572c 10291 rtx compare_op, second_test, bypass_test;
32b5b1aa 10292
0073023d
JH
10293 /* For SF/DFmode conditional moves based on comparisons
10294 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
10295 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10296 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 10297 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
10298 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10299 && (!TARGET_IEEE_FP
10300 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
10301 /* We may be called from the post-reload splitter. */
10302 && (!REG_P (operands[0])
10303 || SSE_REG_P (operands[0])
52a661a6 10304 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
10305 {
10306 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10307 code = GET_CODE (operands[1]);
10308
10309 /* See if we have (cross) match between comparison operands and
10310 conditional move operands. */
10311 if (rtx_equal_p (operands[2], op1))
10312 {
10313 rtx tmp = op0;
10314 op0 = op1;
10315 op1 = tmp;
10316 code = reverse_condition_maybe_unordered (code);
10317 }
10318 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10319 {
10320 /* Check for min operation. */
4977bab6 10321 if (code == LT || code == UNLE)
0073023d 10322 {
4977bab6
ZW
10323 if (code == UNLE)
10324 {
10325 rtx tmp = op0;
10326 op0 = op1;
10327 op1 = tmp;
10328 }
0073023d
JH
10329 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10330 if (memory_operand (op0, VOIDmode))
10331 op0 = force_reg (GET_MODE (operands[0]), op0);
10332 if (GET_MODE (operands[0]) == SFmode)
10333 emit_insn (gen_minsf3 (operands[0], op0, op1));
10334 else
10335 emit_insn (gen_mindf3 (operands[0], op0, op1));
10336 return 1;
10337 }
10338 /* Check for max operation. */
4977bab6 10339 if (code == GT || code == UNGE)
0073023d 10340 {
4977bab6
ZW
10341 if (code == UNGE)
10342 {
10343 rtx tmp = op0;
10344 op0 = op1;
10345 op1 = tmp;
10346 }
0073023d
JH
10347 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10348 if (memory_operand (op0, VOIDmode))
10349 op0 = force_reg (GET_MODE (operands[0]), op0);
10350 if (GET_MODE (operands[0]) == SFmode)
10351 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10352 else
10353 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10354 return 1;
10355 }
10356 }
10357 /* Manage condition to be sse_comparison_operator. In case we are
10358 in non-ieee mode, try to canonicalize the destination operand
10359 to be first in the comparison - this helps reload to avoid extra
10360 moves. */
10361 if (!sse_comparison_operator (operands[1], VOIDmode)
10362 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10363 {
10364 rtx tmp = ix86_compare_op0;
10365 ix86_compare_op0 = ix86_compare_op1;
10366 ix86_compare_op1 = tmp;
10367 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10368 VOIDmode, ix86_compare_op0,
10369 ix86_compare_op1);
10370 }
d1f87653 10371 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10372 move. We also don't support the NE comparison on SSE, so try to
10373 avoid it. */
037f20f1
JH
10374 if ((rtx_equal_p (operands[0], operands[3])
10375 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10376 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10377 {
10378 rtx tmp = operands[2];
10379 operands[2] = operands[3];
92d0fb09 10380 operands[3] = tmp;
0073023d
JH
10381 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10382 (GET_CODE (operands[1])),
10383 VOIDmode, ix86_compare_op0,
10384 ix86_compare_op1);
10385 }
10386 if (GET_MODE (operands[0]) == SFmode)
10387 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10388 operands[2], operands[3],
10389 ix86_compare_op0, ix86_compare_op1));
10390 else
10391 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10392 operands[2], operands[3],
10393 ix86_compare_op0, ix86_compare_op1));
10394 return 1;
10395 }
10396
e075ae69 10397 /* The floating point conditional move instructions don't directly
0f290768 10398 support conditions resulting from a signed integer comparison. */
32b5b1aa 10399
e075ae69 10400 code = GET_CODE (operands[1]);
a1b8572c 10401 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10402
10403 /* The floating point conditional move instructions don't directly
10404 support signed integer comparisons. */
10405
a1b8572c 10406 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10407 {
a1b8572c 10408 if (second_test != NULL || bypass_test != NULL)
b531087a 10409 abort ();
e075ae69 10410 tmp = gen_reg_rtx (QImode);
3a3677ff 10411 ix86_expand_setcc (code, tmp);
e075ae69
RH
10412 code = NE;
10413 ix86_compare_op0 = tmp;
10414 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10415 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10416 }
10417 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10418 {
10419 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10420 emit_move_insn (tmp, operands[3]);
10421 operands[3] = tmp;
10422 }
10423 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10424 {
10425 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10426 emit_move_insn (tmp, operands[2]);
10427 operands[2] = tmp;
e075ae69 10428 }
e9a25f70 10429
e075ae69
RH
10430 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10431 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10432 compare_op,
e075ae69
RH
10433 operands[2],
10434 operands[3])));
a1b8572c
JH
10435 if (bypass_test)
10436 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10437 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10438 bypass_test,
10439 operands[3],
10440 operands[0])));
10441 if (second_test)
10442 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10443 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10444 second_test,
10445 operands[2],
10446 operands[0])));
32b5b1aa 10447
e075ae69 10448 return 1;
32b5b1aa
SC
10449}
10450
7b52eede
JH
10451/* Expand conditional increment or decrement using adb/sbb instructions.
10452 The default case using setcc followed by the conditional move can be
10453 done by generic code. */
10454int
b96a374d 10455ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
10456{
10457 enum rtx_code code = GET_CODE (operands[1]);
10458 rtx compare_op;
10459 rtx val = const0_rtx;
e6e81735 10460 bool fpcmp = false;
e6e81735 10461 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10462
10463 if (operands[3] != const1_rtx
10464 && operands[3] != constm1_rtx)
10465 return 0;
10466 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10467 ix86_compare_op1, &compare_op))
10468 return 0;
e6e81735
JH
10469 code = GET_CODE (compare_op);
10470
10471 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10472 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10473 {
10474 fpcmp = true;
10475 code = ix86_fp_compare_code_to_integer (code);
10476 }
10477
10478 if (code != LTU)
10479 {
10480 val = constm1_rtx;
10481 if (fpcmp)
10482 PUT_CODE (compare_op,
10483 reverse_condition_maybe_unordered
10484 (GET_CODE (compare_op)));
10485 else
10486 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10487 }
10488 PUT_MODE (compare_op, mode);
10489
10490 /* Construct either adc or sbb insn. */
10491 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10492 {
10493 switch (GET_MODE (operands[0]))
10494 {
10495 case QImode:
e6e81735 10496 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10497 break;
10498 case HImode:
e6e81735 10499 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10500 break;
10501 case SImode:
e6e81735 10502 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10503 break;
10504 case DImode:
e6e81735 10505 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10506 break;
10507 default:
10508 abort ();
10509 }
10510 }
10511 else
10512 {
10513 switch (GET_MODE (operands[0]))
10514 {
10515 case QImode:
e6e81735 10516 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10517 break;
10518 case HImode:
e6e81735 10519 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10520 break;
10521 case SImode:
e6e81735 10522 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10523 break;
10524 case DImode:
e6e81735 10525 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10526 break;
10527 default:
10528 abort ();
10529 }
10530 }
10531 return 1; /* DONE */
10532}
10533
10534
2450a057
JH
10535/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10536 works for floating pointer parameters and nonoffsetable memories.
10537 For pushes, it returns just stack offsets; the values will be saved
10538 in the right order. Maximally three parts are generated. */
10539
2b589241 10540static int
b96a374d 10541ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10542{
26e5b205
JH
10543 int size;
10544
10545 if (!TARGET_64BIT)
f8a1ebc6 10546 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
10547 else
10548 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10549
a7180f70
BS
10550 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10551 abort ();
2450a057
JH
10552 if (size < 2 || size > 3)
10553 abort ();
10554
f996902d
RH
10555 /* Optimize constant pool reference to immediates. This is used by fp
10556 moves, that force all constants to memory to allow combining. */
10557 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10558 {
10559 rtx tmp = maybe_get_pool_constant (operand);
10560 if (tmp)
10561 operand = tmp;
10562 }
d7a29404 10563
2450a057 10564 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10565 {
2450a057
JH
10566 /* The only non-offsetable memories we handle are pushes. */
10567 if (! push_operand (operand, VOIDmode))
10568 abort ();
10569
26e5b205
JH
10570 operand = copy_rtx (operand);
10571 PUT_MODE (operand, Pmode);
2450a057
JH
10572 parts[0] = parts[1] = parts[2] = operand;
10573 }
26e5b205 10574 else if (!TARGET_64BIT)
2450a057
JH
10575 {
10576 if (mode == DImode)
10577 split_di (&operand, 1, &parts[0], &parts[1]);
10578 else
e075ae69 10579 {
2450a057
JH
10580 if (REG_P (operand))
10581 {
10582 if (!reload_completed)
10583 abort ();
10584 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10585 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10586 if (size == 3)
10587 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10588 }
10589 else if (offsettable_memref_p (operand))
10590 {
f4ef873c 10591 operand = adjust_address (operand, SImode, 0);
2450a057 10592 parts[0] = operand;
b72f00af 10593 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10594 if (size == 3)
b72f00af 10595 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10596 }
10597 else if (GET_CODE (operand) == CONST_DOUBLE)
10598 {
10599 REAL_VALUE_TYPE r;
2b589241 10600 long l[4];
2450a057
JH
10601
10602 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10603 switch (mode)
10604 {
10605 case XFmode:
10606 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10607 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10608 break;
10609 case DFmode:
10610 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10611 break;
10612 default:
10613 abort ();
10614 }
d8bf17f9
LB
10615 parts[1] = gen_int_mode (l[1], SImode);
10616 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10617 }
10618 else
10619 abort ();
e075ae69 10620 }
2450a057 10621 }
26e5b205
JH
10622 else
10623 {
44cf5b6a
JH
10624 if (mode == TImode)
10625 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10626 if (mode == XFmode || mode == TFmode)
10627 {
f8a1ebc6 10628 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
10629 if (REG_P (operand))
10630 {
10631 if (!reload_completed)
10632 abort ();
10633 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 10634 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
10635 }
10636 else if (offsettable_memref_p (operand))
10637 {
b72f00af 10638 operand = adjust_address (operand, DImode, 0);
26e5b205 10639 parts[0] = operand;
f8a1ebc6 10640 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
10641 }
10642 else if (GET_CODE (operand) == CONST_DOUBLE)
10643 {
10644 REAL_VALUE_TYPE r;
10645 long l[3];
10646
10647 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 10648 real_to_target (l, &r, mode);
26e5b205
JH
10649 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10650 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10651 parts[0]
d8bf17f9 10652 = gen_int_mode
44cf5b6a 10653 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10654 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10655 DImode);
26e5b205
JH
10656 else
10657 parts[0] = immed_double_const (l[0], l[1], DImode);
f8a1ebc6
JH
10658 if (upper_mode == SImode)
10659 parts[1] = gen_int_mode (l[2], SImode);
10660 else if (HOST_BITS_PER_WIDE_INT >= 64)
10661 parts[1]
10662 = gen_int_mode
10663 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10664 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10665 DImode);
10666 else
10667 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
10668 }
10669 else
10670 abort ();
10671 }
10672 }
2450a057 10673
2b589241 10674 return size;
2450a057
JH
10675}
10676
10677/* Emit insns to perform a move or push of DI, DF, and XF values.
10678 Return false when normal moves are needed; true when all required
10679 insns have been emitted. Operands 2-4 contain the input values
10680 int the correct order; operands 5-7 contain the output values. */
10681
26e5b205 10682void
b96a374d 10683ix86_split_long_move (rtx operands[])
2450a057
JH
10684{
10685 rtx part[2][3];
26e5b205 10686 int nparts;
2450a057
JH
10687 int push = 0;
10688 int collisions = 0;
26e5b205
JH
10689 enum machine_mode mode = GET_MODE (operands[0]);
10690
10691 /* The DFmode expanders may ask us to move double.
10692 For 64bit target this is single move. By hiding the fact
10693 here we simplify i386.md splitters. */
10694 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10695 {
8cdfa312
RH
10696 /* Optimize constant pool reference to immediates. This is used by
10697 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10698
10699 if (GET_CODE (operands[1]) == MEM
10700 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10701 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10702 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10703 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10704 {
10705 operands[0] = copy_rtx (operands[0]);
10706 PUT_MODE (operands[0], Pmode);
10707 }
26e5b205
JH
10708 else
10709 operands[0] = gen_lowpart (DImode, operands[0]);
10710 operands[1] = gen_lowpart (DImode, operands[1]);
10711 emit_move_insn (operands[0], operands[1]);
10712 return;
10713 }
2450a057 10714
2450a057
JH
10715 /* The only non-offsettable memory we handle is push. */
10716 if (push_operand (operands[0], VOIDmode))
10717 push = 1;
10718 else if (GET_CODE (operands[0]) == MEM
10719 && ! offsettable_memref_p (operands[0]))
10720 abort ();
10721
26e5b205
JH
10722 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10723 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10724
10725 /* When emitting push, take care for source operands on the stack. */
10726 if (push && GET_CODE (operands[1]) == MEM
10727 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10728 {
26e5b205 10729 if (nparts == 3)
886cbb88
JH
10730 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10731 XEXP (part[1][2], 0));
10732 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10733 XEXP (part[1][1], 0));
2450a057
JH
10734 }
10735
0f290768 10736 /* We need to do copy in the right order in case an address register
2450a057
JH
10737 of the source overlaps the destination. */
10738 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10739 {
10740 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10741 collisions++;
10742 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10743 collisions++;
26e5b205 10744 if (nparts == 3
2450a057
JH
10745 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10746 collisions++;
10747
10748 /* Collision in the middle part can be handled by reordering. */
26e5b205 10749 if (collisions == 1 && nparts == 3
2450a057 10750 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10751 {
2450a057
JH
10752 rtx tmp;
10753 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10754 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10755 }
e075ae69 10756
2450a057
JH
10757 /* If there are more collisions, we can't handle it by reordering.
10758 Do an lea to the last part and use only one colliding move. */
10759 else if (collisions > 1)
10760 {
8231b3f9
RH
10761 rtx base;
10762
2450a057 10763 collisions = 1;
8231b3f9
RH
10764
10765 base = part[0][nparts - 1];
10766
10767 /* Handle the case when the last part isn't valid for lea.
10768 Happens in 64-bit mode storing the 12-byte XFmode. */
10769 if (GET_MODE (base) != Pmode)
10770 base = gen_rtx_REG (Pmode, REGNO (base));
10771
10772 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10773 part[1][0] = replace_equiv_address (part[1][0], base);
10774 part[1][1] = replace_equiv_address (part[1][1],
10775 plus_constant (base, UNITS_PER_WORD));
26e5b205 10776 if (nparts == 3)
8231b3f9
RH
10777 part[1][2] = replace_equiv_address (part[1][2],
10778 plus_constant (base, 8));
2450a057
JH
10779 }
10780 }
10781
10782 if (push)
10783 {
26e5b205 10784 if (!TARGET_64BIT)
2b589241 10785 {
26e5b205
JH
10786 if (nparts == 3)
10787 {
f8a1ebc6
JH
10788 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10789 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
10790 emit_move_insn (part[0][2], part[1][2]);
10791 }
2b589241 10792 }
26e5b205
JH
10793 else
10794 {
10795 /* In 64bit mode we don't have 32bit push available. In case this is
10796 register, it is OK - we will just use larger counterpart. We also
10797 retype memory - these comes from attempt to avoid REX prefix on
10798 moving of second half of TFmode value. */
10799 if (GET_MODE (part[1][1]) == SImode)
10800 {
10801 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10802 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10803 else if (REG_P (part[1][1]))
10804 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10805 else
b531087a 10806 abort ();
886cbb88
JH
10807 if (GET_MODE (part[1][0]) == SImode)
10808 part[1][0] = part[1][1];
26e5b205
JH
10809 }
10810 }
10811 emit_move_insn (part[0][1], part[1][1]);
10812 emit_move_insn (part[0][0], part[1][0]);
10813 return;
2450a057
JH
10814 }
10815
10816 /* Choose correct order to not overwrite the source before it is copied. */
10817 if ((REG_P (part[0][0])
10818 && REG_P (part[1][1])
10819 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10820 || (nparts == 3
2450a057
JH
10821 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10822 || (collisions > 0
10823 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10824 {
26e5b205 10825 if (nparts == 3)
2450a057 10826 {
26e5b205
JH
10827 operands[2] = part[0][2];
10828 operands[3] = part[0][1];
10829 operands[4] = part[0][0];
10830 operands[5] = part[1][2];
10831 operands[6] = part[1][1];
10832 operands[7] = part[1][0];
2450a057
JH
10833 }
10834 else
10835 {
26e5b205
JH
10836 operands[2] = part[0][1];
10837 operands[3] = part[0][0];
10838 operands[5] = part[1][1];
10839 operands[6] = part[1][0];
2450a057
JH
10840 }
10841 }
10842 else
10843 {
26e5b205 10844 if (nparts == 3)
2450a057 10845 {
26e5b205
JH
10846 operands[2] = part[0][0];
10847 operands[3] = part[0][1];
10848 operands[4] = part[0][2];
10849 operands[5] = part[1][0];
10850 operands[6] = part[1][1];
10851 operands[7] = part[1][2];
2450a057
JH
10852 }
10853 else
10854 {
26e5b205
JH
10855 operands[2] = part[0][0];
10856 operands[3] = part[0][1];
10857 operands[5] = part[1][0];
10858 operands[6] = part[1][1];
e075ae69
RH
10859 }
10860 }
26e5b205
JH
10861 emit_move_insn (operands[2], operands[5]);
10862 emit_move_insn (operands[3], operands[6]);
10863 if (nparts == 3)
10864 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10865
26e5b205 10866 return;
32b5b1aa 10867}
32b5b1aa 10868
e075ae69 10869void
b96a374d 10870ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10871{
e075ae69
RH
10872 rtx low[2], high[2];
10873 int count;
b985a30f 10874
e075ae69
RH
10875 if (GET_CODE (operands[2]) == CONST_INT)
10876 {
10877 split_di (operands, 2, low, high);
10878 count = INTVAL (operands[2]) & 63;
32b5b1aa 10879
e075ae69
RH
10880 if (count >= 32)
10881 {
10882 emit_move_insn (high[0], low[1]);
10883 emit_move_insn (low[0], const0_rtx);
b985a30f 10884
e075ae69
RH
10885 if (count > 32)
10886 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10887 }
10888 else
10889 {
10890 if (!rtx_equal_p (operands[0], operands[1]))
10891 emit_move_insn (operands[0], operands[1]);
10892 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10893 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10894 }
10895 }
10896 else
10897 {
10898 if (!rtx_equal_p (operands[0], operands[1]))
10899 emit_move_insn (operands[0], operands[1]);
b985a30f 10900
e075ae69 10901 split_di (operands, 1, low, high);
b985a30f 10902
e075ae69
RH
10903 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10904 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10905
fe577e58 10906 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10907 {
fe577e58 10908 if (! no_new_pseudos)
e075ae69
RH
10909 scratch = force_reg (SImode, const0_rtx);
10910 else
10911 emit_move_insn (scratch, const0_rtx);
10912
10913 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10914 scratch));
10915 }
10916 else
10917 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10918 }
e9a25f70 10919}
32b5b1aa 10920
e075ae69 10921void
b96a374d 10922ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10923{
e075ae69
RH
10924 rtx low[2], high[2];
10925 int count;
32b5b1aa 10926
e075ae69
RH
10927 if (GET_CODE (operands[2]) == CONST_INT)
10928 {
10929 split_di (operands, 2, low, high);
10930 count = INTVAL (operands[2]) & 63;
32b5b1aa 10931
8937b6a2
RS
10932 if (count == 63)
10933 {
10934 emit_move_insn (high[0], high[1]);
10935 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10936 emit_move_insn (low[0], high[0]);
10937
10938 }
10939 else if (count >= 32)
e075ae69
RH
10940 {
10941 emit_move_insn (low[0], high[1]);
32b5b1aa 10942
e075ae69
RH
10943 if (! reload_completed)
10944 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10945 else
10946 {
10947 emit_move_insn (high[0], low[0]);
10948 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10949 }
10950
10951 if (count > 32)
10952 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10953 }
10954 else
10955 {
10956 if (!rtx_equal_p (operands[0], operands[1]))
10957 emit_move_insn (operands[0], operands[1]);
10958 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10959 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10960 }
10961 }
10962 else
32b5b1aa 10963 {
e075ae69
RH
10964 if (!rtx_equal_p (operands[0], operands[1]))
10965 emit_move_insn (operands[0], operands[1]);
10966
10967 split_di (operands, 1, low, high);
10968
10969 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10970 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10971
fe577e58 10972 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10973 {
fe577e58 10974 if (! no_new_pseudos)
e075ae69
RH
10975 scratch = gen_reg_rtx (SImode);
10976 emit_move_insn (scratch, high[0]);
10977 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10978 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10979 scratch));
10980 }
10981 else
10982 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10983 }
e075ae69 10984}
32b5b1aa 10985
e075ae69 10986void
b96a374d 10987ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10988{
10989 rtx low[2], high[2];
10990 int count;
32b5b1aa 10991
e075ae69 10992 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10993 {
e075ae69
RH
10994 split_di (operands, 2, low, high);
10995 count = INTVAL (operands[2]) & 63;
10996
10997 if (count >= 32)
c7271385 10998 {
e075ae69
RH
10999 emit_move_insn (low[0], high[1]);
11000 emit_move_insn (high[0], const0_rtx);
32b5b1aa 11001
e075ae69
RH
11002 if (count > 32)
11003 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11004 }
11005 else
11006 {
11007 if (!rtx_equal_p (operands[0], operands[1]))
11008 emit_move_insn (operands[0], operands[1]);
11009 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11010 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11011 }
32b5b1aa 11012 }
e075ae69
RH
11013 else
11014 {
11015 if (!rtx_equal_p (operands[0], operands[1]))
11016 emit_move_insn (operands[0], operands[1]);
32b5b1aa 11017
e075ae69
RH
11018 split_di (operands, 1, low, high);
11019
11020 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11021 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11022
11023 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 11024 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 11025 {
fe577e58 11026 if (! no_new_pseudos)
e075ae69
RH
11027 scratch = force_reg (SImode, const0_rtx);
11028 else
11029 emit_move_insn (scratch, const0_rtx);
11030
11031 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11032 scratch));
11033 }
11034 else
11035 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11036 }
32b5b1aa 11037}
3f803cd9 11038
0407c02b 11039/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
11040 it is aligned to VALUE bytes. If true, jump to the label. */
11041static rtx
b96a374d 11042ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
11043{
11044 rtx label = gen_label_rtx ();
11045 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11046 if (GET_MODE (variable) == DImode)
11047 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11048 else
11049 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11050 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 11051 1, label);
0945b39d
JH
11052 return label;
11053}
11054
11055/* Adjust COUNTER by the VALUE. */
11056static void
b96a374d 11057ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
11058{
11059 if (GET_MODE (countreg) == DImode)
11060 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11061 else
11062 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11063}
11064
11065/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 11066rtx
b96a374d 11067ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
11068{
11069 rtx r;
11070 if (GET_MODE (exp) == VOIDmode)
11071 return force_reg (Pmode, exp);
11072 if (GET_MODE (exp) == Pmode)
11073 return copy_to_mode_reg (Pmode, exp);
11074 r = gen_reg_rtx (Pmode);
11075 emit_insn (gen_zero_extendsidi2 (r, exp));
11076 return r;
11077}
11078
11079/* Expand string move (memcpy) operation. Use i386 string operations when
11080 profitable. expand_clrstr contains similar code. */
11081int
b96a374d 11082ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 11083{
4e44c1ef 11084 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
11085 enum machine_mode counter_mode;
11086 HOST_WIDE_INT align = 0;
11087 unsigned HOST_WIDE_INT count = 0;
0945b39d 11088
0945b39d
JH
11089 if (GET_CODE (align_exp) == CONST_INT)
11090 align = INTVAL (align_exp);
11091
d0a5295a
RH
11092 /* Can't use any of this if the user has appropriated esi or edi. */
11093 if (global_regs[4] || global_regs[5])
11094 return 0;
11095
5519a4f9 11096 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11097 if (!TARGET_ALIGN_STRINGOPS)
11098 align = 64;
11099
11100 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11101 {
11102 count = INTVAL (count_exp);
11103 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11104 return 0;
11105 }
0945b39d
JH
11106
11107 /* Figure out proper mode for counter. For 32bits it is always SImode,
11108 for 64bits use SImode when possible, otherwise DImode.
11109 Set count to number of bytes copied when known at compile time. */
11110 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11111 || x86_64_zero_extended_value (count_exp))
11112 counter_mode = SImode;
11113 else
11114 counter_mode = DImode;
11115
11116 if (counter_mode != SImode && counter_mode != DImode)
11117 abort ();
11118
11119 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
11120 if (destreg != XEXP (dst, 0))
11121 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 11122 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
11123 if (srcreg != XEXP (src, 0))
11124 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
11125
11126 /* When optimizing for size emit simple rep ; movsb instruction for
11127 counts not divisible by 4. */
11128
11129 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11130 {
4e44c1ef 11131 emit_insn (gen_cld ());
0945b39d 11132 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
11133 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11134 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11135 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11136 destexp, srcexp));
0945b39d
JH
11137 }
11138
11139 /* For constant aligned (or small unaligned) copies use rep movsl
11140 followed by code copying the rest. For PentiumPro ensure 8 byte
11141 alignment to allow rep movsl acceleration. */
11142
11143 else if (count != 0
11144 && (align >= 8
11145 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11146 || optimize_size || count < (unsigned int) 64))
0945b39d 11147 {
4e44c1ef 11148 unsigned HOST_WIDE_INT offset = 0;
0945b39d 11149 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11150 rtx srcmem, dstmem;
11151
11152 emit_insn (gen_cld ());
0945b39d
JH
11153 if (count & ~(size - 1))
11154 {
11155 countreg = copy_to_mode_reg (counter_mode,
11156 GEN_INT ((count >> (size == 4 ? 2 : 3))
11157 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11158 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11159
11160 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11161 GEN_INT (size == 4 ? 2 : 3));
11162 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11163 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11164
11165 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11166 countreg, destexp, srcexp));
11167 offset = count & ~(size - 1);
0945b39d
JH
11168 }
11169 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11170 {
11171 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11172 offset);
11173 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11174 offset);
11175 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11176 offset += 4;
11177 }
0945b39d 11178 if (count & 0x02)
4e44c1ef
JJ
11179 {
11180 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11181 offset);
11182 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11183 offset);
11184 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11185 offset += 2;
11186 }
0945b39d 11187 if (count & 0x01)
4e44c1ef
JJ
11188 {
11189 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11190 offset);
11191 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11192 offset);
11193 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11194 }
0945b39d
JH
11195 }
11196 /* The generic code based on the glibc implementation:
11197 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11198 allowing accelerated copying there)
11199 - copy the data using rep movsl
11200 - copy the rest. */
11201 else
11202 {
11203 rtx countreg2;
11204 rtx label = NULL;
4e44c1ef 11205 rtx srcmem, dstmem;
37ad04a5
JH
11206 int desired_alignment = (TARGET_PENTIUMPRO
11207 && (count == 0 || count >= (unsigned int) 260)
11208 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
11209 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11210 dst = change_address (dst, BLKmode, destreg);
11211 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
11212
11213 /* In case we don't know anything about the alignment, default to
11214 library version, since it is usually equally fast and result in
b96a374d 11215 shorter code.
4977bab6
ZW
11216
11217 Also emit call when we know that the count is large and call overhead
11218 will not be important. */
11219 if (!TARGET_INLINE_ALL_STRINGOPS
11220 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 11221 return 0;
0945b39d
JH
11222
11223 if (TARGET_SINGLE_STRINGOP)
11224 emit_insn (gen_cld ());
11225
11226 countreg2 = gen_reg_rtx (Pmode);
11227 countreg = copy_to_mode_reg (counter_mode, count_exp);
11228
11229 /* We don't use loops to align destination and to copy parts smaller
11230 than 4 bytes, because gcc is able to optimize such code better (in
11231 the case the destination or the count really is aligned, gcc is often
11232 able to predict the branches) and also it is friendlier to the
a4f31c00 11233 hardware branch prediction.
0945b39d 11234
d1f87653 11235 Using loops is beneficial for generic case, because we can
0945b39d
JH
11236 handle small counts using the loops. Many CPUs (such as Athlon)
11237 have large REP prefix setup costs.
11238
4aae8a9a 11239 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
11240 add some customizability to this code. */
11241
37ad04a5 11242 if (count == 0 && align < desired_alignment)
0945b39d
JH
11243 {
11244 label = gen_label_rtx ();
aaae0bb9 11245 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11246 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11247 }
11248 if (align <= 1)
11249 {
11250 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11251 srcmem = change_address (src, QImode, srcreg);
11252 dstmem = change_address (dst, QImode, destreg);
11253 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11254 ix86_adjust_counter (countreg, 1);
11255 emit_label (label);
11256 LABEL_NUSES (label) = 1;
11257 }
11258 if (align <= 2)
11259 {
11260 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11261 srcmem = change_address (src, HImode, srcreg);
11262 dstmem = change_address (dst, HImode, destreg);
11263 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11264 ix86_adjust_counter (countreg, 2);
11265 emit_label (label);
11266 LABEL_NUSES (label) = 1;
11267 }
37ad04a5 11268 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11269 {
11270 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11271 srcmem = change_address (src, SImode, srcreg);
11272 dstmem = change_address (dst, SImode, destreg);
11273 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11274 ix86_adjust_counter (countreg, 4);
11275 emit_label (label);
11276 LABEL_NUSES (label) = 1;
11277 }
11278
37ad04a5
JH
11279 if (label && desired_alignment > 4 && !TARGET_64BIT)
11280 {
11281 emit_label (label);
11282 LABEL_NUSES (label) = 1;
11283 label = NULL_RTX;
11284 }
0945b39d
JH
11285 if (!TARGET_SINGLE_STRINGOP)
11286 emit_insn (gen_cld ());
11287 if (TARGET_64BIT)
11288 {
11289 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11290 GEN_INT (3)));
4e44c1ef 11291 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11292 }
11293 else
11294 {
4e44c1ef
JJ
11295 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11296 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11297 }
4e44c1ef
JJ
11298 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11299 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11300 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11301 countreg2, destexp, srcexp));
0945b39d
JH
11302
11303 if (label)
11304 {
11305 emit_label (label);
11306 LABEL_NUSES (label) = 1;
11307 }
11308 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11309 {
11310 srcmem = change_address (src, SImode, srcreg);
11311 dstmem = change_address (dst, SImode, destreg);
11312 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11313 }
0945b39d
JH
11314 if ((align <= 4 || count == 0) && TARGET_64BIT)
11315 {
11316 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11317 srcmem = change_address (src, SImode, srcreg);
11318 dstmem = change_address (dst, SImode, destreg);
11319 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11320 emit_label (label);
11321 LABEL_NUSES (label) = 1;
11322 }
11323 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11324 {
11325 srcmem = change_address (src, HImode, srcreg);
11326 dstmem = change_address (dst, HImode, destreg);
11327 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11328 }
0945b39d
JH
11329 if (align <= 2 || count == 0)
11330 {
11331 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11332 srcmem = change_address (src, HImode, srcreg);
11333 dstmem = change_address (dst, HImode, destreg);
11334 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11335 emit_label (label);
11336 LABEL_NUSES (label) = 1;
11337 }
11338 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11339 {
11340 srcmem = change_address (src, QImode, srcreg);
11341 dstmem = change_address (dst, QImode, destreg);
11342 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11343 }
0945b39d
JH
11344 if (align <= 1 || count == 0)
11345 {
11346 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11347 srcmem = change_address (src, QImode, srcreg);
11348 dstmem = change_address (dst, QImode, destreg);
11349 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11350 emit_label (label);
11351 LABEL_NUSES (label) = 1;
11352 }
11353 }
11354
0945b39d
JH
11355 return 1;
11356}
11357
11358/* Expand string clear operation (bzero). Use i386 string operations when
11359 profitable. expand_movstr contains similar code. */
11360int
4e44c1ef 11361ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 11362{
4e44c1ef 11363 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
11364 enum machine_mode counter_mode;
11365 HOST_WIDE_INT align = 0;
11366 unsigned HOST_WIDE_INT count = 0;
11367
11368 if (GET_CODE (align_exp) == CONST_INT)
11369 align = INTVAL (align_exp);
11370
d0a5295a
RH
11371 /* Can't use any of this if the user has appropriated esi. */
11372 if (global_regs[4])
11373 return 0;
11374
5519a4f9 11375 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11376 if (!TARGET_ALIGN_STRINGOPS)
11377 align = 32;
11378
11379 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11380 {
11381 count = INTVAL (count_exp);
11382 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11383 return 0;
11384 }
0945b39d
JH
11385 /* Figure out proper mode for counter. For 32bits it is always SImode,
11386 for 64bits use SImode when possible, otherwise DImode.
11387 Set count to number of bytes copied when known at compile time. */
11388 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11389 || x86_64_zero_extended_value (count_exp))
11390 counter_mode = SImode;
11391 else
11392 counter_mode = DImode;
11393
4e44c1ef
JJ
11394 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11395 if (destreg != XEXP (dst, 0))
11396 dst = replace_equiv_address_nv (dst, destreg);
0945b39d
JH
11397
11398 emit_insn (gen_cld ());
11399
11400 /* When optimizing for size emit simple rep ; movsb instruction for
11401 counts not divisible by 4. */
11402
11403 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11404 {
11405 countreg = ix86_zero_extend_to_Pmode (count_exp);
11406 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
11407 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11408 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
11409 }
11410 else if (count != 0
11411 && (align >= 8
11412 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11413 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11414 {
11415 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11416 unsigned HOST_WIDE_INT offset = 0;
11417
0945b39d
JH
11418 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11419 if (count & ~(size - 1))
11420 {
11421 countreg = copy_to_mode_reg (counter_mode,
11422 GEN_INT ((count >> (size == 4 ? 2 : 3))
11423 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11424 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11425 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11426 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11427 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11428 offset = count & ~(size - 1);
0945b39d
JH
11429 }
11430 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11431 {
11432 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11433 offset);
11434 emit_insn (gen_strset (destreg, mem,
0945b39d 11435 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
11436 offset += 4;
11437 }
0945b39d 11438 if (count & 0x02)
4e44c1ef
JJ
11439 {
11440 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11441 offset);
11442 emit_insn (gen_strset (destreg, mem,
0945b39d 11443 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
11444 offset += 2;
11445 }
0945b39d 11446 if (count & 0x01)
4e44c1ef
JJ
11447 {
11448 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11449 offset);
11450 emit_insn (gen_strset (destreg, mem,
0945b39d 11451 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 11452 }
0945b39d
JH
11453 }
11454 else
11455 {
11456 rtx countreg2;
11457 rtx label = NULL;
37ad04a5
JH
11458 /* Compute desired alignment of the string operation. */
11459 int desired_alignment = (TARGET_PENTIUMPRO
11460 && (count == 0 || count >= (unsigned int) 260)
11461 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11462
11463 /* In case we don't know anything about the alignment, default to
11464 library version, since it is usually equally fast and result in
4977bab6
ZW
11465 shorter code.
11466
11467 Also emit call when we know that the count is large and call overhead
11468 will not be important. */
11469 if (!TARGET_INLINE_ALL_STRINGOPS
11470 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11471 return 0;
11472
11473 if (TARGET_SINGLE_STRINGOP)
11474 emit_insn (gen_cld ());
11475
11476 countreg2 = gen_reg_rtx (Pmode);
11477 countreg = copy_to_mode_reg (counter_mode, count_exp);
11478 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
11479 /* Get rid of MEM_OFFSET, it won't be accurate. */
11480 dst = change_address (dst, BLKmode, destreg);
0945b39d 11481
37ad04a5 11482 if (count == 0 && align < desired_alignment)
0945b39d
JH
11483 {
11484 label = gen_label_rtx ();
37ad04a5 11485 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11486 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11487 }
11488 if (align <= 1)
11489 {
11490 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11491 emit_insn (gen_strset (destreg, dst,
11492 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11493 ix86_adjust_counter (countreg, 1);
11494 emit_label (label);
11495 LABEL_NUSES (label) = 1;
11496 }
11497 if (align <= 2)
11498 {
11499 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11500 emit_insn (gen_strset (destreg, dst,
11501 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11502 ix86_adjust_counter (countreg, 2);
11503 emit_label (label);
11504 LABEL_NUSES (label) = 1;
11505 }
37ad04a5 11506 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11507 {
11508 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11509 emit_insn (gen_strset (destreg, dst,
11510 (TARGET_64BIT
11511 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11512 : zeroreg)));
0945b39d
JH
11513 ix86_adjust_counter (countreg, 4);
11514 emit_label (label);
11515 LABEL_NUSES (label) = 1;
11516 }
11517
37ad04a5
JH
11518 if (label && desired_alignment > 4 && !TARGET_64BIT)
11519 {
11520 emit_label (label);
11521 LABEL_NUSES (label) = 1;
11522 label = NULL_RTX;
11523 }
11524
0945b39d
JH
11525 if (!TARGET_SINGLE_STRINGOP)
11526 emit_insn (gen_cld ());
11527 if (TARGET_64BIT)
11528 {
11529 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11530 GEN_INT (3)));
4e44c1ef 11531 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11532 }
11533 else
11534 {
4e44c1ef
JJ
11535 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11536 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11537 }
4e44c1ef
JJ
11538 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11539 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11540
0945b39d
JH
11541 if (label)
11542 {
11543 emit_label (label);
11544 LABEL_NUSES (label) = 1;
11545 }
37ad04a5 11546
0945b39d 11547 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11548 emit_insn (gen_strset (destreg, dst,
11549 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11550 if (TARGET_64BIT && (align <= 4 || count == 0))
11551 {
79258dce 11552 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11553 emit_insn (gen_strset (destreg, dst,
11554 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11555 emit_label (label);
11556 LABEL_NUSES (label) = 1;
11557 }
11558 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11559 emit_insn (gen_strset (destreg, dst,
11560 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11561 if (align <= 2 || count == 0)
11562 {
74411039 11563 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11564 emit_insn (gen_strset (destreg, dst,
11565 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11566 emit_label (label);
11567 LABEL_NUSES (label) = 1;
11568 }
11569 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11570 emit_insn (gen_strset (destreg, dst,
11571 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11572 if (align <= 1 || count == 0)
11573 {
74411039 11574 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11575 emit_insn (gen_strset (destreg, dst,
11576 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11577 emit_label (label);
11578 LABEL_NUSES (label) = 1;
11579 }
11580 }
11581 return 1;
11582}
4e44c1ef 11583
0945b39d
JH
11584/* Expand strlen. */
11585int
b96a374d 11586ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11587{
11588 rtx addr, scratch1, scratch2, scratch3, scratch4;
11589
11590 /* The generic case of strlen expander is long. Avoid it's
11591 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11592
11593 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11594 && !TARGET_INLINE_ALL_STRINGOPS
11595 && !optimize_size
11596 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11597 return 0;
11598
11599 addr = force_reg (Pmode, XEXP (src, 0));
11600 scratch1 = gen_reg_rtx (Pmode);
11601
11602 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11603 && !optimize_size)
11604 {
11605 /* Well it seems that some optimizer does not combine a call like
11606 foo(strlen(bar), strlen(bar));
11607 when the move and the subtraction is done here. It does calculate
11608 the length just once when these instructions are done inside of
11609 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11610 often used and I use one fewer register for the lifetime of
11611 output_strlen_unroll() this is better. */
11612
11613 emit_move_insn (out, addr);
11614
4e44c1ef 11615 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
11616
11617 /* strlensi_unroll_1 returns the address of the zero at the end of
11618 the string, like memchr(), so compute the length by subtracting
11619 the start address. */
11620 if (TARGET_64BIT)
11621 emit_insn (gen_subdi3 (out, out, addr));
11622 else
11623 emit_insn (gen_subsi3 (out, out, addr));
11624 }
11625 else
11626 {
4e44c1ef 11627 rtx unspec;
0945b39d
JH
11628 scratch2 = gen_reg_rtx (Pmode);
11629 scratch3 = gen_reg_rtx (Pmode);
11630 scratch4 = force_reg (Pmode, constm1_rtx);
11631
11632 emit_move_insn (scratch3, addr);
11633 eoschar = force_reg (QImode, eoschar);
11634
11635 emit_insn (gen_cld ());
4e44c1ef
JJ
11636 src = replace_equiv_address_nv (src, scratch3);
11637
11638 /* If .md starts supporting :P, this can be done in .md. */
11639 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11640 scratch4), UNSPEC_SCAS);
11641 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
11642 if (TARGET_64BIT)
11643 {
0945b39d
JH
11644 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11645 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11646 }
11647 else
11648 {
0945b39d
JH
11649 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11650 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11651 }
11652 }
11653 return 1;
11654}
11655
e075ae69
RH
11656/* Expand the appropriate insns for doing strlen if not just doing
11657 repnz; scasb
11658
11659 out = result, initialized with the start address
11660 align_rtx = alignment of the address.
11661 scratch = scratch register, initialized with the startaddress when
77ebd435 11662 not aligned, otherwise undefined
3f803cd9 11663
39e3f58c 11664 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11665 some address computing at the end. These things are done in i386.md. */
11666
0945b39d 11667static void
4e44c1ef 11668ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 11669{
e075ae69
RH
11670 int align;
11671 rtx tmp;
11672 rtx align_2_label = NULL_RTX;
11673 rtx align_3_label = NULL_RTX;
11674 rtx align_4_label = gen_label_rtx ();
11675 rtx end_0_label = gen_label_rtx ();
e075ae69 11676 rtx mem;
e2e52e1b 11677 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11678 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11679 rtx cmp;
e075ae69
RH
11680
11681 align = 0;
11682 if (GET_CODE (align_rtx) == CONST_INT)
11683 align = INTVAL (align_rtx);
3f803cd9 11684
e9a25f70 11685 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11686
e9a25f70 11687 /* Is there a known alignment and is it less than 4? */
e075ae69 11688 if (align < 4)
3f803cd9 11689 {
0945b39d
JH
11690 rtx scratch1 = gen_reg_rtx (Pmode);
11691 emit_move_insn (scratch1, out);
e9a25f70 11692 /* Is there a known alignment and is it not 2? */
e075ae69 11693 if (align != 2)
3f803cd9 11694 {
e075ae69
RH
11695 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11696 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11697
11698 /* Leave just the 3 lower bits. */
0945b39d 11699 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11700 NULL_RTX, 0, OPTAB_WIDEN);
11701
9076b9c1 11702 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11703 Pmode, 1, align_4_label);
60c81c89 11704 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 11705 Pmode, 1, align_2_label);
60c81c89 11706 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 11707 Pmode, 1, align_3_label);
3f803cd9
SC
11708 }
11709 else
11710 {
e9a25f70
JL
11711 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11712 check if is aligned to 4 - byte. */
e9a25f70 11713
60c81c89 11714 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
11715 NULL_RTX, 0, OPTAB_WIDEN);
11716
9076b9c1 11717 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11718 Pmode, 1, align_4_label);
3f803cd9
SC
11719 }
11720
4e44c1ef 11721 mem = change_address (src, QImode, out);
e9a25f70 11722
e075ae69 11723 /* Now compare the bytes. */
e9a25f70 11724
0f290768 11725 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11726 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11727 QImode, 1, end_0_label);
3f803cd9 11728
0f290768 11729 /* Increment the address. */
0945b39d
JH
11730 if (TARGET_64BIT)
11731 emit_insn (gen_adddi3 (out, out, const1_rtx));
11732 else
11733 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11734
e075ae69
RH
11735 /* Not needed with an alignment of 2 */
11736 if (align != 2)
11737 {
11738 emit_label (align_2_label);
3f803cd9 11739
d43e0b7d
RK
11740 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11741 end_0_label);
e075ae69 11742
0945b39d
JH
11743 if (TARGET_64BIT)
11744 emit_insn (gen_adddi3 (out, out, const1_rtx));
11745 else
11746 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11747
11748 emit_label (align_3_label);
11749 }
11750
d43e0b7d
RK
11751 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11752 end_0_label);
e075ae69 11753
0945b39d
JH
11754 if (TARGET_64BIT)
11755 emit_insn (gen_adddi3 (out, out, const1_rtx));
11756 else
11757 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11758 }
11759
e075ae69
RH
11760 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11761 align this loop. It gives only huge programs, but does not help to
11762 speed up. */
11763 emit_label (align_4_label);
3f803cd9 11764
4e44c1ef 11765 mem = change_address (src, SImode, out);
e075ae69 11766 emit_move_insn (scratch, mem);
0945b39d
JH
11767 if (TARGET_64BIT)
11768 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11769 else
11770 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11771
e2e52e1b
JH
11772 /* This formula yields a nonzero result iff one of the bytes is zero.
11773 This saves three branches inside loop and many cycles. */
11774
11775 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11776 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11777 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11778 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11779 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11780 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11781 align_4_label);
e2e52e1b
JH
11782
11783 if (TARGET_CMOVE)
11784 {
11785 rtx reg = gen_reg_rtx (SImode);
0945b39d 11786 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11787 emit_move_insn (reg, tmpreg);
11788 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11789
0f290768 11790 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11791 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11792 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11793 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11794 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11795 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11796 reg,
11797 tmpreg)));
e2e52e1b 11798 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 11799 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 11800 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
11801
11802 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11803 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11804 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11805 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11806 reg2,
11807 out)));
e2e52e1b
JH
11808
11809 }
11810 else
11811 {
11812 rtx end_2_label = gen_label_rtx ();
11813 /* Is zero in the first two bytes? */
11814
16189740 11815 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11816 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11817 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11818 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11819 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11820 pc_rtx);
11821 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11822 JUMP_LABEL (tmp) = end_2_label;
11823
0f290768 11824 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11825 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 11826 if (TARGET_64BIT)
60c81c89 11827 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 11828 else
60c81c89 11829 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
11830
11831 emit_label (end_2_label);
11832
11833 }
11834
0f290768 11835 /* Avoid branch in fixing the byte. */
e2e52e1b 11836 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11837 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11838 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11839 if (TARGET_64BIT)
e6e81735 11840 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11841 else
e6e81735 11842 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11843
11844 emit_label (end_0_label);
11845}
0e07aff3
RH
11846
11847void
0f901c4c
SH
11848ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11849 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 11850 rtx pop, int sibcall)
0e07aff3
RH
11851{
11852 rtx use = NULL, call;
11853
11854 if (pop == const0_rtx)
11855 pop = NULL;
11856 if (TARGET_64BIT && pop)
11857 abort ();
11858
b069de3b
SS
11859#if TARGET_MACHO
11860 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11861 fnaddr = machopic_indirect_call_target (fnaddr);
11862#else
0e07aff3
RH
11863 /* Static functions and indirect calls don't need the pic register. */
11864 if (! TARGET_64BIT && flag_pic
11865 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11866 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11867 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11868
11869 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11870 {
11871 rtx al = gen_rtx_REG (QImode, 0);
11872 emit_move_insn (al, callarg2);
11873 use_reg (&use, al);
11874 }
b069de3b 11875#endif /* TARGET_MACHO */
0e07aff3
RH
11876
11877 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11878 {
11879 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11880 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11881 }
4977bab6
ZW
11882 if (sibcall && TARGET_64BIT
11883 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11884 {
11885 rtx addr;
11886 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11887 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11888 emit_move_insn (fnaddr, addr);
11889 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11890 }
0e07aff3
RH
11891
11892 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11893 if (retval)
11894 call = gen_rtx_SET (VOIDmode, retval, call);
11895 if (pop)
11896 {
11897 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11898 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11899 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11900 }
11901
11902 call = emit_call_insn (call);
11903 if (use)
11904 CALL_INSN_FUNCTION_USAGE (call) = use;
11905}
fce5a9f2 11906
e075ae69 11907\f
e075ae69
RH
11908/* Clear stack slot assignments remembered from previous functions.
11909 This is called from INIT_EXPANDERS once before RTL is emitted for each
11910 function. */
11911
e2500fed 11912static struct machine_function *
b96a374d 11913ix86_init_machine_status (void)
37b15744 11914{
d7394366
JH
11915 struct machine_function *f;
11916
11917 f = ggc_alloc_cleared (sizeof (struct machine_function));
11918 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11919
11920 return f;
1526a060
BS
11921}
11922
e075ae69
RH
11923/* Return a MEM corresponding to a stack slot with mode MODE.
11924 Allocate a new slot if necessary.
11925
11926 The RTL for a function can have several slots available: N is
11927 which slot to use. */
11928
11929rtx
b96a374d 11930assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11931{
ddb0ae00
ZW
11932 struct stack_local_entry *s;
11933
e075ae69
RH
11934 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11935 abort ();
11936
ddb0ae00
ZW
11937 for (s = ix86_stack_locals; s; s = s->next)
11938 if (s->mode == mode && s->n == n)
11939 return s->rtl;
11940
11941 s = (struct stack_local_entry *)
11942 ggc_alloc (sizeof (struct stack_local_entry));
11943 s->n = n;
11944 s->mode = mode;
11945 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11946
ddb0ae00
ZW
11947 s->next = ix86_stack_locals;
11948 ix86_stack_locals = s;
11949 return s->rtl;
e075ae69 11950}
f996902d
RH
11951
11952/* Construct the SYMBOL_REF for the tls_get_addr function. */
11953
e2500fed 11954static GTY(()) rtx ix86_tls_symbol;
f996902d 11955rtx
b96a374d 11956ix86_tls_get_addr (void)
f996902d 11957{
f996902d 11958
e2500fed 11959 if (!ix86_tls_symbol)
f996902d 11960 {
75d38379
JJ
11961 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11962 (TARGET_GNU_TLS && !TARGET_64BIT)
11963 ? "___tls_get_addr"
11964 : "__tls_get_addr");
f996902d
RH
11965 }
11966
e2500fed 11967 return ix86_tls_symbol;
f996902d 11968}
e075ae69
RH
11969\f
11970/* Calculate the length of the memory address in the instruction
11971 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11972
11973static int
b96a374d 11974memory_address_length (rtx addr)
e075ae69
RH
11975{
11976 struct ix86_address parts;
11977 rtx base, index, disp;
11978 int len;
11979
11980 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11981 || GET_CODE (addr) == POST_INC
11982 || GET_CODE (addr) == PRE_MODIFY
11983 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11984 return 0;
3f803cd9 11985
e075ae69
RH
11986 if (! ix86_decompose_address (addr, &parts))
11987 abort ();
3f803cd9 11988
e075ae69
RH
11989 base = parts.base;
11990 index = parts.index;
11991 disp = parts.disp;
11992 len = 0;
3f803cd9 11993
7b65ed54
EB
11994 /* Rule of thumb:
11995 - esp as the base always wants an index,
11996 - ebp as the base always wants a displacement. */
11997
e075ae69
RH
11998 /* Register Indirect. */
11999 if (base && !index && !disp)
12000 {
7b65ed54
EB
12001 /* esp (for its index) and ebp (for its displacement) need
12002 the two-byte modrm form. */
e075ae69
RH
12003 if (addr == stack_pointer_rtx
12004 || addr == arg_pointer_rtx
564d80f4
JH
12005 || addr == frame_pointer_rtx
12006 || addr == hard_frame_pointer_rtx)
e075ae69 12007 len = 1;
3f803cd9 12008 }
e9a25f70 12009
e075ae69
RH
12010 /* Direct Addressing. */
12011 else if (disp && !base && !index)
12012 len = 4;
12013
3f803cd9
SC
12014 else
12015 {
e075ae69
RH
12016 /* Find the length of the displacement constant. */
12017 if (disp)
12018 {
12019 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
12020 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12021 && base)
e075ae69
RH
12022 len = 1;
12023 else
12024 len = 4;
12025 }
7b65ed54
EB
12026 /* ebp always wants a displacement. */
12027 else if (base == hard_frame_pointer_rtx)
12028 len = 1;
3f803cd9 12029
43f3a59d 12030 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
12031 if (index
12032 /* ...like esp, which always wants an index. */
12033 || base == stack_pointer_rtx
12034 || base == arg_pointer_rtx
12035 || base == frame_pointer_rtx)
e075ae69 12036 len += 1;
3f803cd9
SC
12037 }
12038
e075ae69
RH
12039 return len;
12040}
79325812 12041
5bf0ebab
RH
12042/* Compute default value for "length_immediate" attribute. When SHORTFORM
12043 is set, expect that insn have 8bit immediate alternative. */
e075ae69 12044int
b96a374d 12045ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 12046{
6ef67412
JH
12047 int len = 0;
12048 int i;
6c698a6d 12049 extract_insn_cached (insn);
6ef67412
JH
12050 for (i = recog_data.n_operands - 1; i >= 0; --i)
12051 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 12052 {
6ef67412 12053 if (len)
3071fab5 12054 abort ();
6ef67412
JH
12055 if (shortform
12056 && GET_CODE (recog_data.operand[i]) == CONST_INT
12057 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12058 len = 1;
12059 else
12060 {
12061 switch (get_attr_mode (insn))
12062 {
12063 case MODE_QI:
12064 len+=1;
12065 break;
12066 case MODE_HI:
12067 len+=2;
12068 break;
12069 case MODE_SI:
12070 len+=4;
12071 break;
14f73b5a
JH
12072 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12073 case MODE_DI:
12074 len+=4;
12075 break;
6ef67412 12076 default:
c725bd79 12077 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
12078 }
12079 }
3071fab5 12080 }
6ef67412
JH
12081 return len;
12082}
12083/* Compute default value for "length_address" attribute. */
12084int
b96a374d 12085ix86_attr_length_address_default (rtx insn)
6ef67412
JH
12086{
12087 int i;
9b73c90a
EB
12088
12089 if (get_attr_type (insn) == TYPE_LEA)
12090 {
12091 rtx set = PATTERN (insn);
12092 if (GET_CODE (set) == SET)
12093 ;
12094 else if (GET_CODE (set) == PARALLEL
12095 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12096 set = XVECEXP (set, 0, 0);
12097 else
12098 {
12099#ifdef ENABLE_CHECKING
12100 abort ();
12101#endif
12102 return 0;
12103 }
12104
12105 return memory_address_length (SET_SRC (set));
12106 }
12107
6c698a6d 12108 extract_insn_cached (insn);
1ccbefce
RH
12109 for (i = recog_data.n_operands - 1; i >= 0; --i)
12110 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12111 {
6ef67412 12112 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
12113 break;
12114 }
6ef67412 12115 return 0;
3f803cd9 12116}
e075ae69
RH
12117\f
12118/* Return the maximum number of instructions a cpu can issue. */
b657fc39 12119
c237e94a 12120static int
b96a374d 12121ix86_issue_rate (void)
b657fc39 12122{
9e555526 12123 switch (ix86_tune)
b657fc39 12124 {
e075ae69
RH
12125 case PROCESSOR_PENTIUM:
12126 case PROCESSOR_K6:
12127 return 2;
79325812 12128
e075ae69 12129 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
12130 case PROCESSOR_PENTIUM4:
12131 case PROCESSOR_ATHLON:
4977bab6 12132 case PROCESSOR_K8:
89c43c0a 12133 case PROCESSOR_NOCONA:
e075ae69 12134 return 3;
b657fc39 12135
b657fc39 12136 default:
e075ae69 12137 return 1;
b657fc39 12138 }
b657fc39
L
12139}
12140
e075ae69
RH
12141/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12142 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 12143
e075ae69 12144static int
b96a374d 12145ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12146{
12147 rtx set, set2;
b657fc39 12148
e075ae69
RH
12149 /* Simplify the test for uninteresting insns. */
12150 if (insn_type != TYPE_SETCC
12151 && insn_type != TYPE_ICMOV
12152 && insn_type != TYPE_FCMOV
12153 && insn_type != TYPE_IBR)
12154 return 0;
b657fc39 12155
e075ae69
RH
12156 if ((set = single_set (dep_insn)) != 0)
12157 {
12158 set = SET_DEST (set);
12159 set2 = NULL_RTX;
12160 }
12161 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12162 && XVECLEN (PATTERN (dep_insn), 0) == 2
12163 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12164 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12165 {
12166 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12167 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12168 }
78a0d70c
ZW
12169 else
12170 return 0;
b657fc39 12171
78a0d70c
ZW
12172 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12173 return 0;
b657fc39 12174
f5143c46 12175 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
12176 not any other potentially set register. */
12177 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12178 return 0;
12179
12180 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12181 return 0;
12182
12183 return 1;
e075ae69 12184}
b657fc39 12185
e075ae69
RH
12186/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12187 address with operands set by DEP_INSN. */
12188
12189static int
b96a374d 12190ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12191{
12192 rtx addr;
12193
6ad48e84
JH
12194 if (insn_type == TYPE_LEA
12195 && TARGET_PENTIUM)
5fbdde42
RH
12196 {
12197 addr = PATTERN (insn);
12198 if (GET_CODE (addr) == SET)
12199 ;
12200 else if (GET_CODE (addr) == PARALLEL
12201 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12202 addr = XVECEXP (addr, 0, 0);
12203 else
12204 abort ();
12205 addr = SET_SRC (addr);
12206 }
e075ae69
RH
12207 else
12208 {
12209 int i;
6c698a6d 12210 extract_insn_cached (insn);
1ccbefce
RH
12211 for (i = recog_data.n_operands - 1; i >= 0; --i)
12212 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12213 {
1ccbefce 12214 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
12215 goto found;
12216 }
12217 return 0;
12218 found:;
b657fc39
L
12219 }
12220
e075ae69 12221 return modified_in_p (addr, dep_insn);
b657fc39 12222}
a269a03c 12223
c237e94a 12224static int
b96a374d 12225ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 12226{
e075ae69 12227 enum attr_type insn_type, dep_insn_type;
6ad48e84 12228 enum attr_memory memory, dep_memory;
e075ae69 12229 rtx set, set2;
9b00189f 12230 int dep_insn_code_number;
a269a03c 12231
d1f87653 12232 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 12233 if (REG_NOTE_KIND (link) != 0)
309ada50 12234 return 0;
a269a03c 12235
9b00189f
JH
12236 dep_insn_code_number = recog_memoized (dep_insn);
12237
e075ae69 12238 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 12239 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 12240 return cost;
a269a03c 12241
1c71e60e
JH
12242 insn_type = get_attr_type (insn);
12243 dep_insn_type = get_attr_type (dep_insn);
9b00189f 12244
9e555526 12245 switch (ix86_tune)
a269a03c
JC
12246 {
12247 case PROCESSOR_PENTIUM:
e075ae69
RH
12248 /* Address Generation Interlock adds a cycle of latency. */
12249 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12250 cost += 1;
12251
12252 /* ??? Compares pair with jump/setcc. */
12253 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12254 cost = 0;
12255
d1f87653 12256 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 12257 if (insn_type == TYPE_FMOV
e075ae69
RH
12258 && get_attr_memory (insn) == MEMORY_STORE
12259 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12260 cost += 1;
12261 break;
a269a03c 12262
e075ae69 12263 case PROCESSOR_PENTIUMPRO:
6ad48e84 12264 memory = get_attr_memory (insn);
e075ae69
RH
12265
12266 /* INT->FP conversion is expensive. */
12267 if (get_attr_fp_int_src (dep_insn))
12268 cost += 5;
12269
12270 /* There is one cycle extra latency between an FP op and a store. */
12271 if (insn_type == TYPE_FMOV
12272 && (set = single_set (dep_insn)) != NULL_RTX
12273 && (set2 = single_set (insn)) != NULL_RTX
12274 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12275 && GET_CODE (SET_DEST (set2)) == MEM)
12276 cost += 1;
6ad48e84
JH
12277
12278 /* Show ability of reorder buffer to hide latency of load by executing
12279 in parallel with previous instruction in case
12280 previous instruction is not needed to compute the address. */
12281 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12282 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12283 {
6ad48e84
JH
12284 /* Claim moves to take one cycle, as core can issue one load
12285 at time and the next load can start cycle later. */
12286 if (dep_insn_type == TYPE_IMOV
12287 || dep_insn_type == TYPE_FMOV)
12288 cost = 1;
12289 else if (cost > 1)
12290 cost--;
12291 }
e075ae69 12292 break;
a269a03c 12293
e075ae69 12294 case PROCESSOR_K6:
6ad48e84
JH
12295 memory = get_attr_memory (insn);
12296 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
12297 /* The esp dependency is resolved before the instruction is really
12298 finished. */
12299 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12300 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12301 return 1;
a269a03c 12302
0f290768 12303 /* Since we can't represent delayed latencies of load+operation,
e075ae69 12304 increase the cost here for non-imov insns. */
6ad48e84 12305 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
12306 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12307
12308 /* INT->FP conversion is expensive. */
12309 if (get_attr_fp_int_src (dep_insn))
12310 cost += 5;
6ad48e84
JH
12311
12312 /* Show ability of reorder buffer to hide latency of load by executing
12313 in parallel with previous instruction in case
12314 previous instruction is not needed to compute the address. */
12315 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12316 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12317 {
6ad48e84
JH
12318 /* Claim moves to take one cycle, as core can issue one load
12319 at time and the next load can start cycle later. */
12320 if (dep_insn_type == TYPE_IMOV
12321 || dep_insn_type == TYPE_FMOV)
12322 cost = 1;
12323 else if (cost > 2)
12324 cost -= 2;
12325 else
12326 cost = 1;
12327 }
a14003ee 12328 break;
e075ae69 12329
309ada50 12330 case PROCESSOR_ATHLON:
4977bab6 12331 case PROCESSOR_K8:
6ad48e84
JH
12332 memory = get_attr_memory (insn);
12333 dep_memory = get_attr_memory (dep_insn);
12334
6ad48e84
JH
12335 /* Show ability of reorder buffer to hide latency of load by executing
12336 in parallel with previous instruction in case
12337 previous instruction is not needed to compute the address. */
12338 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12339 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12340 {
26f74aa3
JH
12341 enum attr_unit unit = get_attr_unit (insn);
12342 int loadcost = 3;
12343
12344 /* Because of the difference between the length of integer and
12345 floating unit pipeline preparation stages, the memory operands
b96a374d 12346 for floating point are cheaper.
26f74aa3 12347
c51e6d85 12348 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
12349 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12350 loadcost = 3;
12351 else
12352 loadcost = TARGET_ATHLON ? 2 : 0;
12353
12354 if (cost >= loadcost)
12355 cost -= loadcost;
6ad48e84
JH
12356 else
12357 cost = 0;
12358 }
309ada50 12359
a269a03c 12360 default:
a269a03c
JC
12361 break;
12362 }
12363
12364 return cost;
12365}
0a726ef1 12366
9b690711 12367static int
b96a374d 12368ia32_use_dfa_pipeline_interface (void)
9b690711 12369{
56bab446
SB
12370 if (TARGET_PENTIUM
12371 || TARGET_PENTIUMPRO
12372 || TARGET_ATHLON_K8)
9b690711
RH
12373 return 1;
12374 return 0;
12375}
12376
12377/* How many alternative schedules to try. This should be as wide as the
12378 scheduling freedom in the DFA, but no wider. Making this value too
12379 large results extra work for the scheduler. */
12380
12381static int
b96a374d 12382ia32_multipass_dfa_lookahead (void)
9b690711 12383{
9e555526 12384 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 12385 return 2;
56bab446
SB
12386
12387 if (ix86_tune == PROCESSOR_PENTIUMPRO)
12388 return 1;
12389
9b690711 12390 else
56bab446 12391 return 0;
9b690711
RH
12392}
12393
0e4970d7 12394\f
a7180f70
BS
12395/* Compute the alignment given to a constant that is being placed in memory.
12396 EXP is the constant and ALIGN is the alignment that the object would
12397 ordinarily have.
12398 The value of this function is used instead of that alignment to align
12399 the object. */
12400
12401int
b96a374d 12402ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12403{
12404 if (TREE_CODE (exp) == REAL_CST)
12405 {
12406 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12407 return 64;
12408 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12409 return 128;
12410 }
4137ba7a
JJ
12411 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12412 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12413 return BITS_PER_WORD;
a7180f70
BS
12414
12415 return align;
12416}
12417
12418/* Compute the alignment for a static variable.
12419 TYPE is the data type, and ALIGN is the alignment that
12420 the object would ordinarily have. The value of this function is used
12421 instead of that alignment to align the object. */
12422
12423int
b96a374d 12424ix86_data_alignment (tree type, int align)
a7180f70
BS
12425{
12426 if (AGGREGATE_TYPE_P (type)
12427 && TYPE_SIZE (type)
12428 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12429 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12430 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12431 return 256;
12432
0d7d98ee
JH
12433 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12434 to 16byte boundary. */
12435 if (TARGET_64BIT)
12436 {
12437 if (AGGREGATE_TYPE_P (type)
12438 && TYPE_SIZE (type)
12439 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12440 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12441 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12442 return 128;
12443 }
12444
a7180f70
BS
12445 if (TREE_CODE (type) == ARRAY_TYPE)
12446 {
12447 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12448 return 64;
12449 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12450 return 128;
12451 }
12452 else if (TREE_CODE (type) == COMPLEX_TYPE)
12453 {
0f290768 12454
a7180f70
BS
12455 if (TYPE_MODE (type) == DCmode && align < 64)
12456 return 64;
12457 if (TYPE_MODE (type) == XCmode && align < 128)
12458 return 128;
12459 }
12460 else if ((TREE_CODE (type) == RECORD_TYPE
12461 || TREE_CODE (type) == UNION_TYPE
12462 || TREE_CODE (type) == QUAL_UNION_TYPE)
12463 && TYPE_FIELDS (type))
12464 {
12465 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12466 return 64;
12467 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12468 return 128;
12469 }
12470 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12471 || TREE_CODE (type) == INTEGER_TYPE)
12472 {
12473 if (TYPE_MODE (type) == DFmode && align < 64)
12474 return 64;
12475 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12476 return 128;
12477 }
12478
12479 return align;
12480}
12481
12482/* Compute the alignment for a local variable.
12483 TYPE is the data type, and ALIGN is the alignment that
12484 the object would ordinarily have. The value of this macro is used
12485 instead of that alignment to align the object. */
12486
12487int
b96a374d 12488ix86_local_alignment (tree type, int align)
a7180f70 12489{
0d7d98ee
JH
12490 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12491 to 16byte boundary. */
12492 if (TARGET_64BIT)
12493 {
12494 if (AGGREGATE_TYPE_P (type)
12495 && TYPE_SIZE (type)
12496 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12497 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12498 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12499 return 128;
12500 }
a7180f70
BS
12501 if (TREE_CODE (type) == ARRAY_TYPE)
12502 {
12503 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12504 return 64;
12505 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12506 return 128;
12507 }
12508 else if (TREE_CODE (type) == COMPLEX_TYPE)
12509 {
12510 if (TYPE_MODE (type) == DCmode && align < 64)
12511 return 64;
12512 if (TYPE_MODE (type) == XCmode && align < 128)
12513 return 128;
12514 }
12515 else if ((TREE_CODE (type) == RECORD_TYPE
12516 || TREE_CODE (type) == UNION_TYPE
12517 || TREE_CODE (type) == QUAL_UNION_TYPE)
12518 && TYPE_FIELDS (type))
12519 {
12520 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12521 return 64;
12522 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12523 return 128;
12524 }
12525 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12526 || TREE_CODE (type) == INTEGER_TYPE)
12527 {
0f290768 12528
a7180f70
BS
12529 if (TYPE_MODE (type) == DFmode && align < 64)
12530 return 64;
12531 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12532 return 128;
12533 }
12534 return align;
12535}
0ed08620
JH
12536\f
12537/* Emit RTL insns to initialize the variable parts of a trampoline.
12538 FNADDR is an RTX for the address of the function's pure code.
12539 CXT is an RTX for the static chain value for the function. */
12540void
b96a374d 12541x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12542{
12543 if (!TARGET_64BIT)
12544 {
12545 /* Compute offset from the end of the jmp to the target function. */
12546 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12547 plus_constant (tramp, 10),
12548 NULL_RTX, 1, OPTAB_DIRECT);
12549 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12550 gen_int_mode (0xb9, QImode));
0ed08620
JH
12551 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12552 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12553 gen_int_mode (0xe9, QImode));
0ed08620
JH
12554 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12555 }
12556 else
12557 {
12558 int offset = 0;
12559 /* Try to load address using shorter movl instead of movabs.
12560 We may want to support movq for kernel mode, but kernel does not use
12561 trampolines at the moment. */
12562 if (x86_64_zero_extended_value (fnaddr))
12563 {
12564 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12565 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12566 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12567 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12568 gen_lowpart (SImode, fnaddr));
12569 offset += 6;
12570 }
12571 else
12572 {
12573 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12574 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12575 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12576 fnaddr);
12577 offset += 10;
12578 }
12579 /* Load static chain using movabs to r10. */
12580 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12581 gen_int_mode (0xba49, HImode));
0ed08620
JH
12582 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12583 cxt);
12584 offset += 10;
12585 /* Jump to the r11 */
12586 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12587 gen_int_mode (0xff49, HImode));
0ed08620 12588 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12589 gen_int_mode (0xe3, QImode));
0ed08620
JH
12590 offset += 3;
12591 if (offset > TRAMPOLINE_SIZE)
b531087a 12592 abort ();
0ed08620 12593 }
5791cc29
JT
12594
12595#ifdef TRANSFER_FROM_TRAMPOLINE
f84d109f 12596 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
12597 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12598#endif
0ed08620 12599}
eeb06b1b 12600\f
6a2dd09a
RS
12601#define def_builtin(MASK, NAME, TYPE, CODE) \
12602do { \
453ee231
JH
12603 if ((MASK) & target_flags \
12604 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
6a2dd09a
RS
12605 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12606 NULL, NULL_TREE); \
eeb06b1b 12607} while (0)
bd793c65 12608
bd793c65
BS
12609struct builtin_description
12610{
8b60264b
KG
12611 const unsigned int mask;
12612 const enum insn_code icode;
12613 const char *const name;
12614 const enum ix86_builtins code;
12615 const enum rtx_code comparison;
12616 const unsigned int flag;
bd793c65
BS
12617};
12618
8b60264b 12619static const struct builtin_description bdesc_comi[] =
bd793c65 12620{
37f22004
L
12621 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12622 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12623 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12624 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12625 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12626 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12627 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12628 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12629 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12630 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12631 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12632 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12633 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12634 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12635 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12636 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12637 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12638 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12639 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12640 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12641 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12642 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12643 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12644 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12645};
12646
8b60264b 12647static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12648{
12649 /* SSE */
37f22004
L
12650 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12651 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12652 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12653 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12654 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12655 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12656 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12657 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12658
12659 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12660 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12661 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12662 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12663 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12664 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12665 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12666 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12667 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12668 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12669 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12670 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12671 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12672 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12673 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12674 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12675 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12676 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12677 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12678 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12679
12680 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12681 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12682 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12683 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12684
12685 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12686 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12687 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12688 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12689
12690 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12694 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12695
12696 /* MMX */
eeb06b1b
BS
12697 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12698 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12699 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12700 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12701 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12702 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12703 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12704 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12705
12706 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12707 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12708 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12709 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12710 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12711 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12712 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12713 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12714
12715 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12716 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12717 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12718
12719 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12720 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12721 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12722 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12723
37f22004
L
12724 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12725 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12726
12727 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12728 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12729 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12730 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12731 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12732 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12733
37f22004
L
12734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12735 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12736 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12737 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12738
12739 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12740 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12741 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12742 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12743 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12744 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12745
12746 /* Special. */
eeb06b1b
BS
12747 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12749 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12750
37f22004
L
12751 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12752 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12753 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12754
12755 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12756 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12757 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12758 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12760 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12761
12762 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12763 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12764 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12765 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12766 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12767 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12768
12769 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12770 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12771 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12772 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12773
37f22004 12774 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12775 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12776
12777 /* SSE2 */
12778 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12786
12787 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12788 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12789 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12790 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12791 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12792 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12793 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12794 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12795 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12796 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12797 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12798 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12799 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12800 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12801 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12802 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12803 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12804 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12805 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12806 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12807
12808 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12812
1877be45
JH
12813 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12817
12818 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12821
12822 /* SSE2 MMX */
12823 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12826 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12827 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12830 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12831
12832 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12833 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12834 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12835 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12836 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12837 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12838 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12839 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12840
12841 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12842 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12844 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12845
916b60b7
BS
12846 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12850
12851 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12853
12854 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12855 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12859 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12860
12861 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12864 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12865
12866 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12869 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12870 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12873 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12874
916b60b7
BS
12875 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12878
12879 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12881
12882 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12888
12889 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12895
12896 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12900
12901 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12902
fbe5eb6d 12903 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 12904 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 12905 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
12906 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12907
9e200aaf
KC
12908 /* SSE3 MMX */
12909 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12910 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12911 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12912 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12913 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12914 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12915};
12916
8b60264b 12917static const struct builtin_description bdesc_1arg[] =
bd793c65 12918{
37f22004
L
12919 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12920 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12921
37f22004
L
12922 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12923 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12924 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12925
37f22004
L
12926 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12927 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12928 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12929 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12930 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12931 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
12932
12933 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12936 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12937
12938 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12939
12940 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12942
fbe5eb6d
BS
12943 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12948
fbe5eb6d 12949 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12950
fbe5eb6d
BS
12951 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
12953 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12954 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
12955
12956 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12958 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12959
22c7c85e
L
12960 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12961
9e200aaf
KC
12962 /* SSE3 */
12963 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12964 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12965 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
12966};
12967
f6155fda 12968void
b96a374d 12969ix86_init_builtins (void)
f6155fda
SS
12970{
12971 if (TARGET_MMX)
12972 ix86_init_mmx_sse_builtins ();
12973}
12974
12975/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12976 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12977 builtins. */
e37af218 12978static void
b96a374d 12979ix86_init_mmx_sse_builtins (void)
bd793c65 12980{
8b60264b 12981 const struct builtin_description * d;
77ebd435 12982 size_t i;
bd793c65 12983
4a5eab38
PB
12984 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12985 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12986 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12987 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12988 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12989 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12990 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12991 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12992 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12993 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12994
bd793c65 12995 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
12996 tree pcchar_type_node = build_pointer_type (
12997 build_type_variant (char_type_node, 1, 0));
bd793c65 12998 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
12999 tree pcfloat_type_node = build_pointer_type (
13000 build_type_variant (float_type_node, 1, 0));
bd793c65 13001 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 13002 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
13003 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13004
13005 /* Comparisons. */
13006 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
13007 = build_function_type_list (integer_type_node,
13008 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13009 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
13010 = build_function_type_list (V4SI_type_node,
13011 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13012 /* MMX/SSE/integer conversions. */
bd793c65 13013 tree int_ftype_v4sf
b4de2f7d
AH
13014 = build_function_type_list (integer_type_node,
13015 V4SF_type_node, NULL_TREE);
453ee231
JH
13016 tree int64_ftype_v4sf
13017 = build_function_type_list (long_long_integer_type_node,
13018 V4SF_type_node, NULL_TREE);
bd793c65 13019 tree int_ftype_v8qi
b4de2f7d 13020 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13021 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
13022 = build_function_type_list (V4SF_type_node,
13023 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13024 tree v4sf_ftype_v4sf_int64
13025 = build_function_type_list (V4SF_type_node,
13026 V4SF_type_node, long_long_integer_type_node,
13027 NULL_TREE);
bd793c65 13028 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
13029 = build_function_type_list (V4SF_type_node,
13030 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13031 tree int_ftype_v4hi_int
b4de2f7d
AH
13032 = build_function_type_list (integer_type_node,
13033 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13034 tree v4hi_ftype_v4hi_int_int
e7a60f56 13035 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
13036 integer_type_node, integer_type_node,
13037 NULL_TREE);
bd793c65
BS
13038 /* Miscellaneous. */
13039 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
13040 = build_function_type_list (V8QI_type_node,
13041 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13042 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
13043 = build_function_type_list (V4HI_type_node,
13044 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13045 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
13046 = build_function_type_list (V4SF_type_node,
13047 V4SF_type_node, V4SF_type_node,
13048 integer_type_node, NULL_TREE);
bd793c65 13049 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
13050 = build_function_type_list (V2SI_type_node,
13051 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13052 tree v4hi_ftype_v4hi_int
b4de2f7d 13053 = build_function_type_list (V4HI_type_node,
e7a60f56 13054 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13055 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
13056 = build_function_type_list (V4HI_type_node,
13057 V4HI_type_node, long_long_unsigned_type_node,
13058 NULL_TREE);
bd793c65 13059 tree v2si_ftype_v2si_di
b4de2f7d
AH
13060 = build_function_type_list (V2SI_type_node,
13061 V2SI_type_node, long_long_unsigned_type_node,
13062 NULL_TREE);
bd793c65 13063 tree void_ftype_void
b4de2f7d 13064 = build_function_type (void_type_node, void_list_node);
bd793c65 13065 tree void_ftype_unsigned
b4de2f7d 13066 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
13067 tree void_ftype_unsigned_unsigned
13068 = build_function_type_list (void_type_node, unsigned_type_node,
13069 unsigned_type_node, NULL_TREE);
13070 tree void_ftype_pcvoid_unsigned_unsigned
13071 = build_function_type_list (void_type_node, const_ptr_type_node,
13072 unsigned_type_node, unsigned_type_node,
13073 NULL_TREE);
bd793c65 13074 tree unsigned_ftype_void
b4de2f7d 13075 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13076 tree di_ftype_void
b4de2f7d 13077 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 13078 tree v4sf_ftype_void
b4de2f7d 13079 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 13080 tree v2si_ftype_v4sf
b4de2f7d 13081 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13082 /* Loads/stores. */
bd793c65 13083 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13084 = build_function_type_list (void_type_node,
13085 V8QI_type_node, V8QI_type_node,
13086 pchar_type_node, NULL_TREE);
068f5dea
JH
13087 tree v4sf_ftype_pcfloat
13088 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13089 /* @@@ the type is bogus */
13090 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13091 = build_function_type_list (V4SF_type_node,
f8ca7923 13092 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13093 tree void_ftype_pv2si_v4sf
b4de2f7d 13094 = build_function_type_list (void_type_node,
f8ca7923 13095 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13096 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13097 = build_function_type_list (void_type_node,
13098 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13099 tree void_ftype_pdi_di
b4de2f7d
AH
13100 = build_function_type_list (void_type_node,
13101 pdi_type_node, long_long_unsigned_type_node,
13102 NULL_TREE);
916b60b7 13103 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13104 = build_function_type_list (void_type_node,
13105 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13106 /* Normal vector unops. */
13107 tree v4sf_ftype_v4sf
b4de2f7d 13108 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13109
bd793c65
BS
13110 /* Normal vector binops. */
13111 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13112 = build_function_type_list (V4SF_type_node,
13113 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13114 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13115 = build_function_type_list (V8QI_type_node,
13116 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13117 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13118 = build_function_type_list (V4HI_type_node,
13119 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13120 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13121 = build_function_type_list (V2SI_type_node,
13122 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13123 tree di_ftype_di_di
b4de2f7d
AH
13124 = build_function_type_list (long_long_unsigned_type_node,
13125 long_long_unsigned_type_node,
13126 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13127
47f339cf 13128 tree v2si_ftype_v2sf
ae3aa00d 13129 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13130 tree v2sf_ftype_v2si
b4de2f7d 13131 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13132 tree v2si_ftype_v2si
b4de2f7d 13133 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13134 tree v2sf_ftype_v2sf
b4de2f7d 13135 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13136 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13137 = build_function_type_list (V2SF_type_node,
13138 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13139 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13140 = build_function_type_list (V2SI_type_node,
13141 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 13142 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
13143 tree pcint_type_node = build_pointer_type (
13144 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 13145 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13146 tree pcdouble_type_node = build_pointer_type (
13147 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13148 tree int_ftype_v2df_v2df
b4de2f7d
AH
13149 = build_function_type_list (integer_type_node,
13150 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
13151
13152 tree ti_ftype_void
b4de2f7d 13153 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
13154 tree v2di_ftype_void
13155 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 13156 tree ti_ftype_ti_ti
b4de2f7d
AH
13157 = build_function_type_list (intTI_type_node,
13158 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13159 tree void_ftype_pcvoid
13160 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13161 tree v2di_ftype_di
b4de2f7d
AH
13162 = build_function_type_list (V2DI_type_node,
13163 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13164 tree di_ftype_v2di
13165 = build_function_type_list (long_long_unsigned_type_node,
13166 V2DI_type_node, NULL_TREE);
fbe5eb6d 13167 tree v4sf_ftype_v4si
b4de2f7d 13168 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13169 tree v4si_ftype_v4sf
b4de2f7d 13170 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13171 tree v2df_ftype_v4si
b4de2f7d 13172 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13173 tree v4si_ftype_v2df
b4de2f7d 13174 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13175 tree v2si_ftype_v2df
b4de2f7d 13176 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13177 tree v4sf_ftype_v2df
b4de2f7d 13178 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13179 tree v2df_ftype_v2si
b4de2f7d 13180 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13181 tree v2df_ftype_v4sf
b4de2f7d 13182 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13183 tree int_ftype_v2df
b4de2f7d 13184 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13185 tree int64_ftype_v2df
13186 = build_function_type_list (long_long_integer_type_node,
b96a374d 13187 V2DF_type_node, NULL_TREE);
fbe5eb6d 13188 tree v2df_ftype_v2df_int
b4de2f7d
AH
13189 = build_function_type_list (V2DF_type_node,
13190 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13191 tree v2df_ftype_v2df_int64
13192 = build_function_type_list (V2DF_type_node,
13193 V2DF_type_node, long_long_integer_type_node,
13194 NULL_TREE);
fbe5eb6d 13195 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13196 = build_function_type_list (V4SF_type_node,
13197 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13198 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13199 = build_function_type_list (V2DF_type_node,
13200 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13201 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13202 = build_function_type_list (V2DF_type_node,
13203 V2DF_type_node, V2DF_type_node,
13204 integer_type_node,
13205 NULL_TREE);
fbe5eb6d 13206 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13207 = build_function_type_list (V2DF_type_node,
13208 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13209 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13210 = build_function_type_list (void_type_node,
13211 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13212 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13213 = build_function_type_list (void_type_node,
13214 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13215 tree void_ftype_pint_int
b4de2f7d
AH
13216 = build_function_type_list (void_type_node,
13217 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13218 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13219 = build_function_type_list (void_type_node,
13220 V16QI_type_node, V16QI_type_node,
13221 pchar_type_node, NULL_TREE);
068f5dea
JH
13222 tree v2df_ftype_pcdouble
13223 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13224 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13225 = build_function_type_list (V2DF_type_node,
13226 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13227 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13228 = build_function_type_list (V16QI_type_node,
13229 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13230 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13231 = build_function_type_list (V8HI_type_node,
13232 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13233 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13234 = build_function_type_list (V4SI_type_node,
13235 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13236 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13237 = build_function_type_list (V2DI_type_node,
13238 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13239 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13240 = build_function_type_list (V2DI_type_node,
13241 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13242 tree v2df_ftype_v2df
b4de2f7d 13243 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13244 tree v2df_ftype_double
b4de2f7d 13245 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13246 tree v2df_ftype_double_double
b4de2f7d
AH
13247 = build_function_type_list (V2DF_type_node,
13248 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13249 tree int_ftype_v8hi_int
b4de2f7d
AH
13250 = build_function_type_list (integer_type_node,
13251 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13252 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13253 = build_function_type_list (V8HI_type_node,
13254 V8HI_type_node, integer_type_node,
13255 integer_type_node, NULL_TREE);
916b60b7 13256 tree v2di_ftype_v2di_int
b4de2f7d
AH
13257 = build_function_type_list (V2DI_type_node,
13258 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13259 tree v4si_ftype_v4si_int
b4de2f7d
AH
13260 = build_function_type_list (V4SI_type_node,
13261 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13262 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13263 = build_function_type_list (V8HI_type_node,
13264 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13265 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13266 = build_function_type_list (V8HI_type_node,
13267 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13268 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13269 = build_function_type_list (V4SI_type_node,
13270 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13271 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13272 = build_function_type_list (V4SI_type_node,
13273 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13274 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13275 = build_function_type_list (long_long_unsigned_type_node,
13276 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13277 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13278 = build_function_type_list (V2DI_type_node,
13279 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13280 tree int_ftype_v16qi
b4de2f7d 13281 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13282 tree v16qi_ftype_pcchar
13283 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13284 tree void_ftype_pchar_v16qi
13285 = build_function_type_list (void_type_node,
13286 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13287 tree v4si_ftype_pcint
13288 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13289 tree void_ftype_pcint_v4si
f02e1358 13290 = build_function_type_list (void_type_node,
068f5dea 13291 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13292 tree v2di_ftype_v2di
13293 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13294
f8a1ebc6
JH
13295 tree float80_type;
13296 tree float128_type;
13297
13298 /* The __float80 type. */
13299 if (TYPE_MODE (long_double_type_node) == XFmode)
13300 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13301 "__float80");
13302 else
13303 {
13304 /* The __float80 type. */
13305 float80_type = make_node (REAL_TYPE);
13306 TYPE_PRECISION (float80_type) = 96;
13307 layout_type (float80_type);
13308 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13309 }
13310
13311 float128_type = make_node (REAL_TYPE);
13312 TYPE_PRECISION (float128_type) = 128;
13313 layout_type (float128_type);
13314 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13315
bd793c65
BS
13316 /* Add all builtins that are more or less simple operations on two
13317 operands. */
ca7558fc 13318 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13319 {
13320 /* Use one of the operands; the target can have a different mode for
13321 mask-generating compares. */
13322 enum machine_mode mode;
13323 tree type;
13324
13325 if (d->name == 0)
13326 continue;
13327 mode = insn_data[d->icode].operand[1].mode;
13328
bd793c65
BS
13329 switch (mode)
13330 {
fbe5eb6d
BS
13331 case V16QImode:
13332 type = v16qi_ftype_v16qi_v16qi;
13333 break;
13334 case V8HImode:
13335 type = v8hi_ftype_v8hi_v8hi;
13336 break;
13337 case V4SImode:
13338 type = v4si_ftype_v4si_v4si;
13339 break;
13340 case V2DImode:
13341 type = v2di_ftype_v2di_v2di;
13342 break;
13343 case V2DFmode:
13344 type = v2df_ftype_v2df_v2df;
13345 break;
13346 case TImode:
13347 type = ti_ftype_ti_ti;
13348 break;
bd793c65
BS
13349 case V4SFmode:
13350 type = v4sf_ftype_v4sf_v4sf;
13351 break;
13352 case V8QImode:
13353 type = v8qi_ftype_v8qi_v8qi;
13354 break;
13355 case V4HImode:
13356 type = v4hi_ftype_v4hi_v4hi;
13357 break;
13358 case V2SImode:
13359 type = v2si_ftype_v2si_v2si;
13360 break;
bd793c65
BS
13361 case DImode:
13362 type = di_ftype_di_di;
13363 break;
13364
13365 default:
13366 abort ();
13367 }
0f290768 13368
bd793c65
BS
13369 /* Override for comparisons. */
13370 if (d->icode == CODE_FOR_maskcmpv4sf3
13371 || d->icode == CODE_FOR_maskncmpv4sf3
13372 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13373 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13374 type = v4si_ftype_v4sf_v4sf;
13375
fbe5eb6d
BS
13376 if (d->icode == CODE_FOR_maskcmpv2df3
13377 || d->icode == CODE_FOR_maskncmpv2df3
13378 || d->icode == CODE_FOR_vmmaskcmpv2df3
13379 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13380 type = v2di_ftype_v2df_v2df;
13381
eeb06b1b 13382 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13383 }
13384
13385 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13386 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13387 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13388 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13389 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13390 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13391
13392 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13393 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13394 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13395
13396 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13397 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13398
13399 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13400 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13401
bd793c65 13402 /* comi/ucomi insns. */
ca7558fc 13403 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13404 if (d->mask == MASK_SSE2)
13405 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13406 else
13407 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13408
1255c85c
BS
13409 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13410 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13411 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13412
37f22004
L
13413 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13414 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13415 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13416 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13417 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13418 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13419 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13420 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13421 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13422 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13423 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13424
13425 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13426 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13427
13428 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13429
13430 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13431 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13432 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13433 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13434 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13435 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13436
13437 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13438 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13439 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13440 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13441
13442 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13443 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13444 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13445 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13446
13447 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13448
13449 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13450
13451 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13452 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13453 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13454 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13455 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13456 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13457
13458 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13459
47f339cf
BS
13460 /* Original 3DNow! */
13461 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13462 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13463 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13467 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13468 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13469 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13470 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13473 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13474 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13475 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13476 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13478 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13479 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13480 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13481
13482 /* 3DNow! extension as used in the Athlon CPU. */
13483 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13484 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13485 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13486 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13487 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13488 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13489
37f22004 13490 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13491
13492 /* SSE2 */
13493 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13494 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13495
13496 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13498 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13499
068f5dea
JH
13500 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13501 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13503 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13505 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13506
13507 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13510 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13511
13512 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13513 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13514 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13515 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13516 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13517
13518 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13520 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13521 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13522
13523 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13524 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13525
13526 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13527
13528 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13529 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13530
13531 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13535 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13536
13537 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13538
13539 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13541 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13542 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13543
13544 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13547
13548 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13549 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13550 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13552
13553 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13556 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13557 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13558 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13560
068f5dea 13561 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13562 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13564
068f5dea
JH
13565 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13568 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13570 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13571 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13572
37f22004 13573 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13574
916b60b7
BS
13575 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13578
13579 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13582
13583 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13585
ab3146fd 13586 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13587 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13590
ab3146fd 13591 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13592 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13595
13596 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13598
13599 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13600
13601 /* Prescott New Instructions. */
9e200aaf 13602 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
13603 void_ftype_pcvoid_unsigned_unsigned,
13604 IX86_BUILTIN_MONITOR);
9e200aaf 13605 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
13606 void_ftype_unsigned_unsigned,
13607 IX86_BUILTIN_MWAIT);
9e200aaf 13608 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
13609 v4sf_ftype_v4sf,
13610 IX86_BUILTIN_MOVSHDUP);
9e200aaf 13611 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
13612 v4sf_ftype_v4sf,
13613 IX86_BUILTIN_MOVSLDUP);
9e200aaf 13614 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 13615 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
9e200aaf 13616 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
22c7c85e 13617 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
9e200aaf 13618 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
22c7c85e 13619 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13620}
13621
13622/* Errors in the source file can cause expand_expr to return const0_rtx
13623 where we expect a vector. To avoid crashing, use one of the vector
13624 clear instructions. */
13625static rtx
b96a374d 13626safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
13627{
13628 if (x != const0_rtx)
13629 return x;
13630 x = gen_reg_rtx (mode);
13631
47f339cf 13632 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13633 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13634 : gen_rtx_SUBREG (DImode, x, 0)));
13635 else
e37af218 13636 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13637 : gen_rtx_SUBREG (V4SFmode, x, 0),
13638 CONST0_RTX (V4SFmode)));
bd793c65
BS
13639 return x;
13640}
13641
13642/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13643
13644static rtx
b96a374d 13645ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13646{
13647 rtx pat;
13648 tree arg0 = TREE_VALUE (arglist);
13649 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13650 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13651 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13652 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13653 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13654 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13655
13656 if (VECTOR_MODE_P (mode0))
13657 op0 = safe_vector_operand (op0, mode0);
13658 if (VECTOR_MODE_P (mode1))
13659 op1 = safe_vector_operand (op1, mode1);
13660
13661 if (! target
13662 || GET_MODE (target) != tmode
13663 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13664 target = gen_reg_rtx (tmode);
13665
d9deed68
JH
13666 if (GET_MODE (op1) == SImode && mode1 == TImode)
13667 {
13668 rtx x = gen_reg_rtx (V4SImode);
13669 emit_insn (gen_sse2_loadd (x, op1));
13670 op1 = gen_lowpart (TImode, x);
13671 }
13672
bd793c65
BS
13673 /* In case the insn wants input operands in modes different from
13674 the result, abort. */
ebe75517
JH
13675 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13676 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13677 abort ();
13678
13679 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13680 op0 = copy_to_mode_reg (mode0, op0);
13681 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13682 op1 = copy_to_mode_reg (mode1, op1);
13683
59bef189
RH
13684 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13685 yet one of the two must not be a memory. This is normally enforced
13686 by expanders, but we didn't bother to create one here. */
13687 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13688 op0 = copy_to_mode_reg (mode0, op0);
13689
bd793c65
BS
13690 pat = GEN_FCN (icode) (target, op0, op1);
13691 if (! pat)
13692 return 0;
13693 emit_insn (pat);
13694 return target;
13695}
13696
13697/* Subroutine of ix86_expand_builtin to take care of stores. */
13698
13699static rtx
b96a374d 13700ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13701{
13702 rtx pat;
13703 tree arg0 = TREE_VALUE (arglist);
13704 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13705 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13706 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13707 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13708 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13709
13710 if (VECTOR_MODE_P (mode1))
13711 op1 = safe_vector_operand (op1, mode1);
13712
13713 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13714 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13715
bd793c65
BS
13716 pat = GEN_FCN (icode) (op0, op1);
13717 if (pat)
13718 emit_insn (pat);
13719 return 0;
13720}
13721
13722/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13723
13724static rtx
b96a374d
AJ
13725ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13726 rtx target, int do_load)
bd793c65
BS
13727{
13728 rtx pat;
13729 tree arg0 = TREE_VALUE (arglist);
13730 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13731 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13732 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13733
13734 if (! target
13735 || GET_MODE (target) != tmode
13736 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13737 target = gen_reg_rtx (tmode);
13738 if (do_load)
13739 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13740 else
13741 {
13742 if (VECTOR_MODE_P (mode0))
13743 op0 = safe_vector_operand (op0, mode0);
13744
13745 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13746 op0 = copy_to_mode_reg (mode0, op0);
13747 }
13748
13749 pat = GEN_FCN (icode) (target, op0);
13750 if (! pat)
13751 return 0;
13752 emit_insn (pat);
13753 return target;
13754}
13755
13756/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13757 sqrtss, rsqrtss, rcpss. */
13758
13759static rtx
b96a374d 13760ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13761{
13762 rtx pat;
13763 tree arg0 = TREE_VALUE (arglist);
59bef189 13764 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13765 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13766 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13767
13768 if (! target
13769 || GET_MODE (target) != tmode
13770 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13771 target = gen_reg_rtx (tmode);
13772
13773 if (VECTOR_MODE_P (mode0))
13774 op0 = safe_vector_operand (op0, mode0);
13775
13776 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13777 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13778
59bef189
RH
13779 op1 = op0;
13780 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13781 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13782
59bef189 13783 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13784 if (! pat)
13785 return 0;
13786 emit_insn (pat);
13787 return target;
13788}
13789
13790/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13791
13792static rtx
b96a374d
AJ
13793ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13794 rtx target)
bd793c65
BS
13795{
13796 rtx pat;
13797 tree arg0 = TREE_VALUE (arglist);
13798 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13799 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13800 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13801 rtx op2;
13802 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13803 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13804 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13805 enum rtx_code comparison = d->comparison;
13806
13807 if (VECTOR_MODE_P (mode0))
13808 op0 = safe_vector_operand (op0, mode0);
13809 if (VECTOR_MODE_P (mode1))
13810 op1 = safe_vector_operand (op1, mode1);
13811
13812 /* Swap operands if we have a comparison that isn't available in
13813 hardware. */
13814 if (d->flag)
13815 {
21e1b5f1
BS
13816 rtx tmp = gen_reg_rtx (mode1);
13817 emit_move_insn (tmp, op1);
bd793c65 13818 op1 = op0;
21e1b5f1 13819 op0 = tmp;
bd793c65 13820 }
21e1b5f1
BS
13821
13822 if (! target
13823 || GET_MODE (target) != tmode
13824 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13825 target = gen_reg_rtx (tmode);
13826
13827 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13828 op0 = copy_to_mode_reg (mode0, op0);
13829 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13830 op1 = copy_to_mode_reg (mode1, op1);
13831
13832 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13833 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13834 if (! pat)
13835 return 0;
13836 emit_insn (pat);
13837 return target;
13838}
13839
13840/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13841
13842static rtx
b96a374d
AJ
13843ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13844 rtx target)
bd793c65
BS
13845{
13846 rtx pat;
13847 tree arg0 = TREE_VALUE (arglist);
13848 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13849 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13850 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13851 rtx op2;
13852 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13853 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13854 enum rtx_code comparison = d->comparison;
13855
13856 if (VECTOR_MODE_P (mode0))
13857 op0 = safe_vector_operand (op0, mode0);
13858 if (VECTOR_MODE_P (mode1))
13859 op1 = safe_vector_operand (op1, mode1);
13860
13861 /* Swap operands if we have a comparison that isn't available in
13862 hardware. */
13863 if (d->flag)
13864 {
13865 rtx tmp = op1;
13866 op1 = op0;
13867 op0 = tmp;
bd793c65
BS
13868 }
13869
13870 target = gen_reg_rtx (SImode);
13871 emit_move_insn (target, const0_rtx);
13872 target = gen_rtx_SUBREG (QImode, target, 0);
13873
13874 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13875 op0 = copy_to_mode_reg (mode0, op0);
13876 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13877 op1 = copy_to_mode_reg (mode1, op1);
13878
13879 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13880 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13881 if (! pat)
13882 return 0;
13883 emit_insn (pat);
29628f27
BS
13884 emit_insn (gen_rtx_SET (VOIDmode,
13885 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13886 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13887 SET_DEST (pat),
29628f27 13888 const0_rtx)));
bd793c65 13889
6f1a6c5b 13890 return SUBREG_REG (target);
bd793c65
BS
13891}
13892
13893/* Expand an expression EXP that calls a built-in function,
13894 with result going to TARGET if that's convenient
13895 (and in mode MODE if that's convenient).
13896 SUBTARGET may be used as the target for computing one of EXP's operands.
13897 IGNORE is nonzero if the value is to be ignored. */
13898
13899rtx
b96a374d
AJ
13900ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13901 enum machine_mode mode ATTRIBUTE_UNUSED,
13902 int ignore ATTRIBUTE_UNUSED)
bd793c65 13903{
8b60264b 13904 const struct builtin_description *d;
77ebd435 13905 size_t i;
bd793c65
BS
13906 enum insn_code icode;
13907 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13908 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13909 tree arg0, arg1, arg2;
bd793c65
BS
13910 rtx op0, op1, op2, pat;
13911 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13912 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13913
13914 switch (fcode)
13915 {
13916 case IX86_BUILTIN_EMMS:
13917 emit_insn (gen_emms ());
13918 return 0;
13919
13920 case IX86_BUILTIN_SFENCE:
13921 emit_insn (gen_sfence ());
13922 return 0;
13923
bd793c65 13924 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13925 case IX86_BUILTIN_PEXTRW128:
13926 icode = (fcode == IX86_BUILTIN_PEXTRW
13927 ? CODE_FOR_mmx_pextrw
13928 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13929 arg0 = TREE_VALUE (arglist);
13930 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13931 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13932 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13933 tmode = insn_data[icode].operand[0].mode;
13934 mode0 = insn_data[icode].operand[1].mode;
13935 mode1 = insn_data[icode].operand[2].mode;
13936
13937 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13938 op0 = copy_to_mode_reg (mode0, op0);
13939 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13940 {
ebe75517
JH
13941 error ("selector must be an integer constant in the range 0..%i",
13942 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 13943 return gen_reg_rtx (tmode);
bd793c65
BS
13944 }
13945 if (target == 0
13946 || GET_MODE (target) != tmode
13947 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13948 target = gen_reg_rtx (tmode);
13949 pat = GEN_FCN (icode) (target, op0, op1);
13950 if (! pat)
13951 return 0;
13952 emit_insn (pat);
13953 return target;
13954
13955 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13956 case IX86_BUILTIN_PINSRW128:
13957 icode = (fcode == IX86_BUILTIN_PINSRW
13958 ? CODE_FOR_mmx_pinsrw
13959 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13960 arg0 = TREE_VALUE (arglist);
13961 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13962 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13963 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13964 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13965 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13966 tmode = insn_data[icode].operand[0].mode;
13967 mode0 = insn_data[icode].operand[1].mode;
13968 mode1 = insn_data[icode].operand[2].mode;
13969 mode2 = insn_data[icode].operand[3].mode;
13970
13971 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13972 op0 = copy_to_mode_reg (mode0, op0);
13973 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13974 op1 = copy_to_mode_reg (mode1, op1);
13975 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13976 {
ebe75517
JH
13977 error ("selector must be an integer constant in the range 0..%i",
13978 fcode == IX86_BUILTIN_PINSRW ? 15:255);
bd793c65
BS
13979 return const0_rtx;
13980 }
13981 if (target == 0
13982 || GET_MODE (target) != tmode
13983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13984 target = gen_reg_rtx (tmode);
13985 pat = GEN_FCN (icode) (target, op0, op1, op2);
13986 if (! pat)
13987 return 0;
13988 emit_insn (pat);
13989 return target;
13990
13991 case IX86_BUILTIN_MASKMOVQ:
077084dd 13992 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13993 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13994 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13995 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13996 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13997 /* Note the arg order is different from the operand order. */
13998 arg1 = TREE_VALUE (arglist);
13999 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14000 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14001 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14002 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14003 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14004 mode0 = insn_data[icode].operand[0].mode;
14005 mode1 = insn_data[icode].operand[1].mode;
14006 mode2 = insn_data[icode].operand[2].mode;
14007
5c464583 14008 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
14009 op0 = copy_to_mode_reg (mode0, op0);
14010 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14011 op1 = copy_to_mode_reg (mode1, op1);
14012 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14013 op2 = copy_to_mode_reg (mode2, op2);
14014 pat = GEN_FCN (icode) (op0, op1, op2);
14015 if (! pat)
14016 return 0;
14017 emit_insn (pat);
14018 return 0;
14019
14020 case IX86_BUILTIN_SQRTSS:
14021 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14022 case IX86_BUILTIN_RSQRTSS:
14023 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14024 case IX86_BUILTIN_RCPSS:
14025 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14026
14027 case IX86_BUILTIN_LOADAPS:
14028 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14029
14030 case IX86_BUILTIN_LOADUPS:
14031 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14032
14033 case IX86_BUILTIN_STOREAPS:
e37af218 14034 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 14035
bd793c65 14036 case IX86_BUILTIN_STOREUPS:
e37af218 14037 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
14038
14039 case IX86_BUILTIN_LOADSS:
14040 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14041
14042 case IX86_BUILTIN_STORESS:
e37af218 14043 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 14044
0f290768 14045 case IX86_BUILTIN_LOADHPS:
bd793c65 14046 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
14047 case IX86_BUILTIN_LOADHPD:
14048 case IX86_BUILTIN_LOADLPD:
14049 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14050 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14051 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
997404de 14052 : CODE_FOR_sse2_movsd);
bd793c65
BS
14053 arg0 = TREE_VALUE (arglist);
14054 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14055 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14056 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14057 tmode = insn_data[icode].operand[0].mode;
14058 mode0 = insn_data[icode].operand[1].mode;
14059 mode1 = insn_data[icode].operand[2].mode;
14060
14061 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14062 op0 = copy_to_mode_reg (mode0, op0);
14063 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14064 if (target == 0
14065 || GET_MODE (target) != tmode
14066 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14067 target = gen_reg_rtx (tmode);
14068 pat = GEN_FCN (icode) (target, op0, op1);
14069 if (! pat)
14070 return 0;
14071 emit_insn (pat);
14072 return target;
0f290768 14073
bd793c65
BS
14074 case IX86_BUILTIN_STOREHPS:
14075 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
14076 case IX86_BUILTIN_STOREHPD:
14077 case IX86_BUILTIN_STORELPD:
14078 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14079 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14080 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
997404de 14081 : CODE_FOR_sse2_movsd);
bd793c65
BS
14082 arg0 = TREE_VALUE (arglist);
14083 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14084 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14085 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14086 mode0 = insn_data[icode].operand[1].mode;
14087 mode1 = insn_data[icode].operand[2].mode;
14088
14089 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14090 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14091 op1 = copy_to_mode_reg (mode1, op1);
14092
14093 pat = GEN_FCN (icode) (op0, op0, op1);
14094 if (! pat)
14095 return 0;
14096 emit_insn (pat);
14097 return 0;
14098
14099 case IX86_BUILTIN_MOVNTPS:
e37af218 14100 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14101 case IX86_BUILTIN_MOVNTQ:
e37af218 14102 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14103
14104 case IX86_BUILTIN_LDMXCSR:
14105 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14106 target = assign_386_stack_local (SImode, 0);
14107 emit_move_insn (target, op0);
14108 emit_insn (gen_ldmxcsr (target));
14109 return 0;
14110
14111 case IX86_BUILTIN_STMXCSR:
14112 target = assign_386_stack_local (SImode, 0);
14113 emit_insn (gen_stmxcsr (target));
14114 return copy_to_mode_reg (SImode, target);
14115
bd793c65 14116 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14117 case IX86_BUILTIN_SHUFPD:
14118 icode = (fcode == IX86_BUILTIN_SHUFPS
14119 ? CODE_FOR_sse_shufps
14120 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14121 arg0 = TREE_VALUE (arglist);
14122 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14123 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14124 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14125 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14126 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14127 tmode = insn_data[icode].operand[0].mode;
14128 mode0 = insn_data[icode].operand[1].mode;
14129 mode1 = insn_data[icode].operand[2].mode;
14130 mode2 = insn_data[icode].operand[3].mode;
14131
14132 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14133 op0 = copy_to_mode_reg (mode0, op0);
14134 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14135 op1 = copy_to_mode_reg (mode1, op1);
14136 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14137 {
14138 /* @@@ better error message */
14139 error ("mask must be an immediate");
6f1a6c5b 14140 return gen_reg_rtx (tmode);
bd793c65
BS
14141 }
14142 if (target == 0
14143 || GET_MODE (target) != tmode
14144 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14145 target = gen_reg_rtx (tmode);
14146 pat = GEN_FCN (icode) (target, op0, op1, op2);
14147 if (! pat)
14148 return 0;
14149 emit_insn (pat);
14150 return target;
14151
14152 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14153 case IX86_BUILTIN_PSHUFD:
14154 case IX86_BUILTIN_PSHUFHW:
14155 case IX86_BUILTIN_PSHUFLW:
14156 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14157 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14158 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14159 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14160 arg0 = TREE_VALUE (arglist);
14161 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14162 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14163 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14164 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14165 mode1 = insn_data[icode].operand[1].mode;
14166 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14167
29628f27
BS
14168 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14169 op0 = copy_to_mode_reg (mode1, op0);
14170 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14171 {
14172 /* @@@ better error message */
14173 error ("mask must be an immediate");
14174 return const0_rtx;
14175 }
14176 if (target == 0
14177 || GET_MODE (target) != tmode
14178 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14179 target = gen_reg_rtx (tmode);
29628f27 14180 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14181 if (! pat)
14182 return 0;
14183 emit_insn (pat);
14184 return target;
14185
ab3146fd
ZD
14186 case IX86_BUILTIN_PSLLDQI128:
14187 case IX86_BUILTIN_PSRLDQI128:
14188 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14189 : CODE_FOR_sse2_lshrti3);
14190 arg0 = TREE_VALUE (arglist);
14191 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14192 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14193 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14194 tmode = insn_data[icode].operand[0].mode;
14195 mode1 = insn_data[icode].operand[1].mode;
14196 mode2 = insn_data[icode].operand[2].mode;
14197
14198 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14199 {
14200 op0 = copy_to_reg (op0);
14201 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14202 }
14203 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14204 {
14205 error ("shift must be an immediate");
14206 return const0_rtx;
14207 }
14208 target = gen_reg_rtx (V2DImode);
14209 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14210 if (! pat)
14211 return 0;
14212 emit_insn (pat);
14213 return target;
14214
47f339cf
BS
14215 case IX86_BUILTIN_FEMMS:
14216 emit_insn (gen_femms ());
14217 return NULL_RTX;
14218
14219 case IX86_BUILTIN_PAVGUSB:
14220 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14221
14222 case IX86_BUILTIN_PF2ID:
14223 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14224
14225 case IX86_BUILTIN_PFACC:
14226 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14227
14228 case IX86_BUILTIN_PFADD:
14229 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14230
14231 case IX86_BUILTIN_PFCMPEQ:
14232 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14233
14234 case IX86_BUILTIN_PFCMPGE:
14235 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14236
14237 case IX86_BUILTIN_PFCMPGT:
14238 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14239
14240 case IX86_BUILTIN_PFMAX:
14241 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14242
14243 case IX86_BUILTIN_PFMIN:
14244 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14245
14246 case IX86_BUILTIN_PFMUL:
14247 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14248
14249 case IX86_BUILTIN_PFRCP:
14250 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14251
14252 case IX86_BUILTIN_PFRCPIT1:
14253 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14254
14255 case IX86_BUILTIN_PFRCPIT2:
14256 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14257
14258 case IX86_BUILTIN_PFRSQIT1:
14259 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14260
14261 case IX86_BUILTIN_PFRSQRT:
14262 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14263
14264 case IX86_BUILTIN_PFSUB:
14265 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14266
14267 case IX86_BUILTIN_PFSUBR:
14268 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14269
14270 case IX86_BUILTIN_PI2FD:
14271 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14272
14273 case IX86_BUILTIN_PMULHRW:
14274 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14275
47f339cf
BS
14276 case IX86_BUILTIN_PF2IW:
14277 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14278
14279 case IX86_BUILTIN_PFNACC:
14280 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14281
14282 case IX86_BUILTIN_PFPNACC:
14283 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14284
14285 case IX86_BUILTIN_PI2FW:
14286 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14287
14288 case IX86_BUILTIN_PSWAPDSI:
14289 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14290
14291 case IX86_BUILTIN_PSWAPDSF:
14292 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14293
e37af218
RH
14294 case IX86_BUILTIN_SSE_ZERO:
14295 target = gen_reg_rtx (V4SFmode);
4977bab6 14296 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14297 return target;
14298
bd793c65
BS
14299 case IX86_BUILTIN_MMX_ZERO:
14300 target = gen_reg_rtx (DImode);
14301 emit_insn (gen_mmx_clrdi (target));
14302 return target;
14303
f02e1358
JH
14304 case IX86_BUILTIN_CLRTI:
14305 target = gen_reg_rtx (V2DImode);
14306 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14307 return target;
14308
14309
fbe5eb6d
BS
14310 case IX86_BUILTIN_SQRTSD:
14311 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14312 case IX86_BUILTIN_LOADAPD:
14313 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14314 case IX86_BUILTIN_LOADUPD:
14315 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14316
14317 case IX86_BUILTIN_STOREAPD:
14318 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14319 case IX86_BUILTIN_STOREUPD:
14320 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14321
14322 case IX86_BUILTIN_LOADSD:
14323 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14324
14325 case IX86_BUILTIN_STORESD:
14326 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14327
14328 case IX86_BUILTIN_SETPD1:
14329 target = assign_386_stack_local (DFmode, 0);
14330 arg0 = TREE_VALUE (arglist);
14331 emit_move_insn (adjust_address (target, DFmode, 0),
14332 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14333 op0 = gen_reg_rtx (V2DFmode);
14334 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
60c81c89 14335 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
fbe5eb6d
BS
14336 return op0;
14337
14338 case IX86_BUILTIN_SETPD:
14339 target = assign_386_stack_local (V2DFmode, 0);
14340 arg0 = TREE_VALUE (arglist);
14341 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14342 emit_move_insn (adjust_address (target, DFmode, 0),
14343 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14344 emit_move_insn (adjust_address (target, DFmode, 8),
14345 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14346 op0 = gen_reg_rtx (V2DFmode);
14347 emit_insn (gen_sse2_movapd (op0, target));
14348 return op0;
14349
14350 case IX86_BUILTIN_LOADRPD:
14351 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14352 gen_reg_rtx (V2DFmode), 1);
60c81c89 14353 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
fbe5eb6d
BS
14354 return target;
14355
14356 case IX86_BUILTIN_LOADPD1:
14357 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14358 gen_reg_rtx (V2DFmode), 1);
14359 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14360 return target;
14361
14362 case IX86_BUILTIN_STOREPD1:
14363 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14364 case IX86_BUILTIN_STORERPD:
14365 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14366
48126a97
JH
14367 case IX86_BUILTIN_CLRPD:
14368 target = gen_reg_rtx (V2DFmode);
14369 emit_insn (gen_sse_clrv2df (target));
14370 return target;
14371
fbe5eb6d
BS
14372 case IX86_BUILTIN_MFENCE:
14373 emit_insn (gen_sse2_mfence ());
14374 return 0;
14375 case IX86_BUILTIN_LFENCE:
14376 emit_insn (gen_sse2_lfence ());
14377 return 0;
14378
14379 case IX86_BUILTIN_CLFLUSH:
14380 arg0 = TREE_VALUE (arglist);
14381 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14382 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14383 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14384 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14385
14386 emit_insn (gen_sse2_clflush (op0));
14387 return 0;
14388
14389 case IX86_BUILTIN_MOVNTPD:
14390 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14391 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14392 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14393 case IX86_BUILTIN_MOVNTI:
14394 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14395
f02e1358
JH
14396 case IX86_BUILTIN_LOADDQA:
14397 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14398 case IX86_BUILTIN_LOADDQU:
14399 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14400 case IX86_BUILTIN_LOADD:
14401 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14402
14403 case IX86_BUILTIN_STOREDQA:
14404 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14405 case IX86_BUILTIN_STOREDQU:
14406 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14407 case IX86_BUILTIN_STORED:
14408 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14409
22c7c85e
L
14410 case IX86_BUILTIN_MONITOR:
14411 arg0 = TREE_VALUE (arglist);
14412 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14413 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14414 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14415 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14416 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14417 if (!REG_P (op0))
14418 op0 = copy_to_mode_reg (SImode, op0);
14419 if (!REG_P (op1))
14420 op1 = copy_to_mode_reg (SImode, op1);
14421 if (!REG_P (op2))
14422 op2 = copy_to_mode_reg (SImode, op2);
14423 emit_insn (gen_monitor (op0, op1, op2));
14424 return 0;
14425
14426 case IX86_BUILTIN_MWAIT:
14427 arg0 = TREE_VALUE (arglist);
14428 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14429 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14430 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14431 if (!REG_P (op0))
14432 op0 = copy_to_mode_reg (SImode, op0);
14433 if (!REG_P (op1))
14434 op1 = copy_to_mode_reg (SImode, op1);
14435 emit_insn (gen_mwait (op0, op1));
14436 return 0;
14437
14438 case IX86_BUILTIN_LOADDDUP:
14439 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14440
14441 case IX86_BUILTIN_LDDQU:
14442 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14443 1);
14444
bd793c65
BS
14445 default:
14446 break;
14447 }
14448
ca7558fc 14449 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14450 if (d->code == fcode)
14451 {
14452 /* Compares are treated specially. */
14453 if (d->icode == CODE_FOR_maskcmpv4sf3
14454 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14455 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14456 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14457 || d->icode == CODE_FOR_maskcmpv2df3
14458 || d->icode == CODE_FOR_vmmaskcmpv2df3
14459 || d->icode == CODE_FOR_maskncmpv2df3
14460 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14461 return ix86_expand_sse_compare (d, arglist, target);
14462
14463 return ix86_expand_binop_builtin (d->icode, arglist, target);
14464 }
14465
ca7558fc 14466 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14467 if (d->code == fcode)
14468 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14469
ca7558fc 14470 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14471 if (d->code == fcode)
14472 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14473
bd793c65
BS
14474 /* @@@ Should really do something sensible here. */
14475 return 0;
bd793c65 14476}
4211a8fb
JH
14477
14478/* Store OPERAND to the memory after reload is completed. This means
f710504c 14479 that we can't easily use assign_stack_local. */
4211a8fb 14480rtx
b96a374d 14481ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14482{
898d374d 14483 rtx result;
4211a8fb
JH
14484 if (!reload_completed)
14485 abort ();
a5b378d6 14486 if (TARGET_RED_ZONE)
898d374d
JH
14487 {
14488 result = gen_rtx_MEM (mode,
14489 gen_rtx_PLUS (Pmode,
14490 stack_pointer_rtx,
14491 GEN_INT (-RED_ZONE_SIZE)));
14492 emit_move_insn (result, operand);
14493 }
a5b378d6 14494 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14495 {
898d374d 14496 switch (mode)
4211a8fb 14497 {
898d374d
JH
14498 case HImode:
14499 case SImode:
14500 operand = gen_lowpart (DImode, operand);
5efb1046 14501 /* FALLTHRU */
898d374d 14502 case DImode:
4211a8fb 14503 emit_insn (
898d374d
JH
14504 gen_rtx_SET (VOIDmode,
14505 gen_rtx_MEM (DImode,
14506 gen_rtx_PRE_DEC (DImode,
14507 stack_pointer_rtx)),
14508 operand));
14509 break;
14510 default:
14511 abort ();
14512 }
14513 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14514 }
14515 else
14516 {
14517 switch (mode)
14518 {
14519 case DImode:
14520 {
14521 rtx operands[2];
14522 split_di (&operand, 1, operands, operands + 1);
14523 emit_insn (
14524 gen_rtx_SET (VOIDmode,
14525 gen_rtx_MEM (SImode,
14526 gen_rtx_PRE_DEC (Pmode,
14527 stack_pointer_rtx)),
14528 operands[1]));
14529 emit_insn (
14530 gen_rtx_SET (VOIDmode,
14531 gen_rtx_MEM (SImode,
14532 gen_rtx_PRE_DEC (Pmode,
14533 stack_pointer_rtx)),
14534 operands[0]));
14535 }
14536 break;
14537 case HImode:
14538 /* It is better to store HImodes as SImodes. */
14539 if (!TARGET_PARTIAL_REG_STALL)
14540 operand = gen_lowpart (SImode, operand);
5efb1046 14541 /* FALLTHRU */
898d374d 14542 case SImode:
4211a8fb 14543 emit_insn (
898d374d
JH
14544 gen_rtx_SET (VOIDmode,
14545 gen_rtx_MEM (GET_MODE (operand),
14546 gen_rtx_PRE_DEC (SImode,
14547 stack_pointer_rtx)),
14548 operand));
14549 break;
14550 default:
14551 abort ();
4211a8fb 14552 }
898d374d 14553 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14554 }
898d374d 14555 return result;
4211a8fb
JH
14556}
14557
14558/* Free operand from the memory. */
14559void
b96a374d 14560ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14561{
a5b378d6 14562 if (!TARGET_RED_ZONE)
898d374d
JH
14563 {
14564 int size;
14565
14566 if (mode == DImode || TARGET_64BIT)
14567 size = 8;
14568 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14569 size = 2;
14570 else
14571 size = 4;
14572 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14573 to pop or add instruction if registers are available. */
14574 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14575 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14576 GEN_INT (size))));
14577 }
4211a8fb 14578}
a946dd00 14579
f84aa48a
JH
14580/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14581 QImode must go into class Q_REGS.
14582 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14583 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14584enum reg_class
b96a374d 14585ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14586{
1877be45
JH
14587 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14588 return NO_REGS;
f84aa48a
JH
14589 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14590 {
14591 /* SSE can't load any constant directly yet. */
14592 if (SSE_CLASS_P (class))
14593 return NO_REGS;
14594 /* Floats can load 0 and 1. */
14595 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14596 {
14597 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14598 if (MAYBE_SSE_CLASS_P (class))
14599 return (reg_class_subset_p (class, GENERAL_REGS)
14600 ? GENERAL_REGS : FLOAT_REGS);
14601 else
14602 return class;
14603 }
14604 /* General regs can load everything. */
14605 if (reg_class_subset_p (class, GENERAL_REGS))
14606 return GENERAL_REGS;
14607 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14608 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14609 return NO_REGS;
14610 }
14611 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14612 return NO_REGS;
14613 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14614 return Q_REGS;
14615 return class;
14616}
14617
14618/* If we are copying between general and FP registers, we need a memory
14619 location. The same is true for SSE and MMX registers.
14620
14621 The macro can't work reliably when one of the CLASSES is class containing
14622 registers from multiple units (SSE, MMX, integer). We avoid this by never
14623 combining those units in single alternative in the machine description.
14624 Ensure that this constraint holds to avoid unexpected surprises.
14625
14626 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14627 enforce these sanity checks. */
14628int
b96a374d
AJ
14629ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14630 enum machine_mode mode, int strict)
f84aa48a
JH
14631{
14632 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14633 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14634 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14635 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14636 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14637 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14638 {
14639 if (strict)
14640 abort ();
14641 else
14642 return 1;
14643 }
14644 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14645 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14646 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14647 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14648 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14649}
14650/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14651 one in class CLASS2.
f84aa48a
JH
14652
14653 It is not required that the cost always equal 2 when FROM is the same as TO;
14654 on some machines it is expensive to move between registers if they are not
14655 general registers. */
14656int
b96a374d
AJ
14657ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14658 enum reg_class class2)
f84aa48a
JH
14659{
14660 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14661 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14662 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14663
f84aa48a
JH
14664 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14665 {
d631b80a
RH
14666 int cost = 1;
14667
14668 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14669 MEMORY_MOVE_COST (mode, class1, 1));
14670 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14671 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14672
d631b80a
RH
14673 /* In case of copying from general_purpose_register we may emit multiple
14674 stores followed by single load causing memory size mismatch stall.
d1f87653 14675 Count this as arbitrarily high cost of 20. */
62415523 14676 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14677 cost += 20;
14678
14679 /* In the case of FP/MMX moves, the registers actually overlap, and we
14680 have to switch modes in order to treat them differently. */
14681 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14682 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14683 cost += 20;
14684
14685 return cost;
f84aa48a 14686 }
d631b80a 14687
92d0fb09 14688 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14689 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14690 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14691 return ix86_cost->mmxsse_to_integer;
14692 if (MAYBE_FLOAT_CLASS_P (class1))
14693 return ix86_cost->fp_move;
14694 if (MAYBE_SSE_CLASS_P (class1))
14695 return ix86_cost->sse_move;
14696 if (MAYBE_MMX_CLASS_P (class1))
14697 return ix86_cost->mmx_move;
f84aa48a
JH
14698 return 2;
14699}
14700
a946dd00
JH
14701/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14702int
b96a374d 14703ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14704{
14705 /* Flags and only flags can only hold CCmode values. */
14706 if (CC_REGNO_P (regno))
14707 return GET_MODE_CLASS (mode) == MODE_CC;
14708 if (GET_MODE_CLASS (mode) == MODE_CC
14709 || GET_MODE_CLASS (mode) == MODE_RANDOM
14710 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14711 return 0;
14712 if (FP_REGNO_P (regno))
14713 return VALID_FP_MODE_P (mode);
14714 if (SSE_REGNO_P (regno))
a67a3220 14715 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
a946dd00 14716 if (MMX_REGNO_P (regno))
a67a3220
JH
14717 return (TARGET_MMX
14718 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
a946dd00
JH
14719 /* We handle both integer and floats in the general purpose registers.
14720 In future we should be able to handle vector modes as well. */
14721 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14722 return 0;
14723 /* Take care for QImode values - they can be in non-QI regs, but then
14724 they do cause partial register stalls. */
d2836273 14725 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14726 return 1;
14727 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14728}
fa79946e
JH
14729
14730/* Return the cost of moving data of mode M between a
14731 register and memory. A value of 2 is the default; this cost is
14732 relative to those in `REGISTER_MOVE_COST'.
14733
14734 If moving between registers and memory is more expensive than
14735 between two registers, you should define this macro to express the
a4f31c00
AJ
14736 relative cost.
14737
fa79946e
JH
14738 Model also increased moving costs of QImode registers in non
14739 Q_REGS classes.
14740 */
14741int
b96a374d 14742ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14743{
14744 if (FLOAT_CLASS_P (class))
14745 {
14746 int index;
14747 switch (mode)
14748 {
14749 case SFmode:
14750 index = 0;
14751 break;
14752 case DFmode:
14753 index = 1;
14754 break;
14755 case XFmode:
fa79946e
JH
14756 index = 2;
14757 break;
14758 default:
14759 return 100;
14760 }
14761 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14762 }
14763 if (SSE_CLASS_P (class))
14764 {
14765 int index;
14766 switch (GET_MODE_SIZE (mode))
14767 {
14768 case 4:
14769 index = 0;
14770 break;
14771 case 8:
14772 index = 1;
14773 break;
14774 case 16:
14775 index = 2;
14776 break;
14777 default:
14778 return 100;
14779 }
14780 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14781 }
14782 if (MMX_CLASS_P (class))
14783 {
14784 int index;
14785 switch (GET_MODE_SIZE (mode))
14786 {
14787 case 4:
14788 index = 0;
14789 break;
14790 case 8:
14791 index = 1;
14792 break;
14793 default:
14794 return 100;
14795 }
14796 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14797 }
14798 switch (GET_MODE_SIZE (mode))
14799 {
14800 case 1:
14801 if (in)
14802 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14803 : ix86_cost->movzbl_load);
14804 else
14805 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14806 : ix86_cost->int_store[0] + 4);
14807 break;
14808 case 2:
14809 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14810 default:
14811 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14812 if (mode == TFmode)
14813 mode = XFmode;
3bb7e126 14814 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14815 * (((int) GET_MODE_SIZE (mode)
14816 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14817 }
14818}
0ecf09f9 14819
3c50106f
RH
14820/* Compute a (partial) cost for rtx X. Return true if the complete
14821 cost has been computed, and false if subexpressions should be
14822 scanned. In either case, *TOTAL contains the cost result. */
14823
14824static bool
b96a374d 14825ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14826{
14827 enum machine_mode mode = GET_MODE (x);
14828
14829 switch (code)
14830 {
14831 case CONST_INT:
14832 case CONST:
14833 case LABEL_REF:
14834 case SYMBOL_REF:
14835 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14836 *total = 3;
14837 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14838 *total = 2;
3504dad3
JH
14839 else if (flag_pic && SYMBOLIC_CONST (x)
14840 && (!TARGET_64BIT
14841 || (!GET_CODE (x) != LABEL_REF
14842 && (GET_CODE (x) != SYMBOL_REF
12969f45 14843 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14844 *total = 1;
14845 else
14846 *total = 0;
14847 return true;
14848
14849 case CONST_DOUBLE:
14850 if (mode == VOIDmode)
14851 *total = 0;
14852 else
14853 switch (standard_80387_constant_p (x))
14854 {
14855 case 1: /* 0.0 */
14856 *total = 1;
14857 break;
881b2a96 14858 default: /* Other constants */
3c50106f
RH
14859 *total = 2;
14860 break;
881b2a96
RS
14861 case 0:
14862 case -1:
3c50106f
RH
14863 /* Start with (MEM (SYMBOL_REF)), since that's where
14864 it'll probably end up. Add a penalty for size. */
14865 *total = (COSTS_N_INSNS (1)
3504dad3 14866 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14867 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14868 break;
14869 }
14870 return true;
14871
14872 case ZERO_EXTEND:
14873 /* The zero extensions is often completely free on x86_64, so make
14874 it as cheap as possible. */
14875 if (TARGET_64BIT && mode == DImode
14876 && GET_MODE (XEXP (x, 0)) == SImode)
14877 *total = 1;
14878 else if (TARGET_ZERO_EXTEND_WITH_AND)
14879 *total = COSTS_N_INSNS (ix86_cost->add);
14880 else
14881 *total = COSTS_N_INSNS (ix86_cost->movzx);
14882 return false;
14883
14884 case SIGN_EXTEND:
14885 *total = COSTS_N_INSNS (ix86_cost->movsx);
14886 return false;
14887
14888 case ASHIFT:
14889 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14890 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14891 {
14892 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14893 if (value == 1)
14894 {
14895 *total = COSTS_N_INSNS (ix86_cost->add);
14896 return false;
14897 }
14898 if ((value == 2 || value == 3)
14899 && !TARGET_DECOMPOSE_LEA
14900 && ix86_cost->lea <= ix86_cost->shift_const)
14901 {
14902 *total = COSTS_N_INSNS (ix86_cost->lea);
14903 return false;
14904 }
14905 }
5efb1046 14906 /* FALLTHRU */
3c50106f
RH
14907
14908 case ROTATE:
14909 case ASHIFTRT:
14910 case LSHIFTRT:
14911 case ROTATERT:
14912 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14913 {
14914 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14915 {
14916 if (INTVAL (XEXP (x, 1)) > 32)
14917 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14918 else
14919 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14920 }
14921 else
14922 {
14923 if (GET_CODE (XEXP (x, 1)) == AND)
14924 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14925 else
14926 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14927 }
14928 }
14929 else
14930 {
14931 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14932 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14933 else
14934 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14935 }
14936 return false;
14937
14938 case MULT:
14939 if (FLOAT_MODE_P (mode))
3c50106f 14940 {
4a5eab38
PB
14941 *total = COSTS_N_INSNS (ix86_cost->fmul);
14942 return false;
3c50106f
RH
14943 }
14944 else
14945 {
4a5eab38
PB
14946 rtx op0 = XEXP (x, 0);
14947 rtx op1 = XEXP (x, 1);
14948 int nbits;
14949 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14950 {
14951 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14952 for (nbits = 0; value != 0; value &= value - 1)
14953 nbits++;
14954 }
14955 else
14956 /* This is arbitrary. */
14957 nbits = 7;
14958
14959 /* Compute costs correctly for widening multiplication. */
14960 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14961 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14962 == GET_MODE_SIZE (mode))
14963 {
14964 int is_mulwiden = 0;
14965 enum machine_mode inner_mode = GET_MODE (op0);
14966
14967 if (GET_CODE (op0) == GET_CODE (op1))
14968 is_mulwiden = 1, op1 = XEXP (op1, 0);
14969 else if (GET_CODE (op1) == CONST_INT)
14970 {
14971 if (GET_CODE (op0) == SIGN_EXTEND)
14972 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14973 == INTVAL (op1);
14974 else
14975 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14976 }
14977
14978 if (is_mulwiden)
14979 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14980 }
14981
14982 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14983 + nbits * ix86_cost->mult_bit)
14984 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14985
14986 return true;
3c50106f 14987 }
3c50106f
RH
14988
14989 case DIV:
14990 case UDIV:
14991 case MOD:
14992 case UMOD:
14993 if (FLOAT_MODE_P (mode))
14994 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14995 else
14996 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14997 return false;
14998
14999 case PLUS:
15000 if (FLOAT_MODE_P (mode))
15001 *total = COSTS_N_INSNS (ix86_cost->fadd);
15002 else if (!TARGET_DECOMPOSE_LEA
15003 && GET_MODE_CLASS (mode) == MODE_INT
15004 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15005 {
15006 if (GET_CODE (XEXP (x, 0)) == PLUS
15007 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15008 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15009 && CONSTANT_P (XEXP (x, 1)))
15010 {
15011 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15012 if (val == 2 || val == 4 || val == 8)
15013 {
15014 *total = COSTS_N_INSNS (ix86_cost->lea);
15015 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15016 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15017 outer_code);
15018 *total += rtx_cost (XEXP (x, 1), outer_code);
15019 return true;
15020 }
15021 }
15022 else if (GET_CODE (XEXP (x, 0)) == MULT
15023 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15024 {
15025 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15026 if (val == 2 || val == 4 || val == 8)
15027 {
15028 *total = COSTS_N_INSNS (ix86_cost->lea);
15029 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15030 *total += rtx_cost (XEXP (x, 1), outer_code);
15031 return true;
15032 }
15033 }
15034 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15035 {
15036 *total = COSTS_N_INSNS (ix86_cost->lea);
15037 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15038 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15039 *total += rtx_cost (XEXP (x, 1), outer_code);
15040 return true;
15041 }
15042 }
5efb1046 15043 /* FALLTHRU */
3c50106f
RH
15044
15045 case MINUS:
15046 if (FLOAT_MODE_P (mode))
15047 {
15048 *total = COSTS_N_INSNS (ix86_cost->fadd);
15049 return false;
15050 }
5efb1046 15051 /* FALLTHRU */
3c50106f
RH
15052
15053 case AND:
15054 case IOR:
15055 case XOR:
15056 if (!TARGET_64BIT && mode == DImode)
15057 {
15058 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15059 + (rtx_cost (XEXP (x, 0), outer_code)
15060 << (GET_MODE (XEXP (x, 0)) != DImode))
15061 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 15062 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
15063 return true;
15064 }
5efb1046 15065 /* FALLTHRU */
3c50106f
RH
15066
15067 case NEG:
15068 if (FLOAT_MODE_P (mode))
15069 {
15070 *total = COSTS_N_INSNS (ix86_cost->fchs);
15071 return false;
15072 }
5efb1046 15073 /* FALLTHRU */
3c50106f
RH
15074
15075 case NOT:
15076 if (!TARGET_64BIT && mode == DImode)
15077 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15078 else
15079 *total = COSTS_N_INSNS (ix86_cost->add);
15080 return false;
15081
15082 case FLOAT_EXTEND:
15083 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15084 *total = 0;
15085 return false;
15086
15087 case ABS:
15088 if (FLOAT_MODE_P (mode))
15089 *total = COSTS_N_INSNS (ix86_cost->fabs);
15090 return false;
15091
15092 case SQRT:
15093 if (FLOAT_MODE_P (mode))
15094 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15095 return false;
15096
74dc3e94
RH
15097 case UNSPEC:
15098 if (XINT (x, 1) == UNSPEC_TP)
15099 *total = 0;
15100 return false;
15101
3c50106f
RH
15102 default:
15103 return false;
15104 }
15105}
15106
21c318ba 15107#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4 15108static void
b96a374d 15109ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
2cc07db4
RH
15110{
15111 init_section ();
15112 fputs ("\tpushl $", asm_out_file);
15113 assemble_name (asm_out_file, XSTR (symbol, 0));
15114 fputc ('\n', asm_out_file);
15115}
15116#endif
162f023b 15117
b069de3b
SS
15118#if TARGET_MACHO
15119
15120static int current_machopic_label_num;
15121
15122/* Given a symbol name and its associated stub, write out the
15123 definition of the stub. */
15124
15125void
b96a374d 15126machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
15127{
15128 unsigned int length;
15129 char *binder_name, *symbol_name, lazy_ptr_name[32];
15130 int label = ++current_machopic_label_num;
15131
15132 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15133 symb = (*targetm.strip_name_encoding) (symb);
15134
15135 length = strlen (stub);
15136 binder_name = alloca (length + 32);
15137 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15138
15139 length = strlen (symb);
15140 symbol_name = alloca (length + 32);
15141 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15142
15143 sprintf (lazy_ptr_name, "L%d$lz", label);
15144
15145 if (MACHOPIC_PURE)
15146 machopic_picsymbol_stub_section ();
15147 else
15148 machopic_symbol_stub_section ();
15149
15150 fprintf (file, "%s:\n", stub);
15151 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15152
15153 if (MACHOPIC_PURE)
15154 {
15155 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15156 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15157 fprintf (file, "\tjmp %%edx\n");
15158 }
15159 else
15160 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 15161
b069de3b 15162 fprintf (file, "%s:\n", binder_name);
b96a374d 15163
b069de3b
SS
15164 if (MACHOPIC_PURE)
15165 {
15166 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15167 fprintf (file, "\tpushl %%eax\n");
15168 }
15169 else
15170 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15171
15172 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15173
15174 machopic_lazy_symbol_ptr_section ();
15175 fprintf (file, "%s:\n", lazy_ptr_name);
15176 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15177 fprintf (file, "\t.long %s\n", binder_name);
15178}
15179#endif /* TARGET_MACHO */
15180
162f023b
JH
15181/* Order the registers for register allocator. */
15182
15183void
b96a374d 15184x86_order_regs_for_local_alloc (void)
162f023b
JH
15185{
15186 int pos = 0;
15187 int i;
15188
15189 /* First allocate the local general purpose registers. */
15190 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15191 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15192 reg_alloc_order [pos++] = i;
15193
15194 /* Global general purpose registers. */
15195 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15196 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15197 reg_alloc_order [pos++] = i;
15198
15199 /* x87 registers come first in case we are doing FP math
15200 using them. */
15201 if (!TARGET_SSE_MATH)
15202 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15203 reg_alloc_order [pos++] = i;
fce5a9f2 15204
162f023b
JH
15205 /* SSE registers. */
15206 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15207 reg_alloc_order [pos++] = i;
15208 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15209 reg_alloc_order [pos++] = i;
15210
d1f87653 15211 /* x87 registers. */
162f023b
JH
15212 if (TARGET_SSE_MATH)
15213 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15214 reg_alloc_order [pos++] = i;
15215
15216 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15217 reg_alloc_order [pos++] = i;
15218
15219 /* Initialize the rest of array as we do not allocate some registers
15220 at all. */
15221 while (pos < FIRST_PSEUDO_REGISTER)
15222 reg_alloc_order [pos++] = 0;
15223}
194734e9 15224
4977bab6
ZW
15225#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15226#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15227#endif
15228
fe77449a
DR
15229/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15230 struct attribute_spec.handler. */
15231static tree
b96a374d
AJ
15232ix86_handle_struct_attribute (tree *node, tree name,
15233 tree args ATTRIBUTE_UNUSED,
15234 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15235{
15236 tree *type = NULL;
15237 if (DECL_P (*node))
15238 {
15239 if (TREE_CODE (*node) == TYPE_DECL)
15240 type = &TREE_TYPE (*node);
15241 }
15242 else
15243 type = node;
15244
15245 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15246 || TREE_CODE (*type) == UNION_TYPE)))
15247 {
15248 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15249 *no_add_attrs = true;
15250 }
15251
15252 else if ((is_attribute_p ("ms_struct", name)
15253 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15254 || ((is_attribute_p ("gcc_struct", name)
15255 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15256 {
15257 warning ("`%s' incompatible attribute ignored",
15258 IDENTIFIER_POINTER (name));
15259 *no_add_attrs = true;
15260 }
15261
15262 return NULL_TREE;
15263}
15264
4977bab6 15265static bool
b96a374d 15266ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15267{
fe77449a 15268 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15269 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15270 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15271}
15272
483ab821
MM
15273/* Returns an expression indicating where the this parameter is
15274 located on entry to the FUNCTION. */
15275
15276static rtx
b96a374d 15277x86_this_parameter (tree function)
483ab821
MM
15278{
15279 tree type = TREE_TYPE (function);
15280
3961e8fe
RH
15281 if (TARGET_64BIT)
15282 {
61f71b34 15283 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
15284 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15285 }
15286
e767b5be 15287 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
15288 {
15289 tree parm;
15290
15291 parm = TYPE_ARG_TYPES (type);
15292 /* Figure out whether or not the function has a variable number of
15293 arguments. */
3961e8fe 15294 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15295 if (TREE_VALUE (parm) == void_type_node)
15296 break;
e767b5be 15297 /* If not, the this parameter is in the first argument. */
483ab821 15298 if (parm)
e767b5be
JH
15299 {
15300 int regno = 0;
15301 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15302 regno = 2;
02e02343 15303 return gen_rtx_REG (SImode, regno);
e767b5be 15304 }
483ab821
MM
15305 }
15306
61f71b34 15307 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
15308 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15309 else
15310 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15311}
15312
3961e8fe
RH
15313/* Determine whether x86_output_mi_thunk can succeed. */
15314
15315static bool
b96a374d
AJ
15316x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15317 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15318 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15319{
15320 /* 64-bit can handle anything. */
15321 if (TARGET_64BIT)
15322 return true;
15323
15324 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 15325 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
15326 return true;
15327
15328 /* Need a free register for vcall_offset. */
15329 if (vcall_offset)
15330 return false;
15331
15332 /* Need a free register for GOT references. */
15333 if (flag_pic && !(*targetm.binds_local_p) (function))
15334 return false;
15335
15336 /* Otherwise ok. */
15337 return true;
15338}
15339
15340/* Output the assembler code for a thunk function. THUNK_DECL is the
15341 declaration for the thunk function itself, FUNCTION is the decl for
15342 the target function. DELTA is an immediate constant offset to be
272d0bee 15343 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15344 *(*this + vcall_offset) should be added to THIS. */
483ab821 15345
c590b625 15346static void
b96a374d
AJ
15347x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15348 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15349 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15350{
194734e9 15351 rtx xops[3];
3961e8fe
RH
15352 rtx this = x86_this_parameter (function);
15353 rtx this_reg, tmp;
194734e9 15354
3961e8fe
RH
15355 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15356 pull it in now and let DELTA benefit. */
15357 if (REG_P (this))
15358 this_reg = this;
15359 else if (vcall_offset)
15360 {
15361 /* Put the this parameter into %eax. */
15362 xops[0] = this;
15363 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15364 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15365 }
15366 else
15367 this_reg = NULL_RTX;
15368
15369 /* Adjust the this parameter by a fixed constant. */
15370 if (delta)
194734e9 15371 {
483ab821 15372 xops[0] = GEN_INT (delta);
3961e8fe
RH
15373 xops[1] = this_reg ? this_reg : this;
15374 if (TARGET_64BIT)
194734e9 15375 {
3961e8fe
RH
15376 if (!x86_64_general_operand (xops[0], DImode))
15377 {
15378 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15379 xops[1] = tmp;
15380 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15381 xops[0] = tmp;
15382 xops[1] = this;
15383 }
15384 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15385 }
15386 else
3961e8fe 15387 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15388 }
3961e8fe
RH
15389
15390 /* Adjust the this parameter by a value stored in the vtable. */
15391 if (vcall_offset)
194734e9 15392 {
3961e8fe
RH
15393 if (TARGET_64BIT)
15394 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15395 else
e767b5be
JH
15396 {
15397 int tmp_regno = 2 /* ECX */;
15398 if (lookup_attribute ("fastcall",
15399 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15400 tmp_regno = 0 /* EAX */;
15401 tmp = gen_rtx_REG (SImode, tmp_regno);
15402 }
483ab821 15403
3961e8fe
RH
15404 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15405 xops[1] = tmp;
15406 if (TARGET_64BIT)
15407 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15408 else
15409 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15410
3961e8fe
RH
15411 /* Adjust the this parameter. */
15412 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15413 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15414 {
15415 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15416 xops[0] = GEN_INT (vcall_offset);
15417 xops[1] = tmp2;
15418 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15419 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15420 }
3961e8fe
RH
15421 xops[1] = this_reg;
15422 if (TARGET_64BIT)
15423 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15424 else
15425 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15426 }
194734e9 15427
3961e8fe
RH
15428 /* If necessary, drop THIS back to its stack slot. */
15429 if (this_reg && this_reg != this)
15430 {
15431 xops[0] = this_reg;
15432 xops[1] = this;
15433 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15434 }
194734e9 15435
89ce1c8f 15436 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15437 if (TARGET_64BIT)
15438 {
15439 if (!flag_pic || (*targetm.binds_local_p) (function))
15440 output_asm_insn ("jmp\t%P0", xops);
15441 else
fcbe3b89 15442 {
89ce1c8f 15443 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15444 tmp = gen_rtx_CONST (Pmode, tmp);
15445 tmp = gen_rtx_MEM (QImode, tmp);
15446 xops[0] = tmp;
15447 output_asm_insn ("jmp\t%A0", xops);
15448 }
3961e8fe
RH
15449 }
15450 else
15451 {
15452 if (!flag_pic || (*targetm.binds_local_p) (function))
15453 output_asm_insn ("jmp\t%P0", xops);
194734e9 15454 else
21ff35fb 15455#if TARGET_MACHO
095fa594
SH
15456 if (TARGET_MACHO)
15457 {
0f901c4c 15458 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
095fa594
SH
15459 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15460 tmp = gen_rtx_MEM (QImode, tmp);
15461 xops[0] = tmp;
15462 output_asm_insn ("jmp\t%0", xops);
15463 }
15464 else
15465#endif /* TARGET_MACHO */
194734e9 15466 {
3961e8fe
RH
15467 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15468 output_set_got (tmp);
15469
15470 xops[1] = tmp;
15471 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15472 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15473 }
15474 }
15475}
e2500fed 15476
1bc7c5b6 15477static void
b96a374d 15478x86_file_start (void)
1bc7c5b6
ZW
15479{
15480 default_file_start ();
15481 if (X86_FILE_START_VERSION_DIRECTIVE)
15482 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15483 if (X86_FILE_START_FLTUSED)
15484 fputs ("\t.global\t__fltused\n", asm_out_file);
15485 if (ix86_asm_dialect == ASM_INTEL)
15486 fputs ("\t.intel_syntax\n", asm_out_file);
15487}
15488
e932b21b 15489int
b96a374d 15490x86_field_alignment (tree field, int computed)
e932b21b
JH
15491{
15492 enum machine_mode mode;
ad9335eb
JJ
15493 tree type = TREE_TYPE (field);
15494
15495 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15496 return computed;
ad9335eb
JJ
15497 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15498 ? get_inner_array_type (type) : type);
39e3a681
JJ
15499 if (mode == DFmode || mode == DCmode
15500 || GET_MODE_CLASS (mode) == MODE_INT
15501 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15502 return MIN (32, computed);
15503 return computed;
15504}
15505
a5fa1ecd
JH
15506/* Output assembler code to FILE to increment profiler label # LABELNO
15507 for profiling a function entry. */
15508void
b96a374d 15509x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15510{
15511 if (TARGET_64BIT)
15512 if (flag_pic)
15513 {
15514#ifndef NO_PROFILE_COUNTERS
15515 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15516#endif
15517 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15518 }
15519 else
15520 {
15521#ifndef NO_PROFILE_COUNTERS
15522 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15523#endif
15524 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15525 }
15526 else if (flag_pic)
15527 {
15528#ifndef NO_PROFILE_COUNTERS
15529 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15530 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15531#endif
15532 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15533 }
15534 else
15535 {
15536#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15537 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15538 PROFILE_COUNT_REGISTER);
15539#endif
15540 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15541 }
15542}
15543
d2c49530
JH
15544/* We don't have exact information about the insn sizes, but we may assume
15545 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15546 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15547 99% of cases. */
15548
15549static int
b96a374d 15550min_insn_size (rtx insn)
d2c49530
JH
15551{
15552 int l = 0;
15553
15554 if (!INSN_P (insn) || !active_insn_p (insn))
15555 return 0;
15556
15557 /* Discard alignments we've emit and jump instructions. */
15558 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15559 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15560 return 0;
15561 if (GET_CODE (insn) == JUMP_INSN
15562 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15563 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15564 return 0;
15565
15566 /* Important case - calls are always 5 bytes.
15567 It is common to have many calls in the row. */
15568 if (GET_CODE (insn) == CALL_INSN
15569 && symbolic_reference_mentioned_p (PATTERN (insn))
15570 && !SIBLING_CALL_P (insn))
15571 return 5;
15572 if (get_attr_length (insn) <= 1)
15573 return 1;
15574
15575 /* For normal instructions we may rely on the sizes of addresses
15576 and the presence of symbol to require 4 bytes of encoding.
15577 This is not the case for jumps where references are PC relative. */
15578 if (GET_CODE (insn) != JUMP_INSN)
15579 {
15580 l = get_attr_length_address (insn);
15581 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15582 l = 4;
15583 }
15584 if (l)
15585 return 1+l;
15586 else
15587 return 2;
15588}
15589
c51e6d85 15590/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15591 window. */
15592
15593static void
be04394b 15594ix86_avoid_jump_misspredicts (void)
d2c49530
JH
15595{
15596 rtx insn, start = get_insns ();
15597 int nbytes = 0, njumps = 0;
15598 int isjump = 0;
15599
15600 /* Look for all minimal intervals of instructions containing 4 jumps.
15601 The intervals are bounded by START and INSN. NBYTES is the total
15602 size of instructions in the interval including INSN and not including
15603 START. When the NBYTES is smaller than 16 bytes, it is possible
15604 that the end of START and INSN ends up in the same 16byte page.
15605
15606 The smallest offset in the page INSN can start is the case where START
15607 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15608 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15609 */
15610 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15611 {
15612
15613 nbytes += min_insn_size (insn);
c263766c
RH
15614 if (dump_file)
15615 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15616 INSN_UID (insn), min_insn_size (insn));
15617 if ((GET_CODE (insn) == JUMP_INSN
15618 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15619 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15620 || GET_CODE (insn) == CALL_INSN)
15621 njumps++;
15622 else
15623 continue;
15624
15625 while (njumps > 3)
15626 {
15627 start = NEXT_INSN (start);
15628 if ((GET_CODE (start) == JUMP_INSN
15629 && GET_CODE (PATTERN (start)) != ADDR_VEC
15630 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15631 || GET_CODE (start) == CALL_INSN)
15632 njumps--, isjump = 1;
15633 else
15634 isjump = 0;
15635 nbytes -= min_insn_size (start);
15636 }
15637 if (njumps < 0)
15638 abort ();
c263766c
RH
15639 if (dump_file)
15640 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15641 INSN_UID (start), INSN_UID (insn), nbytes);
15642
15643 if (njumps == 3 && isjump && nbytes < 16)
15644 {
15645 int padsize = 15 - nbytes + min_insn_size (insn);
15646
c263766c
RH
15647 if (dump_file)
15648 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15649 INSN_UID (insn), padsize);
d2c49530
JH
15650 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15651 }
15652 }
15653}
15654
be04394b 15655/* AMD Athlon works faster
d1f87653 15656 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15657 by other jump instruction. We avoid the penalty by inserting NOP just
15658 before the RET instructions in such cases. */
18dbd950 15659static void
be04394b 15660ix86_pad_returns (void)
2a500b9e
JH
15661{
15662 edge e;
15663
2a500b9e
JH
15664 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15665 {
15666 basic_block bb = e->src;
a813c111 15667 rtx ret = BB_END (bb);
2a500b9e 15668 rtx prev;
253c7a00 15669 bool replace = false;
2a500b9e 15670
253c7a00
JH
15671 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15672 || !maybe_hot_bb_p (bb))
2a500b9e 15673 continue;
4977bab6
ZW
15674 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15675 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15676 break;
2a500b9e
JH
15677 if (prev && GET_CODE (prev) == CODE_LABEL)
15678 {
15679 edge e;
15680 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15681 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e 15682 && !(e->flags & EDGE_FALLTHRU))
253c7a00 15683 replace = true;
2a500b9e 15684 }
253c7a00 15685 if (!replace)
2a500b9e 15686 {
4977bab6 15687 prev = prev_active_insn (ret);
25f57a0e
JH
15688 if (prev
15689 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15690 || GET_CODE (prev) == CALL_INSN))
253c7a00 15691 replace = true;
c51e6d85 15692 /* Empty functions get branch mispredict even when the jump destination
4977bab6
ZW
15693 is not visible to us. */
15694 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
253c7a00
JH
15695 replace = true;
15696 }
15697 if (replace)
15698 {
15699 emit_insn_before (gen_return_internal_long (), ret);
15700 delete_insn (ret);
2a500b9e 15701 }
2a500b9e 15702 }
be04394b
JH
15703}
15704
15705/* Implement machine specific optimizations. We implement padding of returns
15706 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15707static void
15708ix86_reorg (void)
15709{
15710 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15711 ix86_pad_returns ();
15712 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15713 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
15714}
15715
4977bab6
ZW
15716/* Return nonzero when QImode register that must be represented via REX prefix
15717 is used. */
15718bool
b96a374d 15719x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15720{
15721 int i;
15722 extract_insn_cached (insn);
15723 for (i = 0; i < recog_data.n_operands; i++)
15724 if (REG_P (recog_data.operand[i])
15725 && REGNO (recog_data.operand[i]) >= 4)
15726 return true;
15727 return false;
15728}
15729
15730/* Return nonzero when P points to register encoded via REX prefix.
15731 Called via for_each_rtx. */
15732static int
b96a374d 15733extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15734{
15735 unsigned int regno;
15736 if (!REG_P (*p))
15737 return 0;
15738 regno = REGNO (*p);
15739 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15740}
15741
15742/* Return true when INSN mentions register that must be encoded using REX
15743 prefix. */
15744bool
b96a374d 15745x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15746{
15747 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15748}
15749
1d6ba901 15750/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15751 optabs would emit if we didn't have TFmode patterns. */
15752
15753void
b96a374d 15754x86_emit_floatuns (rtx operands[2])
8d705469
JH
15755{
15756 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15757 enum machine_mode mode, inmode;
15758
15759 inmode = GET_MODE (operands[1]);
15760 if (inmode != SImode
15761 && inmode != DImode)
15762 abort ();
8d705469
JH
15763
15764 out = operands[0];
1d6ba901 15765 in = force_reg (inmode, operands[1]);
8d705469
JH
15766 mode = GET_MODE (out);
15767 neglab = gen_label_rtx ();
15768 donelab = gen_label_rtx ();
15769 i1 = gen_reg_rtx (Pmode);
15770 f0 = gen_reg_rtx (mode);
15771
15772 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15773
15774 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15775 emit_jump_insn (gen_jump (donelab));
15776 emit_barrier ();
15777
15778 emit_label (neglab);
15779
15780 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15781 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15782 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15783 expand_float (f0, i0, 0);
15784 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15785
15786 emit_label (donelab);
15787}
15788
dafc5b82
JH
15789/* Return if we do not know how to pass TYPE solely in registers. */
15790bool
b96a374d 15791ix86_must_pass_in_stack (enum machine_mode mode, tree type)
dafc5b82
JH
15792{
15793 if (default_must_pass_in_stack (mode, type))
15794 return true;
15795 return (!TARGET_64BIT && type && mode == TImode);
15796}
15797
997404de
JH
15798/* Initialize vector TARGET via VALS. */
15799void
15800ix86_expand_vector_init (rtx target, rtx vals)
15801{
15802 enum machine_mode mode = GET_MODE (target);
15803 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15804 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15805 int i;
15806
15807 for (i = n_elts - 1; i >= 0; i--)
15808 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15809 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15810 break;
15811
15812 /* Few special cases first...
15813 ... constants are best loaded from constant pool. */
15814 if (i < 0)
15815 {
15816 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15817 return;
15818 }
15819
15820 /* ... values where only first field is non-constant are best loaded
1ae58c30 15821 from the pool and overwritten via move later. */
997404de
JH
15822 if (!i)
15823 {
15824 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15825 GET_MODE_INNER (mode), 0);
15826
15827 op = force_reg (mode, op);
15828 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15829 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15830 switch (GET_MODE (target))
15831 {
15832 case V2DFmode:
15833 emit_insn (gen_sse2_movsd (target, target, op));
15834 break;
15835 case V4SFmode:
15836 emit_insn (gen_sse_movss (target, target, op));
15837 break;
15838 default:
15839 break;
15840 }
15841 return;
15842 }
15843
15844 /* And the busy sequence doing rotations. */
15845 switch (GET_MODE (target))
15846 {
15847 case V2DFmode:
15848 {
15849 rtx vecop0 =
15850 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15851 rtx vecop1 =
15852 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15853
15854 vecop0 = force_reg (V2DFmode, vecop0);
15855 vecop1 = force_reg (V2DFmode, vecop1);
15856 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15857 }
15858 break;
15859 case V4SFmode:
15860 {
15861 rtx vecop0 =
15862 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15863 rtx vecop1 =
15864 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15865 rtx vecop2 =
15866 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15867 rtx vecop3 =
15868 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15869 rtx tmp1 = gen_reg_rtx (V4SFmode);
15870 rtx tmp2 = gen_reg_rtx (V4SFmode);
15871
15872 vecop0 = force_reg (V4SFmode, vecop0);
15873 vecop1 = force_reg (V4SFmode, vecop1);
15874 vecop2 = force_reg (V4SFmode, vecop2);
15875 vecop3 = force_reg (V4SFmode, vecop3);
15876 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15877 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15878 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15879 }
15880 break;
15881 default:
15882 abort ();
15883 }
15884}
15885
67dfe110
KH
15886/* Worker function for TARGET_MD_ASM_CLOBBERS.
15887
15888 We do this in the new i386 backend to maintain source compatibility
15889 with the old cc0-based compiler. */
15890
15891static tree
15892ix86_md_asm_clobbers (tree clobbers)
15893{
15894 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15895 clobbers);
15896 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15897 clobbers);
15898 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15899 clobbers);
15900 return clobbers;
15901}
15902
3c5cb3e4
KH
15903/* Worker function for REVERSE_CONDITION. */
15904
15905enum rtx_code
15906ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15907{
15908 return (mode != CCFPmode && mode != CCFPUmode
15909 ? reverse_condition (code)
15910 : reverse_condition_maybe_unordered (code));
15911}
15912
5ea9cb6e
RS
15913/* Output code to perform an x87 FP register move, from OPERANDS[1]
15914 to OPERANDS[0]. */
15915
15916const char *
15917output_387_reg_move (rtx insn, rtx *operands)
15918{
15919 if (REG_P (operands[1])
15920 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15921 {
15922 if (REGNO (operands[0]) == FIRST_STACK_REG
15923 && TARGET_USE_FFREEP)
15924 return "ffreep\t%y0";
15925 return "fstp\t%y0";
15926 }
15927 if (STACK_TOP_P (operands[0]))
15928 return "fld%z1\t%y1";
15929 return "fst\t%y0";
15930}
15931
5ae27cfa
UB
15932/* Output code to perform a conditional jump to LABEL, if C2 flag in
15933 FP status register is set. */
15934
15935void
15936ix86_emit_fp_unordered_jump (rtx label)
15937{
15938 rtx reg = gen_reg_rtx (HImode);
15939 rtx temp;
15940
15941 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35
UB
15942
15943 if (TARGET_USE_SAHF)
15944 {
15945 emit_insn (gen_x86_sahf_1 (reg));
15946
15947 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15948 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15949 }
15950 else
15951 {
15952 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15953
15954 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15955 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15956 }
5ae27cfa 15957
5ae27cfa
UB
15958 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15959 gen_rtx_LABEL_REF (VOIDmode, label),
15960 pc_rtx);
15961 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15962 emit_jump_insn (temp);
15963}
15964
c2fcfa4f
UB
15965/* Output code to perform a log1p XFmode calculation. */
15966
15967void ix86_emit_i387_log1p (rtx op0, rtx op1)
15968{
15969 rtx label1 = gen_label_rtx ();
15970 rtx label2 = gen_label_rtx ();
15971
15972 rtx tmp = gen_reg_rtx (XFmode);
15973 rtx tmp2 = gen_reg_rtx (XFmode);
15974
15975 emit_insn (gen_absxf2 (tmp, op1));
15976 emit_insn (gen_cmpxf (tmp,
15977 CONST_DOUBLE_FROM_REAL_VALUE (
15978 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15979 XFmode)));
15980 emit_jump_insn (gen_bge (label1));
15981
15982 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15983 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15984 emit_jump (label2);
15985
15986 emit_label (label1);
15987 emit_move_insn (tmp, CONST1_RTX (XFmode));
15988 emit_insn (gen_addxf3 (tmp, op1, tmp));
15989 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15990 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15991
15992 emit_label (label2);
15993}
15994
e2500fed 15995#include "gt-i386.h"
This page took 4.518841 seconds and 5 git commands to generate.