]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
optabs.h (enum optab_index): Add new OTI_log1p.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
e129d93a 3 2002, 2003, 2004 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9
JVA
34#include "output.h"
35#include "insn-attr.h"
2a2ab3f9 36#include "flags.h"
a8ffcc81 37#include "except.h"
ecbc4695 38#include "function.h"
00c79232 39#include "recog.h"
ced8dd8c 40#include "expr.h"
e78d8e51 41#include "optabs.h"
f103890b 42#include "toplev.h"
e075ae69 43#include "basic-block.h"
1526a060 44#include "ggc.h"
672a6f42
NB
45#include "target.h"
46#include "target-def.h"
f1e639b1 47#include "langhooks.h"
dafc5b82 48#include "cgraph.h"
2a2ab3f9 49
8dfe5673 50#ifndef CHECK_STACK_LIMIT
07933f72 51#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
52#endif
53
3c50106f
RH
54/* Return index of given mode in mult and division cost tables. */
55#define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
61
2ab0437e 62/* Processor costs (relative to an add) */
fce5a9f2 63static const
2ab0437e
JH
64struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
4977bab6 69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 70 0, /* cost of multiply per each bit set */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
72 3, /* cost of movsx */
73 3, /* cost of movzx */
2ab0437e
JH
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
f4365627
JH
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
4977bab6 98 1, /* Branch cost */
229b303a
RS
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
2ab0437e 105};
229b303a 106
32b5b1aa 107/* Processor costs (relative to an add) */
fce5a9f2 108static const
32b5b1aa 109struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 110 1, /* cost of an add instruction */
32b5b1aa
SC
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
4977bab6 114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 115 1, /* cost of multiply per each bit set */
4977bab6 116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
117 3, /* cost of movsx */
118 2, /* cost of movzx */
96e7ae40 119 15, /* "large" insn */
e2e52e1b 120 3, /* MOVE_RATIO */
7c6b971d 121 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
0f290768 124 Relative to reg-reg move (2). */
96e7ae40
JH
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
fa79946e
JH
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
f4365627
JH
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
4977bab6 143 1, /* Branch cost */
229b303a
RS
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
150};
151
fce5a9f2 152static const
32b5b1aa
SC
153struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
4977bab6 158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 159 1, /* cost of multiply per each bit set */
4977bab6 160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
161 3, /* cost of movsx */
162 2, /* cost of movzx */
96e7ae40 163 15, /* "large" insn */
e2e52e1b 164 3, /* MOVE_RATIO */
7c6b971d 165 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
0f290768 168 Relative to reg-reg move (2). */
96e7ae40
JH
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
fa79946e
JH
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
f4365627
JH
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
4977bab6 187 1, /* Branch cost */
229b303a
RS
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
194};
195
fce5a9f2 196static const
e5cb57e8 197struct processor_costs pentium_cost = {
32b5b1aa
SC
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
856b07a1 200 4, /* variable shift costs */
e5cb57e8 201 1, /* constant shift costs */
4977bab6 202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 203 0, /* cost of multiply per each bit set */
4977bab6 204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
205 3, /* cost of movsx */
206 2, /* cost of movzx */
96e7ae40 207 8, /* "large" insn */
e2e52e1b 208 6, /* MOVE_RATIO */
7c6b971d 209 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
0f290768 212 Relative to reg-reg move (2). */
96e7ae40
JH
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
fa79946e
JH
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
f4365627
JH
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
4977bab6 231 2, /* Branch cost */
229b303a
RS
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
238};
239
fce5a9f2 240static const
856b07a1
SC
241struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
e075ae69 244 1, /* variable shift costs */
856b07a1 245 1, /* constant shift costs */
4977bab6 246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 247 0, /* cost of multiply per each bit set */
4977bab6 248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
249 1, /* cost of movsx */
250 1, /* cost of movzx */
96e7ae40 251 8, /* "large" insn */
e2e52e1b 252 6, /* MOVE_RATIO */
7c6b971d 253 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
0f290768 256 Relative to reg-reg move (2). */
96e7ae40
JH
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
fa79946e
JH
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
f4365627
JH
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
4977bab6 275 2, /* Branch cost */
229b303a
RS
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
856b07a1
SC
282};
283
fce5a9f2 284static const
a269a03c
JC
285struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
e075ae69 287 2, /* cost of a lea instruction */
a269a03c
JC
288 1, /* variable shift costs */
289 1, /* constant shift costs */
4977bab6 290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 291 0, /* cost of multiply per each bit set */
4977bab6 292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
293 2, /* cost of movsx */
294 2, /* cost of movzx */
96e7ae40 295 8, /* "large" insn */
e2e52e1b 296 4, /* MOVE_RATIO */
7c6b971d 297 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
0f290768 300 Relative to reg-reg move (2). */
96e7ae40
JH
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
fa79946e
JH
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
f4365627
JH
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
4977bab6 319 1, /* Branch cost */
229b303a
RS
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
4f770e7b
RS
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
229b303a
RS
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
a269a03c
JC
326};
327
fce5a9f2 328static const
309ada50
JH
329struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
0b5107cf 331 2, /* cost of a lea instruction */
309ada50
JH
332 1, /* variable shift costs */
333 1, /* constant shift costs */
4977bab6 334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 335 0, /* cost of multiply per each bit set */
4977bab6 336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
337 1, /* cost of movsx */
338 1, /* cost of movzx */
309ada50 339 8, /* "large" insn */
e2e52e1b 340 9, /* MOVE_RATIO */
309ada50 341 4, /* cost for loading QImode using movzbl */
b72b1c29 342 {3, 4, 3}, /* cost of loading integer registers
309ada50 343 in QImode, HImode and SImode.
0f290768 344 Relative to reg-reg move (2). */
b72b1c29 345 {3, 4, 3}, /* cost of storing integer registers */
309ada50 346 4, /* cost of reg,reg fld/fst */
b72b1c29 347 {4, 4, 12}, /* cost of loading fp registers
309ada50 348 in SFmode, DFmode and XFmode */
b72b1c29 349 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 350 2, /* cost of moving MMX register */
b72b1c29 351 {4, 4}, /* cost of loading MMX registers
fa79946e 352 in SImode and DImode */
b72b1c29 353 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
b72b1c29 356 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 357 in SImode, DImode and TImode */
b72b1c29 358 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 5, /* MMX or SSE register to integer */
f4365627
JH
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
4977bab6 363 2, /* Branch cost */
229b303a
RS
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
309ada50
JH
370};
371
4977bab6
ZW
372static const
373struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
414};
415
fce5a9f2 416static const
b4e89e2d
JH
417struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
4977bab6
ZW
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 423 0, /* cost of multiply per each bit set */
4977bab6 424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
425 1, /* cost of movsx */
426 1, /* cost of movzx */
b4e89e2d
JH
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
f4365627
JH
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
4977bab6 451 2, /* Branch cost */
229b303a
RS
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
458};
459
89c43c0a
VM
460static const
461struct processor_costs nocona_cost = {
462 1, /* cost of an add instruction */
463 1, /* cost of a lea instruction */
464 1, /* variable shift costs */
465 1, /* constant shift costs */
466 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
467 0, /* cost of multiply per each bit set */
468 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
469 1, /* cost of movsx */
470 1, /* cost of movzx */
471 16, /* "large" insn */
472 9, /* MOVE_RATIO */
473 4, /* cost for loading QImode using movzbl */
474 {4, 4, 4}, /* cost of loading integer registers
475 in QImode, HImode and SImode.
476 Relative to reg-reg move (2). */
477 {4, 4, 4}, /* cost of storing integer registers */
478 3, /* cost of reg,reg fld/fst */
479 {12, 12, 12}, /* cost of loading fp registers
480 in SFmode, DFmode and XFmode */
481 {4, 4, 4}, /* cost of loading integer registers */
482 6, /* cost of moving MMX register */
483 {12, 12}, /* cost of loading MMX registers
484 in SImode and DImode */
485 {12, 12}, /* cost of storing MMX registers
486 in SImode and DImode */
487 6, /* cost of moving SSE register */
488 {12, 12, 12}, /* cost of loading SSE registers
489 in SImode, DImode and TImode */
490 {12, 12, 12}, /* cost of storing SSE registers
491 in SImode, DImode and TImode */
492 8, /* MMX or SSE register to integer */
493 128, /* size of prefetch block */
494 8, /* number of parallel prefetches */
495 1, /* Branch cost */
496 6, /* cost of FADD and FSUB insns. */
497 8, /* cost of FMUL instruction. */
498 40, /* cost of FDIV instruction. */
499 3, /* cost of FABS instruction. */
500 3, /* cost of FCHS instruction. */
501 44, /* cost of FSQRT instruction. */
502};
503
8b60264b 504const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 505
a269a03c
JC
506/* Processor feature/optimization bitmasks. */
507#define m_386 (1<<PROCESSOR_I386)
508#define m_486 (1<<PROCESSOR_I486)
509#define m_PENT (1<<PROCESSOR_PENTIUM)
510#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
511#define m_K6 (1<<PROCESSOR_K6)
309ada50 512#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 513#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
514#define m_K8 (1<<PROCESSOR_K8)
515#define m_ATHLON_K8 (m_K8 | m_ATHLON)
89c43c0a 516#define m_NOCONA (1<<PROCESSOR_NOCONA)
a269a03c 517
4977bab6 518const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
89c43c0a 519const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 520const int x86_zero_extend_with_and = m_486 | m_PENT;
89c43c0a 521const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
e075ae69 522const int x86_double_with_add = ~m_386;
a269a03c 523const int x86_use_bit_test = m_386;
4977bab6 524const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
89c43c0a 525const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
4977bab6 526const int x86_3dnow_a = m_ATHLON_K8;
89c43c0a
VM
527const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528const int x86_branch_hints = m_PENT4 | m_NOCONA;
529const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
e075ae69
RH
530const int x86_partial_reg_stall = m_PPRO;
531const int x86_use_loop = m_K6;
4977bab6 532const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
533const int x86_use_mov0 = m_K6;
534const int x86_use_cltd = ~(m_PENT | m_K6);
535const int x86_read_modify_write = ~m_PENT;
536const int x86_read_modify = ~(m_PENT | m_PPRO);
537const int x86_split_long_moves = m_PPRO;
4977bab6 538const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 539const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
89c43c0a 540const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
d9f32422
JH
541const int x86_qimode_math = ~(0);
542const int x86_promote_qi_regs = 0;
543const int x86_himode_math = ~(m_PPRO);
544const int x86_promote_hi_regs = m_PPRO;
89c43c0a
VM
545const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
546const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
547const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
548const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
549const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
550const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
551const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
552const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
7b50a809
JH
553const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
554const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
89c43c0a 555const int x86_decompose_lea = m_PENT4 | m_NOCONA;
495333a6 556const int x86_shift1 = ~m_486;
89c43c0a
VM
557const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
4977bab6 559/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 560 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
561 scalar values in proper format leaving the upper part undefined. */
562const int x86_sse_partial_regs = m_ATHLON_K8;
563/* Athlon optimizes partial-register FPS special case, thus avoiding the
564 need for extra instructions beforehand */
565const int x86_sse_partial_regs_for_cvtsd2ss = 0;
566const int x86_sse_typeless_stores = m_ATHLON_K8;
89c43c0a 567const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
4977bab6
ZW
568const int x86_use_ffreep = m_ATHLON_K8;
569const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 570const int x86_inter_unit_moves = ~(m_ATHLON_K8);
89c43c0a 571const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
be04394b
JH
572/* Some CPU cores are not able to predict more than 4 branch instructions in
573 the 16 byte window. */
89c43c0a 574const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 575
d1f87653 576/* In case the average insn count for single function invocation is
6ab16dd9
JH
577 lower than this constant, emit fast (but longer) prologue and
578 epilogue code. */
4977bab6 579#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 580
5bf0ebab
RH
581/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
582static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
583static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
584static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
585
586/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 587 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 588
e075ae69 589enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
590{
591 /* ax, dx, cx, bx */
ab408a86 592 AREG, DREG, CREG, BREG,
4c0d89b5 593 /* si, di, bp, sp */
e075ae69 594 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
595 /* FP registers */
596 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 597 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 598 /* arg pointer */
83774849 599 NON_Q_REGS,
564d80f4 600 /* flags, fpsr, dirflag, frame */
a7180f70
BS
601 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
602 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
603 SSE_REGS, SSE_REGS,
604 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
605 MMX_REGS, MMX_REGS,
606 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
607 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
4c0d89b5 610};
c572e5ba 611
3d117b30 612/* The "default" register map used in 32bit mode. */
83774849 613
0f290768 614int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
615{
616 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
617 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 618 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
619 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
620 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
621 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
622 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
623};
624
5bf0ebab
RH
625static int const x86_64_int_parameter_registers[6] =
626{
627 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
628 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
629};
630
631static int const x86_64_int_return_registers[4] =
632{
633 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
634};
53c17031 635
0f7fa3d0
JH
636/* The "default" register map used in 64bit mode. */
637int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
638{
639 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 640 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
641 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
642 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
643 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
644 8,9,10,11,12,13,14,15, /* extended integer registers */
645 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
646};
647
83774849
RH
648/* Define the register numbers to be used in Dwarf debugging information.
649 The SVR4 reference port C compiler uses the following register numbers
650 in its Dwarf output code:
651 0 for %eax (gcc regno = 0)
652 1 for %ecx (gcc regno = 2)
653 2 for %edx (gcc regno = 1)
654 3 for %ebx (gcc regno = 3)
655 4 for %esp (gcc regno = 7)
656 5 for %ebp (gcc regno = 6)
657 6 for %esi (gcc regno = 4)
658 7 for %edi (gcc regno = 5)
659 The following three DWARF register numbers are never generated by
660 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
661 believes these numbers have these meanings.
662 8 for %eip (no gcc equivalent)
663 9 for %eflags (gcc regno = 17)
664 10 for %trapno (no gcc equivalent)
665 It is not at all clear how we should number the FP stack registers
666 for the x86 architecture. If the version of SDB on x86/svr4 were
667 a bit less brain dead with respect to floating-point then we would
668 have a precedent to follow with respect to DWARF register numbers
669 for x86 FP registers, but the SDB on x86/svr4 is so completely
670 broken with respect to FP registers that it is hardly worth thinking
671 of it as something to strive for compatibility with.
672 The version of x86/svr4 SDB I have at the moment does (partially)
673 seem to believe that DWARF register number 11 is associated with
674 the x86 register %st(0), but that's about all. Higher DWARF
675 register numbers don't seem to be associated with anything in
676 particular, and even for DWARF regno 11, SDB only seems to under-
677 stand that it should say that a variable lives in %st(0) (when
678 asked via an `=' command) if we said it was in DWARF regno 11,
679 but SDB still prints garbage when asked for the value of the
680 variable in question (via a `/' command).
681 (Also note that the labels SDB prints for various FP stack regs
682 when doing an `x' command are all wrong.)
683 Note that these problems generally don't affect the native SVR4
684 C compiler because it doesn't allow the use of -O with -g and
685 because when it is *not* optimizing, it allocates a memory
686 location for each floating-point variable, and the memory
687 location is what gets described in the DWARF AT_location
688 attribute for the variable in question.
689 Regardless of the severe mental illness of the x86/svr4 SDB, we
690 do something sensible here and we use the following DWARF
691 register numbers. Note that these are all stack-top-relative
692 numbers.
693 11 for %st(0) (gcc regno = 8)
694 12 for %st(1) (gcc regno = 9)
695 13 for %st(2) (gcc regno = 10)
696 14 for %st(3) (gcc regno = 11)
697 15 for %st(4) (gcc regno = 12)
698 16 for %st(5) (gcc regno = 13)
699 17 for %st(6) (gcc regno = 14)
700 18 for %st(7) (gcc regno = 15)
701*/
0f290768 702int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
703{
704 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
705 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 706 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
707 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
708 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
709 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
710 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
711};
712
c572e5ba
JVA
713/* Test and compare insns in i386.md store the information needed to
714 generate branch and scc insns here. */
715
07933f72
GS
716rtx ix86_compare_op0 = NULL_RTX;
717rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 718
7a2e09f4 719#define MAX_386_STACK_LOCALS 3
8362f420
JH
720/* Size of the register save area. */
721#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
722
723/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
724
725struct stack_local_entry GTY(())
726{
727 unsigned short mode;
728 unsigned short n;
729 rtx rtl;
730 struct stack_local_entry *next;
731};
732
4dd2ac2c
JH
733/* Structure describing stack frame layout.
734 Stack grows downward:
735
736 [arguments]
737 <- ARG_POINTER
738 saved pc
739
740 saved frame pointer if frame_pointer_needed
741 <- HARD_FRAME_POINTER
742 [saved regs]
743
744 [padding1] \
745 )
746 [va_arg registers] (
747 > to_allocate <- FRAME_POINTER
748 [frame] (
749 )
750 [padding2] /
751 */
752struct ix86_frame
753{
754 int nregs;
755 int padding1;
8362f420 756 int va_arg_size;
4dd2ac2c
JH
757 HOST_WIDE_INT frame;
758 int padding2;
759 int outgoing_arguments_size;
8362f420 760 int red_zone_size;
4dd2ac2c
JH
761
762 HOST_WIDE_INT to_allocate;
763 /* The offsets relative to ARG_POINTER. */
764 HOST_WIDE_INT frame_pointer_offset;
765 HOST_WIDE_INT hard_frame_pointer_offset;
766 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
767
768 /* When save_regs_using_mov is set, emit prologue using
769 move instead of push instructions. */
770 bool save_regs_using_mov;
4dd2ac2c
JH
771};
772
c93e80a5
JH
773/* Used to enable/disable debugging features. */
774const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
775/* Code model option as passed by user. */
776const char *ix86_cmodel_string;
777/* Parsed value. */
778enum cmodel ix86_cmodel;
80f33d06
GS
779/* Asm dialect. */
780const char *ix86_asm_string;
781enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
782/* TLS dialext. */
783const char *ix86_tls_dialect_string;
784enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 785
5bf0ebab 786/* Which unit we are generating floating point math for. */
965f5423
JH
787enum fpmath_unit ix86_fpmath;
788
5bf0ebab 789/* Which cpu are we scheduling for. */
9e555526 790enum processor_type ix86_tune;
5bf0ebab
RH
791/* Which instruction set architecture to use. */
792enum processor_type ix86_arch;
c8c5cb99
SC
793
794/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 795const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 796const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 797const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 798
0f290768 799/* # of registers to use to pass arguments. */
e075ae69 800const char *ix86_regparm_string;
e9a25f70 801
f4365627
JH
802/* true if sse prefetch instruction is not NOOP. */
803int x86_prefetch_sse;
804
e075ae69
RH
805/* ix86_regparm_string as a number */
806int ix86_regparm;
e9a25f70
JL
807
808/* Alignment to use for loops and jumps: */
809
0f290768 810/* Power of two alignment for loops. */
e075ae69 811const char *ix86_align_loops_string;
e9a25f70 812
0f290768 813/* Power of two alignment for non-loop jumps. */
e075ae69 814const char *ix86_align_jumps_string;
e9a25f70 815
3af4bd89 816/* Power of two alignment for stack boundary in bytes. */
e075ae69 817const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
818
819/* Preferred alignment for stack boundary in bits. */
e075ae69 820int ix86_preferred_stack_boundary;
3af4bd89 821
e9a25f70 822/* Values 1-5: see jump.c */
e075ae69
RH
823int ix86_branch_cost;
824const char *ix86_branch_cost_string;
e9a25f70 825
0f290768 826/* Power of two alignment for functions. */
e075ae69 827const char *ix86_align_funcs_string;
623fe810
RH
828
829/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
830static char internal_label_prefix[16];
831static int internal_label_prefix_len;
e075ae69 832\f
b96a374d
AJ
833static int local_symbolic_operand (rtx, enum machine_mode);
834static int tls_symbolic_operand_1 (rtx, enum tls_model);
835static void output_pic_addr_const (FILE *, rtx, int);
836static void put_condition_code (enum rtx_code, enum machine_mode,
837 int, int, FILE *);
838static const char *get_some_local_dynamic_name (void);
839static int get_some_local_dynamic_name_1 (rtx *, void *);
840static rtx maybe_get_pool_constant (rtx);
841static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
842static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
843 rtx *);
e129d93a
ILT
844static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
845static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
846 enum machine_mode);
b96a374d
AJ
847static rtx get_thread_pointer (int);
848static rtx legitimize_tls_address (rtx, enum tls_model, int);
849static void get_pc_thunk_name (char [32], unsigned int);
850static rtx gen_push (rtx);
851static int memory_address_length (rtx addr);
852static int ix86_flags_dependant (rtx, rtx, enum attr_type);
853static int ix86_agi_dependant (rtx, rtx, enum attr_type);
b96a374d
AJ
854static struct machine_function * ix86_init_machine_status (void);
855static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
856static int ix86_nsaved_regs (void);
857static void ix86_emit_save_regs (void);
858static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 859static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 860static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
861static HOST_WIDE_INT ix86_GOT_alias_set (void);
862static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
863static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 864static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
865static int ix86_issue_rate (void);
866static int ix86_adjust_cost (rtx, rtx, rtx, int);
b96a374d
AJ
867static int ia32_use_dfa_pipeline_interface (void);
868static int ia32_multipass_dfa_lookahead (void);
869static void ix86_init_mmx_sse_builtins (void);
870static rtx x86_this_parameter (tree);
871static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
872 HOST_WIDE_INT, tree);
873static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
874static void x86_file_start (void);
875static void ix86_reorg (void);
c35d187f
RH
876static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
877static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
878static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
879 tree, int *, int);
e075ae69
RH
880
881struct ix86_address
882{
883 rtx base, index, disp;
884 HOST_WIDE_INT scale;
74dc3e94 885 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
e075ae69 886};
b08de47e 887
b96a374d
AJ
888static int ix86_decompose_address (rtx, struct ix86_address *);
889static int ix86_address_cost (rtx);
890static bool ix86_cannot_force_const_mem (rtx);
891static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
892
893struct builtin_description;
b96a374d
AJ
894static rtx ix86_expand_sse_comi (const struct builtin_description *,
895 tree, rtx);
896static rtx ix86_expand_sse_compare (const struct builtin_description *,
897 tree, rtx);
898static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
899static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
900static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
901static rtx ix86_expand_store_builtin (enum insn_code, tree);
902static rtx safe_vector_operand (rtx, enum machine_mode);
903static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
904static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
905 enum rtx_code *, enum rtx_code *);
906static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
907static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
908static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
909static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
910static int ix86_fp_comparison_cost (enum rtx_code code);
911static unsigned int ix86_select_alt_pic_regnum (void);
912static int ix86_save_reg (unsigned int, int);
913static void ix86_compute_frame_layout (struct ix86_frame *);
914static int ix86_comp_type_attributes (tree, tree);
e767b5be 915static int ix86_function_regparm (tree, tree);
91d231cb 916const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
917static bool ix86_function_ok_for_sibcall (tree, tree);
918static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
919static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
920static int ix86_value_regno (enum machine_mode);
921static bool contains_128bit_aligned_vector_p (tree);
922static bool ix86_ms_bitfield_layout_p (tree);
923static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
924static int extended_reg_mentioned_1 (rtx *, void *);
925static bool ix86_rtx_costs (rtx, int, int, int *);
926static int min_insn_size (rtx);
67dfe110 927static tree ix86_md_asm_clobbers (tree clobbers);
7c262518 928
21c318ba 929#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
b96a374d 930static void ix86_svr3_asm_out_constructor (rtx, int);
2cc07db4 931#endif
e56feed6 932
53c17031
JH
933/* Register class used for passing given 64bit part of the argument.
934 These represent classes as documented by the PS ABI, with the exception
935 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 936 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 937
d1f87653 938 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
939 whenever possible (upper half does contain padding).
940 */
941enum x86_64_reg_class
942 {
943 X86_64_NO_CLASS,
944 X86_64_INTEGER_CLASS,
945 X86_64_INTEGERSI_CLASS,
946 X86_64_SSE_CLASS,
947 X86_64_SSESF_CLASS,
948 X86_64_SSEDF_CLASS,
949 X86_64_SSEUP_CLASS,
950 X86_64_X87_CLASS,
951 X86_64_X87UP_CLASS,
952 X86_64_MEMORY_CLASS
953 };
0b5826ac 954static const char * const x86_64_reg_class_name[] =
53c17031
JH
955 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
956
957#define MAX_CLASSES 4
b96a374d
AJ
958static int classify_argument (enum machine_mode, tree,
959 enum x86_64_reg_class [MAX_CLASSES], int);
960static int examine_argument (enum machine_mode, tree, int, int *, int *);
961static rtx construct_container (enum machine_mode, tree, int, int, int,
962 const int *, int);
963static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
964 enum x86_64_reg_class);
881b2a96 965
43f3a59d 966/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
967static REAL_VALUE_TYPE ext_80387_constants_table [5];
968static bool ext_80387_constants_init = 0;
b96a374d 969static void init_ext_80387_constants (void);
672a6f42
NB
970\f
971/* Initialize the GCC target structure. */
91d231cb
JM
972#undef TARGET_ATTRIBUTE_TABLE
973#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 974#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
975# undef TARGET_MERGE_DECL_ATTRIBUTES
976# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
977#endif
978
8d8e52be
JM
979#undef TARGET_COMP_TYPE_ATTRIBUTES
980#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
981
f6155fda
SS
982#undef TARGET_INIT_BUILTINS
983#define TARGET_INIT_BUILTINS ix86_init_builtins
984
985#undef TARGET_EXPAND_BUILTIN
986#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
987
bd09bdeb
RH
988#undef TARGET_ASM_FUNCTION_EPILOGUE
989#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 990
17b53c33
NB
991#undef TARGET_ASM_OPEN_PAREN
992#define TARGET_ASM_OPEN_PAREN ""
993#undef TARGET_ASM_CLOSE_PAREN
994#define TARGET_ASM_CLOSE_PAREN ""
995
301d03af
RS
996#undef TARGET_ASM_ALIGNED_HI_OP
997#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
998#undef TARGET_ASM_ALIGNED_SI_OP
999#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1000#ifdef ASM_QUAD
1001#undef TARGET_ASM_ALIGNED_DI_OP
1002#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1003#endif
1004
1005#undef TARGET_ASM_UNALIGNED_HI_OP
1006#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1007#undef TARGET_ASM_UNALIGNED_SI_OP
1008#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1009#undef TARGET_ASM_UNALIGNED_DI_OP
1010#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1011
c237e94a
ZW
1012#undef TARGET_SCHED_ADJUST_COST
1013#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1014#undef TARGET_SCHED_ISSUE_RATE
1015#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
fce5a9f2 1016#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
1017#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
1018 ia32_use_dfa_pipeline_interface
1019#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1020#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1021 ia32_multipass_dfa_lookahead
c237e94a 1022
4977bab6
ZW
1023#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1024#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1025
f996902d
RH
1026#ifdef HAVE_AS_TLS
1027#undef TARGET_HAVE_TLS
1028#define TARGET_HAVE_TLS true
1029#endif
3a04ff64
RH
1030#undef TARGET_CANNOT_FORCE_CONST_MEM
1031#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 1032
7daebb7a 1033#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1034#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1035
4977bab6
ZW
1036#undef TARGET_MS_BITFIELD_LAYOUT_P
1037#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1038
c590b625
RH
1039#undef TARGET_ASM_OUTPUT_MI_THUNK
1040#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1041#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1043
1bc7c5b6
ZW
1044#undef TARGET_ASM_FILE_START
1045#define TARGET_ASM_FILE_START x86_file_start
1046
3c50106f
RH
1047#undef TARGET_RTX_COSTS
1048#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1049#undef TARGET_ADDRESS_COST
1050#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1051
e129d93a
ILT
1052#undef TARGET_FIXED_CONDITION_CODE_REGS
1053#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1054#undef TARGET_CC_MODES_COMPATIBLE
1055#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1056
18dbd950
RS
1057#undef TARGET_MACHINE_DEPENDENT_REORG
1058#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1059
c35d187f
RH
1060#undef TARGET_BUILD_BUILTIN_VA_LIST
1061#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1062
67dfe110
KH
1063#undef TARGET_MD_ASM_CLOBBERS
1064#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1065
9184f892
KH
1066#undef TARGET_PROMOTE_PROTOTYPES
1067#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1068
a0524eb3
KH
1069#undef TARGET_SETUP_INCOMING_VARARGS
1070#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1071
f6897b10 1072struct gcc_target targetm = TARGET_INITIALIZER;
89c43c0a 1073
e075ae69 1074\f
67c2b45f
JS
1075/* The svr4 ABI for the i386 says that records and unions are returned
1076 in memory. */
1077#ifndef DEFAULT_PCC_STRUCT_RETURN
1078#define DEFAULT_PCC_STRUCT_RETURN 1
1079#endif
1080
f5316dfe
MM
1081/* Sometimes certain combinations of command options do not make
1082 sense on a particular target machine. You can define a macro
1083 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1084 defined, is executed once just after all the command options have
1085 been parsed.
1086
1087 Don't use this macro to turn on various extra optimizations for
1088 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1089
1090void
b96a374d 1091override_options (void)
f5316dfe 1092{
400500c4 1093 int i;
e075ae69
RH
1094 /* Comes from final.c -- no real reason to change it. */
1095#define MAX_CODE_ALIGN 16
f5316dfe 1096
c8c5cb99
SC
1097 static struct ptt
1098 {
8b60264b
KG
1099 const struct processor_costs *cost; /* Processor costs */
1100 const int target_enable; /* Target flags to enable. */
1101 const int target_disable; /* Target flags to disable. */
1102 const int align_loop; /* Default alignments. */
2cca7283 1103 const int align_loop_max_skip;
8b60264b 1104 const int align_jump;
2cca7283 1105 const int align_jump_max_skip;
8b60264b 1106 const int align_func;
e075ae69 1107 }
0f290768 1108 const processor_target_table[PROCESSOR_max] =
e075ae69 1109 {
4977bab6
ZW
1110 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1111 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1112 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1113 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1114 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1115 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1116 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
89c43c0a
VM
1117 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1118 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
e075ae69
RH
1119 };
1120
f4365627 1121 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1122 static struct pta
1123 {
8b60264b
KG
1124 const char *const name; /* processor name or nickname. */
1125 const enum processor_type processor;
0dd0e980
JH
1126 const enum pta_flags
1127 {
1128 PTA_SSE = 1,
1129 PTA_SSE2 = 2,
5bbeea44
JH
1130 PTA_SSE3 = 4,
1131 PTA_MMX = 8,
1132 PTA_PREFETCH_SSE = 16,
1133 PTA_3DNOW = 32,
4977bab6
ZW
1134 PTA_3DNOW_A = 64,
1135 PTA_64BIT = 128
0dd0e980 1136 } flags;
e075ae69 1137 }
0f290768 1138 const processor_alias_table[] =
e075ae69 1139 {
0dd0e980
JH
1140 {"i386", PROCESSOR_I386, 0},
1141 {"i486", PROCESSOR_I486, 0},
1142 {"i586", PROCESSOR_PENTIUM, 0},
1143 {"pentium", PROCESSOR_PENTIUM, 0},
1144 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1145 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1146 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1147 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1148 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1149 {"i686", PROCESSOR_PENTIUMPRO, 0},
1150 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1151 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1152 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
5bbeea44
JH
1153 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1154 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1155 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1156 | PTA_MMX | PTA_PREFETCH_SSE},
1157 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1158 | PTA_MMX | PTA_PREFETCH_SSE},
89c43c0a
VM
1159 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1160 | PTA_MMX | PTA_PREFETCH_SSE},
1161 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
5bbeea44 1162 | PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1163 {"k6", PROCESSOR_K6, PTA_MMX},
1164 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1165 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1166 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1167 | PTA_3DNOW_A},
f4365627 1168 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1169 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1170 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1171 | PTA_3DNOW_A | PTA_SSE},
f4365627 1172 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1173 | PTA_3DNOW_A | PTA_SSE},
f4365627 1174 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1175 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1176 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1177 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1178 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1179 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1180 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1181 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1182 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1183 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1184 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1185 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1186 };
c8c5cb99 1187
ca7558fc 1188 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1189
41ed2237 1190 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1191 in case they weren't overwritten by command line options. */
55ba61f3
JH
1192 if (TARGET_64BIT)
1193 {
1194 if (flag_omit_frame_pointer == 2)
1195 flag_omit_frame_pointer = 1;
1196 if (flag_asynchronous_unwind_tables == 2)
1197 flag_asynchronous_unwind_tables = 1;
1198 if (flag_pcc_struct_return == 2)
1199 flag_pcc_struct_return = 0;
1200 }
1201 else
1202 {
1203 if (flag_omit_frame_pointer == 2)
1204 flag_omit_frame_pointer = 0;
1205 if (flag_asynchronous_unwind_tables == 2)
1206 flag_asynchronous_unwind_tables = 0;
1207 if (flag_pcc_struct_return == 2)
7c712dcc 1208 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1209 }
1210
f5316dfe
MM
1211#ifdef SUBTARGET_OVERRIDE_OPTIONS
1212 SUBTARGET_OVERRIDE_OPTIONS;
1213#endif
1214
9e555526
RH
1215 if (!ix86_tune_string && ix86_arch_string)
1216 ix86_tune_string = ix86_arch_string;
1217 if (!ix86_tune_string)
1218 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
f4365627 1219 if (!ix86_arch_string)
3fec9fa9 1220 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
e075ae69 1221
6189a572
JH
1222 if (ix86_cmodel_string != 0)
1223 {
1224 if (!strcmp (ix86_cmodel_string, "small"))
1225 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1226 else if (flag_pic)
c725bd79 1227 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1228 else if (!strcmp (ix86_cmodel_string, "32"))
1229 ix86_cmodel = CM_32;
1230 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1231 ix86_cmodel = CM_KERNEL;
1232 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1233 ix86_cmodel = CM_MEDIUM;
1234 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1235 ix86_cmodel = CM_LARGE;
1236 else
1237 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1238 }
1239 else
1240 {
1241 ix86_cmodel = CM_32;
1242 if (TARGET_64BIT)
1243 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1244 }
c93e80a5
JH
1245 if (ix86_asm_string != 0)
1246 {
1247 if (!strcmp (ix86_asm_string, "intel"))
1248 ix86_asm_dialect = ASM_INTEL;
1249 else if (!strcmp (ix86_asm_string, "att"))
1250 ix86_asm_dialect = ASM_ATT;
1251 else
1252 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1253 }
6189a572 1254 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1255 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1256 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1257 if (ix86_cmodel == CM_LARGE)
c725bd79 1258 sorry ("code model `large' not supported yet");
0c2dc519 1259 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1260 sorry ("%i-bit mode not compiled in",
0c2dc519 1261 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1262
f4365627
JH
1263 for (i = 0; i < pta_size; i++)
1264 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1265 {
1266 ix86_arch = processor_alias_table[i].processor;
1267 /* Default cpu tuning to the architecture. */
9e555526 1268 ix86_tune = ix86_arch;
f4365627 1269 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1270 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1271 target_flags |= MASK_MMX;
1272 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1273 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1274 target_flags |= MASK_3DNOW;
1275 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1276 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1277 target_flags |= MASK_3DNOW_A;
1278 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1279 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1280 target_flags |= MASK_SSE;
1281 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1282 && !(target_flags_explicit & MASK_SSE2))
f4365627 1283 target_flags |= MASK_SSE2;
5bbeea44
JH
1284 if (processor_alias_table[i].flags & PTA_SSE3
1285 && !(target_flags_explicit & MASK_SSE3))
1286 target_flags |= MASK_SSE3;
f4365627
JH
1287 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1288 x86_prefetch_sse = true;
4977bab6
ZW
1289 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1290 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1291 break;
1292 }
400500c4 1293
f4365627
JH
1294 if (i == pta_size)
1295 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1296
f4365627 1297 for (i = 0; i < pta_size; i++)
9e555526 1298 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1299 {
9e555526 1300 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1301 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1302 error ("CPU you selected does not support x86-64 instruction set");
f4365627
JH
1303 break;
1304 }
1305 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1306 x86_prefetch_sse = true;
1307 if (i == pta_size)
9e555526 1308 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1309
2ab0437e
JH
1310 if (optimize_size)
1311 ix86_cost = &size_cost;
1312 else
9e555526
RH
1313 ix86_cost = processor_target_table[ix86_tune].cost;
1314 target_flags |= processor_target_table[ix86_tune].target_enable;
1315 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1316
36edd3cc
BS
1317 /* Arrange to set up i386_stack_locals for all functions. */
1318 init_machine_status = ix86_init_machine_status;
fce5a9f2 1319
0f290768 1320 /* Validate -mregparm= value. */
e075ae69 1321 if (ix86_regparm_string)
b08de47e 1322 {
400500c4
RK
1323 i = atoi (ix86_regparm_string);
1324 if (i < 0 || i > REGPARM_MAX)
1325 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1326 else
1327 ix86_regparm = i;
b08de47e 1328 }
0d7d98ee
JH
1329 else
1330 if (TARGET_64BIT)
1331 ix86_regparm = REGPARM_MAX;
b08de47e 1332
3e18fdf6 1333 /* If the user has provided any of the -malign-* options,
a4f31c00 1334 warn and use that value only if -falign-* is not set.
3e18fdf6 1335 Remove this code in GCC 3.2 or later. */
e075ae69 1336 if (ix86_align_loops_string)
b08de47e 1337 {
3e18fdf6
GK
1338 warning ("-malign-loops is obsolete, use -falign-loops");
1339 if (align_loops == 0)
1340 {
1341 i = atoi (ix86_align_loops_string);
1342 if (i < 0 || i > MAX_CODE_ALIGN)
1343 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1344 else
1345 align_loops = 1 << i;
1346 }
b08de47e 1347 }
3af4bd89 1348
e075ae69 1349 if (ix86_align_jumps_string)
b08de47e 1350 {
3e18fdf6
GK
1351 warning ("-malign-jumps is obsolete, use -falign-jumps");
1352 if (align_jumps == 0)
1353 {
1354 i = atoi (ix86_align_jumps_string);
1355 if (i < 0 || i > MAX_CODE_ALIGN)
1356 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1357 else
1358 align_jumps = 1 << i;
1359 }
b08de47e 1360 }
b08de47e 1361
e075ae69 1362 if (ix86_align_funcs_string)
b08de47e 1363 {
3e18fdf6
GK
1364 warning ("-malign-functions is obsolete, use -falign-functions");
1365 if (align_functions == 0)
1366 {
1367 i = atoi (ix86_align_funcs_string);
1368 if (i < 0 || i > MAX_CODE_ALIGN)
1369 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1370 else
1371 align_functions = 1 << i;
1372 }
b08de47e 1373 }
3af4bd89 1374
3e18fdf6 1375 /* Default align_* from the processor table. */
3e18fdf6 1376 if (align_loops == 0)
2cca7283 1377 {
9e555526
RH
1378 align_loops = processor_target_table[ix86_tune].align_loop;
1379 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1380 }
3e18fdf6 1381 if (align_jumps == 0)
2cca7283 1382 {
9e555526
RH
1383 align_jumps = processor_target_table[ix86_tune].align_jump;
1384 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1385 }
3e18fdf6 1386 if (align_functions == 0)
2cca7283 1387 {
9e555526 1388 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1389 }
3e18fdf6 1390
e4c0478d 1391 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1392 The default of 128 bits is for Pentium III's SSE __m128, but we
1393 don't want additional code to keep the stack aligned when
1394 optimizing for code size. */
1395 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1396 ? TARGET_64BIT ? 128 : 32
fbb83b43 1397 : 128);
e075ae69 1398 if (ix86_preferred_stack_boundary_string)
3af4bd89 1399 {
400500c4 1400 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1401 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1402 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1403 TARGET_64BIT ? 4 : 2);
400500c4
RK
1404 else
1405 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1406 }
77a989d1 1407
0f290768 1408 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1409 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1410 if (ix86_branch_cost_string)
804a8ee0 1411 {
400500c4
RK
1412 i = atoi (ix86_branch_cost_string);
1413 if (i < 0 || i > 5)
1414 error ("-mbranch-cost=%d is not between 0 and 5", i);
1415 else
1416 ix86_branch_cost = i;
804a8ee0 1417 }
804a8ee0 1418
f996902d
RH
1419 if (ix86_tls_dialect_string)
1420 {
1421 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1422 ix86_tls_dialect = TLS_DIALECT_GNU;
1423 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1424 ix86_tls_dialect = TLS_DIALECT_SUN;
1425 else
1426 error ("bad value (%s) for -mtls-dialect= switch",
1427 ix86_tls_dialect_string);
1428 }
1429
e9a25f70
JL
1430 /* Keep nonleaf frame pointers. */
1431 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1432 flag_omit_frame_pointer = 1;
e075ae69
RH
1433
1434 /* If we're doing fast math, we don't care about comparison order
1435 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1436 if (flag_unsafe_math_optimizations)
e075ae69
RH
1437 target_flags &= ~MASK_IEEE_FP;
1438
30c99a84
RH
1439 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1440 since the insns won't need emulation. */
1441 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1442 target_flags &= ~MASK_NO_FANCY_MATH_387;
1443
9e200aaf
KC
1444 /* Turn on SSE2 builtins for -msse3. */
1445 if (TARGET_SSE3)
22c7c85e
L
1446 target_flags |= MASK_SSE2;
1447
1448 /* Turn on SSE builtins for -msse2. */
1449 if (TARGET_SSE2)
1450 target_flags |= MASK_SSE;
1451
14f73b5a
JH
1452 if (TARGET_64BIT)
1453 {
1454 if (TARGET_ALIGN_DOUBLE)
c725bd79 1455 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1456 if (TARGET_RTD)
c725bd79 1457 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1458 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1459 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1460 ix86_fpmath = FPMATH_SSE;
14f73b5a 1461 }
965f5423 1462 else
a5b378d6
JH
1463 {
1464 ix86_fpmath = FPMATH_387;
1465 /* i386 ABI does not specify red zone. It still makes sense to use it
1466 when programmer takes care to stack from being destroyed. */
1467 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1468 target_flags |= MASK_NO_RED_ZONE;
1469 }
965f5423
JH
1470
1471 if (ix86_fpmath_string != 0)
1472 {
1473 if (! strcmp (ix86_fpmath_string, "387"))
1474 ix86_fpmath = FPMATH_387;
1475 else if (! strcmp (ix86_fpmath_string, "sse"))
1476 {
1477 if (!TARGET_SSE)
1478 {
1479 warning ("SSE instruction set disabled, using 387 arithmetics");
1480 ix86_fpmath = FPMATH_387;
1481 }
1482 else
1483 ix86_fpmath = FPMATH_SSE;
1484 }
1485 else if (! strcmp (ix86_fpmath_string, "387,sse")
1486 || ! strcmp (ix86_fpmath_string, "sse,387"))
1487 {
1488 if (!TARGET_SSE)
1489 {
1490 warning ("SSE instruction set disabled, using 387 arithmetics");
1491 ix86_fpmath = FPMATH_387;
1492 }
1493 else if (!TARGET_80387)
1494 {
1495 warning ("387 instruction set disabled, using SSE arithmetics");
1496 ix86_fpmath = FPMATH_SSE;
1497 }
1498 else
1499 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1500 }
fce5a9f2 1501 else
965f5423
JH
1502 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1503 }
14f73b5a 1504
a7180f70
BS
1505 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1506 on by -msse. */
1507 if (TARGET_SSE)
e37af218
RH
1508 {
1509 target_flags |= MASK_MMX;
1510 x86_prefetch_sse = true;
1511 }
c6036a37 1512
47f339cf
BS
1513 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1514 if (TARGET_3DNOW)
1515 {
1516 target_flags |= MASK_MMX;
d1f87653 1517 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1518 extensions it adds. */
1519 if (x86_3dnow_a & (1 << ix86_arch))
1520 target_flags |= MASK_3DNOW_A;
1521 }
9e555526 1522 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1523 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1524 && !optimize_size)
1525 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1526
1527 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1528 {
1529 char *p;
1530 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1531 p = strchr (internal_label_prefix, 'X');
1532 internal_label_prefix_len = p - internal_label_prefix;
1533 *p = '\0';
1534 }
f5316dfe
MM
1535}
1536\f
32b5b1aa 1537void
b96a374d 1538optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1539{
e9a25f70
JL
1540 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1541 make the problem with not enough registers even worse. */
32b5b1aa
SC
1542#ifdef INSN_SCHEDULING
1543 if (level > 1)
1544 flag_schedule_insns = 0;
1545#endif
55ba61f3
JH
1546
1547 /* The default values of these switches depend on the TARGET_64BIT
1548 that is not known at this moment. Mark these values with 2 and
1549 let user the to override these. In case there is no command line option
1550 specifying them, we will set the defaults in override_options. */
1551 if (optimize >= 1)
1552 flag_omit_frame_pointer = 2;
1553 flag_pcc_struct_return = 2;
1554 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1555}
b08de47e 1556\f
91d231cb
JM
1557/* Table of valid machine attributes. */
1558const struct attribute_spec ix86_attribute_table[] =
b08de47e 1559{
91d231cb 1560 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1561 /* Stdcall attribute says callee is responsible for popping arguments
1562 if they are not variable. */
91d231cb 1563 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1564 /* Fastcall attribute says callee is responsible for popping arguments
1565 if they are not variable. */
1566 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1567 /* Cdecl attribute says the callee is a normal C declaration */
1568 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1569 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1570 passed in registers. */
91d231cb
JM
1571 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1572#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1573 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1574 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1575 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1576#endif
fe77449a
DR
1577 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1578 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
91d231cb
JM
1579 { NULL, 0, 0, false, false, false, NULL }
1580};
1581
5fbf0217
EB
1582/* Decide whether we can make a sibling call to a function. DECL is the
1583 declaration of the function being targeted by the call and EXP is the
1584 CALL_EXPR representing the call. */
4977bab6
ZW
1585
1586static bool
b96a374d 1587ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1588{
1589 /* If we are generating position-independent code, we cannot sibcall
1590 optimize any indirect call, or a direct call to a global function,
1591 as the PLT requires %ebx be live. */
1592 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1593 return false;
1594
1595 /* If we are returning floats on the 80387 register stack, we cannot
1596 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1597 function that does or, conversely, from a function that does return
1598 a float to a function that doesn't; the necessary stack adjustment
1599 would not be executed. */
4977bab6 1600 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1601 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1602 return false;
1603
1604 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1605 register for the address of the target function. Make sure that all
4977bab6
ZW
1606 such registers are not used for passing parameters. */
1607 if (!decl && !TARGET_64BIT)
1608 {
e767b5be 1609 tree type;
4977bab6
ZW
1610
1611 /* We're looking at the CALL_EXPR, we need the type of the function. */
1612 type = TREE_OPERAND (exp, 0); /* pointer expression */
1613 type = TREE_TYPE (type); /* pointer type */
1614 type = TREE_TYPE (type); /* function type */
1615
e767b5be 1616 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1617 {
1618 /* ??? Need to count the actual number of registers to be used,
1619 not the possible number of registers. Fix later. */
1620 return false;
1621 }
1622 }
1623
1624 /* Otherwise okay. That also includes certain types of indirect calls. */
1625 return true;
1626}
1627
e91f04de 1628/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1629 arguments as in struct attribute_spec.handler. */
1630static tree
b96a374d
AJ
1631ix86_handle_cdecl_attribute (tree *node, tree name,
1632 tree args ATTRIBUTE_UNUSED,
1633 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1634{
1635 if (TREE_CODE (*node) != FUNCTION_TYPE
1636 && TREE_CODE (*node) != METHOD_TYPE
1637 && TREE_CODE (*node) != FIELD_DECL
1638 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1639 {
91d231cb
JM
1640 warning ("`%s' attribute only applies to functions",
1641 IDENTIFIER_POINTER (name));
1642 *no_add_attrs = true;
1643 }
e91f04de
CH
1644 else
1645 {
1646 if (is_attribute_p ("fastcall", name))
1647 {
1648 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1649 {
1650 error ("fastcall and stdcall attributes are not compatible");
1651 }
1652 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1653 {
1654 error ("fastcall and regparm attributes are not compatible");
1655 }
1656 }
1657 else if (is_attribute_p ("stdcall", name))
1658 {
1659 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1660 {
1661 error ("fastcall and stdcall attributes are not compatible");
1662 }
1663 }
1664 }
b08de47e 1665
91d231cb
JM
1666 if (TARGET_64BIT)
1667 {
1668 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1669 *no_add_attrs = true;
1670 }
b08de47e 1671
91d231cb
JM
1672 return NULL_TREE;
1673}
b08de47e 1674
91d231cb
JM
1675/* Handle a "regparm" attribute;
1676 arguments as in struct attribute_spec.handler. */
1677static tree
b96a374d
AJ
1678ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1679 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1680{
1681 if (TREE_CODE (*node) != FUNCTION_TYPE
1682 && TREE_CODE (*node) != METHOD_TYPE
1683 && TREE_CODE (*node) != FIELD_DECL
1684 && TREE_CODE (*node) != TYPE_DECL)
1685 {
1686 warning ("`%s' attribute only applies to functions",
1687 IDENTIFIER_POINTER (name));
1688 *no_add_attrs = true;
1689 }
1690 else
1691 {
1692 tree cst;
b08de47e 1693
91d231cb
JM
1694 cst = TREE_VALUE (args);
1695 if (TREE_CODE (cst) != INTEGER_CST)
1696 {
1697 warning ("`%s' attribute requires an integer constant argument",
1698 IDENTIFIER_POINTER (name));
1699 *no_add_attrs = true;
1700 }
1701 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1702 {
1703 warning ("argument to `%s' attribute larger than %d",
1704 IDENTIFIER_POINTER (name), REGPARM_MAX);
1705 *no_add_attrs = true;
1706 }
e91f04de
CH
1707
1708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1709 {
1710 error ("fastcall and regparm attributes are not compatible");
1711 }
b08de47e
MM
1712 }
1713
91d231cb 1714 return NULL_TREE;
b08de47e
MM
1715}
1716
1717/* Return 0 if the attributes for two types are incompatible, 1 if they
1718 are compatible, and 2 if they are nearly compatible (which causes a
1719 warning to be generated). */
1720
8d8e52be 1721static int
b96a374d 1722ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1723{
0f290768 1724 /* Check for mismatch of non-default calling convention. */
27c38fbe 1725 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1726
1727 if (TREE_CODE (type1) != FUNCTION_TYPE)
1728 return 1;
1729
b96a374d 1730 /* Check for mismatched fastcall types */
e91f04de
CH
1731 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1732 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1733 return 0;
e91f04de 1734
afcfe58c 1735 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1736 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1737 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac
MM
1738 return 0;
1739 if (ix86_function_regparm (type1, NULL)
1740 != ix86_function_regparm (type2, NULL))
afcfe58c 1741 return 0;
b08de47e
MM
1742 return 1;
1743}
b08de47e 1744\f
e767b5be
JH
1745/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1746 DECL may be NULL when calling function indirectly
839a4992 1747 or considering a libcall. */
483ab821
MM
1748
1749static int
e767b5be 1750ix86_function_regparm (tree type, tree decl)
483ab821
MM
1751{
1752 tree attr;
e767b5be
JH
1753 int regparm = ix86_regparm;
1754 bool user_convention = false;
483ab821 1755
e767b5be
JH
1756 if (!TARGET_64BIT)
1757 {
1758 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1759 if (attr)
1760 {
1761 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1762 user_convention = true;
1763 }
1764
1765 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1766 {
1767 regparm = 2;
1768 user_convention = true;
1769 }
1770
1771 /* Use register calling convention for local functions when possible. */
1772 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1773 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1774 {
1775 struct cgraph_local_info *i = cgraph_local_info (decl);
1776 if (i && i->local)
1777 {
1778 /* We can't use regparm(3) for nested functions as these use
1779 static chain pointer in third argument. */
1780 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1781 regparm = 2;
1782 else
1783 regparm = 3;
1784 }
1785 }
1786 }
1787 return regparm;
483ab821
MM
1788}
1789
fe9f516f
RH
1790/* Return true if EAX is live at the start of the function. Used by
1791 ix86_expand_prologue to determine if we need special help before
1792 calling allocate_stack_worker. */
1793
1794static bool
1795ix86_eax_live_at_start_p (void)
1796{
1797 /* Cheat. Don't bother working forward from ix86_function_regparm
1798 to the function type to whether an actual argument is located in
1799 eax. Instead just look at cfg info, which is still close enough
1800 to correct at this point. This gives false positives for broken
1801 functions that might use uninitialized data that happens to be
1802 allocated in eax, but who cares? */
1803 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1804}
1805
b08de47e
MM
1806/* Value is the number of bytes of arguments automatically
1807 popped when returning from a subroutine call.
1808 FUNDECL is the declaration node of the function (as a tree),
1809 FUNTYPE is the data type of the function (as a tree),
1810 or for a library call it is an identifier node for the subroutine name.
1811 SIZE is the number of bytes of arguments passed on the stack.
1812
1813 On the 80386, the RTD insn may be used to pop them if the number
1814 of args is fixed, but if the number is variable then the caller
1815 must pop them all. RTD can't be used for library calls now
1816 because the library is compiled with the Unix compiler.
1817 Use of RTD is a selectable option, since it is incompatible with
1818 standard Unix calling sequences. If the option is not selected,
1819 the caller must always pop the args.
1820
1821 The attribute stdcall is equivalent to RTD on a per module basis. */
1822
1823int
b96a374d 1824ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1825{
3345ee7d 1826 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1827
43f3a59d 1828 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1829 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1830
43f3a59d
KH
1831 /* Stdcall and fastcall functions will pop the stack if not
1832 variable args. */
e91f04de
CH
1833 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1834 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1835 rtd = 1;
79325812 1836
698cdd84
SC
1837 if (rtd
1838 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1839 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1840 == void_type_node)))
698cdd84
SC
1841 return size;
1842 }
79325812 1843
232b8f52 1844 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1845 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
0d7d98ee 1846 && !TARGET_64BIT)
232b8f52 1847 {
e767b5be 1848 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1849
1850 if (!nregs)
1851 return GET_MODE_SIZE (Pmode);
1852 }
1853
1854 return 0;
b08de47e 1855}
b08de47e
MM
1856\f
1857/* Argument support functions. */
1858
53c17031
JH
1859/* Return true when register may be used to pass function parameters. */
1860bool
b96a374d 1861ix86_function_arg_regno_p (int regno)
53c17031
JH
1862{
1863 int i;
1864 if (!TARGET_64BIT)
0333394e
JJ
1865 return (regno < REGPARM_MAX
1866 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1867 if (SSE_REGNO_P (regno) && TARGET_SSE)
1868 return true;
1869 /* RAX is used as hidden argument to va_arg functions. */
1870 if (!regno)
1871 return true;
1872 for (i = 0; i < REGPARM_MAX; i++)
1873 if (regno == x86_64_int_parameter_registers[i])
1874 return true;
1875 return false;
1876}
1877
b08de47e
MM
1878/* Initialize a variable CUM of type CUMULATIVE_ARGS
1879 for a call to a function whose data type is FNTYPE.
1880 For a library call, FNTYPE is 0. */
1881
1882void
b96a374d
AJ
1883init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1884 tree fntype, /* tree ptr for function decl */
1885 rtx libname, /* SYMBOL_REF of library name or 0 */
1886 tree fndecl)
b08de47e
MM
1887{
1888 static CUMULATIVE_ARGS zero_cum;
1889 tree param, next_param;
1890
1891 if (TARGET_DEBUG_ARG)
1892 {
1893 fprintf (stderr, "\ninit_cumulative_args (");
1894 if (fntype)
e9a25f70
JL
1895 fprintf (stderr, "fntype code = %s, ret code = %s",
1896 tree_code_name[(int) TREE_CODE (fntype)],
1897 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1898 else
1899 fprintf (stderr, "no fntype");
1900
1901 if (libname)
1902 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1903 }
1904
1905 *cum = zero_cum;
1906
1907 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1908 if (fntype)
1909 cum->nregs = ix86_function_regparm (fntype, fndecl);
1910 else
1911 cum->nregs = ix86_regparm;
53c17031 1912 cum->sse_nregs = SSE_REGPARM_MAX;
bcf17554 1913 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
1914 cum->warn_sse = true;
1915 cum->warn_mmx = true;
53c17031 1916 cum->maybe_vaarg = false;
b08de47e 1917
e91f04de
CH
1918 /* Use ecx and edx registers if function has fastcall attribute */
1919 if (fntype && !TARGET_64BIT)
1920 {
1921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1922 {
1923 cum->nregs = 2;
1924 cum->fastcall = 1;
1925 }
1926 }
1927
1928
b08de47e
MM
1929 /* Determine if this function has variable arguments. This is
1930 indicated by the last argument being 'void_type_mode' if there
1931 are no variable arguments. If there are variable arguments, then
1932 we won't pass anything in registers */
1933
e1be55d0 1934 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
b08de47e
MM
1935 {
1936 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1937 param != 0; param = next_param)
b08de47e
MM
1938 {
1939 next_param = TREE_CHAIN (param);
e9a25f70 1940 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1941 {
1942 if (!TARGET_64BIT)
e91f04de
CH
1943 {
1944 cum->nregs = 0;
e1be55d0
JH
1945 cum->sse_nregs = 0;
1946 cum->mmx_nregs = 0;
1947 cum->warn_sse = 0;
1948 cum->warn_mmx = 0;
e91f04de
CH
1949 cum->fastcall = 0;
1950 }
53c17031
JH
1951 cum->maybe_vaarg = true;
1952 }
b08de47e
MM
1953 }
1954 }
53c17031
JH
1955 if ((!fntype && !libname)
1956 || (fntype && !TYPE_ARG_TYPES (fntype)))
1957 cum->maybe_vaarg = 1;
b08de47e
MM
1958
1959 if (TARGET_DEBUG_ARG)
1960 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1961
1962 return;
1963}
1964
d1f87653 1965/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 1966 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1967 class and assign registers accordingly. */
1968
1969/* Return the union class of CLASS1 and CLASS2.
1970 See the x86-64 PS ABI for details. */
1971
1972static enum x86_64_reg_class
b96a374d 1973merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
1974{
1975 /* Rule #1: If both classes are equal, this is the resulting class. */
1976 if (class1 == class2)
1977 return class1;
1978
1979 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1980 the other class. */
1981 if (class1 == X86_64_NO_CLASS)
1982 return class2;
1983 if (class2 == X86_64_NO_CLASS)
1984 return class1;
1985
1986 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1987 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1988 return X86_64_MEMORY_CLASS;
1989
1990 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1991 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1992 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1993 return X86_64_INTEGERSI_CLASS;
1994 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1995 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1996 return X86_64_INTEGER_CLASS;
1997
1998 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1999 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2000 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2001 return X86_64_MEMORY_CLASS;
2002
2003 /* Rule #6: Otherwise class SSE is used. */
2004 return X86_64_SSE_CLASS;
2005}
2006
2007/* Classify the argument of type TYPE and mode MODE.
2008 CLASSES will be filled by the register class used to pass each word
2009 of the operand. The number of words is returned. In case the parameter
2010 should be passed in memory, 0 is returned. As a special case for zero
2011 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2012
2013 BIT_OFFSET is used internally for handling records and specifies offset
2014 of the offset in bits modulo 256 to avoid overflow cases.
2015
2016 See the x86-64 PS ABI for details.
2017*/
2018
2019static int
b96a374d
AJ
2020classify_argument (enum machine_mode mode, tree type,
2021 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 2022{
296e4ae8 2023 HOST_WIDE_INT bytes =
53c17031 2024 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 2025 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 2026
c60ee6f5
JH
2027 /* Variable sized entities are always passed/returned in memory. */
2028 if (bytes < 0)
2029 return 0;
2030
dafc5b82
JH
2031 if (mode != VOIDmode
2032 && MUST_PASS_IN_STACK (mode, type))
2033 return 0;
2034
53c17031
JH
2035 if (type && AGGREGATE_TYPE_P (type))
2036 {
2037 int i;
2038 tree field;
2039 enum x86_64_reg_class subclasses[MAX_CLASSES];
2040
2041 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2042 if (bytes > 16)
2043 return 0;
2044
2045 for (i = 0; i < words; i++)
2046 classes[i] = X86_64_NO_CLASS;
2047
2048 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2049 signalize memory class, so handle it as special case. */
2050 if (!words)
2051 {
2052 classes[0] = X86_64_NO_CLASS;
2053 return 1;
2054 }
2055
2056 /* Classify each field of record and merge classes. */
2057 if (TREE_CODE (type) == RECORD_TYPE)
2058 {
91ea38f9
JH
2059 /* For classes first merge in the field of the subclasses. */
2060 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2061 {
2062 tree bases = TYPE_BINFO_BASETYPES (type);
2063 int n_bases = TREE_VEC_LENGTH (bases);
2064 int i;
2065
2066 for (i = 0; i < n_bases; ++i)
2067 {
2068 tree binfo = TREE_VEC_ELT (bases, i);
2069 int num;
2070 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2071 tree type = BINFO_TYPE (binfo);
2072
2073 num = classify_argument (TYPE_MODE (type),
2074 type, subclasses,
2075 (offset + bit_offset) % 256);
2076 if (!num)
2077 return 0;
2078 for (i = 0; i < num; i++)
2079 {
db01f480 2080 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2081 classes[i + pos] =
2082 merge_classes (subclasses[i], classes[i + pos]);
2083 }
2084 }
2085 }
43f3a59d 2086 /* And now merge the fields of structure. */
53c17031
JH
2087 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2088 {
2089 if (TREE_CODE (field) == FIELD_DECL)
2090 {
2091 int num;
2092
2093 /* Bitfields are always classified as integer. Handle them
2094 early, since later code would consider them to be
2095 misaligned integers. */
2096 if (DECL_BIT_FIELD (field))
2097 {
2098 for (i = int_bit_position (field) / 8 / 8;
2099 i < (int_bit_position (field)
2100 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2101 + 63) / 8 / 8; i++)
53c17031
JH
2102 classes[i] =
2103 merge_classes (X86_64_INTEGER_CLASS,
2104 classes[i]);
2105 }
2106 else
2107 {
2108 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2109 TREE_TYPE (field), subclasses,
2110 (int_bit_position (field)
2111 + bit_offset) % 256);
2112 if (!num)
2113 return 0;
2114 for (i = 0; i < num; i++)
2115 {
2116 int pos =
db01f480 2117 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2118 classes[i + pos] =
2119 merge_classes (subclasses[i], classes[i + pos]);
2120 }
2121 }
2122 }
2123 }
2124 }
2125 /* Arrays are handled as small records. */
2126 else if (TREE_CODE (type) == ARRAY_TYPE)
2127 {
2128 int num;
2129 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2130 TREE_TYPE (type), subclasses, bit_offset);
2131 if (!num)
2132 return 0;
2133
2134 /* The partial classes are now full classes. */
2135 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2136 subclasses[0] = X86_64_SSE_CLASS;
2137 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2138 subclasses[0] = X86_64_INTEGER_CLASS;
2139
2140 for (i = 0; i < words; i++)
2141 classes[i] = subclasses[i % num];
2142 }
2143 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2144 else if (TREE_CODE (type) == UNION_TYPE
2145 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2146 {
91ea38f9
JH
2147 /* For classes first merge in the field of the subclasses. */
2148 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2149 {
2150 tree bases = TYPE_BINFO_BASETYPES (type);
2151 int n_bases = TREE_VEC_LENGTH (bases);
2152 int i;
2153
2154 for (i = 0; i < n_bases; ++i)
2155 {
2156 tree binfo = TREE_VEC_ELT (bases, i);
2157 int num;
2158 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2159 tree type = BINFO_TYPE (binfo);
2160
2161 num = classify_argument (TYPE_MODE (type),
2162 type, subclasses,
db01f480 2163 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2164 if (!num)
2165 return 0;
2166 for (i = 0; i < num; i++)
2167 {
c16576e6 2168 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2169 classes[i + pos] =
2170 merge_classes (subclasses[i], classes[i + pos]);
2171 }
2172 }
2173 }
53c17031
JH
2174 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2175 {
2176 if (TREE_CODE (field) == FIELD_DECL)
2177 {
2178 int num;
2179 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2180 TREE_TYPE (field), subclasses,
2181 bit_offset);
2182 if (!num)
2183 return 0;
2184 for (i = 0; i < num; i++)
2185 classes[i] = merge_classes (subclasses[i], classes[i]);
2186 }
2187 }
2188 }
448ec26c
WH
2189 else if (TREE_CODE (type) == SET_TYPE)
2190 {
2191 if (bytes <= 4)
2192 {
2193 classes[0] = X86_64_INTEGERSI_CLASS;
2194 return 1;
2195 }
2196 else if (bytes <= 8)
2197 {
2198 classes[0] = X86_64_INTEGER_CLASS;
2199 return 1;
2200 }
2201 else if (bytes <= 12)
2202 {
2203 classes[0] = X86_64_INTEGER_CLASS;
2204 classes[1] = X86_64_INTEGERSI_CLASS;
2205 return 2;
2206 }
2207 else
2208 {
2209 classes[0] = X86_64_INTEGER_CLASS;
2210 classes[1] = X86_64_INTEGER_CLASS;
2211 return 2;
2212 }
2213 }
53c17031
JH
2214 else
2215 abort ();
2216
2217 /* Final merger cleanup. */
2218 for (i = 0; i < words; i++)
2219 {
2220 /* If one class is MEMORY, everything should be passed in
2221 memory. */
2222 if (classes[i] == X86_64_MEMORY_CLASS)
2223 return 0;
2224
d6a7951f 2225 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2226 X86_64_SSE_CLASS. */
2227 if (classes[i] == X86_64_SSEUP_CLASS
2228 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2229 classes[i] = X86_64_SSE_CLASS;
2230
d6a7951f 2231 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2232 if (classes[i] == X86_64_X87UP_CLASS
2233 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2234 classes[i] = X86_64_SSE_CLASS;
2235 }
2236 return words;
2237 }
2238
2239 /* Compute alignment needed. We align all types to natural boundaries with
2240 exception of XFmode that is aligned to 64bits. */
2241 if (mode != VOIDmode && mode != BLKmode)
2242 {
2243 int mode_alignment = GET_MODE_BITSIZE (mode);
2244
2245 if (mode == XFmode)
2246 mode_alignment = 128;
2247 else if (mode == XCmode)
2248 mode_alignment = 256;
f5143c46 2249 /* Misaligned fields are always returned in memory. */
53c17031
JH
2250 if (bit_offset % mode_alignment)
2251 return 0;
2252 }
2253
2254 /* Classification of atomic types. */
2255 switch (mode)
2256 {
2257 case DImode:
2258 case SImode:
2259 case HImode:
2260 case QImode:
2261 case CSImode:
2262 case CHImode:
2263 case CQImode:
2264 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2265 classes[0] = X86_64_INTEGERSI_CLASS;
2266 else
2267 classes[0] = X86_64_INTEGER_CLASS;
2268 return 1;
2269 case CDImode:
2270 case TImode:
2271 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2272 return 2;
2273 case CTImode:
2274 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2275 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2276 return 4;
2277 case SFmode:
2278 if (!(bit_offset % 64))
2279 classes[0] = X86_64_SSESF_CLASS;
2280 else
2281 classes[0] = X86_64_SSE_CLASS;
2282 return 1;
2283 case DFmode:
2284 classes[0] = X86_64_SSEDF_CLASS;
2285 return 1;
f8a1ebc6 2286 case XFmode:
53c17031
JH
2287 classes[0] = X86_64_X87_CLASS;
2288 classes[1] = X86_64_X87UP_CLASS;
2289 return 2;
f8a1ebc6 2290 case TFmode:
cf2348cb
JH
2291 case TCmode:
2292 return 0;
f8a1ebc6 2293 case XCmode:
53c17031
JH
2294 classes[0] = X86_64_X87_CLASS;
2295 classes[1] = X86_64_X87UP_CLASS;
2296 classes[2] = X86_64_X87_CLASS;
2297 classes[3] = X86_64_X87UP_CLASS;
2298 return 4;
2299 case DCmode:
2300 classes[0] = X86_64_SSEDF_CLASS;
2301 classes[1] = X86_64_SSEDF_CLASS;
2302 return 2;
2303 case SCmode:
2304 classes[0] = X86_64_SSE_CLASS;
2305 return 1;
e95d6b23
JH
2306 case V4SFmode:
2307 case V4SImode:
495333a6
JH
2308 case V16QImode:
2309 case V8HImode:
2310 case V2DFmode:
2311 case V2DImode:
e95d6b23
JH
2312 classes[0] = X86_64_SSE_CLASS;
2313 classes[1] = X86_64_SSEUP_CLASS;
2314 return 2;
2315 case V2SFmode:
2316 case V2SImode:
2317 case V4HImode:
2318 case V8QImode:
1194ca05 2319 return 0;
53c17031 2320 case BLKmode:
e95d6b23 2321 case VOIDmode:
53c17031
JH
2322 return 0;
2323 default:
2324 abort ();
2325 }
2326}
2327
2328/* Examine the argument and return set number of register required in each
f5143c46 2329 class. Return 0 iff parameter should be passed in memory. */
53c17031 2330static int
b96a374d
AJ
2331examine_argument (enum machine_mode mode, tree type, int in_return,
2332 int *int_nregs, int *sse_nregs)
53c17031
JH
2333{
2334 enum x86_64_reg_class class[MAX_CLASSES];
2335 int n = classify_argument (mode, type, class, 0);
2336
2337 *int_nregs = 0;
2338 *sse_nregs = 0;
2339 if (!n)
2340 return 0;
2341 for (n--; n >= 0; n--)
2342 switch (class[n])
2343 {
2344 case X86_64_INTEGER_CLASS:
2345 case X86_64_INTEGERSI_CLASS:
2346 (*int_nregs)++;
2347 break;
2348 case X86_64_SSE_CLASS:
2349 case X86_64_SSESF_CLASS:
2350 case X86_64_SSEDF_CLASS:
2351 (*sse_nregs)++;
2352 break;
2353 case X86_64_NO_CLASS:
2354 case X86_64_SSEUP_CLASS:
2355 break;
2356 case X86_64_X87_CLASS:
2357 case X86_64_X87UP_CLASS:
2358 if (!in_return)
2359 return 0;
2360 break;
2361 case X86_64_MEMORY_CLASS:
2362 abort ();
2363 }
2364 return 1;
2365}
2366/* Construct container for the argument used by GCC interface. See
2367 FUNCTION_ARG for the detailed description. */
2368static rtx
b96a374d
AJ
2369construct_container (enum machine_mode mode, tree type, int in_return,
2370 int nintregs, int nsseregs, const int * intreg,
2371 int sse_regno)
53c17031
JH
2372{
2373 enum machine_mode tmpmode;
2374 int bytes =
2375 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2376 enum x86_64_reg_class class[MAX_CLASSES];
2377 int n;
2378 int i;
2379 int nexps = 0;
2380 int needed_sseregs, needed_intregs;
2381 rtx exp[MAX_CLASSES];
2382 rtx ret;
2383
2384 n = classify_argument (mode, type, class, 0);
2385 if (TARGET_DEBUG_ARG)
2386 {
2387 if (!n)
2388 fprintf (stderr, "Memory class\n");
2389 else
2390 {
2391 fprintf (stderr, "Classes:");
2392 for (i = 0; i < n; i++)
2393 {
2394 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2395 }
2396 fprintf (stderr, "\n");
2397 }
2398 }
2399 if (!n)
2400 return NULL;
2401 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2402 return NULL;
2403 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2404 return NULL;
2405
2406 /* First construct simple cases. Avoid SCmode, since we want to use
2407 single register to pass this type. */
2408 if (n == 1 && mode != SCmode)
2409 switch (class[0])
2410 {
2411 case X86_64_INTEGER_CLASS:
2412 case X86_64_INTEGERSI_CLASS:
2413 return gen_rtx_REG (mode, intreg[0]);
2414 case X86_64_SSE_CLASS:
2415 case X86_64_SSESF_CLASS:
2416 case X86_64_SSEDF_CLASS:
2417 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2418 case X86_64_X87_CLASS:
2419 return gen_rtx_REG (mode, FIRST_STACK_REG);
2420 case X86_64_NO_CLASS:
2421 /* Zero sized array, struct or class. */
2422 return NULL;
2423 default:
2424 abort ();
2425 }
2426 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2427 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2428 if (n == 2
2429 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2430 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2431 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2432 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2433 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2434 && intreg[0] + 1 == intreg[1])
2435 return gen_rtx_REG (mode, intreg[0]);
2436 if (n == 4
2437 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2438 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
f8a1ebc6 2439 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
53c17031
JH
2440
2441 /* Otherwise figure out the entries of the PARALLEL. */
2442 for (i = 0; i < n; i++)
2443 {
2444 switch (class[i])
2445 {
2446 case X86_64_NO_CLASS:
2447 break;
2448 case X86_64_INTEGER_CLASS:
2449 case X86_64_INTEGERSI_CLASS:
d1f87653 2450 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2451 if (i * 8 + 8 > bytes)
2452 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2453 else if (class[i] == X86_64_INTEGERSI_CLASS)
2454 tmpmode = SImode;
2455 else
2456 tmpmode = DImode;
2457 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2458 if (tmpmode == BLKmode)
2459 tmpmode = DImode;
2460 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2461 gen_rtx_REG (tmpmode, *intreg),
2462 GEN_INT (i*8));
2463 intreg++;
2464 break;
2465 case X86_64_SSESF_CLASS:
2466 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2467 gen_rtx_REG (SFmode,
2468 SSE_REGNO (sse_regno)),
2469 GEN_INT (i*8));
2470 sse_regno++;
2471 break;
2472 case X86_64_SSEDF_CLASS:
2473 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2474 gen_rtx_REG (DFmode,
2475 SSE_REGNO (sse_regno)),
2476 GEN_INT (i*8));
2477 sse_regno++;
2478 break;
2479 case X86_64_SSE_CLASS:
12f5c45e
JH
2480 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2481 tmpmode = TImode;
53c17031
JH
2482 else
2483 tmpmode = DImode;
2484 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2485 gen_rtx_REG (tmpmode,
2486 SSE_REGNO (sse_regno)),
2487 GEN_INT (i*8));
12f5c45e
JH
2488 if (tmpmode == TImode)
2489 i++;
53c17031
JH
2490 sse_regno++;
2491 break;
2492 default:
2493 abort ();
2494 }
2495 }
2496 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2497 for (i = 0; i < nexps; i++)
2498 XVECEXP (ret, 0, i) = exp [i];
2499 return ret;
2500}
2501
b08de47e
MM
2502/* Update the data in CUM to advance over an argument
2503 of mode MODE and data type TYPE.
2504 (TYPE is null for libcalls where that information may not be available.) */
2505
2506void
b96a374d
AJ
2507function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2508 enum machine_mode mode, /* current arg mode */
2509 tree type, /* type of the argument or 0 if lib support */
2510 int named) /* whether or not the argument was named */
b08de47e 2511{
5ac9118e
KG
2512 int bytes =
2513 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2514 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2515
2516 if (TARGET_DEBUG_ARG)
2517 fprintf (stderr,
bcf17554
JH
2518 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2519 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
53c17031 2520 if (TARGET_64BIT)
b08de47e 2521 {
53c17031
JH
2522 int int_nregs, sse_nregs;
2523 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2524 cum->words += words;
2525 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2526 {
53c17031
JH
2527 cum->nregs -= int_nregs;
2528 cum->sse_nregs -= sse_nregs;
2529 cum->regno += int_nregs;
2530 cum->sse_regno += sse_nregs;
82a127a9 2531 }
53c17031
JH
2532 else
2533 cum->words += words;
b08de47e 2534 }
a4f31c00 2535 else
82a127a9 2536 {
bcf17554
JH
2537 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2538 && (!type || !AGGREGATE_TYPE_P (type)))
53c17031
JH
2539 {
2540 cum->sse_words += words;
2541 cum->sse_nregs -= 1;
2542 cum->sse_regno += 1;
2543 if (cum->sse_nregs <= 0)
2544 {
2545 cum->sse_nregs = 0;
2546 cum->sse_regno = 0;
2547 }
2548 }
bcf17554
JH
2549 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2550 && (!type || !AGGREGATE_TYPE_P (type)))
2551 {
2552 cum->mmx_words += words;
2553 cum->mmx_nregs -= 1;
2554 cum->mmx_regno += 1;
2555 if (cum->mmx_nregs <= 0)
2556 {
2557 cum->mmx_nregs = 0;
2558 cum->mmx_regno = 0;
2559 }
2560 }
53c17031 2561 else
82a127a9 2562 {
53c17031
JH
2563 cum->words += words;
2564 cum->nregs -= words;
2565 cum->regno += words;
2566
2567 if (cum->nregs <= 0)
2568 {
2569 cum->nregs = 0;
2570 cum->regno = 0;
2571 }
82a127a9
CM
2572 }
2573 }
b08de47e
MM
2574 return;
2575}
2576
2577/* Define where to put the arguments to a function.
2578 Value is zero to push the argument on the stack,
2579 or a hard register in which to store the argument.
2580
2581 MODE is the argument's machine mode.
2582 TYPE is the data type of the argument (as a tree).
2583 This is null for libcalls where that information may
2584 not be available.
2585 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2586 the preceding args and about the function being called.
2587 NAMED is nonzero if this argument is a named parameter
2588 (otherwise it is an extra parameter matching an ellipsis). */
2589
07933f72 2590rtx
b96a374d
AJ
2591function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2592 enum machine_mode mode, /* current arg mode */
2593 tree type, /* type of the argument or 0 if lib support */
2594 int named) /* != 0 for normal args, == 0 for ... args */
b08de47e
MM
2595{
2596 rtx ret = NULL_RTX;
5ac9118e
KG
2597 int bytes =
2598 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 2599 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 2600 static bool warnedsse, warnedmmx;
b08de47e 2601
5bdc5878 2602 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2603 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2604 any AL settings. */
32ee7d1d 2605 if (mode == VOIDmode)
b08de47e 2606 {
53c17031
JH
2607 if (TARGET_64BIT)
2608 return GEN_INT (cum->maybe_vaarg
2609 ? (cum->sse_nregs < 0
2610 ? SSE_REGPARM_MAX
2611 : cum->sse_regno)
2612 : -1);
2613 else
2614 return constm1_rtx;
b08de47e 2615 }
53c17031
JH
2616 if (TARGET_64BIT)
2617 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2618 &x86_64_int_parameter_registers [cum->regno],
2619 cum->sse_regno);
2620 else
2621 switch (mode)
2622 {
2623 /* For now, pass fp/complex values on the stack. */
2624 default:
2625 break;
2626
2627 case BLKmode:
8d454008
RH
2628 if (bytes < 0)
2629 break;
5efb1046 2630 /* FALLTHRU */
53c17031
JH
2631 case DImode:
2632 case SImode:
2633 case HImode:
2634 case QImode:
2635 if (words <= cum->nregs)
b96a374d
AJ
2636 {
2637 int regno = cum->regno;
2638
2639 /* Fastcall allocates the first two DWORD (SImode) or
2640 smaller arguments to ECX and EDX. */
2641 if (cum->fastcall)
2642 {
2643 if (mode == BLKmode || mode == DImode)
2644 break;
2645
2646 /* ECX not EAX is the first allocated register. */
2647 if (regno == 0)
e767b5be 2648 regno = 2;
b96a374d
AJ
2649 }
2650 ret = gen_rtx_REG (mode, regno);
2651 }
53c17031
JH
2652 break;
2653 case TImode:
bcf17554
JH
2654 case V16QImode:
2655 case V8HImode:
2656 case V4SImode:
2657 case V2DImode:
2658 case V4SFmode:
2659 case V2DFmode:
2660 if (!type || !AGGREGATE_TYPE_P (type))
2661 {
e1be55d0 2662 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
bcf17554
JH
2663 {
2664 warnedsse = true;
2665 warning ("SSE vector argument without SSE enabled "
2666 "changes the ABI");
2667 }
2668 if (cum->sse_nregs)
2669 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2670 }
2671 break;
2672 case V8QImode:
2673 case V4HImode:
2674 case V2SImode:
2675 case V2SFmode:
2676 if (!type || !AGGREGATE_TYPE_P (type))
2677 {
e1be55d0 2678 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
2679 {
2680 warnedmmx = true;
2681 warning ("MMX vector argument without MMX enabled "
2682 "changes the ABI");
2683 }
2684 if (cum->mmx_nregs)
2685 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2686 }
53c17031
JH
2687 break;
2688 }
b08de47e
MM
2689
2690 if (TARGET_DEBUG_ARG)
2691 {
2692 fprintf (stderr,
91ea38f9 2693 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2694 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2695
2696 if (ret)
91ea38f9 2697 print_simple_rtl (stderr, ret);
b08de47e
MM
2698 else
2699 fprintf (stderr, ", stack");
2700
2701 fprintf (stderr, " )\n");
2702 }
2703
2704 return ret;
2705}
53c17031 2706
09b2e78d
ZD
2707/* A C expression that indicates when an argument must be passed by
2708 reference. If nonzero for an argument, a copy of that argument is
2709 made in memory and a pointer to the argument is passed instead of
2710 the argument itself. The pointer is passed in whatever way is
2711 appropriate for passing a pointer to that type. */
2712
2713int
b96a374d
AJ
2714function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2715 enum machine_mode mode ATTRIBUTE_UNUSED,
2716 tree type, int named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2717{
2718 if (!TARGET_64BIT)
2719 return 0;
2720
2721 if (type && int_size_in_bytes (type) == -1)
2722 {
2723 if (TARGET_DEBUG_ARG)
2724 fprintf (stderr, "function_arg_pass_by_reference\n");
2725 return 1;
2726 }
2727
2728 return 0;
2729}
2730
8b978a57
JH
2731/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2732 ABI */
2733static bool
b96a374d 2734contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2735{
2736 enum machine_mode mode = TYPE_MODE (type);
2737 if (SSE_REG_MODE_P (mode)
2738 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2739 return true;
2740 if (TYPE_ALIGN (type) < 128)
2741 return false;
2742
2743 if (AGGREGATE_TYPE_P (type))
2744 {
2a43945f 2745 /* Walk the aggregates recursively. */
8b978a57
JH
2746 if (TREE_CODE (type) == RECORD_TYPE
2747 || TREE_CODE (type) == UNION_TYPE
2748 || TREE_CODE (type) == QUAL_UNION_TYPE)
2749 {
2750 tree field;
2751
2752 if (TYPE_BINFO (type) != NULL
2753 && TYPE_BINFO_BASETYPES (type) != NULL)
2754 {
2755 tree bases = TYPE_BINFO_BASETYPES (type);
2756 int n_bases = TREE_VEC_LENGTH (bases);
2757 int i;
2758
2759 for (i = 0; i < n_bases; ++i)
2760 {
2761 tree binfo = TREE_VEC_ELT (bases, i);
2762 tree type = BINFO_TYPE (binfo);
2763
2764 if (contains_128bit_aligned_vector_p (type))
2765 return true;
2766 }
2767 }
43f3a59d 2768 /* And now merge the fields of structure. */
8b978a57
JH
2769 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2770 {
2771 if (TREE_CODE (field) == FIELD_DECL
2772 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2773 return true;
2774 }
2775 }
2776 /* Just for use if some languages passes arrays by value. */
2777 else if (TREE_CODE (type) == ARRAY_TYPE)
2778 {
2779 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2780 return true;
2781 }
2782 else
2783 abort ();
2784 }
2785 return false;
2786}
2787
bb498ea3
AH
2788/* Gives the alignment boundary, in bits, of an argument with the
2789 specified mode and type. */
53c17031
JH
2790
2791int
b96a374d 2792ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2793{
2794 int align;
53c17031
JH
2795 if (type)
2796 align = TYPE_ALIGN (type);
2797 else
2798 align = GET_MODE_ALIGNMENT (mode);
2799 if (align < PARM_BOUNDARY)
2800 align = PARM_BOUNDARY;
8b978a57
JH
2801 if (!TARGET_64BIT)
2802 {
2803 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2804 make an exception for SSE modes since these require 128bit
b96a374d 2805 alignment.
8b978a57
JH
2806
2807 The handling here differs from field_alignment. ICC aligns MMX
2808 arguments to 4 byte boundaries, while structure fields are aligned
2809 to 8 byte boundaries. */
2810 if (!type)
2811 {
2812 if (!SSE_REG_MODE_P (mode))
2813 align = PARM_BOUNDARY;
2814 }
2815 else
2816 {
2817 if (!contains_128bit_aligned_vector_p (type))
2818 align = PARM_BOUNDARY;
2819 }
8b978a57 2820 }
53c17031
JH
2821 if (align > 128)
2822 align = 128;
2823 return align;
2824}
2825
2826/* Return true if N is a possible register number of function value. */
2827bool
b96a374d 2828ix86_function_value_regno_p (int regno)
53c17031
JH
2829{
2830 if (!TARGET_64BIT)
2831 {
2832 return ((regno) == 0
2833 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2834 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2835 }
2836 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2837 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2838 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2839}
2840
2841/* Define how to find the value returned by a function.
2842 VALTYPE is the data type of the value (as a tree).
2843 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2844 otherwise, FUNC is 0. */
2845rtx
b96a374d 2846ix86_function_value (tree valtype)
53c17031
JH
2847{
2848 if (TARGET_64BIT)
2849 {
2850 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2851 REGPARM_MAX, SSE_REGPARM_MAX,
2852 x86_64_int_return_registers, 0);
d1f87653
KH
2853 /* For zero sized structures, construct_container return NULL, but we need
2854 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2855 if (!ret)
2856 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2857 return ret;
2858 }
2859 else
b069de3b
SS
2860 return gen_rtx_REG (TYPE_MODE (valtype),
2861 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2862}
2863
f5143c46 2864/* Return false iff type is returned in memory. */
53c17031 2865int
b96a374d 2866ix86_return_in_memory (tree type)
53c17031 2867{
a30b6839
RH
2868 int needed_intregs, needed_sseregs, size;
2869 enum machine_mode mode = TYPE_MODE (type);
2870
53c17031 2871 if (TARGET_64BIT)
a30b6839
RH
2872 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2873
2874 if (mode == BLKmode)
2875 return 1;
2876
2877 size = int_size_in_bytes (type);
2878
2879 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2880 return 0;
2881
2882 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 2883 {
a30b6839
RH
2884 /* User-created vectors small enough to fit in EAX. */
2885 if (size < 8)
5e062767 2886 return 0;
a30b6839
RH
2887
2888 /* MMX/3dNow values are returned on the stack, since we've
2889 got to EMMS/FEMMS before returning. */
2890 if (size == 8)
53c17031 2891 return 1;
a30b6839
RH
2892
2893 /* SSE values are returned in XMM0. */
2894 /* ??? Except when it doesn't exist? We have a choice of
2895 either (1) being abi incompatible with a -march switch,
2896 or (2) generating an error here. Given no good solution,
2897 I think the safest thing is one warning. The user won't
43f3a59d 2898 be able to use -Werror, but.... */
a30b6839
RH
2899 if (size == 16)
2900 {
2901 static bool warned;
2902
2903 if (TARGET_SSE)
2904 return 0;
2905
2906 if (!warned)
2907 {
2908 warned = true;
2909 warning ("SSE vector return without SSE enabled "
2910 "changes the ABI");
2911 }
2912 return 1;
2913 }
53c17031 2914 }
a30b6839 2915
cf2348cb 2916 if (mode == XFmode)
a30b6839 2917 return 0;
f8a1ebc6 2918
a30b6839
RH
2919 if (size > 12)
2920 return 1;
2921 return 0;
53c17031
JH
2922}
2923
2924/* Define how to find the value returned by a library function
2925 assuming the value has mode MODE. */
2926rtx
b96a374d 2927ix86_libcall_value (enum machine_mode mode)
53c17031
JH
2928{
2929 if (TARGET_64BIT)
2930 {
2931 switch (mode)
2932 {
f8a1ebc6
JH
2933 case SFmode:
2934 case SCmode:
2935 case DFmode:
2936 case DCmode:
2937 return gen_rtx_REG (mode, FIRST_SSE_REG);
2938 case XFmode:
2939 case XCmode:
2940 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2941 case TFmode:
f8a1ebc6
JH
2942 case TCmode:
2943 return NULL;
2944 default:
2945 return gen_rtx_REG (mode, 0);
53c17031
JH
2946 }
2947 }
2948 else
f8a1ebc6 2949 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
2950}
2951
2952/* Given a mode, return the register to use for a return value. */
2953
2954static int
b96a374d 2955ix86_value_regno (enum machine_mode mode)
b069de3b 2956{
a30b6839 2957 /* Floating point return values in %st(0). */
b069de3b
SS
2958 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2959 return FIRST_FLOAT_REG;
a30b6839
RH
2960 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2961 we prevent this case when sse is not available. */
2962 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 2963 return FIRST_SSE_REG;
a30b6839 2964 /* Everything else in %eax. */
b069de3b 2965 return 0;
53c17031 2966}
ad919812
JH
2967\f
2968/* Create the va_list data type. */
53c17031 2969
c35d187f
RH
2970static tree
2971ix86_build_builtin_va_list (void)
ad919812
JH
2972{
2973 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2974
ad919812
JH
2975 /* For i386 we use plain pointer to argument area. */
2976 if (!TARGET_64BIT)
2977 return build_pointer_type (char_type_node);
2978
f1e639b1 2979 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2980 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2981
fce5a9f2 2982 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2983 unsigned_type_node);
fce5a9f2 2984 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2985 unsigned_type_node);
2986 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2987 ptr_type_node);
2988 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2989 ptr_type_node);
2990
2991 DECL_FIELD_CONTEXT (f_gpr) = record;
2992 DECL_FIELD_CONTEXT (f_fpr) = record;
2993 DECL_FIELD_CONTEXT (f_ovf) = record;
2994 DECL_FIELD_CONTEXT (f_sav) = record;
2995
2996 TREE_CHAIN (record) = type_decl;
2997 TYPE_NAME (record) = type_decl;
2998 TYPE_FIELDS (record) = f_gpr;
2999 TREE_CHAIN (f_gpr) = f_fpr;
3000 TREE_CHAIN (f_fpr) = f_ovf;
3001 TREE_CHAIN (f_ovf) = f_sav;
3002
3003 layout_type (record);
3004
3005 /* The correct type is an array type of one element. */
3006 return build_array_type (record, build_index_type (size_zero_node));
3007}
3008
a0524eb3 3009/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 3010
a0524eb3 3011static void
b96a374d
AJ
3012ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3013 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3014 int no_rtl)
ad919812
JH
3015{
3016 CUMULATIVE_ARGS next_cum;
3017 rtx save_area = NULL_RTX, mem;
3018 rtx label;
3019 rtx label_ref;
3020 rtx tmp_reg;
3021 rtx nsse_reg;
3022 int set;
3023 tree fntype;
3024 int stdarg_p;
3025 int i;
3026
3027 if (!TARGET_64BIT)
3028 return;
3029
3030 /* Indicate to allocate space on the stack for varargs save area. */
3031 ix86_save_varrargs_registers = 1;
3032
5474eed5
JH
3033 cfun->stack_alignment_needed = 128;
3034
ad919812
JH
3035 fntype = TREE_TYPE (current_function_decl);
3036 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3037 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3038 != void_type_node));
3039
3040 /* For varargs, we do not want to skip the dummy va_dcl argument.
3041 For stdargs, we do want to skip the last named argument. */
3042 next_cum = *cum;
3043 if (stdarg_p)
3044 function_arg_advance (&next_cum, mode, type, 1);
3045
3046 if (!no_rtl)
3047 save_area = frame_pointer_rtx;
3048
3049 set = get_varargs_alias_set ();
3050
3051 for (i = next_cum.regno; i < ix86_regparm; i++)
3052 {
3053 mem = gen_rtx_MEM (Pmode,
3054 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 3055 set_mem_alias_set (mem, set);
ad919812
JH
3056 emit_move_insn (mem, gen_rtx_REG (Pmode,
3057 x86_64_int_parameter_registers[i]));
3058 }
3059
3060 if (next_cum.sse_nregs)
3061 {
3062 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 3063 of SSE parameter registers used to call this function. We use
ad919812
JH
3064 sse_prologue_save insn template that produces computed jump across
3065 SSE saves. We need some preparation work to get this working. */
3066
3067 label = gen_label_rtx ();
3068 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3069
3070 /* Compute address to jump to :
3071 label - 5*eax + nnamed_sse_arguments*5 */
3072 tmp_reg = gen_reg_rtx (Pmode);
3073 nsse_reg = gen_reg_rtx (Pmode);
3074 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3075 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 3076 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
3077 GEN_INT (4))));
3078 if (next_cum.sse_regno)
3079 emit_move_insn
3080 (nsse_reg,
3081 gen_rtx_CONST (DImode,
3082 gen_rtx_PLUS (DImode,
3083 label_ref,
3084 GEN_INT (next_cum.sse_regno * 4))));
3085 else
3086 emit_move_insn (nsse_reg, label_ref);
3087 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3088
3089 /* Compute address of memory block we save into. We always use pointer
3090 pointing 127 bytes after first byte to store - this is needed to keep
3091 instruction size limited by 4 bytes. */
3092 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
3093 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3094 plus_constant (save_area,
3095 8 * REGPARM_MAX + 127)));
ad919812 3096 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 3097 set_mem_alias_set (mem, set);
8ac61af7 3098 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
3099
3100 /* And finally do the dirty job! */
8ac61af7
RK
3101 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3102 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
3103 }
3104
3105}
3106
3107/* Implement va_start. */
3108
3109void
b96a374d 3110ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
3111{
3112 HOST_WIDE_INT words, n_gpr, n_fpr;
3113 tree f_gpr, f_fpr, f_ovf, f_sav;
3114 tree gpr, fpr, ovf, sav, t;
3115
3116 /* Only 64bit target needs something special. */
3117 if (!TARGET_64BIT)
3118 {
e5faf155 3119 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3120 return;
3121 }
3122
3123 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3124 f_fpr = TREE_CHAIN (f_gpr);
3125 f_ovf = TREE_CHAIN (f_fpr);
3126 f_sav = TREE_CHAIN (f_ovf);
3127
3128 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3129 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3130 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3131 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3132 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3133
3134 /* Count number of gp and fp argument registers used. */
3135 words = current_function_args_info.words;
3136 n_gpr = current_function_args_info.regno;
3137 n_fpr = current_function_args_info.sse_regno;
3138
3139 if (TARGET_DEBUG_ARG)
3140 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3141 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
3142
3143 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3144 build_int_2 (n_gpr * 8, 0));
3145 TREE_SIDE_EFFECTS (t) = 1;
3146 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3147
3148 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3149 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3150 TREE_SIDE_EFFECTS (t) = 1;
3151 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3152
3153 /* Find the overflow area. */
3154 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3155 if (words != 0)
3156 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3157 build_int_2 (words * UNITS_PER_WORD, 0));
3158 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3159 TREE_SIDE_EFFECTS (t) = 1;
3160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3161
3162 /* Find the register save area.
3163 Prologue of the function save it right above stack frame. */
3164 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3165 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3166 TREE_SIDE_EFFECTS (t) = 1;
3167 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3168}
3169
3170/* Implement va_arg. */
3171rtx
b96a374d 3172ix86_va_arg (tree valist, tree type)
ad919812 3173{
0139adca 3174 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
3175 tree f_gpr, f_fpr, f_ovf, f_sav;
3176 tree gpr, fpr, ovf, sav, t;
b932f770 3177 int size, rsize;
ad919812
JH
3178 rtx lab_false, lab_over = NULL_RTX;
3179 rtx addr_rtx, r;
3180 rtx container;
09b2e78d 3181 int indirect_p = 0;
ad919812
JH
3182
3183 /* Only 64bit target needs something special. */
3184 if (!TARGET_64BIT)
3185 {
3186 return std_expand_builtin_va_arg (valist, type);
3187 }
3188
3189 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3190 f_fpr = TREE_CHAIN (f_gpr);
3191 f_ovf = TREE_CHAIN (f_fpr);
3192 f_sav = TREE_CHAIN (f_ovf);
3193
3194 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3195 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3196 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3197 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3198 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3199
3200 size = int_size_in_bytes (type);
09b2e78d
ZD
3201 if (size == -1)
3202 {
3203 /* Passed by reference. */
3204 indirect_p = 1;
3205 type = build_pointer_type (type);
3206 size = int_size_in_bytes (type);
3207 }
ad919812
JH
3208 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3209
3210 container = construct_container (TYPE_MODE (type), type, 0,
3211 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3212 /*
3213 * Pull the value out of the saved registers ...
3214 */
3215
3216 addr_rtx = gen_reg_rtx (Pmode);
3217
3218 if (container)
3219 {
3220 rtx int_addr_rtx, sse_addr_rtx;
3221 int needed_intregs, needed_sseregs;
3222 int need_temp;
3223
3224 lab_over = gen_label_rtx ();
3225 lab_false = gen_label_rtx ();
8bad7136 3226
ad919812
JH
3227 examine_argument (TYPE_MODE (type), type, 0,
3228 &needed_intregs, &needed_sseregs);
3229
3230
3231 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3232 || TYPE_ALIGN (type) > 128);
3233
d1f87653 3234 /* In case we are passing structure, verify that it is consecutive block
ad919812
JH
3235 on the register save area. If not we need to do moves. */
3236 if (!need_temp && !REG_P (container))
3237 {
d1f87653 3238 /* Verify that all registers are strictly consecutive */
ad919812
JH
3239 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3240 {
3241 int i;
3242
3243 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3244 {
3245 rtx slot = XVECEXP (container, 0, i);
b531087a 3246 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
3247 || INTVAL (XEXP (slot, 1)) != i * 16)
3248 need_temp = 1;
3249 }
3250 }
3251 else
3252 {
3253 int i;
3254
3255 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3256 {
3257 rtx slot = XVECEXP (container, 0, i);
b531087a 3258 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
3259 || INTVAL (XEXP (slot, 1)) != i * 8)
3260 need_temp = 1;
3261 }
3262 }
3263 }
3264 if (!need_temp)
3265 {
3266 int_addr_rtx = addr_rtx;
3267 sse_addr_rtx = addr_rtx;
3268 }
3269 else
3270 {
3271 int_addr_rtx = gen_reg_rtx (Pmode);
3272 sse_addr_rtx = gen_reg_rtx (Pmode);
3273 }
3274 /* First ensure that we fit completely in registers. */
3275 if (needed_intregs)
3276 {
3277 emit_cmp_and_jump_insns (expand_expr
3278 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3279 GEN_INT ((REGPARM_MAX - needed_intregs +
3280 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 3281 1, lab_false);
ad919812
JH
3282 }
3283 if (needed_sseregs)
3284 {
3285 emit_cmp_and_jump_insns (expand_expr
3286 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3287 GEN_INT ((SSE_REGPARM_MAX -
3288 needed_sseregs + 1) * 16 +
3289 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 3290 SImode, 1, lab_false);
ad919812
JH
3291 }
3292
3293 /* Compute index to start of area used for integer regs. */
3294 if (needed_intregs)
3295 {
3296 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3297 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3298 if (r != int_addr_rtx)
3299 emit_move_insn (int_addr_rtx, r);
3300 }
3301 if (needed_sseregs)
3302 {
3303 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3304 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3305 if (r != sse_addr_rtx)
3306 emit_move_insn (sse_addr_rtx, r);
3307 }
3308 if (need_temp)
3309 {
3310 int i;
3311 rtx mem;
70642ee3 3312 rtx x;
ad919812 3313
b932f770 3314 /* Never use the memory itself, as it has the alias set. */
70642ee3
JH
3315 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3316 mem = gen_rtx_MEM (BLKmode, x);
3317 force_operand (x, addr_rtx);
0692acba 3318 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 3319 set_mem_align (mem, BITS_PER_UNIT);
b932f770 3320
ad919812
JH
3321 for (i = 0; i < XVECLEN (container, 0); i++)
3322 {
3323 rtx slot = XVECEXP (container, 0, i);
3324 rtx reg = XEXP (slot, 0);
3325 enum machine_mode mode = GET_MODE (reg);
3326 rtx src_addr;
3327 rtx src_mem;
3328 int src_offset;
3329 rtx dest_mem;
3330
3331 if (SSE_REGNO_P (REGNO (reg)))
3332 {
3333 src_addr = sse_addr_rtx;
3334 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3335 }
3336 else
3337 {
3338 src_addr = int_addr_rtx;
3339 src_offset = REGNO (reg) * 8;
3340 }
3341 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 3342 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
3343 src_mem = adjust_address (src_mem, mode, src_offset);
3344 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
3345 emit_move_insn (dest_mem, src_mem);
3346 }
3347 }
3348
3349 if (needed_intregs)
3350 {
3351 t =
3352 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3353 build_int_2 (needed_intregs * 8, 0));
3354 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3355 TREE_SIDE_EFFECTS (t) = 1;
3356 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3357 }
3358 if (needed_sseregs)
3359 {
3360 t =
3361 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3362 build_int_2 (needed_sseregs * 16, 0));
3363 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3364 TREE_SIDE_EFFECTS (t) = 1;
3365 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3366 }
3367
3368 emit_jump_insn (gen_jump (lab_over));
3369 emit_barrier ();
3370 emit_label (lab_false);
3371 }
3372
3373 /* ... otherwise out of the overflow area. */
3374
3375 /* Care for on-stack alignment if needed. */
3376 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3377 t = ovf;
3378 else
3379 {
3380 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3381 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3382 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3383 }
3384 t = save_expr (t);
3385
3386 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3387 if (r != addr_rtx)
3388 emit_move_insn (addr_rtx, r);
3389
3390 t =
3391 build (PLUS_EXPR, TREE_TYPE (t), t,
3392 build_int_2 (rsize * UNITS_PER_WORD, 0));
3393 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3394 TREE_SIDE_EFFECTS (t) = 1;
3395 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3396
3397 if (container)
3398 emit_label (lab_over);
3399
09b2e78d
ZD
3400 if (indirect_p)
3401 {
3402 r = gen_rtx_MEM (Pmode, addr_rtx);
3403 set_mem_alias_set (r, get_varargs_alias_set ());
3404 emit_move_insn (addr_rtx, r);
3405 }
3406
ad919812
JH
3407 return addr_rtx;
3408}
3409\f
c3c637e3
GS
3410/* Return nonzero if OP is either a i387 or SSE fp register. */
3411int
b96a374d 3412any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3413{
3414 return ANY_FP_REG_P (op);
3415}
3416
3417/* Return nonzero if OP is an i387 fp register. */
3418int
b96a374d 3419fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3420{
3421 return FP_REG_P (op);
3422}
3423
3424/* Return nonzero if OP is a non-fp register_operand. */
3425int
b96a374d 3426register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3427{
3428 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3429}
3430
40b982a9 3431/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3432 i387 fp register. */
3433int
b96a374d 3434register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3435{
3436 return register_operand (op, mode) && !FP_REG_P (op);
3437}
3438
7dd4b4a3
JH
3439/* Return nonzero if OP is general operand representable on x86_64. */
3440
3441int
b96a374d 3442x86_64_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3443{
3444 if (!TARGET_64BIT)
3445 return general_operand (op, mode);
3446 if (nonimmediate_operand (op, mode))
3447 return 1;
c05dbe81 3448 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3449}
3450
3451/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3452 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3453
3454int
b96a374d 3455x86_64_szext_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3456{
3457 if (!TARGET_64BIT)
3458 return general_operand (op, mode);
3459 if (nonimmediate_operand (op, mode))
3460 return 1;
c05dbe81 3461 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3462}
3463
3464/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3465
3466int
b96a374d 3467x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3468{
3469 if (!TARGET_64BIT)
3470 return nonmemory_operand (op, mode);
3471 if (register_operand (op, mode))
3472 return 1;
c05dbe81 3473 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3474}
3475
3476/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3477
3478int
b96a374d 3479x86_64_movabs_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3480{
3481 if (!TARGET_64BIT || !flag_pic)
3482 return nonmemory_operand (op, mode);
c05dbe81 3483 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3484 return 1;
3485 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3486 return 1;
3487 return 0;
3488}
3489
7e6dc358
JJ
3490/* Return nonzero if OPNUM's MEM should be matched
3491 in movabs* patterns. */
3492
3493int
3494ix86_check_movabs (rtx insn, int opnum)
3495{
3496 rtx set, mem;
3497
3498 set = PATTERN (insn);
3499 if (GET_CODE (set) == PARALLEL)
3500 set = XVECEXP (set, 0, 0);
3501 if (GET_CODE (set) != SET)
3502 abort ();
3503 mem = XEXP (set, opnum);
3504 while (GET_CODE (mem) == SUBREG)
3505 mem = SUBREG_REG (mem);
3506 if (GET_CODE (mem) != MEM)
3507 abort ();
3508 return (volatile_ok || !MEM_VOLATILE_P (mem));
3509}
3510
7dd4b4a3
JH
3511/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3512
3513int
b96a374d 3514x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3515{
3516 if (!TARGET_64BIT)
3517 return nonmemory_operand (op, mode);
3518 if (register_operand (op, mode))
3519 return 1;
c05dbe81 3520 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3521}
3522
3523/* Return nonzero if OP is immediate operand representable on x86_64. */
3524
3525int
b96a374d 3526x86_64_immediate_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3527{
3528 if (!TARGET_64BIT)
3529 return immediate_operand (op, mode);
c05dbe81 3530 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3531}
3532
3533/* Return nonzero if OP is immediate operand representable on x86_64. */
3534
3535int
b96a374d 3536x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7dd4b4a3
JH
3537{
3538 return x86_64_zero_extended_value (op);
3539}
3540
794a292d
JJ
3541/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3542 for shift & compare patterns, as shifting by 0 does not change flags),
3543 else return zero. */
3544
3545int
b96a374d 3546const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
794a292d
JJ
3547{
3548 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3549}
3550
e075ae69
RH
3551/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3552 reference and a constant. */
b08de47e
MM
3553
3554int
8d531ab9 3555symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3556{
e075ae69 3557 switch (GET_CODE (op))
2a2ab3f9 3558 {
e075ae69
RH
3559 case SYMBOL_REF:
3560 case LABEL_REF:
3561 return 1;
3562
3563 case CONST:
3564 op = XEXP (op, 0);
3565 if (GET_CODE (op) == SYMBOL_REF
3566 || GET_CODE (op) == LABEL_REF
3567 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3568 && (XINT (op, 1) == UNSPEC_GOT
3569 || XINT (op, 1) == UNSPEC_GOTOFF
3570 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3571 return 1;
3572 if (GET_CODE (op) != PLUS
3573 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3574 return 0;
3575
3576 op = XEXP (op, 0);
3577 if (GET_CODE (op) == SYMBOL_REF
3578 || GET_CODE (op) == LABEL_REF)
3579 return 1;
3580 /* Only @GOTOFF gets offsets. */
3581 if (GET_CODE (op) != UNSPEC
8ee41eaf 3582 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3583 return 0;
3584
3585 op = XVECEXP (op, 0, 0);
3586 if (GET_CODE (op) == SYMBOL_REF
3587 || GET_CODE (op) == LABEL_REF)
3588 return 1;
3589 return 0;
3590
3591 default:
3592 return 0;
2a2ab3f9
JVA
3593 }
3594}
2a2ab3f9 3595
e075ae69 3596/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3597
e075ae69 3598int
8d531ab9 3599pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3600{
6eb791fc
JH
3601 if (GET_CODE (op) != CONST)
3602 return 0;
3603 op = XEXP (op, 0);
3604 if (TARGET_64BIT)
3605 {
a0c8285b
JH
3606 if (GET_CODE (op) == UNSPEC
3607 && XINT (op, 1) == UNSPEC_GOTPCREL)
3608 return 1;
3609 if (GET_CODE (op) == PLUS
fdacb904
JH
3610 && GET_CODE (XEXP (op, 0)) == UNSPEC
3611 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
6eb791fc
JH
3612 return 1;
3613 }
fce5a9f2 3614 else
2a2ab3f9 3615 {
e075ae69
RH
3616 if (GET_CODE (op) == UNSPEC)
3617 return 1;
3618 if (GET_CODE (op) != PLUS
3619 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3620 return 0;
3621 op = XEXP (op, 0);
3622 if (GET_CODE (op) == UNSPEC)
3623 return 1;
2a2ab3f9 3624 }
e075ae69 3625 return 0;
2a2ab3f9 3626}
2a2ab3f9 3627
623fe810
RH
3628/* Return true if OP is a symbolic operand that resolves locally. */
3629
3630static int
b96a374d 3631local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
623fe810 3632{
623fe810
RH
3633 if (GET_CODE (op) == CONST
3634 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3635 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3636 op = XEXP (XEXP (op, 0), 0);
3637
8bfb45f8
JJ
3638 if (GET_CODE (op) == LABEL_REF)
3639 return 1;
3640
623fe810
RH
3641 if (GET_CODE (op) != SYMBOL_REF)
3642 return 0;
3643
2ae5ae57 3644 if (SYMBOL_REF_LOCAL_P (op))
623fe810
RH
3645 return 1;
3646
3647 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3648 the compiler that assumes it can just stick the results of
623fe810
RH
3649 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3650 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3651 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3652 if (strncmp (XSTR (op, 0), internal_label_prefix,
3653 internal_label_prefix_len) == 0)
3654 return 1;
3655
3656 return 0;
3657}
3658
2ae5ae57 3659/* Test for various thread-local symbols. */
f996902d
RH
3660
3661int
8d531ab9 3662tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d 3663{
f996902d
RH
3664 if (GET_CODE (op) != SYMBOL_REF)
3665 return 0;
2ae5ae57 3666 return SYMBOL_REF_TLS_MODEL (op);
f996902d
RH
3667}
3668
2ae5ae57 3669static inline int
b96a374d 3670tls_symbolic_operand_1 (rtx op, enum tls_model kind)
f996902d 3671{
f996902d
RH
3672 if (GET_CODE (op) != SYMBOL_REF)
3673 return 0;
2ae5ae57 3674 return SYMBOL_REF_TLS_MODEL (op) == kind;
f996902d
RH
3675}
3676
3677int
8d531ab9 3678global_dynamic_symbolic_operand (rtx op,
b96a374d 3679 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3680{
3681 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3682}
3683
3684int
8d531ab9 3685local_dynamic_symbolic_operand (rtx op,
b96a374d 3686 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3687{
3688 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3689}
3690
3691int
8d531ab9 3692initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3693{
3694 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3695}
3696
3697int
8d531ab9 3698local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3699{
3700 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3701}
3702
28d52ffb
RH
3703/* Test for a valid operand for a call instruction. Don't allow the
3704 arg pointer register or virtual regs since they may decay into
3705 reg + const, which the patterns can't handle. */
2a2ab3f9 3706
e075ae69 3707int
b96a374d 3708call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3709{
e075ae69
RH
3710 /* Disallow indirect through a virtual register. This leads to
3711 compiler aborts when trying to eliminate them. */
3712 if (GET_CODE (op) == REG
3713 && (op == arg_pointer_rtx
564d80f4 3714 || op == frame_pointer_rtx
e075ae69
RH
3715 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3716 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3717 return 0;
2a2ab3f9 3718
28d52ffb
RH
3719 /* Disallow `call 1234'. Due to varying assembler lameness this
3720 gets either rejected or translated to `call .+1234'. */
3721 if (GET_CODE (op) == CONST_INT)
3722 return 0;
3723
cbbf65e0
RH
3724 /* Explicitly allow SYMBOL_REF even if pic. */
3725 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3726 return 1;
2a2ab3f9 3727
cbbf65e0
RH
3728 /* Otherwise we can allow any general_operand in the address. */
3729 return general_operand (op, Pmode);
e075ae69 3730}
79325812 3731
4977bab6
ZW
3732/* Test for a valid operand for a call instruction. Don't allow the
3733 arg pointer register or virtual regs since they may decay into
3734 reg + const, which the patterns can't handle. */
3735
3736int
b96a374d 3737sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3738{
3739 /* Disallow indirect through a virtual register. This leads to
3740 compiler aborts when trying to eliminate them. */
3741 if (GET_CODE (op) == REG
3742 && (op == arg_pointer_rtx
3743 || op == frame_pointer_rtx
3744 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3745 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3746 return 0;
3747
3748 /* Explicitly allow SYMBOL_REF even if pic. */
3749 if (GET_CODE (op) == SYMBOL_REF)
3750 return 1;
3751
3752 /* Otherwise we can only allow register operands. */
3753 return register_operand (op, Pmode);
3754}
3755
e075ae69 3756int
b96a374d 3757constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3758{
eaf19aba
JJ
3759 if (GET_CODE (op) == CONST
3760 && GET_CODE (XEXP (op, 0)) == PLUS
3761 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3762 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3763 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3764}
2a2ab3f9 3765
e075ae69 3766/* Match exactly zero and one. */
e9a25f70 3767
0f290768 3768int
8d531ab9 3769const0_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3770{
3771 return op == CONST0_RTX (mode);
3772}
e9a25f70 3773
0f290768 3774int
8d531ab9 3775const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3776{
3777 return op == const1_rtx;
3778}
2a2ab3f9 3779
e075ae69 3780/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3781
e075ae69 3782int
8d531ab9 3783const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3784{
3785 return (GET_CODE (op) == CONST_INT
3786 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3787}
e9a25f70 3788
ebe75517 3789int
8d531ab9 3790const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3791{
3792 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3793}
3794
3795int
8d531ab9 3796const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3797{
3798 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3799}
3800
3801int
8d531ab9 3802const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3803{
3804 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3805}
3806
3807int
8d531ab9 3808const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3809{
3810 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3811}
3812
3813
d1f87653 3814/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3815
e075ae69 3816int
8d531ab9 3817incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3818{
f5143c46 3819 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d 3820 registers, since carry flag is not set. */
89c43c0a 3821 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
b4e89e2d 3822 return 0;
2b1c08f5 3823 return op == const1_rtx || op == constm1_rtx;
e075ae69 3824}
2a2ab3f9 3825
371bc54b
JH
3826/* Return nonzero if OP is acceptable as operand of DImode shift
3827 expander. */
3828
3829int
b96a374d 3830shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
371bc54b
JH
3831{
3832 if (TARGET_64BIT)
3833 return nonimmediate_operand (op, mode);
3834 else
3835 return register_operand (op, mode);
3836}
3837
0f290768 3838/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3839 register eliminable to the stack pointer. Otherwise, this is
3840 a register operand.
2a2ab3f9 3841
e075ae69
RH
3842 This is used to prevent esp from being used as an index reg.
3843 Which would only happen in pathological cases. */
5f1ec3e6 3844
e075ae69 3845int
8d531ab9 3846reg_no_sp_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3847{
3848 rtx t = op;
3849 if (GET_CODE (t) == SUBREG)
3850 t = SUBREG_REG (t);
564d80f4 3851 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3852 return 0;
2a2ab3f9 3853
e075ae69 3854 return register_operand (op, mode);
2a2ab3f9 3855}
b840bfb0 3856
915119a5 3857int
8d531ab9 3858mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3859{
3860 return MMX_REG_P (op);
3861}
3862
2c5a510c
RH
3863/* Return false if this is any eliminable register. Otherwise
3864 general_operand. */
3865
3866int
8d531ab9 3867general_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3868{
3869 rtx t = op;
3870 if (GET_CODE (t) == SUBREG)
3871 t = SUBREG_REG (t);
3872 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3873 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3874 || t == virtual_stack_dynamic_rtx)
3875 return 0;
1020a5ab
RH
3876 if (REG_P (t)
3877 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3878 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3879 return 0;
2c5a510c
RH
3880
3881 return general_operand (op, mode);
3882}
3883
3884/* Return false if this is any eliminable register. Otherwise
3885 register_operand or const_int. */
3886
3887int
8d531ab9 3888nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3889{
3890 rtx t = op;
3891 if (GET_CODE (t) == SUBREG)
3892 t = SUBREG_REG (t);
3893 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3894 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3895 || t == virtual_stack_dynamic_rtx)
3896 return 0;
3897
3898 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3899}
3900
7ec70495
JH
3901/* Return false if this is any eliminable register or stack register,
3902 otherwise work like register_operand. */
3903
3904int
8d531ab9 3905index_register_operand (rtx op, enum machine_mode mode)
7ec70495
JH
3906{
3907 rtx t = op;
3908 if (GET_CODE (t) == SUBREG)
3909 t = SUBREG_REG (t);
3910 if (!REG_P (t))
3911 return 0;
3912 if (t == arg_pointer_rtx
3913 || t == frame_pointer_rtx
3914 || t == virtual_incoming_args_rtx
3915 || t == virtual_stack_vars_rtx
3916 || t == virtual_stack_dynamic_rtx
3917 || REGNO (t) == STACK_POINTER_REGNUM)
3918 return 0;
3919
3920 return general_operand (op, mode);
3921}
3922
e075ae69 3923/* Return true if op is a Q_REGS class register. */
b840bfb0 3924
e075ae69 3925int
8d531ab9 3926q_regs_operand (rtx op, enum machine_mode mode)
b840bfb0 3927{
e075ae69
RH
3928 if (mode != VOIDmode && GET_MODE (op) != mode)
3929 return 0;
3930 if (GET_CODE (op) == SUBREG)
3931 op = SUBREG_REG (op);
7799175f 3932 return ANY_QI_REG_P (op);
0f290768 3933}
b840bfb0 3934
4977bab6
ZW
3935/* Return true if op is an flags register. */
3936
3937int
8d531ab9 3938flags_reg_operand (rtx op, enum machine_mode mode)
4977bab6
ZW
3939{
3940 if (mode != VOIDmode && GET_MODE (op) != mode)
3941 return 0;
3942 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3943}
3944
e075ae69 3945/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3946
e075ae69 3947int
8d531ab9 3948non_q_regs_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3949{
3950 if (mode != VOIDmode && GET_MODE (op) != mode)
3951 return 0;
3952 if (GET_CODE (op) == SUBREG)
3953 op = SUBREG_REG (op);
3954 return NON_QI_REG_P (op);
0f290768 3955}
b840bfb0 3956
4977bab6 3957int
b96a374d
AJ
3958zero_extended_scalar_load_operand (rtx op,
3959 enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3960{
3961 unsigned n_elts;
3962 if (GET_CODE (op) != MEM)
3963 return 0;
3964 op = maybe_get_pool_constant (op);
3965 if (!op)
3966 return 0;
3967 if (GET_CODE (op) != CONST_VECTOR)
3968 return 0;
3969 n_elts =
3970 (GET_MODE_SIZE (GET_MODE (op)) /
3971 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3972 for (n_elts--; n_elts > 0; n_elts--)
3973 {
3974 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3975 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3976 return 0;
3977 }
3978 return 1;
3979}
3980
fdc4b40b
JH
3981/* Return 1 when OP is operand acceptable for standard SSE move. */
3982int
b96a374d 3983vector_move_operand (rtx op, enum machine_mode mode)
fdc4b40b
JH
3984{
3985 if (nonimmediate_operand (op, mode))
3986 return 1;
3987 if (GET_MODE (op) != mode && mode != VOIDmode)
3988 return 0;
3989 return (op == CONST0_RTX (GET_MODE (op)));
3990}
3991
74dc3e94
RH
3992/* Return true if op if a valid address, and does not contain
3993 a segment override. */
3994
3995int
8d531ab9 3996no_seg_address_operand (rtx op, enum machine_mode mode)
74dc3e94
RH
3997{
3998 struct ix86_address parts;
3999
4000 if (! address_operand (op, mode))
4001 return 0;
4002
4003 if (! ix86_decompose_address (op, &parts))
4004 abort ();
4005
4006 return parts.seg == SEG_DEFAULT;
4007}
4008
915119a5
BS
4009/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4010 insns. */
4011int
b96a374d 4012sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
4013{
4014 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
4015 switch (code)
4016 {
4017 /* Operations supported directly. */
4018 case EQ:
4019 case LT:
4020 case LE:
4021 case UNORDERED:
4022 case NE:
4023 case UNGE:
4024 case UNGT:
4025 case ORDERED:
4026 return 1;
4027 /* These are equivalent to ones above in non-IEEE comparisons. */
4028 case UNEQ:
4029 case UNLT:
4030 case UNLE:
4031 case LTGT:
4032 case GE:
4033 case GT:
4034 return !TARGET_IEEE_FP;
4035 default:
4036 return 0;
4037 }
915119a5 4038}
9076b9c1 4039/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 4040int
8d531ab9 4041ix86_comparison_operator (rtx op, enum machine_mode mode)
e075ae69 4042{
9076b9c1 4043 enum machine_mode inmode;
9a915772 4044 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
4045 if (mode != VOIDmode && GET_MODE (op) != mode)
4046 return 0;
ec8e098d 4047 if (!COMPARISON_P (op))
9a915772
JH
4048 return 0;
4049 inmode = GET_MODE (XEXP (op, 0));
4050
4051 if (inmode == CCFPmode || inmode == CCFPUmode)
4052 {
4053 enum rtx_code second_code, bypass_code;
4054 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4055 return (bypass_code == NIL && second_code == NIL);
4056 }
4057 switch (code)
3a3677ff
RH
4058 {
4059 case EQ: case NE:
3a3677ff 4060 return 1;
9076b9c1 4061 case LT: case GE:
7e08e190 4062 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
4063 || inmode == CCGOCmode || inmode == CCNOmode)
4064 return 1;
4065 return 0;
7e08e190 4066 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 4067 if (inmode == CCmode)
9076b9c1
JH
4068 return 1;
4069 return 0;
4070 case GT: case LE:
7e08e190 4071 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
4072 return 1;
4073 return 0;
3a3677ff
RH
4074 default:
4075 return 0;
4076 }
4077}
4078
e6e81735
JH
4079/* Return 1 if OP is a valid comparison operator testing carry flag
4080 to be set. */
4081int
8d531ab9 4082ix86_carry_flag_operator (rtx op, enum machine_mode mode)
e6e81735
JH
4083{
4084 enum machine_mode inmode;
4085 enum rtx_code code = GET_CODE (op);
4086
4087 if (mode != VOIDmode && GET_MODE (op) != mode)
4088 return 0;
ec8e098d 4089 if (!COMPARISON_P (op))
e6e81735
JH
4090 return 0;
4091 inmode = GET_MODE (XEXP (op, 0));
4092 if (GET_CODE (XEXP (op, 0)) != REG
4093 || REGNO (XEXP (op, 0)) != 17
4094 || XEXP (op, 1) != const0_rtx)
4095 return 0;
4096
4097 if (inmode == CCFPmode || inmode == CCFPUmode)
4098 {
4099 enum rtx_code second_code, bypass_code;
4100
4101 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4102 if (bypass_code != NIL || second_code != NIL)
4103 return 0;
4104 code = ix86_fp_compare_code_to_integer (code);
4105 }
4106 else if (inmode != CCmode)
4107 return 0;
4108 return code == LTU;
4109}
4110
9076b9c1 4111/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 4112
9076b9c1 4113int
8d531ab9 4114fcmov_comparison_operator (rtx op, enum machine_mode mode)
3a3677ff 4115{
b62d22a2 4116 enum machine_mode inmode;
9a915772 4117 enum rtx_code code = GET_CODE (op);
e6e81735 4118
3a3677ff
RH
4119 if (mode != VOIDmode && GET_MODE (op) != mode)
4120 return 0;
ec8e098d 4121 if (!COMPARISON_P (op))
9a915772
JH
4122 return 0;
4123 inmode = GET_MODE (XEXP (op, 0));
4124 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 4125 {
9a915772 4126 enum rtx_code second_code, bypass_code;
e6e81735 4127
9a915772
JH
4128 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4129 if (bypass_code != NIL || second_code != NIL)
4130 return 0;
4131 code = ix86_fp_compare_code_to_integer (code);
4132 }
4133 /* i387 supports just limited amount of conditional codes. */
4134 switch (code)
4135 {
4136 case LTU: case GTU: case LEU: case GEU:
4137 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
4138 return 1;
4139 return 0;
9a915772
JH
4140 case ORDERED: case UNORDERED:
4141 case EQ: case NE:
4142 return 1;
3a3677ff
RH
4143 default:
4144 return 0;
4145 }
e075ae69 4146}
b840bfb0 4147
e9e80858
JH
4148/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4149
4150int
8d531ab9 4151promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e9e80858
JH
4152{
4153 switch (GET_CODE (op))
4154 {
4155 case MULT:
4156 /* Modern CPUs have same latency for HImode and SImode multiply,
4157 but 386 and 486 do HImode multiply faster. */
9e555526 4158 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
4159 case PLUS:
4160 case AND:
4161 case IOR:
4162 case XOR:
4163 case ASHIFT:
4164 return 1;
4165 default:
4166 return 0;
4167 }
4168}
4169
e075ae69
RH
4170/* Nearly general operand, but accept any const_double, since we wish
4171 to be able to drop them into memory rather than have them get pulled
4172 into registers. */
b840bfb0 4173
2a2ab3f9 4174int
8d531ab9 4175cmp_fp_expander_operand (rtx op, enum machine_mode mode)
2a2ab3f9 4176{
e075ae69 4177 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 4178 return 0;
e075ae69 4179 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 4180 return 1;
e075ae69 4181 return general_operand (op, mode);
2a2ab3f9
JVA
4182}
4183
e075ae69 4184/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
4185
4186int
8d531ab9 4187ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 4188{
3522082b 4189 int regno;
0d7d98ee
JH
4190 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4191 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 4192 return 0;
3522082b
JH
4193
4194 if (!register_operand (op, VOIDmode))
4195 return 0;
4196
d1f87653 4197 /* Be careful to accept only registers having upper parts. */
3522082b
JH
4198 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4199 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
4200}
4201
4202/* Return 1 if this is a valid binary floating-point operation.
0f290768 4203 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
4204
4205int
8d531ab9 4206binary_fp_operator (rtx op, enum machine_mode mode)
e075ae69
RH
4207{
4208 if (mode != VOIDmode && mode != GET_MODE (op))
4209 return 0;
4210
2a2ab3f9
JVA
4211 switch (GET_CODE (op))
4212 {
e075ae69
RH
4213 case PLUS:
4214 case MINUS:
4215 case MULT:
4216 case DIV:
4217 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 4218
2a2ab3f9
JVA
4219 default:
4220 return 0;
4221 }
4222}
fee2770d 4223
e075ae69 4224int
8d531ab9 4225mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4226{
4227 return GET_CODE (op) == MULT;
4228}
4229
4230int
8d531ab9 4231div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4232{
4233 return GET_CODE (op) == DIV;
4234}
0a726ef1
JL
4235
4236int
b96a374d 4237arith_or_logical_operator (rtx op, enum machine_mode mode)
0a726ef1 4238{
e075ae69 4239 return ((mode == VOIDmode || GET_MODE (op) == mode)
ec8e098d 4240 && ARITHMETIC_P (op));
0a726ef1
JL
4241}
4242
e075ae69 4243/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4244
4245int
8d531ab9 4246memory_displacement_operand (rtx op, enum machine_mode mode)
4f2c8ebb 4247{
e075ae69 4248 struct ix86_address parts;
e9a25f70 4249
e075ae69
RH
4250 if (! memory_operand (op, mode))
4251 return 0;
4252
4253 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4254 abort ();
4255
4256 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4257}
4258
16189740 4259/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4260 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4261
4262 ??? It seems likely that this will only work because cmpsi is an
4263 expander, and no actual insns use this. */
4f2c8ebb
RS
4264
4265int
b96a374d 4266cmpsi_operand (rtx op, enum machine_mode mode)
fee2770d 4267{
b9b2c339 4268 if (nonimmediate_operand (op, mode))
e075ae69
RH
4269 return 1;
4270
4271 if (GET_CODE (op) == AND
4272 && GET_MODE (op) == SImode
4273 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4274 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4275 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4276 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4277 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4278 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4279 return 1;
e9a25f70 4280
fee2770d
RS
4281 return 0;
4282}
d784886d 4283
e075ae69
RH
4284/* Returns 1 if OP is memory operand that can not be represented by the
4285 modRM array. */
d784886d
RK
4286
4287int
8d531ab9 4288long_memory_operand (rtx op, enum machine_mode mode)
d784886d 4289{
e075ae69 4290 if (! memory_operand (op, mode))
d784886d
RK
4291 return 0;
4292
e075ae69 4293 return memory_address_length (op) != 0;
d784886d 4294}
2247f6ed
JH
4295
4296/* Return nonzero if the rtx is known aligned. */
4297
4298int
b96a374d 4299aligned_operand (rtx op, enum machine_mode mode)
2247f6ed
JH
4300{
4301 struct ix86_address parts;
4302
4303 if (!general_operand (op, mode))
4304 return 0;
4305
0f290768 4306 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4307 if (GET_CODE (op) != MEM)
4308 return 1;
4309
0f290768 4310 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4311 if (MEM_VOLATILE_P (op))
4312 return 0;
4313
4314 op = XEXP (op, 0);
4315
4316 /* Pushes and pops are only valid on the stack pointer. */
4317 if (GET_CODE (op) == PRE_DEC
4318 || GET_CODE (op) == POST_INC)
4319 return 1;
4320
4321 /* Decode the address. */
4322 if (! ix86_decompose_address (op, &parts))
4323 abort ();
4324
4325 /* Look for some component that isn't known to be aligned. */
4326 if (parts.index)
4327 {
4328 if (parts.scale < 4
bdb429a5 4329 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4330 return 0;
4331 }
4332 if (parts.base)
4333 {
bdb429a5 4334 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4335 return 0;
4336 }
4337 if (parts.disp)
4338 {
4339 if (GET_CODE (parts.disp) != CONST_INT
4340 || (INTVAL (parts.disp) & 3) != 0)
4341 return 0;
4342 }
4343
4344 /* Didn't find one -- this must be an aligned address. */
4345 return 1;
4346}
e075ae69 4347\f
881b2a96
RS
4348/* Initialize the table of extra 80387 mathematical constants. */
4349
4350static void
b96a374d 4351init_ext_80387_constants (void)
881b2a96
RS
4352{
4353 static const char * cst[5] =
4354 {
4355 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4356 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4357 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4358 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4359 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4360 };
4361 int i;
4362
4363 for (i = 0; i < 5; i++)
4364 {
4365 real_from_string (&ext_80387_constants_table[i], cst[i]);
4366 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 4367 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 4368 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
4369 }
4370
4371 ext_80387_constants_init = 1;
4372}
4373
e075ae69 4374/* Return true if the constant is something that can be loaded with
881b2a96 4375 a special instruction. */
57dbca5e
BS
4376
4377int
b96a374d 4378standard_80387_constant_p (rtx x)
57dbca5e 4379{
2b04e52b 4380 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4381 return -1;
881b2a96 4382
2b04e52b
JH
4383 if (x == CONST0_RTX (GET_MODE (x)))
4384 return 1;
4385 if (x == CONST1_RTX (GET_MODE (x)))
4386 return 2;
881b2a96 4387
22cc69c4
RS
4388 /* For XFmode constants, try to find a special 80387 instruction when
4389 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 4390 if (GET_MODE (x) == XFmode
22cc69c4 4391 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
4392 {
4393 REAL_VALUE_TYPE r;
4394 int i;
4395
4396 if (! ext_80387_constants_init)
4397 init_ext_80387_constants ();
4398
4399 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4400 for (i = 0; i < 5; i++)
4401 if (real_identical (&r, &ext_80387_constants_table[i]))
4402 return i + 3;
4403 }
4404
e075ae69 4405 return 0;
57dbca5e
BS
4406}
4407
881b2a96
RS
4408/* Return the opcode of the special instruction to be used to load
4409 the constant X. */
4410
4411const char *
b96a374d 4412standard_80387_constant_opcode (rtx x)
881b2a96
RS
4413{
4414 switch (standard_80387_constant_p (x))
4415 {
b96a374d 4416 case 1:
881b2a96
RS
4417 return "fldz";
4418 case 2:
4419 return "fld1";
b96a374d 4420 case 3:
881b2a96
RS
4421 return "fldlg2";
4422 case 4:
4423 return "fldln2";
b96a374d 4424 case 5:
881b2a96
RS
4425 return "fldl2e";
4426 case 6:
4427 return "fldl2t";
b96a374d 4428 case 7:
881b2a96
RS
4429 return "fldpi";
4430 }
4431 abort ();
4432}
4433
4434/* Return the CONST_DOUBLE representing the 80387 constant that is
4435 loaded by the specified special instruction. The argument IDX
4436 matches the return value from standard_80387_constant_p. */
4437
4438rtx
b96a374d 4439standard_80387_constant_rtx (int idx)
881b2a96
RS
4440{
4441 int i;
4442
4443 if (! ext_80387_constants_init)
4444 init_ext_80387_constants ();
4445
4446 switch (idx)
4447 {
4448 case 3:
4449 case 4:
4450 case 5:
4451 case 6:
4452 case 7:
4453 i = idx - 3;
4454 break;
4455
4456 default:
4457 abort ();
4458 }
4459
1f48e56d 4460 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 4461 XFmode);
881b2a96
RS
4462}
4463
2b04e52b
JH
4464/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4465 */
4466int
b96a374d 4467standard_sse_constant_p (rtx x)
2b04e52b 4468{
0e67d460
JH
4469 if (x == const0_rtx)
4470 return 1;
2b04e52b
JH
4471 return (x == CONST0_RTX (GET_MODE (x)));
4472}
4473
2a2ab3f9
JVA
4474/* Returns 1 if OP contains a symbol reference */
4475
4476int
b96a374d 4477symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4478{
8d531ab9
KH
4479 const char *fmt;
4480 int i;
2a2ab3f9
JVA
4481
4482 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4483 return 1;
4484
4485 fmt = GET_RTX_FORMAT (GET_CODE (op));
4486 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4487 {
4488 if (fmt[i] == 'E')
4489 {
8d531ab9 4490 int j;
2a2ab3f9
JVA
4491
4492 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4493 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4494 return 1;
4495 }
e9a25f70 4496
2a2ab3f9
JVA
4497 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4498 return 1;
4499 }
4500
4501 return 0;
4502}
e075ae69
RH
4503
4504/* Return 1 if it is appropriate to emit `ret' instructions in the
4505 body of a function. Do this only if the epilogue is simple, needing a
4506 couple of insns. Prior to reloading, we can't tell how many registers
4507 must be saved, so return 0 then. Return 0 if there is no frame
4508 marker to de-allocate.
4509
4510 If NON_SAVING_SETJMP is defined and true, then it is not possible
4511 for the epilogue to be simple, so return 0. This is a special case
4512 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4513 until final, but jump_optimize may need to know sooner if a
4514 `return' is OK. */
32b5b1aa
SC
4515
4516int
b96a374d 4517ix86_can_use_return_insn_p (void)
32b5b1aa 4518{
4dd2ac2c 4519 struct ix86_frame frame;
9a7372d6 4520
e075ae69
RH
4521#ifdef NON_SAVING_SETJMP
4522 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4523 return 0;
4524#endif
9a7372d6
RH
4525
4526 if (! reload_completed || frame_pointer_needed)
4527 return 0;
32b5b1aa 4528
9a7372d6
RH
4529 /* Don't allow more than 32 pop, since that's all we can do
4530 with one instruction. */
4531 if (current_function_pops_args
4532 && current_function_args_size >= 32768)
e075ae69 4533 return 0;
32b5b1aa 4534
4dd2ac2c
JH
4535 ix86_compute_frame_layout (&frame);
4536 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4537}
6189a572
JH
4538\f
4539/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4540int
b96a374d 4541x86_64_sign_extended_value (rtx value)
6189a572
JH
4542{
4543 switch (GET_CODE (value))
4544 {
4545 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4546 to be at least 32 and this all acceptable constants are
4547 represented as CONST_INT. */
4548 case CONST_INT:
4549 if (HOST_BITS_PER_WIDE_INT == 32)
4550 return 1;
4551 else
4552 {
4553 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4554 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4555 }
4556 break;
4557
75d38379
JJ
4558 /* For certain code models, the symbolic references are known to fit.
4559 in CM_SMALL_PIC model we know it fits if it is local to the shared
4560 library. Don't count TLS SYMBOL_REFs here, since they should fit
4561 only if inside of UNSPEC handled below. */
6189a572 4562 case SYMBOL_REF:
d7222e38
JH
4563 /* TLS symbols are not constant. */
4564 if (tls_symbolic_operand (value, Pmode))
4565 return false;
c05dbe81 4566 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4567
4568 /* For certain code models, the code is near as well. */
4569 case LABEL_REF:
c05dbe81
JH
4570 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4571 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4572
4573 /* We also may accept the offsetted memory references in certain special
4574 cases. */
4575 case CONST:
75d38379
JJ
4576 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4577 switch (XINT (XEXP (value, 0), 1))
4578 {
4579 case UNSPEC_GOTPCREL:
4580 case UNSPEC_DTPOFF:
4581 case UNSPEC_GOTNTPOFF:
4582 case UNSPEC_NTPOFF:
4583 return 1;
4584 default:
4585 break;
4586 }
4587 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4588 {
4589 rtx op1 = XEXP (XEXP (value, 0), 0);
4590 rtx op2 = XEXP (XEXP (value, 0), 1);
4591 HOST_WIDE_INT offset;
4592
4593 if (ix86_cmodel == CM_LARGE)
4594 return 0;
4595 if (GET_CODE (op2) != CONST_INT)
4596 return 0;
4597 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4598 switch (GET_CODE (op1))
4599 {
4600 case SYMBOL_REF:
75d38379 4601 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4602 end of 31bits boundary. We may also accept pretty
4603 large negative constants knowing that all objects are
4604 in the positive half of address space. */
4605 if (ix86_cmodel == CM_SMALL
75d38379 4606 && offset < 16*1024*1024
6189a572
JH
4607 && trunc_int_for_mode (offset, SImode) == offset)
4608 return 1;
4609 /* For CM_KERNEL we know that all object resist in the
4610 negative half of 32bits address space. We may not
4611 accept negative offsets, since they may be just off
d6a7951f 4612 and we may accept pretty large positive ones. */
6189a572
JH
4613 if (ix86_cmodel == CM_KERNEL
4614 && offset > 0
4615 && trunc_int_for_mode (offset, SImode) == offset)
4616 return 1;
4617 break;
4618 case LABEL_REF:
4619 /* These conditions are similar to SYMBOL_REF ones, just the
4620 constraints for code models differ. */
c05dbe81 4621 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4622 && offset < 16*1024*1024
6189a572
JH
4623 && trunc_int_for_mode (offset, SImode) == offset)
4624 return 1;
4625 if (ix86_cmodel == CM_KERNEL
4626 && offset > 0
4627 && trunc_int_for_mode (offset, SImode) == offset)
4628 return 1;
4629 break;
75d38379
JJ
4630 case UNSPEC:
4631 switch (XINT (op1, 1))
4632 {
4633 case UNSPEC_DTPOFF:
4634 case UNSPEC_NTPOFF:
4635 if (offset > 0
4636 && trunc_int_for_mode (offset, SImode) == offset)
4637 return 1;
4638 }
4639 break;
6189a572
JH
4640 default:
4641 return 0;
4642 }
4643 }
4644 return 0;
4645 default:
4646 return 0;
4647 }
4648}
4649
4650/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4651int
b96a374d 4652x86_64_zero_extended_value (rtx value)
6189a572
JH
4653{
4654 switch (GET_CODE (value))
4655 {
4656 case CONST_DOUBLE:
4657 if (HOST_BITS_PER_WIDE_INT == 32)
4658 return (GET_MODE (value) == VOIDmode
4659 && !CONST_DOUBLE_HIGH (value));
4660 else
4661 return 0;
4662 case CONST_INT:
4663 if (HOST_BITS_PER_WIDE_INT == 32)
4664 return INTVAL (value) >= 0;
4665 else
b531087a 4666 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4667 break;
4668
4669 /* For certain code models, the symbolic references are known to fit. */
4670 case SYMBOL_REF:
d7222e38
JH
4671 /* TLS symbols are not constant. */
4672 if (tls_symbolic_operand (value, Pmode))
4673 return false;
6189a572
JH
4674 return ix86_cmodel == CM_SMALL;
4675
4676 /* For certain code models, the code is near as well. */
4677 case LABEL_REF:
4678 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4679
4680 /* We also may accept the offsetted memory references in certain special
4681 cases. */
4682 case CONST:
4683 if (GET_CODE (XEXP (value, 0)) == PLUS)
4684 {
4685 rtx op1 = XEXP (XEXP (value, 0), 0);
4686 rtx op2 = XEXP (XEXP (value, 0), 1);
4687
4688 if (ix86_cmodel == CM_LARGE)
4689 return 0;
4690 switch (GET_CODE (op1))
4691 {
4692 case SYMBOL_REF:
4693 return 0;
d6a7951f 4694 /* For small code model we may accept pretty large positive
6189a572
JH
4695 offsets, since one bit is available for free. Negative
4696 offsets are limited by the size of NULL pointer area
4697 specified by the ABI. */
4698 if (ix86_cmodel == CM_SMALL
4699 && GET_CODE (op2) == CONST_INT
4700 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4701 && (trunc_int_for_mode (INTVAL (op2), SImode)
4702 == INTVAL (op2)))
4703 return 1;
4704 /* ??? For the kernel, we may accept adjustment of
4705 -0x10000000, since we know that it will just convert
d6a7951f 4706 negative address space to positive, but perhaps this
6189a572
JH
4707 is not worthwhile. */
4708 break;
4709 case LABEL_REF:
4710 /* These conditions are similar to SYMBOL_REF ones, just the
4711 constraints for code models differ. */
4712 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4713 && GET_CODE (op2) == CONST_INT
4714 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4715 && (trunc_int_for_mode (INTVAL (op2), SImode)
4716 == INTVAL (op2)))
4717 return 1;
4718 break;
4719 default:
4720 return 0;
4721 }
4722 }
4723 return 0;
4724 default:
4725 return 0;
4726 }
4727}
6fca22eb
RH
4728
4729/* Value should be nonzero if functions must have frame pointers.
4730 Zero means the frame pointer need not be set up (and parms may
4731 be accessed via the stack pointer) in functions that seem suitable. */
4732
4733int
b96a374d 4734ix86_frame_pointer_required (void)
6fca22eb
RH
4735{
4736 /* If we accessed previous frames, then the generated code expects
4737 to be able to access the saved ebp value in our frame. */
4738 if (cfun->machine->accesses_prev_frame)
4739 return 1;
a4f31c00 4740
6fca22eb
RH
4741 /* Several x86 os'es need a frame pointer for other reasons,
4742 usually pertaining to setjmp. */
4743 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4744 return 1;
4745
4746 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4747 the frame pointer by default. Turn it back on now if we've not
4748 got a leaf function. */
a7943381 4749 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4750 && (!current_function_is_leaf))
4751 return 1;
4752
4753 if (current_function_profile)
6fca22eb
RH
4754 return 1;
4755
4756 return 0;
4757}
4758
4759/* Record that the current function accesses previous call frames. */
4760
4761void
b96a374d 4762ix86_setup_frame_addresses (void)
6fca22eb
RH
4763{
4764 cfun->machine->accesses_prev_frame = 1;
4765}
e075ae69 4766\f
145aacc2
RH
4767#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4768# define USE_HIDDEN_LINKONCE 1
4769#else
4770# define USE_HIDDEN_LINKONCE 0
4771#endif
4772
bd09bdeb 4773static int pic_labels_used;
e9a25f70 4774
145aacc2
RH
4775/* Fills in the label name that should be used for a pc thunk for
4776 the given register. */
4777
4778static void
b96a374d 4779get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4780{
4781 if (USE_HIDDEN_LINKONCE)
4782 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4783 else
4784 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4785}
4786
4787
e075ae69
RH
4788/* This function generates code for -fpic that loads %ebx with
4789 the return address of the caller and then returns. */
4790
4791void
b96a374d 4792ix86_file_end (void)
e075ae69
RH
4793{
4794 rtx xops[2];
bd09bdeb 4795 int regno;
32b5b1aa 4796
bd09bdeb 4797 for (regno = 0; regno < 8; ++regno)
7c262518 4798 {
145aacc2
RH
4799 char name[32];
4800
bd09bdeb
RH
4801 if (! ((pic_labels_used >> regno) & 1))
4802 continue;
4803
145aacc2 4804 get_pc_thunk_name (name, regno);
bd09bdeb 4805
145aacc2
RH
4806 if (USE_HIDDEN_LINKONCE)
4807 {
4808 tree decl;
4809
4810 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4811 error_mark_node);
4812 TREE_PUBLIC (decl) = 1;
4813 TREE_STATIC (decl) = 1;
4814 DECL_ONE_ONLY (decl) = 1;
4815
4816 (*targetm.asm_out.unique_section) (decl, 0);
4817 named_section (decl, NULL, 0);
4818
a5fe455b
ZW
4819 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4820 fputs ("\t.hidden\t", asm_out_file);
4821 assemble_name (asm_out_file, name);
4822 fputc ('\n', asm_out_file);
4823 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4824 }
4825 else
4826 {
4827 text_section ();
a5fe455b 4828 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4829 }
bd09bdeb
RH
4830
4831 xops[0] = gen_rtx_REG (SImode, regno);
4832 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4833 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4834 output_asm_insn ("ret", xops);
7c262518 4835 }
3edc56a9 4836
a5fe455b
ZW
4837 if (NEED_INDICATE_EXEC_STACK)
4838 file_end_indicate_exec_stack ();
32b5b1aa 4839}
32b5b1aa 4840
c8c03509 4841/* Emit code for the SET_GOT patterns. */
32b5b1aa 4842
c8c03509 4843const char *
b96a374d 4844output_set_got (rtx dest)
c8c03509
RH
4845{
4846 rtx xops[3];
0d7d98ee 4847
c8c03509 4848 xops[0] = dest;
5fc0e5df 4849 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4850
c8c03509 4851 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4852 {
c8c03509
RH
4853 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4854
4855 if (!flag_pic)
4856 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4857 else
4858 output_asm_insn ("call\t%a2", xops);
4859
b069de3b
SS
4860#if TARGET_MACHO
4861 /* Output the "canonical" label name ("Lxx$pb") here too. This
4862 is what will be referred to by the Mach-O PIC subsystem. */
4863 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4864#endif
4977bab6 4865 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4866 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4867
4868 if (flag_pic)
4869 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4870 }
e075ae69 4871 else
e5cb57e8 4872 {
145aacc2
RH
4873 char name[32];
4874 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4875 pic_labels_used |= 1 << REGNO (dest);
f996902d 4876
145aacc2 4877 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4878 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4879 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4880 }
e5cb57e8 4881
c8c03509
RH
4882 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4883 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4884 else if (!TARGET_MACHO)
8e9fadc3 4885 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4886
c8c03509 4887 return "";
e9a25f70 4888}
8dfe5673 4889
0d7d98ee 4890/* Generate an "push" pattern for input ARG. */
e9a25f70 4891
e075ae69 4892static rtx
b96a374d 4893gen_push (rtx arg)
e9a25f70 4894{
c5c76735 4895 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4896 gen_rtx_MEM (Pmode,
4897 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4898 stack_pointer_rtx)),
4899 arg);
e9a25f70
JL
4900}
4901
bd09bdeb
RH
4902/* Return >= 0 if there is an unused call-clobbered register available
4903 for the entire function. */
4904
4905static unsigned int
b96a374d 4906ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
4907{
4908 if (current_function_is_leaf && !current_function_profile)
4909 {
4910 int i;
4911 for (i = 2; i >= 0; --i)
4912 if (!regs_ever_live[i])
4913 return i;
4914 }
4915
4916 return INVALID_REGNUM;
4917}
fce5a9f2 4918
4dd2ac2c
JH
4919/* Return 1 if we need to save REGNO. */
4920static int
b96a374d 4921ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 4922{
bd09bdeb
RH
4923 if (pic_offset_table_rtx
4924 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4925 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4926 || current_function_profile
8c38a24f
MM
4927 || current_function_calls_eh_return
4928 || current_function_uses_const_pool))
bd09bdeb
RH
4929 {
4930 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4931 return 0;
4932 return 1;
4933 }
1020a5ab
RH
4934
4935 if (current_function_calls_eh_return && maybe_eh_return)
4936 {
4937 unsigned i;
4938 for (i = 0; ; i++)
4939 {
b531087a 4940 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4941 if (test == INVALID_REGNUM)
4942 break;
9b690711 4943 if (test == regno)
1020a5ab
RH
4944 return 1;
4945 }
4946 }
4dd2ac2c 4947
1020a5ab
RH
4948 return (regs_ever_live[regno]
4949 && !call_used_regs[regno]
4950 && !fixed_regs[regno]
4951 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4952}
4953
0903fcab
JH
4954/* Return number of registers to be saved on the stack. */
4955
4956static int
b96a374d 4957ix86_nsaved_regs (void)
0903fcab
JH
4958{
4959 int nregs = 0;
0903fcab
JH
4960 int regno;
4961
4dd2ac2c 4962 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4963 if (ix86_save_reg (regno, true))
4dd2ac2c 4964 nregs++;
0903fcab
JH
4965 return nregs;
4966}
4967
4968/* Return the offset between two registers, one to be eliminated, and the other
4969 its replacement, at the start of a routine. */
4970
4971HOST_WIDE_INT
b96a374d 4972ix86_initial_elimination_offset (int from, int to)
0903fcab 4973{
4dd2ac2c
JH
4974 struct ix86_frame frame;
4975 ix86_compute_frame_layout (&frame);
564d80f4
JH
4976
4977 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4978 return frame.hard_frame_pointer_offset;
564d80f4
JH
4979 else if (from == FRAME_POINTER_REGNUM
4980 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4981 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4982 else
4983 {
564d80f4
JH
4984 if (to != STACK_POINTER_REGNUM)
4985 abort ();
4986 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4987 return frame.stack_pointer_offset;
564d80f4
JH
4988 else if (from != FRAME_POINTER_REGNUM)
4989 abort ();
0903fcab 4990 else
4dd2ac2c 4991 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4992 }
4993}
4994
4dd2ac2c 4995/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4996
4dd2ac2c 4997static void
b96a374d 4998ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4999{
65954bd8 5000 HOST_WIDE_INT total_size;
564d80f4 5001 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
b19ee4bd 5002 HOST_WIDE_INT offset;
44affdae 5003 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 5004 HOST_WIDE_INT size = get_frame_size ();
65954bd8 5005
4dd2ac2c 5006 frame->nregs = ix86_nsaved_regs ();
564d80f4 5007 total_size = size;
65954bd8 5008
d7394366
JH
5009 /* During reload iteration the amount of registers saved can change.
5010 Recompute the value as needed. Do not recompute when amount of registers
5011 didn't change as reload does mutiple calls to the function and does not
5012 expect the decision to change within single iteration. */
5013 if (!optimize_size
5014 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
5015 {
5016 int count = frame->nregs;
5017
d7394366 5018 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
5019 /* The fast prologue uses move instead of push to save registers. This
5020 is significantly longer, but also executes faster as modern hardware
5021 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 5022
d9b40e8d
JH
5023 Be careful about choosing what prologue to emit: When function takes
5024 many instructions to execute we may use slow version as well as in
5025 case function is known to be outside hot spot (this is known with
5026 feedback only). Weight the size of function by number of registers
5027 to save as it is cheap to use one or two push instructions but very
5028 slow to use many of them. */
5029 if (count)
5030 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5031 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5032 || (flag_branch_probabilities
5033 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5034 cfun->machine->use_fast_prologue_epilogue = false;
5035 else
5036 cfun->machine->use_fast_prologue_epilogue
5037 = !expensive_function_p (count);
5038 }
5039 if (TARGET_PROLOGUE_USING_MOVE
5040 && cfun->machine->use_fast_prologue_epilogue)
5041 frame->save_regs_using_mov = true;
5042 else
5043 frame->save_regs_using_mov = false;
5044
5045
9ba81eaa 5046 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
5047 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5048
5049 frame->hard_frame_pointer_offset = offset;
564d80f4 5050
fcbfaa65
RK
5051 /* Do some sanity checking of stack_alignment_needed and
5052 preferred_alignment, since i386 port is the only using those features
f710504c 5053 that may break easily. */
564d80f4 5054
44affdae
JH
5055 if (size && !stack_alignment_needed)
5056 abort ();
44affdae
JH
5057 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5058 abort ();
5059 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5060 abort ();
5061 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5062 abort ();
564d80f4 5063
4dd2ac2c
JH
5064 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5065 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 5066
4dd2ac2c
JH
5067 /* Register save area */
5068 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 5069
8362f420
JH
5070 /* Va-arg area */
5071 if (ix86_save_varrargs_registers)
5072 {
5073 offset += X86_64_VARARGS_SIZE;
5074 frame->va_arg_size = X86_64_VARARGS_SIZE;
5075 }
5076 else
5077 frame->va_arg_size = 0;
5078
4dd2ac2c
JH
5079 /* Align start of frame for local function. */
5080 frame->padding1 = ((offset + stack_alignment_needed - 1)
5081 & -stack_alignment_needed) - offset;
f73ad30e 5082
4dd2ac2c 5083 offset += frame->padding1;
65954bd8 5084
4dd2ac2c
JH
5085 /* Frame pointer points here. */
5086 frame->frame_pointer_offset = offset;
54ff41b7 5087
4dd2ac2c 5088 offset += size;
65954bd8 5089
0b7ae565 5090 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
5091 all the function calls as dead code.
5092 Skipping is however impossible when function calls alloca. Alloca
5093 expander assumes that last current_function_outgoing_args_size
5094 of stack frame are unused. */
5095 if (ACCUMULATE_OUTGOING_ARGS
5096 && (!current_function_is_leaf || current_function_calls_alloca))
4dd2ac2c
JH
5097 {
5098 offset += current_function_outgoing_args_size;
5099 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5100 }
5101 else
5102 frame->outgoing_arguments_size = 0;
564d80f4 5103
002ff5bc
RH
5104 /* Align stack boundary. Only needed if we're calling another function
5105 or using alloca. */
5106 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
5107 frame->padding2 = ((offset + preferred_alignment - 1)
5108 & -preferred_alignment) - offset;
5109 else
5110 frame->padding2 = 0;
4dd2ac2c
JH
5111
5112 offset += frame->padding2;
5113
5114 /* We've reached end of stack frame. */
5115 frame->stack_pointer_offset = offset;
5116
5117 /* Size prologue needs to allocate. */
5118 frame->to_allocate =
5119 (size + frame->padding1 + frame->padding2
8362f420 5120 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 5121
b19ee4bd
JJ
5122 if ((!frame->to_allocate && frame->nregs <= 1)
5123 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
5124 frame->save_regs_using_mov = false;
5125
a5b378d6 5126 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
5127 && current_function_is_leaf)
5128 {
5129 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
5130 if (frame->save_regs_using_mov)
5131 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
5132 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5133 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5134 }
5135 else
5136 frame->red_zone_size = 0;
5137 frame->to_allocate -= frame->red_zone_size;
5138 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
5139#if 0
5140 fprintf (stderr, "nregs: %i\n", frame->nregs);
5141 fprintf (stderr, "size: %i\n", size);
5142 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5143 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 5144 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
5145 fprintf (stderr, "padding2: %i\n", frame->padding2);
5146 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 5147 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
5148 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5149 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5150 frame->hard_frame_pointer_offset);
5151 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5152#endif
65954bd8
JL
5153}
5154
0903fcab
JH
5155/* Emit code to save registers in the prologue. */
5156
5157static void
b96a374d 5158ix86_emit_save_regs (void)
0903fcab 5159{
8d531ab9 5160 int regno;
0903fcab 5161 rtx insn;
0903fcab 5162
4dd2ac2c 5163 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5164 if (ix86_save_reg (regno, true))
0903fcab 5165 {
0d7d98ee 5166 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5167 RTX_FRAME_RELATED_P (insn) = 1;
5168 }
5169}
5170
c6036a37
JH
5171/* Emit code to save registers using MOV insns. First register
5172 is restored from POINTER + OFFSET. */
5173static void
b96a374d 5174ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
5175{
5176 int regno;
5177 rtx insn;
5178
5179 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5180 if (ix86_save_reg (regno, true))
5181 {
b72f00af
RK
5182 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5183 Pmode, offset),
c6036a37
JH
5184 gen_rtx_REG (Pmode, regno));
5185 RTX_FRAME_RELATED_P (insn) = 1;
5186 offset += UNITS_PER_WORD;
5187 }
5188}
5189
839a4992 5190/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
5191 The pattern exist to put a dependency on all ebp-based memory accesses.
5192 STYLE should be negative if instructions should be marked as frame related,
5193 zero if %r11 register is live and cannot be freely used and positive
5194 otherwise. */
5195
5196static void
5197pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5198{
5199 rtx insn;
5200
5201 if (! TARGET_64BIT)
5202 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5203 else if (x86_64_immediate_operand (offset, DImode))
5204 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5205 else
5206 {
5207 rtx r11;
5208 /* r11 is used by indirect sibcall return as well, set before the
5209 epilogue and used after the epilogue. ATM indirect sibcall
5210 shouldn't be used together with huge frame sizes in one
5211 function because of the frame_size check in sibcall.c. */
5212 if (style == 0)
5213 abort ();
5214 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5215 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5216 if (style < 0)
5217 RTX_FRAME_RELATED_P (insn) = 1;
5218 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5219 offset));
5220 }
5221 if (style < 0)
5222 RTX_FRAME_RELATED_P (insn) = 1;
5223}
5224
0f290768 5225/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5226
5227void
b96a374d 5228ix86_expand_prologue (void)
2a2ab3f9 5229{
564d80f4 5230 rtx insn;
bd09bdeb 5231 bool pic_reg_used;
4dd2ac2c 5232 struct ix86_frame frame;
c6036a37 5233 HOST_WIDE_INT allocate;
4dd2ac2c 5234
4977bab6 5235 ix86_compute_frame_layout (&frame);
79325812 5236
e075ae69
RH
5237 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5238 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5239
2a2ab3f9
JVA
5240 if (frame_pointer_needed)
5241 {
564d80f4 5242 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5243 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5244
564d80f4 5245 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5246 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5247 }
5248
c6036a37 5249 allocate = frame.to_allocate;
c6036a37 5250
d9b40e8d 5251 if (!frame.save_regs_using_mov)
c6036a37
JH
5252 ix86_emit_save_regs ();
5253 else
5254 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5255
d9b40e8d
JH
5256 /* When using red zone we may start register saving before allocating
5257 the stack frame saving one cycle of the prologue. */
5258 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5259 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5260 : stack_pointer_rtx,
5261 -frame.nregs * UNITS_PER_WORD);
5262
c6036a37 5263 if (allocate == 0)
8dfe5673 5264 ;
e323735c 5265 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
5266 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5267 GEN_INT (-allocate), -1);
79325812 5268 else
8dfe5673 5269 {
fe9f516f
RH
5270 /* Only valid for Win32. */
5271 rtx eax = gen_rtx_REG (SImode, 0);
5272 bool eax_live = ix86_eax_live_at_start_p ();
e9a25f70 5273
8362f420 5274 if (TARGET_64BIT)
b1177d69 5275 abort ();
e075ae69 5276
fe9f516f
RH
5277 if (eax_live)
5278 {
5279 emit_insn (gen_push (eax));
5280 allocate -= 4;
5281 }
5282
5283 insn = emit_move_insn (eax, GEN_INT (allocate));
b1177d69 5284 RTX_FRAME_RELATED_P (insn) = 1;
98417968 5285
b1177d69
KC
5286 insn = emit_insn (gen_allocate_stack_worker (eax));
5287 RTX_FRAME_RELATED_P (insn) = 1;
fe9f516f
RH
5288
5289 if (eax_live)
5290 {
5291 rtx t = plus_constant (stack_pointer_rtx, allocate);
5292 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5293 }
e075ae69 5294 }
fe9f516f 5295
d9b40e8d 5296 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5297 {
5298 if (!frame_pointer_needed || !frame.to_allocate)
5299 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5300 else
5301 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5302 -frame.nregs * UNITS_PER_WORD);
5303 }
e9a25f70 5304
bd09bdeb
RH
5305 pic_reg_used = false;
5306 if (pic_offset_table_rtx
5307 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5308 || current_function_profile))
5309 {
5310 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5311
5312 if (alt_pic_reg_used != INVALID_REGNUM)
5313 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5314
5315 pic_reg_used = true;
5316 }
5317
e9a25f70 5318 if (pic_reg_used)
c8c03509
RH
5319 {
5320 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5321
66edd3b4
RH
5322 /* Even with accurate pre-reload life analysis, we can wind up
5323 deleting all references to the pic register after reload.
5324 Consider if cross-jumping unifies two sides of a branch
d1f87653 5325 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5326 In which case, allow the set_got to be deleted, though we're
5327 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5328 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5329 }
77a989d1 5330
66edd3b4
RH
5331 /* Prevent function calls from be scheduled before the call to mcount.
5332 In the pic_reg_used case, make sure that the got load isn't deleted. */
5333 if (current_function_profile)
5334 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5335}
5336
da2d1d3a
JH
5337/* Emit code to restore saved registers using MOV insns. First register
5338 is restored from POINTER + OFFSET. */
5339static void
72613dfa
JH
5340ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5341 int maybe_eh_return)
da2d1d3a
JH
5342{
5343 int regno;
72613dfa 5344 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 5345
4dd2ac2c 5346 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5347 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5348 {
72613dfa
JH
5349 /* Ensure that adjust_address won't be forced to produce pointer
5350 out of range allowed by x86-64 instruction set. */
5351 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5352 {
5353 rtx r11;
5354
5355 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5356 emit_move_insn (r11, GEN_INT (offset));
5357 emit_insn (gen_adddi3 (r11, r11, pointer));
5358 base_address = gen_rtx_MEM (Pmode, r11);
5359 offset = 0;
5360 }
4dd2ac2c 5361 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 5362 adjust_address (base_address, Pmode, offset));
4dd2ac2c 5363 offset += UNITS_PER_WORD;
da2d1d3a
JH
5364 }
5365}
5366
0f290768 5367/* Restore function stack, frame, and registers. */
e9a25f70 5368
2a2ab3f9 5369void
b96a374d 5370ix86_expand_epilogue (int style)
2a2ab3f9 5371{
1c71e60e 5372 int regno;
fdb8a883 5373 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5374 struct ix86_frame frame;
65954bd8 5375 HOST_WIDE_INT offset;
4dd2ac2c
JH
5376
5377 ix86_compute_frame_layout (&frame);
2a2ab3f9 5378
a4f31c00 5379 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5380 must be taken for the normal return case of a function using
5381 eh_return: the eax and edx registers are marked as saved, but not
5382 restored along this path. */
5383 offset = frame.nregs;
5384 if (current_function_calls_eh_return && style != 2)
5385 offset -= 2;
5386 offset *= -UNITS_PER_WORD;
2a2ab3f9 5387
fdb8a883
JW
5388 /* If we're only restoring one register and sp is not valid then
5389 using a move instruction to restore the register since it's
0f290768 5390 less work than reloading sp and popping the register.
da2d1d3a
JH
5391
5392 The default code result in stack adjustment using add/lea instruction,
5393 while this code results in LEAVE instruction (or discrete equivalent),
5394 so it is profitable in some other cases as well. Especially when there
5395 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5396 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5397 tuning in future. */
4dd2ac2c 5398 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5399 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5400 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5401 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5402 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5403 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5404 && cfun->machine->use_fast_prologue_epilogue
5405 && frame.nregs == 1)
2ab0437e 5406 || current_function_calls_eh_return)
2a2ab3f9 5407 {
da2d1d3a
JH
5408 /* Restore registers. We can use ebp or esp to address the memory
5409 locations. If both are available, default to ebp, since offsets
5410 are known to be small. Only exception is esp pointing directly to the
5411 end of block of saved registers, where we may simplify addressing
5412 mode. */
5413
4dd2ac2c 5414 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5415 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5416 frame.to_allocate, style == 2);
da2d1d3a 5417 else
1020a5ab
RH
5418 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5419 offset, style == 2);
5420
5421 /* eh_return epilogues need %ecx added to the stack pointer. */
5422 if (style == 2)
5423 {
5424 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5425
1020a5ab
RH
5426 if (frame_pointer_needed)
5427 {
5428 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5429 tmp = plus_constant (tmp, UNITS_PER_WORD);
5430 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5431
5432 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5433 emit_move_insn (hard_frame_pointer_rtx, tmp);
5434
b19ee4bd
JJ
5435 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5436 const0_rtx, style);
1020a5ab
RH
5437 }
5438 else
5439 {
5440 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5441 tmp = plus_constant (tmp, (frame.to_allocate
5442 + frame.nregs * UNITS_PER_WORD));
5443 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5444 }
5445 }
5446 else if (!frame_pointer_needed)
b19ee4bd
JJ
5447 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5448 GEN_INT (frame.to_allocate
5449 + frame.nregs * UNITS_PER_WORD),
5450 style);
0f290768 5451 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5452 else if (TARGET_USE_LEAVE || optimize_size
5453 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5454 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5455 else
2a2ab3f9 5456 {
b19ee4bd
JJ
5457 pro_epilogue_adjust_stack (stack_pointer_rtx,
5458 hard_frame_pointer_rtx,
5459 const0_rtx, style);
8362f420
JH
5460 if (TARGET_64BIT)
5461 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5462 else
5463 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5464 }
5465 }
1c71e60e 5466 else
68f654ec 5467 {
1c71e60e
JH
5468 /* First step is to deallocate the stack frame so that we can
5469 pop the registers. */
5470 if (!sp_valid)
5471 {
5472 if (!frame_pointer_needed)
5473 abort ();
b19ee4bd
JJ
5474 pro_epilogue_adjust_stack (stack_pointer_rtx,
5475 hard_frame_pointer_rtx,
5476 GEN_INT (offset), style);
1c71e60e 5477 }
4dd2ac2c 5478 else if (frame.to_allocate)
b19ee4bd
JJ
5479 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5480 GEN_INT (frame.to_allocate), style);
1c71e60e 5481
4dd2ac2c 5482 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5483 if (ix86_save_reg (regno, false))
8362f420
JH
5484 {
5485 if (TARGET_64BIT)
5486 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5487 else
5488 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5489 }
4dd2ac2c 5490 if (frame_pointer_needed)
8362f420 5491 {
f5143c46 5492 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5493 able to grok it fast. */
5494 if (TARGET_USE_LEAVE)
5495 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5496 else if (TARGET_64BIT)
8362f420
JH
5497 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5498 else
5499 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5500 }
68f654ec 5501 }
68f654ec 5502
cbbf65e0 5503 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5504 if (style == 0)
cbbf65e0
RH
5505 return;
5506
2a2ab3f9
JVA
5507 if (current_function_pops_args && current_function_args_size)
5508 {
e075ae69 5509 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5510
b8c752c8
UD
5511 /* i386 can only pop 64K bytes. If asked to pop more, pop
5512 return address, do explicit add, and jump indirectly to the
0f290768 5513 caller. */
2a2ab3f9 5514
b8c752c8 5515 if (current_function_pops_args >= 65536)
2a2ab3f9 5516 {
e075ae69 5517 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5518
b19ee4bd 5519 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 5520 if (TARGET_64BIT)
b531087a 5521 abort ();
8362f420 5522
e075ae69
RH
5523 emit_insn (gen_popsi1 (ecx));
5524 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5525 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5526 }
79325812 5527 else
e075ae69
RH
5528 emit_jump_insn (gen_return_pop_internal (popc));
5529 }
5530 else
5531 emit_jump_insn (gen_return_internal ());
5532}
bd09bdeb
RH
5533
5534/* Reset from the function's potential modifications. */
5535
5536static void
b96a374d
AJ
5537ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5538 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5539{
5540 if (pic_offset_table_rtx)
5541 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5542}
e075ae69
RH
5543\f
5544/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5545 for an instruction. Return 0 if the structure of the address is
5546 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5547 strictly valid, but still used for computing length of lea instruction. */
e075ae69
RH
5548
5549static int
8d531ab9 5550ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69
RH
5551{
5552 rtx base = NULL_RTX;
5553 rtx index = NULL_RTX;
5554 rtx disp = NULL_RTX;
5555 HOST_WIDE_INT scale = 1;
5556 rtx scale_rtx = NULL_RTX;
b446e5a2 5557 int retval = 1;
74dc3e94 5558 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5559
90e4e4c5 5560 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
5561 base = addr;
5562 else if (GET_CODE (addr) == PLUS)
5563 {
74dc3e94
RH
5564 rtx addends[4], op;
5565 int n = 0, i;
e075ae69 5566
74dc3e94
RH
5567 op = addr;
5568 do
e075ae69 5569 {
74dc3e94
RH
5570 if (n >= 4)
5571 return 0;
5572 addends[n++] = XEXP (op, 1);
5573 op = XEXP (op, 0);
2a2ab3f9 5574 }
74dc3e94
RH
5575 while (GET_CODE (op) == PLUS);
5576 if (n >= 4)
5577 return 0;
5578 addends[n] = op;
5579
5580 for (i = n; i >= 0; --i)
e075ae69 5581 {
74dc3e94
RH
5582 op = addends[i];
5583 switch (GET_CODE (op))
5584 {
5585 case MULT:
5586 if (index)
5587 return 0;
5588 index = XEXP (op, 0);
5589 scale_rtx = XEXP (op, 1);
5590 break;
5591
5592 case UNSPEC:
5593 if (XINT (op, 1) == UNSPEC_TP
5594 && TARGET_TLS_DIRECT_SEG_REFS
5595 && seg == SEG_DEFAULT)
5596 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5597 else
5598 return 0;
5599 break;
5600
5601 case REG:
5602 case SUBREG:
5603 if (!base)
5604 base = op;
5605 else if (!index)
5606 index = op;
5607 else
5608 return 0;
5609 break;
5610
5611 case CONST:
5612 case CONST_INT:
5613 case SYMBOL_REF:
5614 case LABEL_REF:
5615 if (disp)
5616 return 0;
5617 disp = op;
5618 break;
5619
5620 default:
5621 return 0;
5622 }
e075ae69 5623 }
e075ae69
RH
5624 }
5625 else if (GET_CODE (addr) == MULT)
5626 {
5627 index = XEXP (addr, 0); /* index*scale */
5628 scale_rtx = XEXP (addr, 1);
5629 }
5630 else if (GET_CODE (addr) == ASHIFT)
5631 {
5632 rtx tmp;
5633
5634 /* We're called for lea too, which implements ashift on occasion. */
5635 index = XEXP (addr, 0);
5636 tmp = XEXP (addr, 1);
5637 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5638 return 0;
e075ae69
RH
5639 scale = INTVAL (tmp);
5640 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5641 return 0;
e075ae69 5642 scale = 1 << scale;
b446e5a2 5643 retval = -1;
2a2ab3f9 5644 }
2a2ab3f9 5645 else
e075ae69
RH
5646 disp = addr; /* displacement */
5647
5648 /* Extract the integral value of scale. */
5649 if (scale_rtx)
e9a25f70 5650 {
e075ae69 5651 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5652 return 0;
e075ae69 5653 scale = INTVAL (scale_rtx);
e9a25f70 5654 }
3b3c6a3f 5655
74dc3e94 5656 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 5657 if (base && index && scale == 1
74dc3e94
RH
5658 && (index == arg_pointer_rtx
5659 || index == frame_pointer_rtx
5660 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
5661 {
5662 rtx tmp = base;
5663 base = index;
5664 index = tmp;
5665 }
5666
5667 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5668 if ((base == hard_frame_pointer_rtx
5669 || base == frame_pointer_rtx
5670 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5671 disp = const0_rtx;
5672
5673 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5674 Avoid this by transforming to [%esi+0]. */
9e555526 5675 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5676 && base && !index && !disp
329e1d01 5677 && REG_P (base)
e075ae69
RH
5678 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5679 disp = const0_rtx;
5680
5681 /* Special case: encode reg+reg instead of reg*2. */
5682 if (!base && index && scale && scale == 2)
5683 base = index, scale = 1;
0f290768 5684
e075ae69
RH
5685 /* Special case: scaling cannot be encoded without base or displacement. */
5686 if (!base && !disp && index && scale != 1)
5687 disp = const0_rtx;
5688
5689 out->base = base;
5690 out->index = index;
5691 out->disp = disp;
5692 out->scale = scale;
74dc3e94 5693 out->seg = seg;
3b3c6a3f 5694
b446e5a2 5695 return retval;
e075ae69 5696}
01329426
JH
5697\f
5698/* Return cost of the memory address x.
5699 For i386, it is better to use a complex address than let gcc copy
5700 the address into a reg and make a new pseudo. But not if the address
5701 requires to two regs - that would mean more pseudos with longer
5702 lifetimes. */
dcefdf67 5703static int
b96a374d 5704ix86_address_cost (rtx x)
01329426
JH
5705{
5706 struct ix86_address parts;
5707 int cost = 1;
3b3c6a3f 5708
01329426
JH
5709 if (!ix86_decompose_address (x, &parts))
5710 abort ();
5711
5712 /* More complex memory references are better. */
5713 if (parts.disp && parts.disp != const0_rtx)
5714 cost--;
74dc3e94
RH
5715 if (parts.seg != SEG_DEFAULT)
5716 cost--;
01329426
JH
5717
5718 /* Attempt to minimize number of registers in the address. */
5719 if ((parts.base
5720 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5721 || (parts.index
5722 && (!REG_P (parts.index)
5723 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5724 cost++;
5725
5726 if (parts.base
5727 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5728 && parts.index
5729 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5730 && parts.base != parts.index)
5731 cost++;
5732
5733 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5734 since it's predecode logic can't detect the length of instructions
5735 and it degenerates to vector decoded. Increase cost of such
5736 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5737 to split such addresses or even refuse such addresses at all.
01329426
JH
5738
5739 Following addressing modes are affected:
5740 [base+scale*index]
5741 [scale*index+disp]
5742 [base+index]
0f290768 5743
01329426
JH
5744 The first and last case may be avoidable by explicitly coding the zero in
5745 memory address, but I don't have AMD-K6 machine handy to check this
5746 theory. */
5747
5748 if (TARGET_K6
5749 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5750 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5751 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5752 cost += 10;
0f290768 5753
01329426
JH
5754 return cost;
5755}
5756\f
b949ea8b
JW
5757/* If X is a machine specific address (i.e. a symbol or label being
5758 referenced as a displacement from the GOT implemented using an
5759 UNSPEC), then return the base term. Otherwise return X. */
5760
5761rtx
b96a374d 5762ix86_find_base_term (rtx x)
b949ea8b
JW
5763{
5764 rtx term;
5765
6eb791fc
JH
5766 if (TARGET_64BIT)
5767 {
5768 if (GET_CODE (x) != CONST)
5769 return x;
5770 term = XEXP (x, 0);
5771 if (GET_CODE (term) == PLUS
5772 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5773 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5774 term = XEXP (term, 0);
5775 if (GET_CODE (term) != UNSPEC
8ee41eaf 5776 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5777 return x;
5778
5779 term = XVECEXP (term, 0, 0);
5780
5781 if (GET_CODE (term) != SYMBOL_REF
5782 && GET_CODE (term) != LABEL_REF)
5783 return x;
5784
5785 return term;
5786 }
5787
69bd9368 5788 term = ix86_delegitimize_address (x);
b949ea8b
JW
5789
5790 if (GET_CODE (term) != SYMBOL_REF
5791 && GET_CODE (term) != LABEL_REF)
5792 return x;
5793
5794 return term;
5795}
5796\f
f996902d
RH
5797/* Determine if a given RTX is a valid constant. We already know this
5798 satisfies CONSTANT_P. */
5799
5800bool
b96a374d 5801legitimate_constant_p (rtx x)
f996902d
RH
5802{
5803 rtx inner;
5804
5805 switch (GET_CODE (x))
5806 {
5807 case SYMBOL_REF:
5808 /* TLS symbols are not constant. */
5809 if (tls_symbolic_operand (x, Pmode))
5810 return false;
5811 break;
5812
5813 case CONST:
5814 inner = XEXP (x, 0);
5815
5816 /* Offsets of TLS symbols are never valid.
5817 Discourage CSE from creating them. */
5818 if (GET_CODE (inner) == PLUS
5819 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5820 return false;
5821
799b33a0
JH
5822 if (GET_CODE (inner) == PLUS)
5823 {
5824 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5825 return false;
5826 inner = XEXP (inner, 0);
5827 }
5828
f996902d
RH
5829 /* Only some unspecs are valid as "constants". */
5830 if (GET_CODE (inner) == UNSPEC)
5831 switch (XINT (inner, 1))
5832 {
5833 case UNSPEC_TPOFF:
cb0e3e3f 5834 case UNSPEC_NTPOFF:
f996902d 5835 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
cb0e3e3f
RH
5836 case UNSPEC_DTPOFF:
5837 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5838 default:
5839 return false;
5840 }
5841 break;
5842
5843 default:
5844 break;
5845 }
5846
5847 /* Otherwise we handle everything else in the move patterns. */
5848 return true;
5849}
5850
3a04ff64
RH
5851/* Determine if it's legal to put X into the constant pool. This
5852 is not possible for the address of thread-local symbols, which
5853 is checked above. */
5854
5855static bool
b96a374d 5856ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5857{
5858 return !legitimate_constant_p (x);
5859}
5860
f996902d
RH
5861/* Determine if a given RTX is a valid constant address. */
5862
5863bool
b96a374d 5864constant_address_p (rtx x)
f996902d 5865{
a94f136b 5866 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5867}
5868
5869/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5870 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5871 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5872
5873bool
b96a374d 5874legitimate_pic_operand_p (rtx x)
f996902d
RH
5875{
5876 rtx inner;
5877
5878 switch (GET_CODE (x))
5879 {
5880 case CONST:
5881 inner = XEXP (x, 0);
5882
5883 /* Only some unspecs are valid as "constants". */
5884 if (GET_CODE (inner) == UNSPEC)
5885 switch (XINT (inner, 1))
5886 {
5887 case UNSPEC_TPOFF:
5888 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5889 default:
5890 return false;
5891 }
5efb1046 5892 /* FALLTHRU */
f996902d
RH
5893
5894 case SYMBOL_REF:
5895 case LABEL_REF:
5896 return legitimate_pic_address_disp_p (x);
5897
5898 default:
5899 return true;
5900 }
5901}
5902
e075ae69
RH
5903/* Determine if a given CONST RTX is a valid memory displacement
5904 in PIC mode. */
0f290768 5905
59be65f6 5906int
8d531ab9 5907legitimate_pic_address_disp_p (rtx disp)
91bb873f 5908{
f996902d
RH
5909 bool saw_plus;
5910
6eb791fc
JH
5911 /* In 64bit mode we can allow direct addresses of symbols and labels
5912 when they are not dynamic symbols. */
c05dbe81
JH
5913 if (TARGET_64BIT)
5914 {
5915 /* TLS references should always be enclosed in UNSPEC. */
5916 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5917 return 0;
5918 if (GET_CODE (disp) == SYMBOL_REF
5919 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5920 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
5921 return 1;
5922 if (GET_CODE (disp) == LABEL_REF)
5923 return 1;
5924 if (GET_CODE (disp) == CONST
a132b6a8
JJ
5925 && GET_CODE (XEXP (disp, 0)) == PLUS)
5926 {
5927 rtx op0 = XEXP (XEXP (disp, 0), 0);
5928 rtx op1 = XEXP (XEXP (disp, 0), 1);
5929
5930 /* TLS references should always be enclosed in UNSPEC. */
5931 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5932 return 0;
5933 if (((GET_CODE (op0) == SYMBOL_REF
5934 && ix86_cmodel == CM_SMALL_PIC
5935 && SYMBOL_REF_LOCAL_P (op0))
5936 || GET_CODE (op0) == LABEL_REF)
5937 && GET_CODE (op1) == CONST_INT
5938 && INTVAL (op1) < 16*1024*1024
5939 && INTVAL (op1) >= -16*1024*1024)
5940 return 1;
5941 }
c05dbe81 5942 }
91bb873f
RH
5943 if (GET_CODE (disp) != CONST)
5944 return 0;
5945 disp = XEXP (disp, 0);
5946
6eb791fc
JH
5947 if (TARGET_64BIT)
5948 {
5949 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5950 of GOT tables. We should not need these anyway. */
5951 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5952 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5953 return 0;
5954
5955 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5956 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5957 return 0;
5958 return 1;
5959 }
5960
f996902d 5961 saw_plus = false;
91bb873f
RH
5962 if (GET_CODE (disp) == PLUS)
5963 {
5964 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5965 return 0;
5966 disp = XEXP (disp, 0);
f996902d 5967 saw_plus = true;
91bb873f
RH
5968 }
5969
b069de3b
SS
5970 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5971 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5972 {
5973 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5974 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5975 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5976 {
5977 const char *sym_name = XSTR (XEXP (disp, 1), 0);
86ecdfb6 5978 if (! strcmp (sym_name, "<pic base>"))
b069de3b
SS
5979 return 1;
5980 }
5981 }
5982
8ee41eaf 5983 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5984 return 0;
5985
623fe810
RH
5986 switch (XINT (disp, 1))
5987 {
8ee41eaf 5988 case UNSPEC_GOT:
f996902d
RH
5989 if (saw_plus)
5990 return false;
623fe810 5991 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5992 case UNSPEC_GOTOFF:
799b33a0
JH
5993 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5994 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5995 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5996 return false;
f996902d 5997 case UNSPEC_GOTTPOFF:
dea73790
JJ
5998 case UNSPEC_GOTNTPOFF:
5999 case UNSPEC_INDNTPOFF:
f996902d
RH
6000 if (saw_plus)
6001 return false;
6002 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6003 case UNSPEC_NTPOFF:
f996902d
RH
6004 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6005 case UNSPEC_DTPOFF:
f996902d 6006 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 6007 }
fce5a9f2 6008
623fe810 6009 return 0;
91bb873f
RH
6010}
6011
e075ae69
RH
6012/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6013 memory address for an instruction. The MODE argument is the machine mode
6014 for the MEM expression that wants to use this address.
6015
6016 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6017 convert common non-canonical forms to canonical form so that they will
6018 be recognized. */
6019
3b3c6a3f 6020int
8d531ab9 6021legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 6022{
e075ae69
RH
6023 struct ix86_address parts;
6024 rtx base, index, disp;
6025 HOST_WIDE_INT scale;
6026 const char *reason = NULL;
6027 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
6028
6029 if (TARGET_DEBUG_ADDR)
6030 {
6031 fprintf (stderr,
e9a25f70 6032 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 6033 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
6034 debug_rtx (addr);
6035 }
6036
b446e5a2 6037 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 6038 {
e075ae69 6039 reason = "decomposition failed";
50e60bc3 6040 goto report_error;
3b3c6a3f
MM
6041 }
6042
e075ae69
RH
6043 base = parts.base;
6044 index = parts.index;
6045 disp = parts.disp;
6046 scale = parts.scale;
91f0226f 6047
e075ae69 6048 /* Validate base register.
e9a25f70
JL
6049
6050 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
6051 is one word out of a two word structure, which is represented internally
6052 as a DImode int. */
e9a25f70 6053
3b3c6a3f
MM
6054 if (base)
6055 {
e075ae69
RH
6056 reason_rtx = base;
6057
90e4e4c5 6058 if (GET_CODE (base) != REG)
3b3c6a3f 6059 {
e075ae69 6060 reason = "base is not a register";
50e60bc3 6061 goto report_error;
3b3c6a3f
MM
6062 }
6063
c954bd01
RH
6064 if (GET_MODE (base) != Pmode)
6065 {
e075ae69 6066 reason = "base is not in Pmode";
50e60bc3 6067 goto report_error;
c954bd01
RH
6068 }
6069
90e4e4c5
RH
6070 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6071 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 6072 {
e075ae69 6073 reason = "base is not valid";
50e60bc3 6074 goto report_error;
3b3c6a3f
MM
6075 }
6076 }
6077
e075ae69 6078 /* Validate index register.
e9a25f70
JL
6079
6080 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
6081 is one word out of a two word structure, which is represented internally
6082 as a DImode int. */
e075ae69
RH
6083
6084 if (index)
3b3c6a3f 6085 {
e075ae69
RH
6086 reason_rtx = index;
6087
90e4e4c5 6088 if (GET_CODE (index) != REG)
3b3c6a3f 6089 {
e075ae69 6090 reason = "index is not a register";
50e60bc3 6091 goto report_error;
3b3c6a3f
MM
6092 }
6093
e075ae69 6094 if (GET_MODE (index) != Pmode)
c954bd01 6095 {
e075ae69 6096 reason = "index is not in Pmode";
50e60bc3 6097 goto report_error;
c954bd01
RH
6098 }
6099
90e4e4c5
RH
6100 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6101 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 6102 {
e075ae69 6103 reason = "index is not valid";
50e60bc3 6104 goto report_error;
3b3c6a3f
MM
6105 }
6106 }
3b3c6a3f 6107
e075ae69
RH
6108 /* Validate scale factor. */
6109 if (scale != 1)
3b3c6a3f 6110 {
e075ae69
RH
6111 reason_rtx = GEN_INT (scale);
6112 if (!index)
3b3c6a3f 6113 {
e075ae69 6114 reason = "scale without index";
50e60bc3 6115 goto report_error;
3b3c6a3f
MM
6116 }
6117
e075ae69 6118 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 6119 {
e075ae69 6120 reason = "scale is not a valid multiplier";
50e60bc3 6121 goto report_error;
3b3c6a3f
MM
6122 }
6123 }
6124
91bb873f 6125 /* Validate displacement. */
3b3c6a3f
MM
6126 if (disp)
6127 {
e075ae69
RH
6128 reason_rtx = disp;
6129
f996902d
RH
6130 if (GET_CODE (disp) == CONST
6131 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6132 switch (XINT (XEXP (disp, 0), 1))
6133 {
6134 case UNSPEC_GOT:
6135 case UNSPEC_GOTOFF:
6136 case UNSPEC_GOTPCREL:
6137 if (!flag_pic)
6138 abort ();
6139 goto is_legitimate_pic;
6140
6141 case UNSPEC_GOTTPOFF:
dea73790
JJ
6142 case UNSPEC_GOTNTPOFF:
6143 case UNSPEC_INDNTPOFF:
f996902d
RH
6144 case UNSPEC_NTPOFF:
6145 case UNSPEC_DTPOFF:
6146 break;
6147
6148 default:
6149 reason = "invalid address unspec";
6150 goto report_error;
6151 }
6152
b069de3b
SS
6153 else if (flag_pic && (SYMBOLIC_CONST (disp)
6154#if TARGET_MACHO
6155 && !machopic_operand_p (disp)
6156#endif
6157 ))
3b3c6a3f 6158 {
f996902d 6159 is_legitimate_pic:
0d7d98ee
JH
6160 if (TARGET_64BIT && (index || base))
6161 {
75d38379
JJ
6162 /* foo@dtpoff(%rX) is ok. */
6163 if (GET_CODE (disp) != CONST
6164 || GET_CODE (XEXP (disp, 0)) != PLUS
6165 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6166 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6167 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6168 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6169 {
6170 reason = "non-constant pic memory reference";
6171 goto report_error;
6172 }
0d7d98ee 6173 }
75d38379 6174 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 6175 {
e075ae69 6176 reason = "displacement is an invalid pic construct";
50e60bc3 6177 goto report_error;
91bb873f
RH
6178 }
6179
4e9efe54 6180 /* This code used to verify that a symbolic pic displacement
0f290768
KH
6181 includes the pic_offset_table_rtx register.
6182
4e9efe54
JH
6183 While this is good idea, unfortunately these constructs may
6184 be created by "adds using lea" optimization for incorrect
6185 code like:
6186
6187 int a;
6188 int foo(int i)
6189 {
6190 return *(&a+i);
6191 }
6192
50e60bc3 6193 This code is nonsensical, but results in addressing
4e9efe54 6194 GOT table with pic_offset_table_rtx base. We can't
f710504c 6195 just refuse it easily, since it gets matched by
4e9efe54
JH
6196 "addsi3" pattern, that later gets split to lea in the
6197 case output register differs from input. While this
6198 can be handled by separate addsi pattern for this case
6199 that never results in lea, this seems to be easier and
6200 correct fix for crash to disable this test. */
3b3c6a3f 6201 }
a94f136b
JH
6202 else if (GET_CODE (disp) != LABEL_REF
6203 && GET_CODE (disp) != CONST_INT
6204 && (GET_CODE (disp) != CONST
6205 || !legitimate_constant_p (disp))
6206 && (GET_CODE (disp) != SYMBOL_REF
6207 || !legitimate_constant_p (disp)))
f996902d
RH
6208 {
6209 reason = "displacement is not constant";
6210 goto report_error;
6211 }
c05dbe81
JH
6212 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6213 {
6214 reason = "displacement is out of range";
6215 goto report_error;
6216 }
3b3c6a3f
MM
6217 }
6218
e075ae69 6219 /* Everything looks valid. */
3b3c6a3f 6220 if (TARGET_DEBUG_ADDR)
e075ae69 6221 fprintf (stderr, "Success.\n");
3b3c6a3f 6222 return TRUE;
e075ae69 6223
5bf0ebab 6224 report_error:
e075ae69
RH
6225 if (TARGET_DEBUG_ADDR)
6226 {
6227 fprintf (stderr, "Error: %s\n", reason);
6228 debug_rtx (reason_rtx);
6229 }
6230 return FALSE;
3b3c6a3f 6231}
3b3c6a3f 6232\f
55efb413
JW
6233/* Return an unique alias set for the GOT. */
6234
0f290768 6235static HOST_WIDE_INT
b96a374d 6236ix86_GOT_alias_set (void)
55efb413 6237{
5bf0ebab
RH
6238 static HOST_WIDE_INT set = -1;
6239 if (set == -1)
6240 set = new_alias_set ();
6241 return set;
0f290768 6242}
55efb413 6243
3b3c6a3f
MM
6244/* Return a legitimate reference for ORIG (an address) using the
6245 register REG. If REG is 0, a new pseudo is generated.
6246
91bb873f 6247 There are two types of references that must be handled:
3b3c6a3f
MM
6248
6249 1. Global data references must load the address from the GOT, via
6250 the PIC reg. An insn is emitted to do this load, and the reg is
6251 returned.
6252
91bb873f
RH
6253 2. Static data references, constant pool addresses, and code labels
6254 compute the address as an offset from the GOT, whose base is in
2ae5ae57 6255 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
6256 differentiate them from global data objects. The returned
6257 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6258
6259 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6260 reg also appears in the address. */
3b3c6a3f
MM
6261
6262rtx
b96a374d 6263legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
6264{
6265 rtx addr = orig;
6266 rtx new = orig;
91bb873f 6267 rtx base;
3b3c6a3f 6268
b069de3b
SS
6269#if TARGET_MACHO
6270 if (reg == 0)
6271 reg = gen_reg_rtx (Pmode);
6272 /* Use the generic Mach-O PIC machinery. */
6273 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6274#endif
6275
c05dbe81
JH
6276 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6277 new = addr;
6278 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6279 {
c05dbe81
JH
6280 /* This symbol may be referenced via a displacement from the PIC
6281 base address (@GOTOFF). */
3b3c6a3f 6282
c05dbe81
JH
6283 if (reload_in_progress)
6284 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6285 if (GET_CODE (addr) == CONST)
6286 addr = XEXP (addr, 0);
6287 if (GET_CODE (addr) == PLUS)
6288 {
6289 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6290 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6291 }
6292 else
6293 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6294 new = gen_rtx_CONST (Pmode, new);
6295 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6296
c05dbe81
JH
6297 if (reg != 0)
6298 {
6299 emit_move_insn (reg, new);
6300 new = reg;
6301 }
3b3c6a3f 6302 }
91bb873f 6303 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6304 {
14f73b5a
JH
6305 if (TARGET_64BIT)
6306 {
8ee41eaf 6307 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6308 new = gen_rtx_CONST (Pmode, new);
6309 new = gen_rtx_MEM (Pmode, new);
6310 RTX_UNCHANGING_P (new) = 1;
6311 set_mem_alias_set (new, ix86_GOT_alias_set ());
6312
6313 if (reg == 0)
6314 reg = gen_reg_rtx (Pmode);
6315 /* Use directly gen_movsi, otherwise the address is loaded
6316 into register for CSE. We don't want to CSE this addresses,
6317 instead we CSE addresses from the GOT table, so skip this. */
6318 emit_insn (gen_movsi (reg, new));
6319 new = reg;
6320 }
6321 else
6322 {
6323 /* This symbol must be referenced via a load from the
6324 Global Offset Table (@GOT). */
3b3c6a3f 6325
66edd3b4
RH
6326 if (reload_in_progress)
6327 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6328 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6329 new = gen_rtx_CONST (Pmode, new);
6330 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6331 new = gen_rtx_MEM (Pmode, new);
6332 RTX_UNCHANGING_P (new) = 1;
6333 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6334
14f73b5a
JH
6335 if (reg == 0)
6336 reg = gen_reg_rtx (Pmode);
6337 emit_move_insn (reg, new);
6338 new = reg;
6339 }
0f290768 6340 }
91bb873f
RH
6341 else
6342 {
6343 if (GET_CODE (addr) == CONST)
3b3c6a3f 6344 {
91bb873f 6345 addr = XEXP (addr, 0);
e3c8ea67
RH
6346
6347 /* We must match stuff we generate before. Assume the only
6348 unspecs that can get here are ours. Not that we could do
43f3a59d 6349 anything with them anyway.... */
e3c8ea67
RH
6350 if (GET_CODE (addr) == UNSPEC
6351 || (GET_CODE (addr) == PLUS
6352 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6353 return orig;
6354 if (GET_CODE (addr) != PLUS)
564d80f4 6355 abort ();
3b3c6a3f 6356 }
91bb873f
RH
6357 if (GET_CODE (addr) == PLUS)
6358 {
6359 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6360
91bb873f
RH
6361 /* Check first to see if this is a constant offset from a @GOTOFF
6362 symbol reference. */
623fe810 6363 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6364 && GET_CODE (op1) == CONST_INT)
6365 {
6eb791fc
JH
6366 if (!TARGET_64BIT)
6367 {
66edd3b4
RH
6368 if (reload_in_progress)
6369 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6370 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6371 UNSPEC_GOTOFF);
6eb791fc
JH
6372 new = gen_rtx_PLUS (Pmode, new, op1);
6373 new = gen_rtx_CONST (Pmode, new);
6374 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6375
6eb791fc
JH
6376 if (reg != 0)
6377 {
6378 emit_move_insn (reg, new);
6379 new = reg;
6380 }
6381 }
6382 else
91bb873f 6383 {
75d38379
JJ
6384 if (INTVAL (op1) < -16*1024*1024
6385 || INTVAL (op1) >= 16*1024*1024)
6386 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6387 }
6388 }
6389 else
6390 {
6391 base = legitimize_pic_address (XEXP (addr, 0), reg);
6392 new = legitimize_pic_address (XEXP (addr, 1),
6393 base == reg ? NULL_RTX : reg);
6394
6395 if (GET_CODE (new) == CONST_INT)
6396 new = plus_constant (base, INTVAL (new));
6397 else
6398 {
6399 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6400 {
6401 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6402 new = XEXP (new, 1);
6403 }
6404 new = gen_rtx_PLUS (Pmode, base, new);
6405 }
6406 }
6407 }
3b3c6a3f
MM
6408 }
6409 return new;
6410}
6411\f
74dc3e94 6412/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6413
6414static rtx
b96a374d 6415get_thread_pointer (int to_reg)
f996902d 6416{
74dc3e94 6417 rtx tp, reg, insn;
f996902d
RH
6418
6419 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6420 if (!to_reg)
6421 return tp;
f996902d 6422
74dc3e94
RH
6423 reg = gen_reg_rtx (Pmode);
6424 insn = gen_rtx_SET (VOIDmode, reg, tp);
6425 insn = emit_insn (insn);
6426
6427 return reg;
6428}
6429
6430/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6431 false if we expect this to be used for a memory address and true if
6432 we expect to load the address into a register. */
6433
6434static rtx
b96a374d 6435legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
6436{
6437 rtx dest, base, off, pic;
6438 int type;
6439
6440 switch (model)
6441 {
6442 case TLS_MODEL_GLOBAL_DYNAMIC:
6443 dest = gen_reg_rtx (Pmode);
6444 if (TARGET_64BIT)
6445 {
6446 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6447
6448 start_sequence ();
6449 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6450 insns = get_insns ();
6451 end_sequence ();
6452
6453 emit_libcall_block (insns, dest, rax, x);
6454 }
6455 else
6456 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6457 break;
6458
6459 case TLS_MODEL_LOCAL_DYNAMIC:
6460 base = gen_reg_rtx (Pmode);
6461 if (TARGET_64BIT)
6462 {
6463 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6464
6465 start_sequence ();
6466 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6467 insns = get_insns ();
6468 end_sequence ();
6469
6470 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6471 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6472 emit_libcall_block (insns, base, rax, note);
6473 }
6474 else
6475 emit_insn (gen_tls_local_dynamic_base_32 (base));
6476
6477 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6478 off = gen_rtx_CONST (Pmode, off);
6479
6480 return gen_rtx_PLUS (Pmode, base, off);
6481
6482 case TLS_MODEL_INITIAL_EXEC:
6483 if (TARGET_64BIT)
6484 {
6485 pic = NULL;
6486 type = UNSPEC_GOTNTPOFF;
6487 }
6488 else if (flag_pic)
6489 {
6490 if (reload_in_progress)
6491 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6492 pic = pic_offset_table_rtx;
6493 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6494 }
6495 else if (!TARGET_GNU_TLS)
6496 {
6497 pic = gen_reg_rtx (Pmode);
6498 emit_insn (gen_set_got (pic));
6499 type = UNSPEC_GOTTPOFF;
6500 }
6501 else
6502 {
6503 pic = NULL;
6504 type = UNSPEC_INDNTPOFF;
6505 }
6506
6507 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6508 off = gen_rtx_CONST (Pmode, off);
6509 if (pic)
6510 off = gen_rtx_PLUS (Pmode, pic, off);
6511 off = gen_rtx_MEM (Pmode, off);
6512 RTX_UNCHANGING_P (off) = 1;
6513 set_mem_alias_set (off, ix86_GOT_alias_set ());
6514
6515 if (TARGET_64BIT || TARGET_GNU_TLS)
6516 {
6517 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6518 off = force_reg (Pmode, off);
6519 return gen_rtx_PLUS (Pmode, base, off);
6520 }
6521 else
6522 {
6523 base = get_thread_pointer (true);
6524 dest = gen_reg_rtx (Pmode);
6525 emit_insn (gen_subsi3 (dest, base, off));
6526 }
6527 break;
6528
6529 case TLS_MODEL_LOCAL_EXEC:
6530 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6531 (TARGET_64BIT || TARGET_GNU_TLS)
6532 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6533 off = gen_rtx_CONST (Pmode, off);
6534
6535 if (TARGET_64BIT || TARGET_GNU_TLS)
6536 {
6537 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6538 return gen_rtx_PLUS (Pmode, base, off);
6539 }
6540 else
6541 {
6542 base = get_thread_pointer (true);
6543 dest = gen_reg_rtx (Pmode);
6544 emit_insn (gen_subsi3 (dest, base, off));
6545 }
6546 break;
6547
6548 default:
6549 abort ();
6550 }
6551
6552 return dest;
f996902d 6553}
fce5a9f2 6554
3b3c6a3f
MM
6555/* Try machine-dependent ways of modifying an illegitimate address
6556 to be legitimate. If we find one, return the new, valid address.
6557 This macro is used in only one place: `memory_address' in explow.c.
6558
6559 OLDX is the address as it was before break_out_memory_refs was called.
6560 In some cases it is useful to look at this to decide what needs to be done.
6561
6562 MODE and WIN are passed so that this macro can use
6563 GO_IF_LEGITIMATE_ADDRESS.
6564
6565 It is always safe for this macro to do nothing. It exists to recognize
6566 opportunities to optimize the output.
6567
6568 For the 80386, we handle X+REG by loading X into a register R and
6569 using R+REG. R will go in a general reg and indexing will be used.
6570 However, if REG is a broken-out memory address or multiplication,
6571 nothing needs to be done because REG can certainly go in a general reg.
6572
6573 When -fpic is used, special handling is needed for symbolic references.
6574 See comments by legitimize_pic_address in i386.c for details. */
6575
6576rtx
8d531ab9 6577legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
6578{
6579 int changed = 0;
6580 unsigned log;
6581
6582 if (TARGET_DEBUG_ADDR)
6583 {
e9a25f70
JL
6584 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6585 GET_MODE_NAME (mode));
3b3c6a3f
MM
6586 debug_rtx (x);
6587 }
6588
f996902d
RH
6589 log = tls_symbolic_operand (x, mode);
6590 if (log)
74dc3e94 6591 return legitimize_tls_address (x, log, false);
f996902d 6592
3b3c6a3f
MM
6593 if (flag_pic && SYMBOLIC_CONST (x))
6594 return legitimize_pic_address (x, 0);
6595
6596 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6597 if (GET_CODE (x) == ASHIFT
6598 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6599 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6600 {
6601 changed = 1;
a269a03c
JC
6602 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6603 GEN_INT (1 << log));
3b3c6a3f
MM
6604 }
6605
6606 if (GET_CODE (x) == PLUS)
6607 {
0f290768 6608 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6609
3b3c6a3f
MM
6610 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6611 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6612 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6613 {
6614 changed = 1;
c5c76735
JL
6615 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6616 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6617 GEN_INT (1 << log));
3b3c6a3f
MM
6618 }
6619
6620 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6621 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6622 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6623 {
6624 changed = 1;
c5c76735
JL
6625 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6626 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6627 GEN_INT (1 << log));
3b3c6a3f
MM
6628 }
6629
0f290768 6630 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6631 if (GET_CODE (XEXP (x, 1)) == MULT)
6632 {
6633 rtx tmp = XEXP (x, 0);
6634 XEXP (x, 0) = XEXP (x, 1);
6635 XEXP (x, 1) = tmp;
6636 changed = 1;
6637 }
6638
6639 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6640 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6641 created by virtual register instantiation, register elimination, and
6642 similar optimizations. */
6643 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6644 {
6645 changed = 1;
c5c76735
JL
6646 x = gen_rtx_PLUS (Pmode,
6647 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6648 XEXP (XEXP (x, 1), 0)),
6649 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6650 }
6651
e9a25f70
JL
6652 /* Canonicalize
6653 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6654 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6655 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6656 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6657 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6658 && CONSTANT_P (XEXP (x, 1)))
6659 {
00c79232
ML
6660 rtx constant;
6661 rtx other = NULL_RTX;
3b3c6a3f
MM
6662
6663 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6664 {
6665 constant = XEXP (x, 1);
6666 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6667 }
6668 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6669 {
6670 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6671 other = XEXP (x, 1);
6672 }
6673 else
6674 constant = 0;
6675
6676 if (constant)
6677 {
6678 changed = 1;
c5c76735
JL
6679 x = gen_rtx_PLUS (Pmode,
6680 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6681 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6682 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6683 }
6684 }
6685
6686 if (changed && legitimate_address_p (mode, x, FALSE))
6687 return x;
6688
6689 if (GET_CODE (XEXP (x, 0)) == MULT)
6690 {
6691 changed = 1;
6692 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6693 }
6694
6695 if (GET_CODE (XEXP (x, 1)) == MULT)
6696 {
6697 changed = 1;
6698 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6699 }
6700
6701 if (changed
6702 && GET_CODE (XEXP (x, 1)) == REG
6703 && GET_CODE (XEXP (x, 0)) == REG)
6704 return x;
6705
6706 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6707 {
6708 changed = 1;
6709 x = legitimize_pic_address (x, 0);
6710 }
6711
6712 if (changed && legitimate_address_p (mode, x, FALSE))
6713 return x;
6714
6715 if (GET_CODE (XEXP (x, 0)) == REG)
6716 {
8d531ab9
KH
6717 rtx temp = gen_reg_rtx (Pmode);
6718 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
6719 if (val != temp)
6720 emit_move_insn (temp, val);
6721
6722 XEXP (x, 1) = temp;
6723 return x;
6724 }
6725
6726 else if (GET_CODE (XEXP (x, 1)) == REG)
6727 {
8d531ab9
KH
6728 rtx temp = gen_reg_rtx (Pmode);
6729 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
6730 if (val != temp)
6731 emit_move_insn (temp, val);
6732
6733 XEXP (x, 0) = temp;
6734 return x;
6735 }
6736 }
6737
6738 return x;
6739}
2a2ab3f9
JVA
6740\f
6741/* Print an integer constant expression in assembler syntax. Addition
6742 and subtraction are the only arithmetic that may appear in these
6743 expressions. FILE is the stdio stream to write to, X is the rtx, and
6744 CODE is the operand print code from the output string. */
6745
6746static void
b96a374d 6747output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6748{
6749 char buf[256];
6750
6751 switch (GET_CODE (x))
6752 {
6753 case PC:
6754 if (flag_pic)
6755 putc ('.', file);
6756 else
6757 abort ();
6758 break;
6759
6760 case SYMBOL_REF:
91bb873f 6761 assemble_name (file, XSTR (x, 0));
12969f45 6762 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6763 fputs ("@PLT", file);
2a2ab3f9
JVA
6764 break;
6765
91bb873f
RH
6766 case LABEL_REF:
6767 x = XEXP (x, 0);
5efb1046 6768 /* FALLTHRU */
2a2ab3f9
JVA
6769 case CODE_LABEL:
6770 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6771 assemble_name (asm_out_file, buf);
6772 break;
6773
6774 case CONST_INT:
f64cecad 6775 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6776 break;
6777
6778 case CONST:
6779 /* This used to output parentheses around the expression,
6780 but that does not work on the 386 (either ATT or BSD assembler). */
6781 output_pic_addr_const (file, XEXP (x, 0), code);
6782 break;
6783
6784 case CONST_DOUBLE:
6785 if (GET_MODE (x) == VOIDmode)
6786 {
6787 /* We can use %d if the number is <32 bits and positive. */
6788 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6789 fprintf (file, "0x%lx%08lx",
6790 (unsigned long) CONST_DOUBLE_HIGH (x),
6791 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6792 else
f64cecad 6793 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6794 }
6795 else
6796 /* We can't handle floating point constants;
6797 PRINT_OPERAND must handle them. */
6798 output_operand_lossage ("floating constant misused");
6799 break;
6800
6801 case PLUS:
e9a25f70 6802 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6803 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6804 {
2a2ab3f9 6805 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6806 putc ('+', file);
e9a25f70 6807 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6808 }
91bb873f 6809 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6810 {
2a2ab3f9 6811 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6812 putc ('+', file);
e9a25f70 6813 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6814 }
91bb873f
RH
6815 else
6816 abort ();
2a2ab3f9
JVA
6817 break;
6818
6819 case MINUS:
b069de3b
SS
6820 if (!TARGET_MACHO)
6821 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6822 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6823 putc ('-', file);
2a2ab3f9 6824 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6825 if (!TARGET_MACHO)
6826 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6827 break;
6828
91bb873f
RH
6829 case UNSPEC:
6830 if (XVECLEN (x, 0) != 1)
5bf0ebab 6831 abort ();
91bb873f
RH
6832 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6833 switch (XINT (x, 1))
77ebd435 6834 {
8ee41eaf 6835 case UNSPEC_GOT:
77ebd435
AJ
6836 fputs ("@GOT", file);
6837 break;
8ee41eaf 6838 case UNSPEC_GOTOFF:
77ebd435
AJ
6839 fputs ("@GOTOFF", file);
6840 break;
8ee41eaf 6841 case UNSPEC_GOTPCREL:
edfe8595 6842 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6843 break;
f996902d 6844 case UNSPEC_GOTTPOFF:
dea73790 6845 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6846 fputs ("@GOTTPOFF", file);
6847 break;
6848 case UNSPEC_TPOFF:
6849 fputs ("@TPOFF", file);
6850 break;
6851 case UNSPEC_NTPOFF:
75d38379
JJ
6852 if (TARGET_64BIT)
6853 fputs ("@TPOFF", file);
6854 else
6855 fputs ("@NTPOFF", file);
f996902d
RH
6856 break;
6857 case UNSPEC_DTPOFF:
6858 fputs ("@DTPOFF", file);
6859 break;
dea73790 6860 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6861 if (TARGET_64BIT)
6862 fputs ("@GOTTPOFF(%rip)", file);
6863 else
6864 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6865 break;
6866 case UNSPEC_INDNTPOFF:
6867 fputs ("@INDNTPOFF", file);
6868 break;
77ebd435
AJ
6869 default:
6870 output_operand_lossage ("invalid UNSPEC as operand");
6871 break;
6872 }
91bb873f
RH
6873 break;
6874
2a2ab3f9
JVA
6875 default:
6876 output_operand_lossage ("invalid expression as operand");
6877 }
6878}
1865dbb5 6879
0f290768 6880/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6881 We need to handle our special PIC relocations. */
6882
0f290768 6883void
b96a374d 6884i386_dwarf_output_addr_const (FILE *file, rtx x)
1865dbb5 6885{
14f73b5a 6886#ifdef ASM_QUAD
18b5b8d6 6887 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6888#else
6889 if (TARGET_64BIT)
6890 abort ();
18b5b8d6 6891 fprintf (file, "%s", ASM_LONG);
14f73b5a 6892#endif
1865dbb5
JM
6893 if (flag_pic)
6894 output_pic_addr_const (file, x, '\0');
6895 else
6896 output_addr_const (file, x);
6897 fputc ('\n', file);
6898}
6899
b9203463
RH
6900/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6901 We need to emit DTP-relative relocations. */
6902
6903void
b96a374d 6904i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 6905{
75d38379
JJ
6906 fputs (ASM_LONG, file);
6907 output_addr_const (file, x);
6908 fputs ("@DTPOFF", file);
b9203463
RH
6909 switch (size)
6910 {
6911 case 4:
b9203463
RH
6912 break;
6913 case 8:
75d38379 6914 fputs (", 0", file);
b9203463 6915 break;
b9203463
RH
6916 default:
6917 abort ();
6918 }
b9203463
RH
6919}
6920
1865dbb5
JM
6921/* In the name of slightly smaller debug output, and to cater to
6922 general assembler losage, recognize PIC+GOTOFF and turn it back
6923 into a direct symbol reference. */
6924
69bd9368 6925static rtx
b96a374d 6926ix86_delegitimize_address (rtx orig_x)
1865dbb5 6927{
ec65b2e3 6928 rtx x = orig_x, y;
1865dbb5 6929
4c8c0dec
JJ
6930 if (GET_CODE (x) == MEM)
6931 x = XEXP (x, 0);
6932
6eb791fc
JH
6933 if (TARGET_64BIT)
6934 {
6935 if (GET_CODE (x) != CONST
6936 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6937 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6938 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6939 return orig_x;
6940 return XVECEXP (XEXP (x, 0), 0, 0);
6941 }
6942
1865dbb5 6943 if (GET_CODE (x) != PLUS
1865dbb5
JM
6944 || GET_CODE (XEXP (x, 1)) != CONST)
6945 return orig_x;
6946
ec65b2e3
JJ
6947 if (GET_CODE (XEXP (x, 0)) == REG
6948 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6949 /* %ebx + GOT/GOTOFF */
6950 y = NULL;
6951 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6952 {
6953 /* %ebx + %reg * scale + GOT/GOTOFF */
6954 y = XEXP (x, 0);
6955 if (GET_CODE (XEXP (y, 0)) == REG
6956 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6957 y = XEXP (y, 1);
6958 else if (GET_CODE (XEXP (y, 1)) == REG
6959 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6960 y = XEXP (y, 0);
6961 else
6962 return orig_x;
6963 if (GET_CODE (y) != REG
6964 && GET_CODE (y) != MULT
6965 && GET_CODE (y) != ASHIFT)
6966 return orig_x;
6967 }
6968 else
6969 return orig_x;
6970
1865dbb5
JM
6971 x = XEXP (XEXP (x, 1), 0);
6972 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6973 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6974 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6975 {
6976 if (y)
6977 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6978 return XVECEXP (x, 0, 0);
6979 }
1865dbb5
JM
6980
6981 if (GET_CODE (x) == PLUS
6982 && GET_CODE (XEXP (x, 0)) == UNSPEC
6983 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6984 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6985 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6986 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6987 {
6988 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6989 if (y)
6990 return gen_rtx_PLUS (Pmode, y, x);
6991 return x;
6992 }
1865dbb5
JM
6993
6994 return orig_x;
6995}
2a2ab3f9 6996\f
a269a03c 6997static void
b96a374d
AJ
6998put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6999 int fp, FILE *file)
a269a03c 7000{
a269a03c
JC
7001 const char *suffix;
7002
9a915772
JH
7003 if (mode == CCFPmode || mode == CCFPUmode)
7004 {
7005 enum rtx_code second_code, bypass_code;
7006 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7007 if (bypass_code != NIL || second_code != NIL)
b531087a 7008 abort ();
9a915772
JH
7009 code = ix86_fp_compare_code_to_integer (code);
7010 mode = CCmode;
7011 }
a269a03c
JC
7012 if (reverse)
7013 code = reverse_condition (code);
e075ae69 7014
a269a03c
JC
7015 switch (code)
7016 {
7017 case EQ:
7018 suffix = "e";
7019 break;
a269a03c
JC
7020 case NE:
7021 suffix = "ne";
7022 break;
a269a03c 7023 case GT:
7e08e190 7024 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
7025 abort ();
7026 suffix = "g";
a269a03c 7027 break;
a269a03c 7028 case GTU:
e075ae69
RH
7029 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7030 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 7031 if (mode != CCmode)
0f290768 7032 abort ();
e075ae69 7033 suffix = fp ? "nbe" : "a";
a269a03c 7034 break;
a269a03c 7035 case LT:
9076b9c1 7036 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 7037 suffix = "s";
7e08e190 7038 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7039 suffix = "l";
9076b9c1 7040 else
0f290768 7041 abort ();
a269a03c 7042 break;
a269a03c 7043 case LTU:
9076b9c1 7044 if (mode != CCmode)
0f290768 7045 abort ();
a269a03c
JC
7046 suffix = "b";
7047 break;
a269a03c 7048 case GE:
9076b9c1 7049 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 7050 suffix = "ns";
7e08e190 7051 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7052 suffix = "ge";
9076b9c1 7053 else
0f290768 7054 abort ();
a269a03c 7055 break;
a269a03c 7056 case GEU:
e075ae69 7057 /* ??? As above. */
7e08e190 7058 if (mode != CCmode)
0f290768 7059 abort ();
7e08e190 7060 suffix = fp ? "nb" : "ae";
a269a03c 7061 break;
a269a03c 7062 case LE:
7e08e190 7063 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
7064 abort ();
7065 suffix = "le";
a269a03c 7066 break;
a269a03c 7067 case LEU:
9076b9c1
JH
7068 if (mode != CCmode)
7069 abort ();
7e08e190 7070 suffix = "be";
a269a03c 7071 break;
3a3677ff 7072 case UNORDERED:
9e7adcb3 7073 suffix = fp ? "u" : "p";
3a3677ff
RH
7074 break;
7075 case ORDERED:
9e7adcb3 7076 suffix = fp ? "nu" : "np";
3a3677ff 7077 break;
a269a03c
JC
7078 default:
7079 abort ();
7080 }
7081 fputs (suffix, file);
7082}
7083
a55f4481
RK
7084/* Print the name of register X to FILE based on its machine mode and number.
7085 If CODE is 'w', pretend the mode is HImode.
7086 If CODE is 'b', pretend the mode is QImode.
7087 If CODE is 'k', pretend the mode is SImode.
7088 If CODE is 'q', pretend the mode is DImode.
7089 If CODE is 'h', pretend the reg is the `high' byte register.
7090 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7091
e075ae69 7092void
b96a374d 7093print_reg (rtx x, int code, FILE *file)
e5cb57e8 7094{
a55f4481
RK
7095 if (REGNO (x) == ARG_POINTER_REGNUM
7096 || REGNO (x) == FRAME_POINTER_REGNUM
7097 || REGNO (x) == FLAGS_REG
7098 || REGNO (x) == FPSR_REG)
480feac0
ZW
7099 abort ();
7100
5bf0ebab 7101 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
7102 putc ('%', file);
7103
ef6257cd 7104 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
7105 code = 2;
7106 else if (code == 'b')
7107 code = 1;
7108 else if (code == 'k')
7109 code = 4;
3f3f2124
JH
7110 else if (code == 'q')
7111 code = 8;
e075ae69
RH
7112 else if (code == 'y')
7113 code = 3;
7114 else if (code == 'h')
7115 code = 0;
7116 else
7117 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 7118
3f3f2124
JH
7119 /* Irritatingly, AMD extended registers use different naming convention
7120 from the normal registers. */
7121 if (REX_INT_REG_P (x))
7122 {
885a70fd
JH
7123 if (!TARGET_64BIT)
7124 abort ();
3f3f2124
JH
7125 switch (code)
7126 {
ef6257cd 7127 case 0:
c725bd79 7128 error ("extended registers have no high halves");
3f3f2124
JH
7129 break;
7130 case 1:
7131 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7132 break;
7133 case 2:
7134 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7135 break;
7136 case 4:
7137 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7138 break;
7139 case 8:
7140 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7141 break;
7142 default:
c725bd79 7143 error ("unsupported operand size for extended register");
3f3f2124
JH
7144 break;
7145 }
7146 return;
7147 }
e075ae69
RH
7148 switch (code)
7149 {
7150 case 3:
7151 if (STACK_TOP_P (x))
7152 {
7153 fputs ("st(0)", file);
7154 break;
7155 }
5efb1046 7156 /* FALLTHRU */
e075ae69 7157 case 8:
3f3f2124 7158 case 4:
e075ae69 7159 case 12:
446988df 7160 if (! ANY_FP_REG_P (x))
885a70fd 7161 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 7162 /* FALLTHRU */
a7180f70 7163 case 16:
e075ae69 7164 case 2:
d4c32b6f 7165 normal:
e075ae69
RH
7166 fputs (hi_reg_name[REGNO (x)], file);
7167 break;
7168 case 1:
d4c32b6f
RH
7169 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7170 goto normal;
e075ae69
RH
7171 fputs (qi_reg_name[REGNO (x)], file);
7172 break;
7173 case 0:
d4c32b6f
RH
7174 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7175 goto normal;
e075ae69
RH
7176 fputs (qi_high_reg_name[REGNO (x)], file);
7177 break;
7178 default:
7179 abort ();
fe25fea3 7180 }
e5cb57e8
SC
7181}
7182
f996902d
RH
7183/* Locate some local-dynamic symbol still in use by this function
7184 so that we can print its name in some tls_local_dynamic_base
7185 pattern. */
7186
7187static const char *
b96a374d 7188get_some_local_dynamic_name (void)
f996902d
RH
7189{
7190 rtx insn;
7191
7192 if (cfun->machine->some_ld_name)
7193 return cfun->machine->some_ld_name;
7194
7195 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7196 if (INSN_P (insn)
7197 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7198 return cfun->machine->some_ld_name;
7199
7200 abort ();
7201}
7202
7203static int
b96a374d 7204get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
7205{
7206 rtx x = *px;
7207
7208 if (GET_CODE (x) == SYMBOL_REF
7209 && local_dynamic_symbolic_operand (x, Pmode))
7210 {
7211 cfun->machine->some_ld_name = XSTR (x, 0);
7212 return 1;
7213 }
7214
7215 return 0;
7216}
7217
2a2ab3f9 7218/* Meaning of CODE:
fe25fea3 7219 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7220 C -- print opcode suffix for set/cmov insn.
fe25fea3 7221 c -- like C, but print reversed condition
ef6257cd 7222 F,f -- likewise, but for floating-point.
f6f5dff2
RO
7223 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7224 otherwise nothing
2a2ab3f9
JVA
7225 R -- print the prefix for register names.
7226 z -- print the opcode suffix for the size of the current operand.
7227 * -- print a star (in certain assembler syntax)
fb204271 7228 A -- print an absolute memory reference.
2a2ab3f9 7229 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7230 s -- print a shift double count, followed by the assemblers argument
7231 delimiter.
fe25fea3
SC
7232 b -- print the QImode name of the register for the indicated operand.
7233 %b0 would print %al if operands[0] is reg 0.
7234 w -- likewise, print the HImode name of the register.
7235 k -- likewise, print the SImode name of the register.
3f3f2124 7236 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7237 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7238 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7239 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7240 P -- if PIC, print an @PLT suffix.
7241 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7242 & -- print some in-use local-dynamic symbol name.
a46d1d38 7243 */
2a2ab3f9
JVA
7244
7245void
b96a374d 7246print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7247{
7248 if (code)
7249 {
7250 switch (code)
7251 {
7252 case '*':
80f33d06 7253 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7254 putc ('*', file);
7255 return;
7256
f996902d
RH
7257 case '&':
7258 assemble_name (file, get_some_local_dynamic_name ());
7259 return;
7260
fb204271 7261 case 'A':
80f33d06 7262 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 7263 putc ('*', file);
80f33d06 7264 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7265 {
7266 /* Intel syntax. For absolute addresses, registers should not
7267 be surrounded by braces. */
7268 if (GET_CODE (x) != REG)
7269 {
7270 putc ('[', file);
7271 PRINT_OPERAND (file, x, 0);
7272 putc (']', file);
7273 return;
7274 }
7275 }
80f33d06
GS
7276 else
7277 abort ();
fb204271
DN
7278
7279 PRINT_OPERAND (file, x, 0);
7280 return;
7281
7282
2a2ab3f9 7283 case 'L':
80f33d06 7284 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7285 putc ('l', file);
2a2ab3f9
JVA
7286 return;
7287
7288 case 'W':
80f33d06 7289 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7290 putc ('w', file);
2a2ab3f9
JVA
7291 return;
7292
7293 case 'B':
80f33d06 7294 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7295 putc ('b', file);
2a2ab3f9
JVA
7296 return;
7297
7298 case 'Q':
80f33d06 7299 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7300 putc ('l', file);
2a2ab3f9
JVA
7301 return;
7302
7303 case 'S':
80f33d06 7304 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7305 putc ('s', file);
2a2ab3f9
JVA
7306 return;
7307
5f1ec3e6 7308 case 'T':
80f33d06 7309 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7310 putc ('t', file);
5f1ec3e6
JVA
7311 return;
7312
2a2ab3f9
JVA
7313 case 'z':
7314 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7315 registers. */
2a2ab3f9
JVA
7316 if (STACK_REG_P (x))
7317 return;
7318
831c4e87
KC
7319 /* Likewise if using Intel opcodes. */
7320 if (ASSEMBLER_DIALECT == ASM_INTEL)
7321 return;
7322
7323 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7324 switch (GET_MODE_SIZE (GET_MODE (x)))
7325 {
2a2ab3f9 7326 case 2:
155d8a47
JW
7327#ifdef HAVE_GAS_FILDS_FISTS
7328 putc ('s', file);
7329#endif
2a2ab3f9
JVA
7330 return;
7331
7332 case 4:
7333 if (GET_MODE (x) == SFmode)
7334 {
e075ae69 7335 putc ('s', file);
2a2ab3f9
JVA
7336 return;
7337 }
7338 else
e075ae69 7339 putc ('l', file);
2a2ab3f9
JVA
7340 return;
7341
5f1ec3e6 7342 case 12:
2b589241 7343 case 16:
e075ae69
RH
7344 putc ('t', file);
7345 return;
5f1ec3e6 7346
2a2ab3f9
JVA
7347 case 8:
7348 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7349 {
7350#ifdef GAS_MNEMONICS
e075ae69 7351 putc ('q', file);
56c0e8fa 7352#else
e075ae69
RH
7353 putc ('l', file);
7354 putc ('l', file);
56c0e8fa
JVA
7355#endif
7356 }
e075ae69
RH
7357 else
7358 putc ('l', file);
2a2ab3f9 7359 return;
155d8a47
JW
7360
7361 default:
7362 abort ();
2a2ab3f9 7363 }
4af3895e
JVA
7364
7365 case 'b':
7366 case 'w':
7367 case 'k':
3f3f2124 7368 case 'q':
4af3895e
JVA
7369 case 'h':
7370 case 'y':
5cb6195d 7371 case 'X':
e075ae69 7372 case 'P':
4af3895e
JVA
7373 break;
7374
2d49677f
SC
7375 case 's':
7376 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7377 {
7378 PRINT_OPERAND (file, x, 0);
e075ae69 7379 putc (',', file);
2d49677f 7380 }
a269a03c
JC
7381 return;
7382
a46d1d38
JH
7383 case 'D':
7384 /* Little bit of braindamage here. The SSE compare instructions
7385 does use completely different names for the comparisons that the
7386 fp conditional moves. */
7387 switch (GET_CODE (x))
7388 {
7389 case EQ:
7390 case UNEQ:
7391 fputs ("eq", file);
7392 break;
7393 case LT:
7394 case UNLT:
7395 fputs ("lt", file);
7396 break;
7397 case LE:
7398 case UNLE:
7399 fputs ("le", file);
7400 break;
7401 case UNORDERED:
7402 fputs ("unord", file);
7403 break;
7404 case NE:
7405 case LTGT:
7406 fputs ("neq", file);
7407 break;
7408 case UNGE:
7409 case GE:
7410 fputs ("nlt", file);
7411 break;
7412 case UNGT:
7413 case GT:
7414 fputs ("nle", file);
7415 break;
7416 case ORDERED:
7417 fputs ("ord", file);
7418 break;
7419 default:
7420 abort ();
7421 break;
7422 }
7423 return;
048b1c95 7424 case 'O':
f6f5dff2 7425#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7426 if (ASSEMBLER_DIALECT == ASM_ATT)
7427 {
7428 switch (GET_MODE (x))
7429 {
7430 case HImode: putc ('w', file); break;
7431 case SImode:
7432 case SFmode: putc ('l', file); break;
7433 case DImode:
7434 case DFmode: putc ('q', file); break;
7435 default: abort ();
7436 }
7437 putc ('.', file);
7438 }
7439#endif
7440 return;
1853aadd 7441 case 'C':
e075ae69 7442 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7443 return;
fe25fea3 7444 case 'F':
f6f5dff2 7445#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7446 if (ASSEMBLER_DIALECT == ASM_ATT)
7447 putc ('.', file);
7448#endif
e075ae69 7449 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7450 return;
7451
e9a25f70 7452 /* Like above, but reverse condition */
e075ae69 7453 case 'c':
fce5a9f2 7454 /* Check to see if argument to %c is really a constant
c1d5afc4 7455 and not a condition code which needs to be reversed. */
ec8e098d 7456 if (!COMPARISON_P (x))
c1d5afc4
CR
7457 {
7458 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7459 return;
7460 }
e075ae69
RH
7461 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7462 return;
fe25fea3 7463 case 'f':
f6f5dff2 7464#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7465 if (ASSEMBLER_DIALECT == ASM_ATT)
7466 putc ('.', file);
7467#endif
e075ae69 7468 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7469 return;
ef6257cd
JH
7470 case '+':
7471 {
7472 rtx x;
e5cb57e8 7473
ef6257cd
JH
7474 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7475 return;
a4f31c00 7476
ef6257cd
JH
7477 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7478 if (x)
7479 {
7480 int pred_val = INTVAL (XEXP (x, 0));
7481
7482 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7483 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7484 {
7485 int taken = pred_val > REG_BR_PROB_BASE / 2;
7486 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7487
7488 /* Emit hints only in the case default branch prediction
d1f87653 7489 heuristics would fail. */
ef6257cd
JH
7490 if (taken != cputaken)
7491 {
7492 /* We use 3e (DS) prefix for taken branches and
7493 2e (CS) prefix for not taken branches. */
7494 if (taken)
7495 fputs ("ds ; ", file);
7496 else
7497 fputs ("cs ; ", file);
7498 }
7499 }
7500 }
7501 return;
7502 }
4af3895e 7503 default:
a52453cc 7504 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7505 }
7506 }
e9a25f70 7507
2a2ab3f9 7508 if (GET_CODE (x) == REG)
a55f4481 7509 print_reg (x, code, file);
e9a25f70 7510
2a2ab3f9
JVA
7511 else if (GET_CODE (x) == MEM)
7512 {
e075ae69 7513 /* No `byte ptr' prefix for call instructions. */
80f33d06 7514 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7515 {
69ddee61 7516 const char * size;
e075ae69
RH
7517 switch (GET_MODE_SIZE (GET_MODE (x)))
7518 {
7519 case 1: size = "BYTE"; break;
7520 case 2: size = "WORD"; break;
7521 case 4: size = "DWORD"; break;
7522 case 8: size = "QWORD"; break;
7523 case 12: size = "XWORD"; break;
a7180f70 7524 case 16: size = "XMMWORD"; break;
e075ae69 7525 default:
564d80f4 7526 abort ();
e075ae69 7527 }
fb204271
DN
7528
7529 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7530 if (code == 'b')
7531 size = "BYTE";
7532 else if (code == 'w')
7533 size = "WORD";
7534 else if (code == 'k')
7535 size = "DWORD";
7536
e075ae69
RH
7537 fputs (size, file);
7538 fputs (" PTR ", file);
2a2ab3f9 7539 }
e075ae69
RH
7540
7541 x = XEXP (x, 0);
0d7d98ee 7542 /* Avoid (%rip) for call operands. */
d10f5ecf 7543 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7544 && GET_CODE (x) != CONST_INT)
7545 output_addr_const (file, x);
c8b94768
RH
7546 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7547 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7548 else
e075ae69 7549 output_address (x);
2a2ab3f9 7550 }
e9a25f70 7551
2a2ab3f9
JVA
7552 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7553 {
e9a25f70
JL
7554 REAL_VALUE_TYPE r;
7555 long l;
7556
5f1ec3e6
JVA
7557 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7558 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7559
80f33d06 7560 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7561 putc ('$', file);
781f4ec1 7562 fprintf (file, "0x%08lx", l);
5f1ec3e6 7563 }
e9a25f70 7564
74dc3e94
RH
7565 /* These float cases don't actually occur as immediate operands. */
7566 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7567 {
e9a25f70
JL
7568 char dstr[30];
7569
da6eec72 7570 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7571 fprintf (file, "%s", dstr);
2a2ab3f9 7572 }
e9a25f70 7573
2b589241 7574 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 7575 && GET_MODE (x) == XFmode)
2a2ab3f9 7576 {
e9a25f70
JL
7577 char dstr[30];
7578
da6eec72 7579 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7580 fprintf (file, "%s", dstr);
2a2ab3f9 7581 }
f996902d 7582
79325812 7583 else
2a2ab3f9 7584 {
4af3895e 7585 if (code != 'P')
2a2ab3f9 7586 {
695dac07 7587 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7588 {
80f33d06 7589 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7590 putc ('$', file);
7591 }
2a2ab3f9
JVA
7592 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7593 || GET_CODE (x) == LABEL_REF)
e075ae69 7594 {
80f33d06 7595 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7596 putc ('$', file);
7597 else
7598 fputs ("OFFSET FLAT:", file);
7599 }
2a2ab3f9 7600 }
e075ae69
RH
7601 if (GET_CODE (x) == CONST_INT)
7602 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7603 else if (flag_pic)
2a2ab3f9
JVA
7604 output_pic_addr_const (file, x, code);
7605 else
7606 output_addr_const (file, x);
7607 }
7608}
7609\f
7610/* Print a memory operand whose address is ADDR. */
7611
7612void
8d531ab9 7613print_operand_address (FILE *file, rtx addr)
2a2ab3f9 7614{
e075ae69
RH
7615 struct ix86_address parts;
7616 rtx base, index, disp;
7617 int scale;
e9a25f70 7618
e075ae69
RH
7619 if (! ix86_decompose_address (addr, &parts))
7620 abort ();
e9a25f70 7621
e075ae69
RH
7622 base = parts.base;
7623 index = parts.index;
7624 disp = parts.disp;
7625 scale = parts.scale;
e9a25f70 7626
74dc3e94
RH
7627 switch (parts.seg)
7628 {
7629 case SEG_DEFAULT:
7630 break;
7631 case SEG_FS:
7632 case SEG_GS:
7633 if (USER_LABEL_PREFIX[0] == 0)
7634 putc ('%', file);
7635 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7636 break;
7637 default:
7638 abort ();
7639 }
7640
e075ae69
RH
7641 if (!base && !index)
7642 {
7643 /* Displacement only requires special attention. */
e9a25f70 7644
e075ae69 7645 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7646 {
74dc3e94 7647 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7648 {
7649 if (USER_LABEL_PREFIX[0] == 0)
7650 putc ('%', file);
7651 fputs ("ds:", file);
7652 }
74dc3e94 7653 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7654 }
e075ae69 7655 else if (flag_pic)
74dc3e94 7656 output_pic_addr_const (file, disp, 0);
e075ae69 7657 else
74dc3e94 7658 output_addr_const (file, disp);
0d7d98ee
JH
7659
7660 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7661 if (TARGET_64BIT
74dc3e94
RH
7662 && ((GET_CODE (disp) == SYMBOL_REF
7663 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7664 || GET_CODE (disp) == LABEL_REF
7665 || (GET_CODE (disp) == CONST
7666 && GET_CODE (XEXP (disp, 0)) == PLUS
7667 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7668 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7669 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 7670 fputs ("(%rip)", file);
e075ae69
RH
7671 }
7672 else
7673 {
80f33d06 7674 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7675 {
e075ae69 7676 if (disp)
2a2ab3f9 7677 {
c399861d 7678 if (flag_pic)
e075ae69
RH
7679 output_pic_addr_const (file, disp, 0);
7680 else if (GET_CODE (disp) == LABEL_REF)
7681 output_asm_label (disp);
2a2ab3f9 7682 else
e075ae69 7683 output_addr_const (file, disp);
2a2ab3f9
JVA
7684 }
7685
e075ae69
RH
7686 putc ('(', file);
7687 if (base)
a55f4481 7688 print_reg (base, 0, file);
e075ae69 7689 if (index)
2a2ab3f9 7690 {
e075ae69 7691 putc (',', file);
a55f4481 7692 print_reg (index, 0, file);
e075ae69
RH
7693 if (scale != 1)
7694 fprintf (file, ",%d", scale);
2a2ab3f9 7695 }
e075ae69 7696 putc (')', file);
2a2ab3f9 7697 }
2a2ab3f9
JVA
7698 else
7699 {
e075ae69 7700 rtx offset = NULL_RTX;
e9a25f70 7701
e075ae69
RH
7702 if (disp)
7703 {
7704 /* Pull out the offset of a symbol; print any symbol itself. */
7705 if (GET_CODE (disp) == CONST
7706 && GET_CODE (XEXP (disp, 0)) == PLUS
7707 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7708 {
7709 offset = XEXP (XEXP (disp, 0), 1);
7710 disp = gen_rtx_CONST (VOIDmode,
7711 XEXP (XEXP (disp, 0), 0));
7712 }
ce193852 7713
e075ae69
RH
7714 if (flag_pic)
7715 output_pic_addr_const (file, disp, 0);
7716 else if (GET_CODE (disp) == LABEL_REF)
7717 output_asm_label (disp);
7718 else if (GET_CODE (disp) == CONST_INT)
7719 offset = disp;
7720 else
7721 output_addr_const (file, disp);
7722 }
e9a25f70 7723
e075ae69
RH
7724 putc ('[', file);
7725 if (base)
a8620236 7726 {
a55f4481 7727 print_reg (base, 0, file);
e075ae69
RH
7728 if (offset)
7729 {
7730 if (INTVAL (offset) >= 0)
7731 putc ('+', file);
7732 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7733 }
a8620236 7734 }
e075ae69
RH
7735 else if (offset)
7736 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7737 else
e075ae69 7738 putc ('0', file);
e9a25f70 7739
e075ae69
RH
7740 if (index)
7741 {
7742 putc ('+', file);
a55f4481 7743 print_reg (index, 0, file);
e075ae69
RH
7744 if (scale != 1)
7745 fprintf (file, "*%d", scale);
7746 }
7747 putc (']', file);
7748 }
2a2ab3f9
JVA
7749 }
7750}
f996902d
RH
7751
7752bool
b96a374d 7753output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7754{
7755 rtx op;
7756
7757 if (GET_CODE (x) != UNSPEC)
7758 return false;
7759
7760 op = XVECEXP (x, 0, 0);
7761 switch (XINT (x, 1))
7762 {
7763 case UNSPEC_GOTTPOFF:
7764 output_addr_const (file, op);
dea73790 7765 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7766 fputs ("@GOTTPOFF", file);
7767 break;
7768 case UNSPEC_TPOFF:
7769 output_addr_const (file, op);
7770 fputs ("@TPOFF", file);
7771 break;
7772 case UNSPEC_NTPOFF:
7773 output_addr_const (file, op);
75d38379
JJ
7774 if (TARGET_64BIT)
7775 fputs ("@TPOFF", file);
7776 else
7777 fputs ("@NTPOFF", file);
f996902d
RH
7778 break;
7779 case UNSPEC_DTPOFF:
7780 output_addr_const (file, op);
7781 fputs ("@DTPOFF", file);
7782 break;
dea73790
JJ
7783 case UNSPEC_GOTNTPOFF:
7784 output_addr_const (file, op);
75d38379
JJ
7785 if (TARGET_64BIT)
7786 fputs ("@GOTTPOFF(%rip)", file);
7787 else
7788 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7789 break;
7790 case UNSPEC_INDNTPOFF:
7791 output_addr_const (file, op);
7792 fputs ("@INDNTPOFF", file);
7793 break;
f996902d
RH
7794
7795 default:
7796 return false;
7797 }
7798
7799 return true;
7800}
2a2ab3f9
JVA
7801\f
7802/* Split one or more DImode RTL references into pairs of SImode
7803 references. The RTL can be REG, offsettable MEM, integer constant, or
7804 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7805 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7806 that parallel "operands". */
2a2ab3f9
JVA
7807
7808void
b96a374d 7809split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
7810{
7811 while (num--)
7812 {
57dbca5e 7813 rtx op = operands[num];
b932f770
JH
7814
7815 /* simplify_subreg refuse to split volatile memory addresses,
7816 but we still have to handle it. */
7817 if (GET_CODE (op) == MEM)
2a2ab3f9 7818 {
f4ef873c 7819 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7820 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7821 }
7822 else
b932f770 7823 {
38ca929b
JH
7824 lo_half[num] = simplify_gen_subreg (SImode, op,
7825 GET_MODE (op) == VOIDmode
7826 ? DImode : GET_MODE (op), 0);
7827 hi_half[num] = simplify_gen_subreg (SImode, op,
7828 GET_MODE (op) == VOIDmode
7829 ? DImode : GET_MODE (op), 4);
b932f770 7830 }
2a2ab3f9
JVA
7831 }
7832}
44cf5b6a
JH
7833/* Split one or more TImode RTL references into pairs of SImode
7834 references. The RTL can be REG, offsettable MEM, integer constant, or
7835 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7836 split and "num" is its length. lo_half and hi_half are output arrays
7837 that parallel "operands". */
7838
7839void
b96a374d 7840split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
7841{
7842 while (num--)
7843 {
7844 rtx op = operands[num];
b932f770
JH
7845
7846 /* simplify_subreg refuse to split volatile memory addresses, but we
7847 still have to handle it. */
7848 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7849 {
7850 lo_half[num] = adjust_address (op, DImode, 0);
7851 hi_half[num] = adjust_address (op, DImode, 8);
7852 }
7853 else
b932f770
JH
7854 {
7855 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7856 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7857 }
44cf5b6a
JH
7858 }
7859}
2a2ab3f9 7860\f
2a2ab3f9
JVA
7861/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7862 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7863 is the expression of the binary operation. The output may either be
7864 emitted here, or returned to the caller, like all output_* functions.
7865
7866 There is no guarantee that the operands are the same mode, as they
0f290768 7867 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7868
e3c2afab
AM
7869#ifndef SYSV386_COMPAT
7870/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7871 wants to fix the assemblers because that causes incompatibility
7872 with gcc. No-one wants to fix gcc because that causes
7873 incompatibility with assemblers... You can use the option of
7874 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7875#define SYSV386_COMPAT 1
7876#endif
7877
69ddee61 7878const char *
b96a374d 7879output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 7880{
e3c2afab 7881 static char buf[30];
69ddee61 7882 const char *p;
1deaa899
JH
7883 const char *ssep;
7884 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7885
e3c2afab
AM
7886#ifdef ENABLE_CHECKING
7887 /* Even if we do not want to check the inputs, this documents input
7888 constraints. Which helps in understanding the following code. */
7889 if (STACK_REG_P (operands[0])
7890 && ((REG_P (operands[1])
7891 && REGNO (operands[0]) == REGNO (operands[1])
7892 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7893 || (REG_P (operands[2])
7894 && REGNO (operands[0]) == REGNO (operands[2])
7895 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7896 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7897 ; /* ok */
1deaa899 7898 else if (!is_sse)
e3c2afab
AM
7899 abort ();
7900#endif
7901
2a2ab3f9
JVA
7902 switch (GET_CODE (operands[3]))
7903 {
7904 case PLUS:
e075ae69
RH
7905 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7906 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7907 p = "fiadd";
7908 else
7909 p = "fadd";
1deaa899 7910 ssep = "add";
2a2ab3f9
JVA
7911 break;
7912
7913 case MINUS:
e075ae69
RH
7914 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7915 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7916 p = "fisub";
7917 else
7918 p = "fsub";
1deaa899 7919 ssep = "sub";
2a2ab3f9
JVA
7920 break;
7921
7922 case MULT:
e075ae69
RH
7923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7925 p = "fimul";
7926 else
7927 p = "fmul";
1deaa899 7928 ssep = "mul";
2a2ab3f9
JVA
7929 break;
7930
7931 case DIV:
e075ae69
RH
7932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7934 p = "fidiv";
7935 else
7936 p = "fdiv";
1deaa899 7937 ssep = "div";
2a2ab3f9
JVA
7938 break;
7939
7940 default:
7941 abort ();
7942 }
7943
1deaa899
JH
7944 if (is_sse)
7945 {
7946 strcpy (buf, ssep);
7947 if (GET_MODE (operands[0]) == SFmode)
7948 strcat (buf, "ss\t{%2, %0|%0, %2}");
7949 else
7950 strcat (buf, "sd\t{%2, %0|%0, %2}");
7951 return buf;
7952 }
e075ae69 7953 strcpy (buf, p);
2a2ab3f9
JVA
7954
7955 switch (GET_CODE (operands[3]))
7956 {
7957 case MULT:
7958 case PLUS:
7959 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7960 {
e3c2afab 7961 rtx temp = operands[2];
2a2ab3f9
JVA
7962 operands[2] = operands[1];
7963 operands[1] = temp;
7964 }
7965
e3c2afab
AM
7966 /* know operands[0] == operands[1]. */
7967
2a2ab3f9 7968 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7969 {
7970 p = "%z2\t%2";
7971 break;
7972 }
2a2ab3f9
JVA
7973
7974 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7975 {
7976 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7977 /* How is it that we are storing to a dead operand[2]?
7978 Well, presumably operands[1] is dead too. We can't
7979 store the result to st(0) as st(0) gets popped on this
7980 instruction. Instead store to operands[2] (which I
7981 think has to be st(1)). st(1) will be popped later.
7982 gcc <= 2.8.1 didn't have this check and generated
7983 assembly code that the Unixware assembler rejected. */
7984 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7985 else
e3c2afab 7986 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7987 break;
6b28fd63 7988 }
2a2ab3f9
JVA
7989
7990 if (STACK_TOP_P (operands[0]))
e3c2afab 7991 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7992 else
e3c2afab 7993 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7994 break;
2a2ab3f9
JVA
7995
7996 case MINUS:
7997 case DIV:
7998 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7999 {
8000 p = "r%z1\t%1";
8001 break;
8002 }
2a2ab3f9
JVA
8003
8004 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
8005 {
8006 p = "%z2\t%2";
8007 break;
8008 }
2a2ab3f9 8009
2a2ab3f9 8010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 8011 {
e3c2afab
AM
8012#if SYSV386_COMPAT
8013 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8014 derived assemblers, confusingly reverse the direction of
8015 the operation for fsub{r} and fdiv{r} when the
8016 destination register is not st(0). The Intel assembler
8017 doesn't have this brain damage. Read !SYSV386_COMPAT to
8018 figure out what the hardware really does. */
8019 if (STACK_TOP_P (operands[0]))
8020 p = "{p\t%0, %2|rp\t%2, %0}";
8021 else
8022 p = "{rp\t%2, %0|p\t%0, %2}";
8023#else
6b28fd63 8024 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
8025 /* As above for fmul/fadd, we can't store to st(0). */
8026 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 8027 else
e3c2afab
AM
8028 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8029#endif
e075ae69 8030 break;
6b28fd63 8031 }
2a2ab3f9
JVA
8032
8033 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 8034 {
e3c2afab 8035#if SYSV386_COMPAT
6b28fd63 8036 if (STACK_TOP_P (operands[0]))
e3c2afab 8037 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 8038 else
e3c2afab
AM
8039 p = "{p\t%1, %0|rp\t%0, %1}";
8040#else
8041 if (STACK_TOP_P (operands[0]))
8042 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8043 else
8044 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8045#endif
e075ae69 8046 break;
6b28fd63 8047 }
2a2ab3f9
JVA
8048
8049 if (STACK_TOP_P (operands[0]))
8050 {
8051 if (STACK_TOP_P (operands[1]))
e3c2afab 8052 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8053 else
e3c2afab 8054 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 8055 break;
2a2ab3f9
JVA
8056 }
8057 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
8058 {
8059#if SYSV386_COMPAT
8060 p = "{\t%1, %0|r\t%0, %1}";
8061#else
8062 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8063#endif
8064 }
2a2ab3f9 8065 else
e3c2afab
AM
8066 {
8067#if SYSV386_COMPAT
8068 p = "{r\t%2, %0|\t%0, %2}";
8069#else
8070 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8071#endif
8072 }
e075ae69 8073 break;
2a2ab3f9
JVA
8074
8075 default:
8076 abort ();
8077 }
e075ae69
RH
8078
8079 strcat (buf, p);
8080 return buf;
2a2ab3f9 8081}
e075ae69 8082
a4f31c00 8083/* Output code to initialize control word copies used by
7a2e09f4
JH
8084 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8085 is set to control word rounding downwards. */
8086void
b96a374d 8087emit_i387_cw_initialization (rtx normal, rtx round_down)
7a2e09f4
JH
8088{
8089 rtx reg = gen_reg_rtx (HImode);
8090
8091 emit_insn (gen_x86_fnstcw_1 (normal));
8092 emit_move_insn (reg, normal);
8093 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8094 && !TARGET_64BIT)
8095 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8096 else
8097 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8098 emit_move_insn (round_down, reg);
8099}
8100
2a2ab3f9 8101/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 8102 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 8103 operand may be [SDX]Fmode. */
2a2ab3f9 8104
69ddee61 8105const char *
b96a374d 8106output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
8107{
8108 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 8109 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 8110
e075ae69
RH
8111 /* Jump through a hoop or two for DImode, since the hardware has no
8112 non-popping instruction. We used to do this a different way, but
8113 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
8114 if (dimode_p && !stack_top_dies)
8115 output_asm_insn ("fld\t%y1", operands);
e075ae69 8116
7a2e09f4 8117 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
8118 abort ();
8119
e075ae69 8120 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 8121 abort ();
e9a25f70 8122
7a2e09f4 8123 output_asm_insn ("fldcw\t%3", operands);
e075ae69 8124 if (stack_top_dies || dimode_p)
7a2e09f4 8125 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 8126 else
7a2e09f4 8127 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 8128 output_asm_insn ("fldcw\t%2", operands);
10195bd8 8129
e075ae69 8130 return "";
2a2ab3f9 8131}
cda749b1 8132
e075ae69
RH
8133/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8134 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8135 when fucom should be used. */
8136
69ddee61 8137const char *
b96a374d 8138output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 8139{
e075ae69
RH
8140 int stack_top_dies;
8141 rtx cmp_op0 = operands[0];
8142 rtx cmp_op1 = operands[1];
0644b628 8143 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
8144
8145 if (eflags_p == 2)
8146 {
8147 cmp_op0 = cmp_op1;
8148 cmp_op1 = operands[2];
8149 }
0644b628
JH
8150 if (is_sse)
8151 {
8152 if (GET_MODE (operands[0]) == SFmode)
8153 if (unordered_p)
8154 return "ucomiss\t{%1, %0|%0, %1}";
8155 else
a5cf80f0 8156 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
8157 else
8158 if (unordered_p)
8159 return "ucomisd\t{%1, %0|%0, %1}";
8160 else
a5cf80f0 8161 return "comisd\t{%1, %0|%0, %1}";
0644b628 8162 }
cda749b1 8163
e075ae69 8164 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
8165 abort ();
8166
e075ae69 8167 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 8168
e075ae69
RH
8169 if (STACK_REG_P (cmp_op1)
8170 && stack_top_dies
8171 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8172 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 8173 {
e075ae69
RH
8174 /* If both the top of the 387 stack dies, and the other operand
8175 is also a stack register that dies, then this must be a
8176 `fcompp' float compare */
8177
8178 if (eflags_p == 1)
8179 {
8180 /* There is no double popping fcomi variant. Fortunately,
8181 eflags is immune from the fstp's cc clobbering. */
8182 if (unordered_p)
8183 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8184 else
8185 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8186 return "fstp\t%y0";
8187 }
8188 else
cda749b1 8189 {
e075ae69
RH
8190 if (eflags_p == 2)
8191 {
8192 if (unordered_p)
8193 return "fucompp\n\tfnstsw\t%0";
8194 else
8195 return "fcompp\n\tfnstsw\t%0";
8196 }
cda749b1
JW
8197 else
8198 {
e075ae69
RH
8199 if (unordered_p)
8200 return "fucompp";
8201 else
8202 return "fcompp";
cda749b1
JW
8203 }
8204 }
cda749b1
JW
8205 }
8206 else
8207 {
e075ae69 8208 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 8209
0f290768 8210 static const char * const alt[24] =
e075ae69
RH
8211 {
8212 "fcom%z1\t%y1",
8213 "fcomp%z1\t%y1",
8214 "fucom%z1\t%y1",
8215 "fucomp%z1\t%y1",
0f290768 8216
e075ae69
RH
8217 "ficom%z1\t%y1",
8218 "ficomp%z1\t%y1",
8219 NULL,
8220 NULL,
8221
8222 "fcomi\t{%y1, %0|%0, %y1}",
8223 "fcomip\t{%y1, %0|%0, %y1}",
8224 "fucomi\t{%y1, %0|%0, %y1}",
8225 "fucomip\t{%y1, %0|%0, %y1}",
8226
8227 NULL,
8228 NULL,
8229 NULL,
8230 NULL,
8231
8232 "fcom%z2\t%y2\n\tfnstsw\t%0",
8233 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8234 "fucom%z2\t%y2\n\tfnstsw\t%0",
8235 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8236
e075ae69
RH
8237 "ficom%z2\t%y2\n\tfnstsw\t%0",
8238 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8239 NULL,
8240 NULL
8241 };
8242
8243 int mask;
69ddee61 8244 const char *ret;
e075ae69
RH
8245
8246 mask = eflags_p << 3;
8247 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8248 mask |= unordered_p << 1;
8249 mask |= stack_top_dies;
8250
8251 if (mask >= 24)
8252 abort ();
8253 ret = alt[mask];
8254 if (ret == NULL)
8255 abort ();
cda749b1 8256
e075ae69 8257 return ret;
cda749b1
JW
8258 }
8259}
2a2ab3f9 8260
f88c65f7 8261void
b96a374d 8262ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
8263{
8264 const char *directive = ASM_LONG;
8265
8266 if (TARGET_64BIT)
8267 {
8268#ifdef ASM_QUAD
8269 directive = ASM_QUAD;
8270#else
8271 abort ();
8272#endif
8273 }
8274
8275 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8276}
8277
8278void
b96a374d 8279ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
8280{
8281 if (TARGET_64BIT)
74411039 8282 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8283 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8284 else if (HAVE_AS_GOTOFF_IN_DATA)
8285 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8286#if TARGET_MACHO
8287 else if (TARGET_MACHO)
86ecdfb6
AP
8288 {
8289 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8290 machopic_output_function_base_name (file);
8291 fprintf(file, "\n");
8292 }
b069de3b 8293#endif
f88c65f7 8294 else
5fc0e5df
KW
8295 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8296 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8297}
32b5b1aa 8298\f
a8bac9ab
RH
8299/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8300 for the target. */
8301
8302void
b96a374d 8303ix86_expand_clear (rtx dest)
a8bac9ab
RH
8304{
8305 rtx tmp;
8306
8307 /* We play register width games, which are only valid after reload. */
8308 if (!reload_completed)
8309 abort ();
8310
8311 /* Avoid HImode and its attendant prefix byte. */
8312 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8313 dest = gen_rtx_REG (SImode, REGNO (dest));
8314
8315 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8316
8317 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8318 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8319 {
8320 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8321 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8322 }
8323
8324 emit_insn (tmp);
8325}
8326
f996902d
RH
8327/* X is an unchanging MEM. If it is a constant pool reference, return
8328 the constant pool rtx, else NULL. */
8329
8330static rtx
b96a374d 8331maybe_get_pool_constant (rtx x)
f996902d 8332{
69bd9368 8333 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8334
8335 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8336 return get_pool_constant (x);
8337
8338 return NULL_RTX;
8339}
8340
79325812 8341void
b96a374d 8342ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8343{
e075ae69 8344 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8345 rtx op0, op1;
8346 enum tls_model model;
f996902d
RH
8347
8348 op0 = operands[0];
8349 op1 = operands[1];
8350
74dc3e94
RH
8351 model = tls_symbolic_operand (op1, Pmode);
8352 if (model)
f996902d 8353 {
74dc3e94
RH
8354 op1 = legitimize_tls_address (op1, model, true);
8355 op1 = force_operand (op1, op0);
8356 if (op1 == op0)
8357 return;
f996902d 8358 }
74dc3e94
RH
8359
8360 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8361 {
b069de3b
SS
8362#if TARGET_MACHO
8363 if (MACHOPIC_PURE)
8364 {
8365 rtx temp = ((reload_in_progress
8366 || ((op0 && GET_CODE (op0) == REG)
8367 && mode == Pmode))
8368 ? op0 : gen_reg_rtx (Pmode));
8369 op1 = machopic_indirect_data_reference (op1, temp);
8370 op1 = machopic_legitimize_pic_address (op1, mode,
8371 temp == op1 ? 0 : temp);
8372 }
74dc3e94
RH
8373 else if (MACHOPIC_INDIRECT)
8374 op1 = machopic_indirect_data_reference (op1, 0);
8375 if (op0 == op1)
8376 return;
8377#else
f996902d
RH
8378 if (GET_CODE (op0) == MEM)
8379 op1 = force_reg (Pmode, op1);
e075ae69 8380 else
32b5b1aa 8381 {
f996902d 8382 rtx temp = op0;
e075ae69
RH
8383 if (GET_CODE (temp) != REG)
8384 temp = gen_reg_rtx (Pmode);
f996902d
RH
8385 temp = legitimize_pic_address (op1, temp);
8386 if (temp == op0)
e075ae69 8387 return;
f996902d 8388 op1 = temp;
32b5b1aa 8389 }
74dc3e94 8390#endif /* TARGET_MACHO */
e075ae69
RH
8391 }
8392 else
8393 {
f996902d 8394 if (GET_CODE (op0) == MEM
44cf5b6a 8395 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8396 || !push_operand (op0, mode))
8397 && GET_CODE (op1) == MEM)
8398 op1 = force_reg (mode, op1);
e9a25f70 8399
f996902d
RH
8400 if (push_operand (op0, mode)
8401 && ! general_no_elim_operand (op1, mode))
8402 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8403
44cf5b6a
JH
8404 /* Force large constants in 64bit compilation into register
8405 to get them CSEed. */
8406 if (TARGET_64BIT && mode == DImode
f996902d
RH
8407 && immediate_operand (op1, mode)
8408 && !x86_64_zero_extended_value (op1)
8409 && !register_operand (op0, mode)
44cf5b6a 8410 && optimize && !reload_completed && !reload_in_progress)
f996902d 8411 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8412
e075ae69 8413 if (FLOAT_MODE_P (mode))
32b5b1aa 8414 {
d7a29404
JH
8415 /* If we are loading a floating point constant to a register,
8416 force the value to memory now, since we'll get better code
8417 out the back end. */
e075ae69
RH
8418
8419 if (strict)
8420 ;
ddc67067
MM
8421 else if (GET_CODE (op1) == CONST_DOUBLE)
8422 {
8423 op1 = validize_mem (force_const_mem (mode, op1));
8424 if (!register_operand (op0, mode))
8425 {
8426 rtx temp = gen_reg_rtx (mode);
8427 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8428 emit_move_insn (op0, temp);
8429 return;
8430 }
8431 }
32b5b1aa 8432 }
32b5b1aa 8433 }
e9a25f70 8434
74dc3e94 8435 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8436}
e9a25f70 8437
e37af218 8438void
b96a374d 8439ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
8440{
8441 /* Force constants other than zero into memory. We do not know how
8442 the instructions used to build constants modify the upper 64 bits
8443 of the register, once we have that information we may be able
8444 to handle some of them more efficiently. */
8445 if ((reload_in_progress | reload_completed) == 0
8446 && register_operand (operands[0], mode)
fdc4b40b 8447 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8448 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8449
8450 /* Make operand1 a register if it isn't already. */
f8ca7923 8451 if (!no_new_pseudos
e37af218 8452 && !register_operand (operands[0], mode)
b105d6da 8453 && !register_operand (operands[1], mode))
e37af218 8454 {
59bef189 8455 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8456 emit_move_insn (operands[0], temp);
8457 return;
8458 }
8459
8460 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8461}
e37af218 8462
e075ae69
RH
8463/* Attempt to expand a binary operator. Make the expansion closer to the
8464 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8465 memory references (one output, two input) in a single insn. */
e9a25f70 8466
e075ae69 8467void
b96a374d
AJ
8468ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8469 rtx operands[])
e075ae69
RH
8470{
8471 int matching_memory;
8472 rtx src1, src2, dst, op, clob;
8473
8474 dst = operands[0];
8475 src1 = operands[1];
8476 src2 = operands[2];
8477
8478 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
ec8e098d 8479 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8480 && (rtx_equal_p (dst, src2)
8481 || immediate_operand (src1, mode)))
8482 {
8483 rtx temp = src1;
8484 src1 = src2;
8485 src2 = temp;
32b5b1aa 8486 }
e9a25f70 8487
e075ae69
RH
8488 /* If the destination is memory, and we do not have matching source
8489 operands, do things in registers. */
8490 matching_memory = 0;
8491 if (GET_CODE (dst) == MEM)
32b5b1aa 8492 {
e075ae69
RH
8493 if (rtx_equal_p (dst, src1))
8494 matching_memory = 1;
ec8e098d 8495 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8496 && rtx_equal_p (dst, src2))
8497 matching_memory = 2;
8498 else
8499 dst = gen_reg_rtx (mode);
8500 }
0f290768 8501
e075ae69
RH
8502 /* Both source operands cannot be in memory. */
8503 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8504 {
8505 if (matching_memory != 2)
8506 src2 = force_reg (mode, src2);
8507 else
8508 src1 = force_reg (mode, src1);
32b5b1aa 8509 }
e9a25f70 8510
06a964de
JH
8511 /* If the operation is not commutable, source 1 cannot be a constant
8512 or non-matching memory. */
0f290768 8513 if ((CONSTANT_P (src1)
06a964de 8514 || (!matching_memory && GET_CODE (src1) == MEM))
ec8e098d 8515 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69 8516 src1 = force_reg (mode, src1);
0f290768 8517
e075ae69 8518 /* If optimizing, copy to regs to improve CSE */
fe577e58 8519 if (optimize && ! no_new_pseudos)
32b5b1aa 8520 {
e075ae69
RH
8521 if (GET_CODE (dst) == MEM)
8522 dst = gen_reg_rtx (mode);
8523 if (GET_CODE (src1) == MEM)
8524 src1 = force_reg (mode, src1);
8525 if (GET_CODE (src2) == MEM)
8526 src2 = force_reg (mode, src2);
32b5b1aa 8527 }
e9a25f70 8528
e075ae69
RH
8529 /* Emit the instruction. */
8530
8531 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8532 if (reload_in_progress)
8533 {
8534 /* Reload doesn't know about the flags register, and doesn't know that
8535 it doesn't want to clobber it. We can only do this with PLUS. */
8536 if (code != PLUS)
8537 abort ();
8538 emit_insn (op);
8539 }
8540 else
32b5b1aa 8541 {
e075ae69
RH
8542 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8543 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8544 }
e9a25f70 8545
e075ae69
RH
8546 /* Fix up the destination if needed. */
8547 if (dst != operands[0])
8548 emit_move_insn (operands[0], dst);
8549}
8550
8551/* Return TRUE or FALSE depending on whether the binary operator meets the
8552 appropriate constraints. */
8553
8554int
b96a374d
AJ
8555ix86_binary_operator_ok (enum rtx_code code,
8556 enum machine_mode mode ATTRIBUTE_UNUSED,
8557 rtx operands[3])
e075ae69
RH
8558{
8559 /* Both source operands cannot be in memory. */
8560 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8561 return 0;
8562 /* If the operation is not commutable, source 1 cannot be a constant. */
ec8e098d 8563 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69
RH
8564 return 0;
8565 /* If the destination is memory, we must have a matching source operand. */
8566 if (GET_CODE (operands[0]) == MEM
8567 && ! (rtx_equal_p (operands[0], operands[1])
ec8e098d 8568 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8569 && rtx_equal_p (operands[0], operands[2]))))
8570 return 0;
06a964de 8571 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8572 have a matching destination. */
06a964de 8573 if (GET_CODE (operands[1]) == MEM
ec8e098d 8574 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
06a964de
JH
8575 && ! rtx_equal_p (operands[0], operands[1]))
8576 return 0;
e075ae69
RH
8577 return 1;
8578}
8579
8580/* Attempt to expand a unary operator. Make the expansion closer to the
8581 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8582 memory references (one output, one input) in a single insn. */
e075ae69 8583
9d81fc27 8584void
b96a374d
AJ
8585ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8586 rtx operands[])
e075ae69 8587{
06a964de
JH
8588 int matching_memory;
8589 rtx src, dst, op, clob;
8590
8591 dst = operands[0];
8592 src = operands[1];
e075ae69 8593
06a964de
JH
8594 /* If the destination is memory, and we do not have matching source
8595 operands, do things in registers. */
8596 matching_memory = 0;
8597 if (GET_CODE (dst) == MEM)
32b5b1aa 8598 {
06a964de
JH
8599 if (rtx_equal_p (dst, src))
8600 matching_memory = 1;
e075ae69 8601 else
06a964de 8602 dst = gen_reg_rtx (mode);
32b5b1aa 8603 }
e9a25f70 8604
06a964de
JH
8605 /* When source operand is memory, destination must match. */
8606 if (!matching_memory && GET_CODE (src) == MEM)
8607 src = force_reg (mode, src);
0f290768 8608
06a964de 8609 /* If optimizing, copy to regs to improve CSE */
fe577e58 8610 if (optimize && ! no_new_pseudos)
06a964de
JH
8611 {
8612 if (GET_CODE (dst) == MEM)
8613 dst = gen_reg_rtx (mode);
8614 if (GET_CODE (src) == MEM)
8615 src = force_reg (mode, src);
8616 }
8617
8618 /* Emit the instruction. */
8619
8620 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8621 if (reload_in_progress || code == NOT)
8622 {
8623 /* Reload doesn't know about the flags register, and doesn't know that
8624 it doesn't want to clobber it. */
8625 if (code != NOT)
8626 abort ();
8627 emit_insn (op);
8628 }
8629 else
8630 {
8631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8633 }
8634
8635 /* Fix up the destination if needed. */
8636 if (dst != operands[0])
8637 emit_move_insn (operands[0], dst);
e075ae69
RH
8638}
8639
8640/* Return TRUE or FALSE depending on whether the unary operator meets the
8641 appropriate constraints. */
8642
8643int
b96a374d
AJ
8644ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8645 enum machine_mode mode ATTRIBUTE_UNUSED,
8646 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 8647{
06a964de
JH
8648 /* If one of operands is memory, source and destination must match. */
8649 if ((GET_CODE (operands[0]) == MEM
8650 || GET_CODE (operands[1]) == MEM)
8651 && ! rtx_equal_p (operands[0], operands[1]))
8652 return FALSE;
e075ae69
RH
8653 return TRUE;
8654}
8655
16189740
RH
8656/* Return TRUE or FALSE depending on whether the first SET in INSN
8657 has source and destination with matching CC modes, and that the
8658 CC mode is at least as constrained as REQ_MODE. */
8659
8660int
b96a374d 8661ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8662{
8663 rtx set;
8664 enum machine_mode set_mode;
8665
8666 set = PATTERN (insn);
8667 if (GET_CODE (set) == PARALLEL)
8668 set = XVECEXP (set, 0, 0);
8669 if (GET_CODE (set) != SET)
8670 abort ();
9076b9c1
JH
8671 if (GET_CODE (SET_SRC (set)) != COMPARE)
8672 abort ();
16189740
RH
8673
8674 set_mode = GET_MODE (SET_DEST (set));
8675 switch (set_mode)
8676 {
9076b9c1
JH
8677 case CCNOmode:
8678 if (req_mode != CCNOmode
8679 && (req_mode != CCmode
8680 || XEXP (SET_SRC (set), 1) != const0_rtx))
8681 return 0;
8682 break;
16189740 8683 case CCmode:
9076b9c1 8684 if (req_mode == CCGCmode)
16189740 8685 return 0;
5efb1046 8686 /* FALLTHRU */
9076b9c1
JH
8687 case CCGCmode:
8688 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8689 return 0;
5efb1046 8690 /* FALLTHRU */
9076b9c1 8691 case CCGOCmode:
16189740
RH
8692 if (req_mode == CCZmode)
8693 return 0;
5efb1046 8694 /* FALLTHRU */
16189740
RH
8695 case CCZmode:
8696 break;
8697
8698 default:
8699 abort ();
8700 }
8701
8702 return (GET_MODE (SET_SRC (set)) == set_mode);
8703}
8704
e075ae69
RH
8705/* Generate insn patterns to do an integer compare of OPERANDS. */
8706
8707static rtx
b96a374d 8708ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8709{
8710 enum machine_mode cmpmode;
8711 rtx tmp, flags;
8712
8713 cmpmode = SELECT_CC_MODE (code, op0, op1);
8714 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8715
8716 /* This is very simple, but making the interface the same as in the
8717 FP case makes the rest of the code easier. */
8718 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8719 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8720
8721 /* Return the test that should be put into the flags user, i.e.
8722 the bcc, scc, or cmov instruction. */
8723 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8724}
8725
3a3677ff
RH
8726/* Figure out whether to use ordered or unordered fp comparisons.
8727 Return the appropriate mode to use. */
e075ae69 8728
b1cdafbb 8729enum machine_mode
b96a374d 8730ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8731{
9e7adcb3
JH
8732 /* ??? In order to make all comparisons reversible, we do all comparisons
8733 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8734 all forms trapping and nontrapping comparisons, we can make inequality
8735 comparisons trapping again, since it results in better code when using
8736 FCOM based compares. */
8737 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8738}
8739
9076b9c1 8740enum machine_mode
b96a374d 8741ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8742{
8743 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8744 return ix86_fp_compare_mode (code);
8745 switch (code)
8746 {
8747 /* Only zero flag is needed. */
8748 case EQ: /* ZF=0 */
8749 case NE: /* ZF!=0 */
8750 return CCZmode;
8751 /* Codes needing carry flag. */
265dab10
JH
8752 case GEU: /* CF=0 */
8753 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8754 case LTU: /* CF=1 */
8755 case LEU: /* CF=1 | ZF=1 */
265dab10 8756 return CCmode;
9076b9c1
JH
8757 /* Codes possibly doable only with sign flag when
8758 comparing against zero. */
8759 case GE: /* SF=OF or SF=0 */
7e08e190 8760 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8761 if (op1 == const0_rtx)
8762 return CCGOCmode;
8763 else
8764 /* For other cases Carry flag is not required. */
8765 return CCGCmode;
8766 /* Codes doable only with sign flag when comparing
8767 against zero, but we miss jump instruction for it
4aae8a9a 8768 so we need to use relational tests against overflow
9076b9c1
JH
8769 that thus needs to be zero. */
8770 case GT: /* ZF=0 & SF=OF */
8771 case LE: /* ZF=1 | SF<>OF */
8772 if (op1 == const0_rtx)
8773 return CCNOmode;
8774 else
8775 return CCGCmode;
7fcd7218
JH
8776 /* strcmp pattern do (use flags) and combine may ask us for proper
8777 mode. */
8778 case USE:
8779 return CCmode;
9076b9c1 8780 default:
0f290768 8781 abort ();
9076b9c1
JH
8782 }
8783}
8784
e129d93a
ILT
8785/* Return the fixed registers used for condition codes. */
8786
8787static bool
8788ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8789{
8790 *p1 = FLAGS_REG;
8791 *p2 = FPSR_REG;
8792 return true;
8793}
8794
8795/* If two condition code modes are compatible, return a condition code
8796 mode which is compatible with both. Otherwise, return
8797 VOIDmode. */
8798
8799static enum machine_mode
8800ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8801{
8802 if (m1 == m2)
8803 return m1;
8804
8805 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8806 return VOIDmode;
8807
8808 if ((m1 == CCGCmode && m2 == CCGOCmode)
8809 || (m1 == CCGOCmode && m2 == CCGCmode))
8810 return CCGCmode;
8811
8812 switch (m1)
8813 {
8814 default:
8815 abort ();
8816
8817 case CCmode:
8818 case CCGCmode:
8819 case CCGOCmode:
8820 case CCNOmode:
8821 case CCZmode:
8822 switch (m2)
8823 {
8824 default:
8825 return VOIDmode;
8826
8827 case CCmode:
8828 case CCGCmode:
8829 case CCGOCmode:
8830 case CCNOmode:
8831 case CCZmode:
8832 return CCmode;
8833 }
8834
8835 case CCFPmode:
8836 case CCFPUmode:
8837 /* These are only compatible with themselves, which we already
8838 checked above. */
8839 return VOIDmode;
8840 }
8841}
8842
3a3677ff
RH
8843/* Return true if we should use an FCOMI instruction for this fp comparison. */
8844
a940d8bd 8845int
b96a374d 8846ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8847{
9e7adcb3
JH
8848 enum rtx_code swapped_code = swap_condition (code);
8849 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8850 || (ix86_fp_comparison_cost (swapped_code)
8851 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8852}
8853
0f290768 8854/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8855 to a fp comparison. The operands are updated in place; the new
d1f87653 8856 comparison code is returned. */
3a3677ff
RH
8857
8858static enum rtx_code
b96a374d 8859ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8860{
8861 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8862 rtx op0 = *pop0, op1 = *pop1;
8863 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8864 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8865
e075ae69 8866 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8867 The same is true of the XFmode compare instructions. The same is
8868 true of the fcomi compare instructions. */
8869
0644b628
JH
8870 if (!is_sse
8871 && (fpcmp_mode == CCFPUmode
8872 || op_mode == XFmode
0644b628 8873 || ix86_use_fcomi_compare (code)))
e075ae69 8874 {
3a3677ff
RH
8875 op0 = force_reg (op_mode, op0);
8876 op1 = force_reg (op_mode, op1);
e075ae69
RH
8877 }
8878 else
8879 {
8880 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8881 things around if they appear profitable, otherwise force op0
8882 into a register. */
8883
8884 if (standard_80387_constant_p (op0) == 0
8885 || (GET_CODE (op0) == MEM
8886 && ! (standard_80387_constant_p (op1) == 0
8887 || GET_CODE (op1) == MEM)))
32b5b1aa 8888 {
e075ae69
RH
8889 rtx tmp;
8890 tmp = op0, op0 = op1, op1 = tmp;
8891 code = swap_condition (code);
8892 }
8893
8894 if (GET_CODE (op0) != REG)
3a3677ff 8895 op0 = force_reg (op_mode, op0);
e075ae69
RH
8896
8897 if (CONSTANT_P (op1))
8898 {
8899 if (standard_80387_constant_p (op1))
3a3677ff 8900 op1 = force_reg (op_mode, op1);
e075ae69 8901 else
3a3677ff 8902 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8903 }
8904 }
e9a25f70 8905
9e7adcb3
JH
8906 /* Try to rearrange the comparison to make it cheaper. */
8907 if (ix86_fp_comparison_cost (code)
8908 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8909 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8910 {
8911 rtx tmp;
8912 tmp = op0, op0 = op1, op1 = tmp;
8913 code = swap_condition (code);
8914 if (GET_CODE (op0) != REG)
8915 op0 = force_reg (op_mode, op0);
8916 }
8917
3a3677ff
RH
8918 *pop0 = op0;
8919 *pop1 = op1;
8920 return code;
8921}
8922
c0c102a9
JH
8923/* Convert comparison codes we use to represent FP comparison to integer
8924 code that will result in proper branch. Return UNKNOWN if no such code
8925 is available. */
8926static enum rtx_code
b96a374d 8927ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8928{
8929 switch (code)
8930 {
8931 case GT:
8932 return GTU;
8933 case GE:
8934 return GEU;
8935 case ORDERED:
8936 case UNORDERED:
8937 return code;
8938 break;
8939 case UNEQ:
8940 return EQ;
8941 break;
8942 case UNLT:
8943 return LTU;
8944 break;
8945 case UNLE:
8946 return LEU;
8947 break;
8948 case LTGT:
8949 return NE;
8950 break;
8951 default:
8952 return UNKNOWN;
8953 }
8954}
8955
8956/* Split comparison code CODE into comparisons we can do using branch
8957 instructions. BYPASS_CODE is comparison code for branch that will
8958 branch around FIRST_CODE and SECOND_CODE. If some of branches
8959 is not required, set value to NIL.
8960 We never require more than two branches. */
8961static void
b96a374d
AJ
8962ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8963 enum rtx_code *first_code,
8964 enum rtx_code *second_code)
c0c102a9
JH
8965{
8966 *first_code = code;
8967 *bypass_code = NIL;
8968 *second_code = NIL;
8969
8970 /* The fcomi comparison sets flags as follows:
8971
8972 cmp ZF PF CF
8973 > 0 0 0
8974 < 0 0 1
8975 = 1 0 0
8976 un 1 1 1 */
8977
8978 switch (code)
8979 {
8980 case GT: /* GTU - CF=0 & ZF=0 */
8981 case GE: /* GEU - CF=0 */
8982 case ORDERED: /* PF=0 */
8983 case UNORDERED: /* PF=1 */
8984 case UNEQ: /* EQ - ZF=1 */
8985 case UNLT: /* LTU - CF=1 */
8986 case UNLE: /* LEU - CF=1 | ZF=1 */
8987 case LTGT: /* EQ - ZF=0 */
8988 break;
8989 case LT: /* LTU - CF=1 - fails on unordered */
8990 *first_code = UNLT;
8991 *bypass_code = UNORDERED;
8992 break;
8993 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8994 *first_code = UNLE;
8995 *bypass_code = UNORDERED;
8996 break;
8997 case EQ: /* EQ - ZF=1 - fails on unordered */
8998 *first_code = UNEQ;
8999 *bypass_code = UNORDERED;
9000 break;
9001 case NE: /* NE - ZF=0 - fails on unordered */
9002 *first_code = LTGT;
9003 *second_code = UNORDERED;
9004 break;
9005 case UNGE: /* GEU - CF=0 - fails on unordered */
9006 *first_code = GE;
9007 *second_code = UNORDERED;
9008 break;
9009 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9010 *first_code = GT;
9011 *second_code = UNORDERED;
9012 break;
9013 default:
9014 abort ();
9015 }
9016 if (!TARGET_IEEE_FP)
9017 {
9018 *second_code = NIL;
9019 *bypass_code = NIL;
9020 }
9021}
9022
9e7adcb3 9023/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 9024 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
9025 In future this should be tweaked to compute bytes for optimize_size and
9026 take into account performance of various instructions on various CPUs. */
9027static int
b96a374d 9028ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
9029{
9030 if (!TARGET_IEEE_FP)
9031 return 4;
9032 /* The cost of code output by ix86_expand_fp_compare. */
9033 switch (code)
9034 {
9035 case UNLE:
9036 case UNLT:
9037 case LTGT:
9038 case GT:
9039 case GE:
9040 case UNORDERED:
9041 case ORDERED:
9042 case UNEQ:
9043 return 4;
9044 break;
9045 case LT:
9046 case NE:
9047 case EQ:
9048 case UNGE:
9049 return 5;
9050 break;
9051 case LE:
9052 case UNGT:
9053 return 6;
9054 break;
9055 default:
9056 abort ();
9057 }
9058}
9059
9060/* Return cost of comparison done using fcomi operation.
9061 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9062static int
b96a374d 9063ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
9064{
9065 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9066 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
9067 prevents gcc from using it. */
9068 if (!TARGET_CMOVE)
9069 return 1024;
9070 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9071 return (bypass_code != NIL || second_code != NIL) + 2;
9072}
9073
9074/* Return cost of comparison done using sahf operation.
9075 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9076static int
b96a374d 9077ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
9078{
9079 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9080 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
9081 avoids gcc from using it. */
9082 if (!TARGET_USE_SAHF && !optimize_size)
9083 return 1024;
9084 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9085 return (bypass_code != NIL || second_code != NIL) + 3;
9086}
9087
9088/* Compute cost of the comparison done using any method.
9089 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9090static int
b96a374d 9091ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
9092{
9093 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9094 int min;
9095
9096 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9097 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9098
9099 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9100 if (min > sahf_cost)
9101 min = sahf_cost;
9102 if (min > fcomi_cost)
9103 min = fcomi_cost;
9104 return min;
9105}
c0c102a9 9106
3a3677ff
RH
9107/* Generate insn patterns to do a floating point compare of OPERANDS. */
9108
9e7adcb3 9109static rtx
b96a374d
AJ
9110ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9111 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
9112{
9113 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 9114 rtx tmp, tmp2;
9e7adcb3 9115 int cost = ix86_fp_comparison_cost (code);
c0c102a9 9116 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9117
9118 fpcmp_mode = ix86_fp_compare_mode (code);
9119 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9120
9e7adcb3
JH
9121 if (second_test)
9122 *second_test = NULL_RTX;
9123 if (bypass_test)
9124 *bypass_test = NULL_RTX;
9125
c0c102a9
JH
9126 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9127
9e7adcb3
JH
9128 /* Do fcomi/sahf based test when profitable. */
9129 if ((bypass_code == NIL || bypass_test)
9130 && (second_code == NIL || second_test)
9131 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 9132 {
c0c102a9
JH
9133 if (TARGET_CMOVE)
9134 {
9135 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9136 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9137 tmp);
9138 emit_insn (tmp);
9139 }
9140 else
9141 {
9142 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9143 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9144 if (!scratch)
9145 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
9146 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9147 emit_insn (gen_x86_sahf_1 (scratch));
9148 }
e075ae69
RH
9149
9150 /* The FP codes work out to act like unsigned. */
9a915772 9151 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
9152 code = first_code;
9153 if (bypass_code != NIL)
9154 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9155 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9156 const0_rtx);
9157 if (second_code != NIL)
9158 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9159 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9160 const0_rtx);
e075ae69
RH
9161 }
9162 else
9163 {
9164 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 9165 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9166 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9167 if (!scratch)
9168 scratch = gen_reg_rtx (HImode);
3a3677ff 9169 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 9170
9a915772
JH
9171 /* In the unordered case, we have to check C2 for NaN's, which
9172 doesn't happen to work out to anything nice combination-wise.
9173 So do some bit twiddling on the value we've got in AH to come
9174 up with an appropriate set of condition codes. */
e075ae69 9175
9a915772
JH
9176 intcmp_mode = CCNOmode;
9177 switch (code)
32b5b1aa 9178 {
9a915772
JH
9179 case GT:
9180 case UNGT:
9181 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 9182 {
3a3677ff 9183 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 9184 code = EQ;
9a915772
JH
9185 }
9186 else
9187 {
9188 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9189 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9190 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9191 intcmp_mode = CCmode;
9192 code = GEU;
9193 }
9194 break;
9195 case LT:
9196 case UNLT:
9197 if (code == LT && TARGET_IEEE_FP)
9198 {
3a3677ff
RH
9199 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9200 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
9201 intcmp_mode = CCmode;
9202 code = EQ;
9a915772
JH
9203 }
9204 else
9205 {
9206 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9207 code = NE;
9208 }
9209 break;
9210 case GE:
9211 case UNGE:
9212 if (code == GE || !TARGET_IEEE_FP)
9213 {
3a3677ff 9214 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 9215 code = EQ;
9a915772
JH
9216 }
9217 else
9218 {
9219 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9220 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9221 GEN_INT (0x01)));
9222 code = NE;
9223 }
9224 break;
9225 case LE:
9226 case UNLE:
9227 if (code == LE && TARGET_IEEE_FP)
9228 {
3a3677ff
RH
9229 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9230 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9231 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9232 intcmp_mode = CCmode;
9233 code = LTU;
9a915772
JH
9234 }
9235 else
9236 {
9237 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9238 code = NE;
9239 }
9240 break;
9241 case EQ:
9242 case UNEQ:
9243 if (code == EQ && TARGET_IEEE_FP)
9244 {
3a3677ff
RH
9245 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9247 intcmp_mode = CCmode;
9248 code = EQ;
9a915772
JH
9249 }
9250 else
9251 {
3a3677ff
RH
9252 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9253 code = NE;
9254 break;
9a915772
JH
9255 }
9256 break;
9257 case NE:
9258 case LTGT:
9259 if (code == NE && TARGET_IEEE_FP)
9260 {
3a3677ff 9261 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
9262 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9263 GEN_INT (0x40)));
3a3677ff 9264 code = NE;
9a915772
JH
9265 }
9266 else
9267 {
3a3677ff
RH
9268 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9269 code = EQ;
32b5b1aa 9270 }
9a915772
JH
9271 break;
9272
9273 case UNORDERED:
9274 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9275 code = NE;
9276 break;
9277 case ORDERED:
9278 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9279 code = EQ;
9280 break;
9281
9282 default:
9283 abort ();
32b5b1aa 9284 }
32b5b1aa 9285 }
e075ae69
RH
9286
9287 /* Return the test that should be put into the flags user, i.e.
9288 the bcc, scc, or cmov instruction. */
9289 return gen_rtx_fmt_ee (code, VOIDmode,
9290 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9291 const0_rtx);
9292}
9293
9e3e266c 9294rtx
b96a374d 9295ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
9296{
9297 rtx op0, op1, ret;
9298 op0 = ix86_compare_op0;
9299 op1 = ix86_compare_op1;
9300
a1b8572c
JH
9301 if (second_test)
9302 *second_test = NULL_RTX;
9303 if (bypass_test)
9304 *bypass_test = NULL_RTX;
9305
e075ae69 9306 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9307 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9308 second_test, bypass_test);
32b5b1aa 9309 else
e075ae69
RH
9310 ret = ix86_expand_int_compare (code, op0, op1);
9311
9312 return ret;
9313}
9314
03598dea
JH
9315/* Return true if the CODE will result in nontrivial jump sequence. */
9316bool
b96a374d 9317ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
9318{
9319 enum rtx_code bypass_code, first_code, second_code;
9320 if (!TARGET_CMOVE)
9321 return true;
9322 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9323 return bypass_code != NIL || second_code != NIL;
9324}
9325
e075ae69 9326void
b96a374d 9327ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 9328{
3a3677ff 9329 rtx tmp;
e075ae69 9330
3a3677ff 9331 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9332 {
3a3677ff
RH
9333 case QImode:
9334 case HImode:
9335 case SImode:
0d7d98ee 9336 simple:
a1b8572c 9337 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9338 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9339 gen_rtx_LABEL_REF (VOIDmode, label),
9340 pc_rtx);
9341 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9342 return;
e075ae69 9343
3a3677ff
RH
9344 case SFmode:
9345 case DFmode:
0f290768 9346 case XFmode:
3a3677ff
RH
9347 {
9348 rtvec vec;
9349 int use_fcomi;
03598dea 9350 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9351
9352 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9353 &ix86_compare_op1);
fce5a9f2 9354
03598dea
JH
9355 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9356
9357 /* Check whether we will use the natural sequence with one jump. If
9358 so, we can expand jump early. Otherwise delay expansion by
9359 creating compound insn to not confuse optimizers. */
9360 if (bypass_code == NIL && second_code == NIL
9361 && TARGET_CMOVE)
9362 {
9363 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9364 gen_rtx_LABEL_REF (VOIDmode, label),
9365 pc_rtx, NULL_RTX);
9366 }
9367 else
9368 {
9369 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9370 ix86_compare_op0, ix86_compare_op1);
9371 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9372 gen_rtx_LABEL_REF (VOIDmode, label),
9373 pc_rtx);
9374 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9375
9376 use_fcomi = ix86_use_fcomi_compare (code);
9377 vec = rtvec_alloc (3 + !use_fcomi);
9378 RTVEC_ELT (vec, 0) = tmp;
9379 RTVEC_ELT (vec, 1)
9380 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9381 RTVEC_ELT (vec, 2)
9382 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9383 if (! use_fcomi)
9384 RTVEC_ELT (vec, 3)
9385 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9386
9387 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9388 }
3a3677ff
RH
9389 return;
9390 }
32b5b1aa 9391
3a3677ff 9392 case DImode:
0d7d98ee
JH
9393 if (TARGET_64BIT)
9394 goto simple;
3a3677ff
RH
9395 /* Expand DImode branch into multiple compare+branch. */
9396 {
9397 rtx lo[2], hi[2], label2;
9398 enum rtx_code code1, code2, code3;
32b5b1aa 9399
3a3677ff
RH
9400 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9401 {
9402 tmp = ix86_compare_op0;
9403 ix86_compare_op0 = ix86_compare_op1;
9404 ix86_compare_op1 = tmp;
9405 code = swap_condition (code);
9406 }
9407 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9408 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9409
3a3677ff
RH
9410 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9411 avoid two branches. This costs one extra insn, so disable when
9412 optimizing for size. */
32b5b1aa 9413
3a3677ff
RH
9414 if ((code == EQ || code == NE)
9415 && (!optimize_size
9416 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9417 {
9418 rtx xor0, xor1;
32b5b1aa 9419
3a3677ff
RH
9420 xor1 = hi[0];
9421 if (hi[1] != const0_rtx)
9422 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9423 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9424
3a3677ff
RH
9425 xor0 = lo[0];
9426 if (lo[1] != const0_rtx)
9427 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9428 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9429
3a3677ff
RH
9430 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9431 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9432
3a3677ff
RH
9433 ix86_compare_op0 = tmp;
9434 ix86_compare_op1 = const0_rtx;
9435 ix86_expand_branch (code, label);
9436 return;
9437 }
e075ae69 9438
1f9124e4
JJ
9439 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9440 op1 is a constant and the low word is zero, then we can just
9441 examine the high word. */
32b5b1aa 9442
1f9124e4
JJ
9443 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9444 switch (code)
9445 {
9446 case LT: case LTU: case GE: case GEU:
9447 ix86_compare_op0 = hi[0];
9448 ix86_compare_op1 = hi[1];
9449 ix86_expand_branch (code, label);
9450 return;
9451 default:
9452 break;
9453 }
e075ae69 9454
3a3677ff 9455 /* Otherwise, we need two or three jumps. */
e075ae69 9456
3a3677ff 9457 label2 = gen_label_rtx ();
e075ae69 9458
3a3677ff
RH
9459 code1 = code;
9460 code2 = swap_condition (code);
9461 code3 = unsigned_condition (code);
e075ae69 9462
3a3677ff
RH
9463 switch (code)
9464 {
9465 case LT: case GT: case LTU: case GTU:
9466 break;
e075ae69 9467
3a3677ff
RH
9468 case LE: code1 = LT; code2 = GT; break;
9469 case GE: code1 = GT; code2 = LT; break;
9470 case LEU: code1 = LTU; code2 = GTU; break;
9471 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9472
3a3677ff
RH
9473 case EQ: code1 = NIL; code2 = NE; break;
9474 case NE: code2 = NIL; break;
e075ae69 9475
3a3677ff
RH
9476 default:
9477 abort ();
9478 }
e075ae69 9479
3a3677ff
RH
9480 /*
9481 * a < b =>
9482 * if (hi(a) < hi(b)) goto true;
9483 * if (hi(a) > hi(b)) goto false;
9484 * if (lo(a) < lo(b)) goto true;
9485 * false:
9486 */
9487
9488 ix86_compare_op0 = hi[0];
9489 ix86_compare_op1 = hi[1];
9490
9491 if (code1 != NIL)
9492 ix86_expand_branch (code1, label);
9493 if (code2 != NIL)
9494 ix86_expand_branch (code2, label2);
9495
9496 ix86_compare_op0 = lo[0];
9497 ix86_compare_op1 = lo[1];
9498 ix86_expand_branch (code3, label);
9499
9500 if (code2 != NIL)
9501 emit_label (label2);
9502 return;
9503 }
e075ae69 9504
3a3677ff
RH
9505 default:
9506 abort ();
9507 }
32b5b1aa 9508}
e075ae69 9509
9e7adcb3
JH
9510/* Split branch based on floating point condition. */
9511void
b96a374d
AJ
9512ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9513 rtx target1, rtx target2, rtx tmp)
9e7adcb3
JH
9514{
9515 rtx second, bypass;
9516 rtx label = NULL_RTX;
03598dea 9517 rtx condition;
6b24c259
JH
9518 int bypass_probability = -1, second_probability = -1, probability = -1;
9519 rtx i;
9e7adcb3
JH
9520
9521 if (target2 != pc_rtx)
9522 {
9523 rtx tmp = target2;
9524 code = reverse_condition_maybe_unordered (code);
9525 target2 = target1;
9526 target1 = tmp;
9527 }
9528
9529 condition = ix86_expand_fp_compare (code, op1, op2,
9530 tmp, &second, &bypass);
6b24c259
JH
9531
9532 if (split_branch_probability >= 0)
9533 {
9534 /* Distribute the probabilities across the jumps.
9535 Assume the BYPASS and SECOND to be always test
9536 for UNORDERED. */
9537 probability = split_branch_probability;
9538
d6a7951f 9539 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9540 to be updated. Later we may run some experiments and see
9541 if unordered values are more frequent in practice. */
9542 if (bypass)
9543 bypass_probability = 1;
9544 if (second)
9545 second_probability = 1;
9546 }
9e7adcb3
JH
9547 if (bypass != NULL_RTX)
9548 {
9549 label = gen_label_rtx ();
6b24c259
JH
9550 i = emit_jump_insn (gen_rtx_SET
9551 (VOIDmode, pc_rtx,
9552 gen_rtx_IF_THEN_ELSE (VOIDmode,
9553 bypass,
9554 gen_rtx_LABEL_REF (VOIDmode,
9555 label),
9556 pc_rtx)));
9557 if (bypass_probability >= 0)
9558 REG_NOTES (i)
9559 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9560 GEN_INT (bypass_probability),
9561 REG_NOTES (i));
9562 }
9563 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9564 (VOIDmode, pc_rtx,
9565 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9566 condition, target1, target2)));
9567 if (probability >= 0)
9568 REG_NOTES (i)
9569 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9570 GEN_INT (probability),
9571 REG_NOTES (i));
9572 if (second != NULL_RTX)
9e7adcb3 9573 {
6b24c259
JH
9574 i = emit_jump_insn (gen_rtx_SET
9575 (VOIDmode, pc_rtx,
9576 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9577 target2)));
9578 if (second_probability >= 0)
9579 REG_NOTES (i)
9580 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9581 GEN_INT (second_probability),
9582 REG_NOTES (i));
9e7adcb3 9583 }
9e7adcb3
JH
9584 if (label != NULL_RTX)
9585 emit_label (label);
9586}
9587
32b5b1aa 9588int
b96a374d 9589ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9590{
3a627503 9591 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9592 rtx second_test, bypass_test;
e075ae69 9593
885a70fd
JH
9594 if (GET_MODE (ix86_compare_op0) == DImode
9595 && !TARGET_64BIT)
e075ae69
RH
9596 return 0; /* FAIL */
9597
b932f770
JH
9598 if (GET_MODE (dest) != QImode)
9599 abort ();
e075ae69 9600
a1b8572c 9601 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9602 PUT_MODE (ret, QImode);
9603
9604 tmp = dest;
a1b8572c 9605 tmpreg = dest;
32b5b1aa 9606
e075ae69 9607 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9608 if (bypass_test || second_test)
9609 {
9610 rtx test = second_test;
9611 int bypass = 0;
9612 rtx tmp2 = gen_reg_rtx (QImode);
9613 if (bypass_test)
9614 {
9615 if (second_test)
b531087a 9616 abort ();
a1b8572c
JH
9617 test = bypass_test;
9618 bypass = 1;
9619 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9620 }
9621 PUT_MODE (test, QImode);
9622 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9623
9624 if (bypass)
9625 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9626 else
9627 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9628 }
e075ae69 9629
3a627503
RS
9630 /* Attach a REG_EQUAL note describing the comparison result. */
9631 equiv = simplify_gen_relational (code, QImode,
9632 GET_MODE (ix86_compare_op0),
9633 ix86_compare_op0, ix86_compare_op1);
9634 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9635
e075ae69 9636 return 1; /* DONE */
32b5b1aa 9637}
e075ae69 9638
c35d187f
RH
9639/* Expand comparison setting or clearing carry flag. Return true when
9640 successful and set pop for the operation. */
9641static bool
b96a374d 9642ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9643{
9644 enum machine_mode mode =
9645 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9646
9647 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 9648 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9649 if ((mode == DImode && !TARGET_64BIT))
9650 return false;
9651 if (FLOAT_MODE_P (mode))
9652 {
9653 rtx second_test = NULL, bypass_test = NULL;
9654 rtx compare_op, compare_seq;
9655
9656 /* Shortcut: following common codes never translate into carry flag compares. */
9657 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9658 || code == ORDERED || code == UNORDERED)
9659 return false;
9660
9661 /* These comparisons require zero flag; swap operands so they won't. */
9662 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9663 && !TARGET_IEEE_FP)
9664 {
9665 rtx tmp = op0;
9666 op0 = op1;
9667 op1 = tmp;
9668 code = swap_condition (code);
9669 }
9670
c51e6d85
KH
9671 /* Try to expand the comparison and verify that we end up with carry flag
9672 based comparison. This is fails to be true only when we decide to expand
9673 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9674 start_sequence ();
9675 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9676 &second_test, &bypass_test);
9677 compare_seq = get_insns ();
9678 end_sequence ();
9679
9680 if (second_test || bypass_test)
9681 return false;
9682 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9683 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9684 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9685 else
9686 code = GET_CODE (compare_op);
9687 if (code != LTU && code != GEU)
9688 return false;
9689 emit_insn (compare_seq);
9690 *pop = compare_op;
9691 return true;
9692 }
9693 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9694 return false;
9695 switch (code)
9696 {
9697 case LTU:
9698 case GEU:
9699 break;
9700
9701 /* Convert a==0 into (unsigned)a<1. */
9702 case EQ:
9703 case NE:
9704 if (op1 != const0_rtx)
9705 return false;
9706 op1 = const1_rtx;
9707 code = (code == EQ ? LTU : GEU);
9708 break;
9709
9710 /* Convert a>b into b<a or a>=b-1. */
9711 case GTU:
9712 case LEU:
9713 if (GET_CODE (op1) == CONST_INT)
9714 {
9715 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9716 /* Bail out on overflow. We still can swap operands but that
43f3a59d 9717 would force loading of the constant into register. */
4977bab6
ZW
9718 if (op1 == const0_rtx
9719 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9720 return false;
9721 code = (code == GTU ? GEU : LTU);
9722 }
9723 else
9724 {
9725 rtx tmp = op1;
9726 op1 = op0;
9727 op0 = tmp;
9728 code = (code == GTU ? LTU : GEU);
9729 }
9730 break;
9731
ccea753c 9732 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9733 case LT:
9734 case GE:
9735 if (mode == DImode || op1 != const0_rtx)
9736 return false;
ccea753c 9737 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9738 code = (code == LT ? GEU : LTU);
9739 break;
9740 case LE:
9741 case GT:
9742 if (mode == DImode || op1 != constm1_rtx)
9743 return false;
ccea753c 9744 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9745 code = (code == LE ? GEU : LTU);
9746 break;
9747
9748 default:
9749 return false;
9750 }
ebe75517
JH
9751 /* Swapping operands may cause constant to appear as first operand. */
9752 if (!nonimmediate_operand (op0, VOIDmode))
9753 {
9754 if (no_new_pseudos)
9755 return false;
9756 op0 = force_reg (mode, op0);
9757 }
4977bab6
ZW
9758 ix86_compare_op0 = op0;
9759 ix86_compare_op1 = op1;
9760 *pop = ix86_expand_compare (code, NULL, NULL);
9761 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9762 abort ();
9763 return true;
9764}
9765
32b5b1aa 9766int
b96a374d 9767ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9768{
e075ae69
RH
9769 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9770 rtx compare_seq, compare_op;
a1b8572c 9771 rtx second_test, bypass_test;
635559ab 9772 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9773 bool sign_bit_compare_p = false;;
3a3677ff 9774
e075ae69 9775 start_sequence ();
a1b8572c 9776 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9777 compare_seq = get_insns ();
e075ae69
RH
9778 end_sequence ();
9779
9780 compare_code = GET_CODE (compare_op);
9781
4977bab6
ZW
9782 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9783 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9784 sign_bit_compare_p = true;
9785
e075ae69
RH
9786 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9787 HImode insns, we'd be swallowed in word prefix ops. */
9788
4977bab6 9789 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9790 && (mode != DImode || TARGET_64BIT)
0f290768 9791 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9792 && GET_CODE (operands[3]) == CONST_INT)
9793 {
9794 rtx out = operands[0];
9795 HOST_WIDE_INT ct = INTVAL (operands[2]);
9796 HOST_WIDE_INT cf = INTVAL (operands[3]);
9797 HOST_WIDE_INT diff;
9798
4977bab6
ZW
9799 diff = ct - cf;
9800 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9801 sbb. */
4977bab6
ZW
9802 if (sign_bit_compare_p
9803 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9804 ix86_compare_op1, &compare_op))
e075ae69 9805 {
e075ae69
RH
9806 /* Detect overlap between destination and compare sources. */
9807 rtx tmp = out;
9808
4977bab6 9809 if (!sign_bit_compare_p)
36583fea 9810 {
e6e81735
JH
9811 bool fpcmp = false;
9812
4977bab6
ZW
9813 compare_code = GET_CODE (compare_op);
9814
e6e81735
JH
9815 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9816 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9817 {
9818 fpcmp = true;
9819 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9820 }
9821
4977bab6
ZW
9822 /* To simplify rest of code, restrict to the GEU case. */
9823 if (compare_code == LTU)
9824 {
9825 HOST_WIDE_INT tmp = ct;
9826 ct = cf;
9827 cf = tmp;
9828 compare_code = reverse_condition (compare_code);
9829 code = reverse_condition (code);
9830 }
e6e81735
JH
9831 else
9832 {
9833 if (fpcmp)
9834 PUT_CODE (compare_op,
9835 reverse_condition_maybe_unordered
9836 (GET_CODE (compare_op)));
9837 else
9838 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9839 }
4977bab6 9840 diff = ct - cf;
36583fea 9841
4977bab6
ZW
9842 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9843 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9844 tmp = gen_reg_rtx (mode);
e075ae69 9845
4977bab6 9846 if (mode == DImode)
e6e81735 9847 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9848 else
e6e81735 9849 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9850 }
14f73b5a 9851 else
4977bab6
ZW
9852 {
9853 if (code == GT || code == GE)
9854 code = reverse_condition (code);
9855 else
9856 {
9857 HOST_WIDE_INT tmp = ct;
9858 ct = cf;
9859 cf = tmp;
5fb48685 9860 diff = ct - cf;
4977bab6
ZW
9861 }
9862 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9863 ix86_compare_op1, VOIDmode, 0, -1);
9864 }
e075ae69 9865
36583fea
JH
9866 if (diff == 1)
9867 {
9868 /*
9869 * cmpl op0,op1
9870 * sbbl dest,dest
9871 * [addl dest, ct]
9872 *
9873 * Size 5 - 8.
9874 */
9875 if (ct)
b96a374d 9876 tmp = expand_simple_binop (mode, PLUS,
635559ab 9877 tmp, GEN_INT (ct),
4977bab6 9878 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9879 }
9880 else if (cf == -1)
9881 {
9882 /*
9883 * cmpl op0,op1
9884 * sbbl dest,dest
9885 * orl $ct, dest
9886 *
9887 * Size 8.
9888 */
635559ab
JH
9889 tmp = expand_simple_binop (mode, IOR,
9890 tmp, GEN_INT (ct),
4977bab6 9891 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9892 }
9893 else if (diff == -1 && ct)
9894 {
9895 /*
9896 * cmpl op0,op1
9897 * sbbl dest,dest
06ec023f 9898 * notl dest
36583fea
JH
9899 * [addl dest, cf]
9900 *
9901 * Size 8 - 11.
9902 */
4977bab6 9903 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9904 if (cf)
b96a374d 9905 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9906 copy_rtx (tmp), GEN_INT (cf),
9907 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9908 }
9909 else
9910 {
9911 /*
9912 * cmpl op0,op1
9913 * sbbl dest,dest
06ec023f 9914 * [notl dest]
36583fea
JH
9915 * andl cf - ct, dest
9916 * [addl dest, ct]
9917 *
9918 * Size 8 - 11.
9919 */
06ec023f
RB
9920
9921 if (cf == 0)
9922 {
9923 cf = ct;
9924 ct = 0;
4977bab6 9925 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9926 }
9927
635559ab 9928 tmp = expand_simple_binop (mode, AND,
4977bab6 9929 copy_rtx (tmp),
d8bf17f9 9930 gen_int_mode (cf - ct, mode),
4977bab6 9931 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9932 if (ct)
b96a374d 9933 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9934 copy_rtx (tmp), GEN_INT (ct),
9935 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9936 }
e075ae69 9937
4977bab6
ZW
9938 if (!rtx_equal_p (tmp, out))
9939 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9940
9941 return 1; /* DONE */
9942 }
9943
e075ae69
RH
9944 if (diff < 0)
9945 {
9946 HOST_WIDE_INT tmp;
9947 tmp = ct, ct = cf, cf = tmp;
9948 diff = -diff;
734dba19
JH
9949 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9950 {
9951 /* We may be reversing unordered compare to normal compare, that
9952 is not valid in general (we may convert non-trapping condition
9953 to trapping one), however on i386 we currently emit all
9954 comparisons unordered. */
9955 compare_code = reverse_condition_maybe_unordered (compare_code);
9956 code = reverse_condition_maybe_unordered (code);
9957 }
9958 else
9959 {
9960 compare_code = reverse_condition (compare_code);
9961 code = reverse_condition (code);
9962 }
e075ae69 9963 }
0f2a3457
JJ
9964
9965 compare_code = NIL;
9966 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9967 && GET_CODE (ix86_compare_op1) == CONST_INT)
9968 {
9969 if (ix86_compare_op1 == const0_rtx
9970 && (code == LT || code == GE))
9971 compare_code = code;
9972 else if (ix86_compare_op1 == constm1_rtx)
9973 {
9974 if (code == LE)
9975 compare_code = LT;
9976 else if (code == GT)
9977 compare_code = GE;
9978 }
9979 }
9980
9981 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9982 if (compare_code != NIL
9983 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9984 && (cf == -1 || ct == -1))
9985 {
9986 /* If lea code below could be used, only optimize
9987 if it results in a 2 insn sequence. */
9988
9989 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9990 || diff == 3 || diff == 5 || diff == 9)
9991 || (compare_code == LT && ct == -1)
9992 || (compare_code == GE && cf == -1))
9993 {
9994 /*
9995 * notl op1 (if necessary)
9996 * sarl $31, op1
9997 * orl cf, op1
9998 */
9999 if (ct != -1)
10000 {
10001 cf = ct;
b96a374d 10002 ct = -1;
0f2a3457
JJ
10003 code = reverse_condition (code);
10004 }
10005
10006 out = emit_store_flag (out, code, ix86_compare_op0,
10007 ix86_compare_op1, VOIDmode, 0, -1);
10008
10009 out = expand_simple_binop (mode, IOR,
10010 out, GEN_INT (cf),
10011 out, 1, OPTAB_DIRECT);
10012 if (out != operands[0])
10013 emit_move_insn (operands[0], out);
10014
10015 return 1; /* DONE */
10016 }
10017 }
10018
4977bab6 10019
635559ab
JH
10020 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10021 || diff == 3 || diff == 5 || diff == 9)
4977bab6 10022 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 10023 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
10024 {
10025 /*
10026 * xorl dest,dest
10027 * cmpl op1,op2
10028 * setcc dest
10029 * lea cf(dest*(ct-cf)),dest
10030 *
10031 * Size 14.
10032 *
10033 * This also catches the degenerate setcc-only case.
10034 */
10035
10036 rtx tmp;
10037 int nops;
10038
10039 out = emit_store_flag (out, code, ix86_compare_op0,
10040 ix86_compare_op1, VOIDmode, 0, 1);
10041
10042 nops = 0;
97f51ac4
RB
10043 /* On x86_64 the lea instruction operates on Pmode, so we need
10044 to get arithmetics done in proper mode to match. */
e075ae69 10045 if (diff == 1)
068f5dea 10046 tmp = copy_rtx (out);
e075ae69
RH
10047 else
10048 {
885a70fd 10049 rtx out1;
068f5dea 10050 out1 = copy_rtx (out);
635559ab 10051 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
10052 nops++;
10053 if (diff & 1)
10054 {
635559ab 10055 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
10056 nops++;
10057 }
10058 }
10059 if (cf != 0)
10060 {
635559ab 10061 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
10062 nops++;
10063 }
4977bab6 10064 if (!rtx_equal_p (tmp, out))
e075ae69 10065 {
14f73b5a 10066 if (nops == 1)
a5cf80f0 10067 out = force_operand (tmp, copy_rtx (out));
e075ae69 10068 else
4977bab6 10069 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 10070 }
4977bab6 10071 if (!rtx_equal_p (out, operands[0]))
1985ef90 10072 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10073
10074 return 1; /* DONE */
10075 }
10076
10077 /*
10078 * General case: Jumpful:
10079 * xorl dest,dest cmpl op1, op2
10080 * cmpl op1, op2 movl ct, dest
10081 * setcc dest jcc 1f
10082 * decl dest movl cf, dest
10083 * andl (cf-ct),dest 1:
10084 * addl ct,dest
0f290768 10085 *
e075ae69
RH
10086 * Size 20. Size 14.
10087 *
10088 * This is reasonably steep, but branch mispredict costs are
10089 * high on modern cpus, so consider failing only if optimizing
10090 * for space.
e075ae69
RH
10091 */
10092
4977bab6
ZW
10093 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10094 && BRANCH_COST >= 2)
e075ae69 10095 {
97f51ac4 10096 if (cf == 0)
e075ae69 10097 {
97f51ac4
RB
10098 cf = ct;
10099 ct = 0;
734dba19 10100 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
10101 /* We may be reversing unordered compare to normal compare,
10102 that is not valid in general (we may convert non-trapping
10103 condition to trapping one), however on i386 we currently
10104 emit all comparisons unordered. */
10105 code = reverse_condition_maybe_unordered (code);
10106 else
10107 {
10108 code = reverse_condition (code);
10109 if (compare_code != NIL)
10110 compare_code = reverse_condition (compare_code);
10111 }
10112 }
10113
10114 if (compare_code != NIL)
10115 {
10116 /* notl op1 (if needed)
10117 sarl $31, op1
10118 andl (cf-ct), op1
b96a374d 10119 addl ct, op1
0f2a3457
JJ
10120
10121 For x < 0 (resp. x <= -1) there will be no notl,
10122 so if possible swap the constants to get rid of the
10123 complement.
10124 True/false will be -1/0 while code below (store flag
10125 followed by decrement) is 0/-1, so the constants need
10126 to be exchanged once more. */
10127
10128 if (compare_code == GE || !cf)
734dba19 10129 {
b96a374d 10130 code = reverse_condition (code);
0f2a3457 10131 compare_code = LT;
734dba19
JH
10132 }
10133 else
10134 {
0f2a3457 10135 HOST_WIDE_INT tmp = cf;
b96a374d 10136 cf = ct;
0f2a3457 10137 ct = tmp;
734dba19 10138 }
0f2a3457
JJ
10139
10140 out = emit_store_flag (out, code, ix86_compare_op0,
10141 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 10142 }
0f2a3457
JJ
10143 else
10144 {
10145 out = emit_store_flag (out, code, ix86_compare_op0,
10146 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 10147
4977bab6
ZW
10148 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10149 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 10150 }
e075ae69 10151
4977bab6 10152 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 10153 gen_int_mode (cf - ct, mode),
4977bab6 10154 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 10155 if (ct)
4977bab6
ZW
10156 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10157 copy_rtx (out), 1, OPTAB_DIRECT);
10158 if (!rtx_equal_p (out, operands[0]))
10159 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10160
10161 return 1; /* DONE */
10162 }
10163 }
10164
4977bab6 10165 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
10166 {
10167 /* Try a few things more with specific constants and a variable. */
10168
78a0d70c 10169 optab op;
e075ae69
RH
10170 rtx var, orig_out, out, tmp;
10171
4977bab6 10172 if (BRANCH_COST <= 2)
e075ae69
RH
10173 return 0; /* FAIL */
10174
0f290768 10175 /* If one of the two operands is an interesting constant, load a
e075ae69 10176 constant with the above and mask it in with a logical operation. */
0f290768 10177
e075ae69
RH
10178 if (GET_CODE (operands[2]) == CONST_INT)
10179 {
10180 var = operands[3];
4977bab6 10181 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 10182 operands[3] = constm1_rtx, op = and_optab;
4977bab6 10183 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 10184 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10185 else
10186 return 0; /* FAIL */
e075ae69
RH
10187 }
10188 else if (GET_CODE (operands[3]) == CONST_INT)
10189 {
10190 var = operands[2];
4977bab6 10191 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 10192 operands[2] = constm1_rtx, op = and_optab;
4977bab6 10193 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 10194 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10195 else
10196 return 0; /* FAIL */
e075ae69 10197 }
78a0d70c 10198 else
e075ae69
RH
10199 return 0; /* FAIL */
10200
10201 orig_out = operands[0];
635559ab 10202 tmp = gen_reg_rtx (mode);
e075ae69
RH
10203 operands[0] = tmp;
10204
10205 /* Recurse to get the constant loaded. */
10206 if (ix86_expand_int_movcc (operands) == 0)
10207 return 0; /* FAIL */
10208
10209 /* Mask in the interesting variable. */
635559ab 10210 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 10211 OPTAB_WIDEN);
4977bab6
ZW
10212 if (!rtx_equal_p (out, orig_out))
10213 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
10214
10215 return 1; /* DONE */
10216 }
10217
10218 /*
10219 * For comparison with above,
10220 *
10221 * movl cf,dest
10222 * movl ct,tmp
10223 * cmpl op1,op2
10224 * cmovcc tmp,dest
10225 *
10226 * Size 15.
10227 */
10228
635559ab
JH
10229 if (! nonimmediate_operand (operands[2], mode))
10230 operands[2] = force_reg (mode, operands[2]);
10231 if (! nonimmediate_operand (operands[3], mode))
10232 operands[3] = force_reg (mode, operands[3]);
e075ae69 10233
a1b8572c
JH
10234 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10235 {
635559ab 10236 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10237 emit_move_insn (tmp, operands[3]);
10238 operands[3] = tmp;
10239 }
10240 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10241 {
635559ab 10242 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10243 emit_move_insn (tmp, operands[2]);
10244 operands[2] = tmp;
10245 }
4977bab6 10246
c9682caf 10247 if (! register_operand (operands[2], VOIDmode)
b96a374d 10248 && (mode == QImode
4977bab6 10249 || ! register_operand (operands[3], VOIDmode)))
635559ab 10250 operands[2] = force_reg (mode, operands[2]);
a1b8572c 10251
4977bab6
ZW
10252 if (mode == QImode
10253 && ! register_operand (operands[3], VOIDmode))
10254 operands[3] = force_reg (mode, operands[3]);
10255
e075ae69
RH
10256 emit_insn (compare_seq);
10257 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 10258 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
10259 compare_op, operands[2],
10260 operands[3])));
a1b8572c 10261 if (bypass_test)
4977bab6 10262 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10263 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10264 bypass_test,
4977bab6
ZW
10265 copy_rtx (operands[3]),
10266 copy_rtx (operands[0]))));
a1b8572c 10267 if (second_test)
4977bab6 10268 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10269 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10270 second_test,
4977bab6
ZW
10271 copy_rtx (operands[2]),
10272 copy_rtx (operands[0]))));
e075ae69
RH
10273
10274 return 1; /* DONE */
e9a25f70 10275}
e075ae69 10276
32b5b1aa 10277int
b96a374d 10278ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 10279{
e075ae69 10280 enum rtx_code code;
e075ae69 10281 rtx tmp;
a1b8572c 10282 rtx compare_op, second_test, bypass_test;
32b5b1aa 10283
0073023d
JH
10284 /* For SF/DFmode conditional moves based on comparisons
10285 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
10286 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10287 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 10288 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
10289 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10290 && (!TARGET_IEEE_FP
10291 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
10292 /* We may be called from the post-reload splitter. */
10293 && (!REG_P (operands[0])
10294 || SSE_REG_P (operands[0])
52a661a6 10295 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
10296 {
10297 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10298 code = GET_CODE (operands[1]);
10299
10300 /* See if we have (cross) match between comparison operands and
10301 conditional move operands. */
10302 if (rtx_equal_p (operands[2], op1))
10303 {
10304 rtx tmp = op0;
10305 op0 = op1;
10306 op1 = tmp;
10307 code = reverse_condition_maybe_unordered (code);
10308 }
10309 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10310 {
10311 /* Check for min operation. */
4977bab6 10312 if (code == LT || code == UNLE)
0073023d 10313 {
4977bab6
ZW
10314 if (code == UNLE)
10315 {
10316 rtx tmp = op0;
10317 op0 = op1;
10318 op1 = tmp;
10319 }
0073023d
JH
10320 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10321 if (memory_operand (op0, VOIDmode))
10322 op0 = force_reg (GET_MODE (operands[0]), op0);
10323 if (GET_MODE (operands[0]) == SFmode)
10324 emit_insn (gen_minsf3 (operands[0], op0, op1));
10325 else
10326 emit_insn (gen_mindf3 (operands[0], op0, op1));
10327 return 1;
10328 }
10329 /* Check for max operation. */
4977bab6 10330 if (code == GT || code == UNGE)
0073023d 10331 {
4977bab6
ZW
10332 if (code == UNGE)
10333 {
10334 rtx tmp = op0;
10335 op0 = op1;
10336 op1 = tmp;
10337 }
0073023d
JH
10338 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10339 if (memory_operand (op0, VOIDmode))
10340 op0 = force_reg (GET_MODE (operands[0]), op0);
10341 if (GET_MODE (operands[0]) == SFmode)
10342 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10343 else
10344 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10345 return 1;
10346 }
10347 }
10348 /* Manage condition to be sse_comparison_operator. In case we are
10349 in non-ieee mode, try to canonicalize the destination operand
10350 to be first in the comparison - this helps reload to avoid extra
10351 moves. */
10352 if (!sse_comparison_operator (operands[1], VOIDmode)
10353 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10354 {
10355 rtx tmp = ix86_compare_op0;
10356 ix86_compare_op0 = ix86_compare_op1;
10357 ix86_compare_op1 = tmp;
10358 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10359 VOIDmode, ix86_compare_op0,
10360 ix86_compare_op1);
10361 }
d1f87653 10362 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10363 move. We also don't support the NE comparison on SSE, so try to
10364 avoid it. */
037f20f1
JH
10365 if ((rtx_equal_p (operands[0], operands[3])
10366 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10367 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10368 {
10369 rtx tmp = operands[2];
10370 operands[2] = operands[3];
92d0fb09 10371 operands[3] = tmp;
0073023d
JH
10372 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10373 (GET_CODE (operands[1])),
10374 VOIDmode, ix86_compare_op0,
10375 ix86_compare_op1);
10376 }
10377 if (GET_MODE (operands[0]) == SFmode)
10378 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10379 operands[2], operands[3],
10380 ix86_compare_op0, ix86_compare_op1));
10381 else
10382 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10383 operands[2], operands[3],
10384 ix86_compare_op0, ix86_compare_op1));
10385 return 1;
10386 }
10387
e075ae69 10388 /* The floating point conditional move instructions don't directly
0f290768 10389 support conditions resulting from a signed integer comparison. */
32b5b1aa 10390
e075ae69 10391 code = GET_CODE (operands[1]);
a1b8572c 10392 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10393
10394 /* The floating point conditional move instructions don't directly
10395 support signed integer comparisons. */
10396
a1b8572c 10397 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10398 {
a1b8572c 10399 if (second_test != NULL || bypass_test != NULL)
b531087a 10400 abort ();
e075ae69 10401 tmp = gen_reg_rtx (QImode);
3a3677ff 10402 ix86_expand_setcc (code, tmp);
e075ae69
RH
10403 code = NE;
10404 ix86_compare_op0 = tmp;
10405 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10406 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10407 }
10408 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10409 {
10410 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10411 emit_move_insn (tmp, operands[3]);
10412 operands[3] = tmp;
10413 }
10414 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10415 {
10416 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10417 emit_move_insn (tmp, operands[2]);
10418 operands[2] = tmp;
e075ae69 10419 }
e9a25f70 10420
e075ae69
RH
10421 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10422 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10423 compare_op,
e075ae69
RH
10424 operands[2],
10425 operands[3])));
a1b8572c
JH
10426 if (bypass_test)
10427 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10428 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10429 bypass_test,
10430 operands[3],
10431 operands[0])));
10432 if (second_test)
10433 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10434 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10435 second_test,
10436 operands[2],
10437 operands[0])));
32b5b1aa 10438
e075ae69 10439 return 1;
32b5b1aa
SC
10440}
10441
7b52eede
JH
10442/* Expand conditional increment or decrement using adb/sbb instructions.
10443 The default case using setcc followed by the conditional move can be
10444 done by generic code. */
10445int
b96a374d 10446ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
10447{
10448 enum rtx_code code = GET_CODE (operands[1]);
10449 rtx compare_op;
10450 rtx val = const0_rtx;
e6e81735 10451 bool fpcmp = false;
e6e81735 10452 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10453
10454 if (operands[3] != const1_rtx
10455 && operands[3] != constm1_rtx)
10456 return 0;
10457 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10458 ix86_compare_op1, &compare_op))
10459 return 0;
e6e81735
JH
10460 code = GET_CODE (compare_op);
10461
10462 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10463 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10464 {
10465 fpcmp = true;
10466 code = ix86_fp_compare_code_to_integer (code);
10467 }
10468
10469 if (code != LTU)
10470 {
10471 val = constm1_rtx;
10472 if (fpcmp)
10473 PUT_CODE (compare_op,
10474 reverse_condition_maybe_unordered
10475 (GET_CODE (compare_op)));
10476 else
10477 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10478 }
10479 PUT_MODE (compare_op, mode);
10480
10481 /* Construct either adc or sbb insn. */
10482 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10483 {
10484 switch (GET_MODE (operands[0]))
10485 {
10486 case QImode:
e6e81735 10487 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10488 break;
10489 case HImode:
e6e81735 10490 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10491 break;
10492 case SImode:
e6e81735 10493 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10494 break;
10495 case DImode:
e6e81735 10496 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10497 break;
10498 default:
10499 abort ();
10500 }
10501 }
10502 else
10503 {
10504 switch (GET_MODE (operands[0]))
10505 {
10506 case QImode:
e6e81735 10507 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10508 break;
10509 case HImode:
e6e81735 10510 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10511 break;
10512 case SImode:
e6e81735 10513 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10514 break;
10515 case DImode:
e6e81735 10516 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10517 break;
10518 default:
10519 abort ();
10520 }
10521 }
10522 return 1; /* DONE */
10523}
10524
10525
2450a057
JH
10526/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10527 works for floating pointer parameters and nonoffsetable memories.
10528 For pushes, it returns just stack offsets; the values will be saved
10529 in the right order. Maximally three parts are generated. */
10530
2b589241 10531static int
b96a374d 10532ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10533{
26e5b205
JH
10534 int size;
10535
10536 if (!TARGET_64BIT)
f8a1ebc6 10537 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
10538 else
10539 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10540
a7180f70
BS
10541 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10542 abort ();
2450a057
JH
10543 if (size < 2 || size > 3)
10544 abort ();
10545
f996902d
RH
10546 /* Optimize constant pool reference to immediates. This is used by fp
10547 moves, that force all constants to memory to allow combining. */
10548 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10549 {
10550 rtx tmp = maybe_get_pool_constant (operand);
10551 if (tmp)
10552 operand = tmp;
10553 }
d7a29404 10554
2450a057 10555 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10556 {
2450a057
JH
10557 /* The only non-offsetable memories we handle are pushes. */
10558 if (! push_operand (operand, VOIDmode))
10559 abort ();
10560
26e5b205
JH
10561 operand = copy_rtx (operand);
10562 PUT_MODE (operand, Pmode);
2450a057
JH
10563 parts[0] = parts[1] = parts[2] = operand;
10564 }
26e5b205 10565 else if (!TARGET_64BIT)
2450a057
JH
10566 {
10567 if (mode == DImode)
10568 split_di (&operand, 1, &parts[0], &parts[1]);
10569 else
e075ae69 10570 {
2450a057
JH
10571 if (REG_P (operand))
10572 {
10573 if (!reload_completed)
10574 abort ();
10575 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10576 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10577 if (size == 3)
10578 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10579 }
10580 else if (offsettable_memref_p (operand))
10581 {
f4ef873c 10582 operand = adjust_address (operand, SImode, 0);
2450a057 10583 parts[0] = operand;
b72f00af 10584 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10585 if (size == 3)
b72f00af 10586 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10587 }
10588 else if (GET_CODE (operand) == CONST_DOUBLE)
10589 {
10590 REAL_VALUE_TYPE r;
2b589241 10591 long l[4];
2450a057
JH
10592
10593 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10594 switch (mode)
10595 {
10596 case XFmode:
10597 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10598 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10599 break;
10600 case DFmode:
10601 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10602 break;
10603 default:
10604 abort ();
10605 }
d8bf17f9
LB
10606 parts[1] = gen_int_mode (l[1], SImode);
10607 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10608 }
10609 else
10610 abort ();
e075ae69 10611 }
2450a057 10612 }
26e5b205
JH
10613 else
10614 {
44cf5b6a
JH
10615 if (mode == TImode)
10616 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10617 if (mode == XFmode || mode == TFmode)
10618 {
f8a1ebc6 10619 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
10620 if (REG_P (operand))
10621 {
10622 if (!reload_completed)
10623 abort ();
10624 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 10625 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
10626 }
10627 else if (offsettable_memref_p (operand))
10628 {
b72f00af 10629 operand = adjust_address (operand, DImode, 0);
26e5b205 10630 parts[0] = operand;
f8a1ebc6 10631 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
10632 }
10633 else if (GET_CODE (operand) == CONST_DOUBLE)
10634 {
10635 REAL_VALUE_TYPE r;
10636 long l[3];
10637
10638 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 10639 real_to_target (l, &r, mode);
26e5b205
JH
10640 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10641 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10642 parts[0]
d8bf17f9 10643 = gen_int_mode
44cf5b6a 10644 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10645 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10646 DImode);
26e5b205
JH
10647 else
10648 parts[0] = immed_double_const (l[0], l[1], DImode);
f8a1ebc6
JH
10649 if (upper_mode == SImode)
10650 parts[1] = gen_int_mode (l[2], SImode);
10651 else if (HOST_BITS_PER_WIDE_INT >= 64)
10652 parts[1]
10653 = gen_int_mode
10654 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10655 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10656 DImode);
10657 else
10658 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
10659 }
10660 else
10661 abort ();
10662 }
10663 }
2450a057 10664
2b589241 10665 return size;
2450a057
JH
10666}
10667
10668/* Emit insns to perform a move or push of DI, DF, and XF values.
10669 Return false when normal moves are needed; true when all required
10670 insns have been emitted. Operands 2-4 contain the input values
10671 int the correct order; operands 5-7 contain the output values. */
10672
26e5b205 10673void
b96a374d 10674ix86_split_long_move (rtx operands[])
2450a057
JH
10675{
10676 rtx part[2][3];
26e5b205 10677 int nparts;
2450a057
JH
10678 int push = 0;
10679 int collisions = 0;
26e5b205
JH
10680 enum machine_mode mode = GET_MODE (operands[0]);
10681
10682 /* The DFmode expanders may ask us to move double.
10683 For 64bit target this is single move. By hiding the fact
10684 here we simplify i386.md splitters. */
10685 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10686 {
8cdfa312
RH
10687 /* Optimize constant pool reference to immediates. This is used by
10688 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10689
10690 if (GET_CODE (operands[1]) == MEM
10691 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10692 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10693 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10694 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10695 {
10696 operands[0] = copy_rtx (operands[0]);
10697 PUT_MODE (operands[0], Pmode);
10698 }
26e5b205
JH
10699 else
10700 operands[0] = gen_lowpart (DImode, operands[0]);
10701 operands[1] = gen_lowpart (DImode, operands[1]);
10702 emit_move_insn (operands[0], operands[1]);
10703 return;
10704 }
2450a057 10705
2450a057
JH
10706 /* The only non-offsettable memory we handle is push. */
10707 if (push_operand (operands[0], VOIDmode))
10708 push = 1;
10709 else if (GET_CODE (operands[0]) == MEM
10710 && ! offsettable_memref_p (operands[0]))
10711 abort ();
10712
26e5b205
JH
10713 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10714 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10715
10716 /* When emitting push, take care for source operands on the stack. */
10717 if (push && GET_CODE (operands[1]) == MEM
10718 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10719 {
26e5b205 10720 if (nparts == 3)
886cbb88
JH
10721 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10722 XEXP (part[1][2], 0));
10723 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10724 XEXP (part[1][1], 0));
2450a057
JH
10725 }
10726
0f290768 10727 /* We need to do copy in the right order in case an address register
2450a057
JH
10728 of the source overlaps the destination. */
10729 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10730 {
10731 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10732 collisions++;
10733 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10734 collisions++;
26e5b205 10735 if (nparts == 3
2450a057
JH
10736 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10737 collisions++;
10738
10739 /* Collision in the middle part can be handled by reordering. */
26e5b205 10740 if (collisions == 1 && nparts == 3
2450a057 10741 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10742 {
2450a057
JH
10743 rtx tmp;
10744 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10745 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10746 }
e075ae69 10747
2450a057
JH
10748 /* If there are more collisions, we can't handle it by reordering.
10749 Do an lea to the last part and use only one colliding move. */
10750 else if (collisions > 1)
10751 {
8231b3f9
RH
10752 rtx base;
10753
2450a057 10754 collisions = 1;
8231b3f9
RH
10755
10756 base = part[0][nparts - 1];
10757
10758 /* Handle the case when the last part isn't valid for lea.
10759 Happens in 64-bit mode storing the 12-byte XFmode. */
10760 if (GET_MODE (base) != Pmode)
10761 base = gen_rtx_REG (Pmode, REGNO (base));
10762
10763 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10764 part[1][0] = replace_equiv_address (part[1][0], base);
10765 part[1][1] = replace_equiv_address (part[1][1],
10766 plus_constant (base, UNITS_PER_WORD));
26e5b205 10767 if (nparts == 3)
8231b3f9
RH
10768 part[1][2] = replace_equiv_address (part[1][2],
10769 plus_constant (base, 8));
2450a057
JH
10770 }
10771 }
10772
10773 if (push)
10774 {
26e5b205 10775 if (!TARGET_64BIT)
2b589241 10776 {
26e5b205
JH
10777 if (nparts == 3)
10778 {
f8a1ebc6
JH
10779 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10780 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
10781 emit_move_insn (part[0][2], part[1][2]);
10782 }
2b589241 10783 }
26e5b205
JH
10784 else
10785 {
10786 /* In 64bit mode we don't have 32bit push available. In case this is
10787 register, it is OK - we will just use larger counterpart. We also
10788 retype memory - these comes from attempt to avoid REX prefix on
10789 moving of second half of TFmode value. */
10790 if (GET_MODE (part[1][1]) == SImode)
10791 {
10792 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10793 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10794 else if (REG_P (part[1][1]))
10795 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10796 else
b531087a 10797 abort ();
886cbb88
JH
10798 if (GET_MODE (part[1][0]) == SImode)
10799 part[1][0] = part[1][1];
26e5b205
JH
10800 }
10801 }
10802 emit_move_insn (part[0][1], part[1][1]);
10803 emit_move_insn (part[0][0], part[1][0]);
10804 return;
2450a057
JH
10805 }
10806
10807 /* Choose correct order to not overwrite the source before it is copied. */
10808 if ((REG_P (part[0][0])
10809 && REG_P (part[1][1])
10810 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10811 || (nparts == 3
2450a057
JH
10812 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10813 || (collisions > 0
10814 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10815 {
26e5b205 10816 if (nparts == 3)
2450a057 10817 {
26e5b205
JH
10818 operands[2] = part[0][2];
10819 operands[3] = part[0][1];
10820 operands[4] = part[0][0];
10821 operands[5] = part[1][2];
10822 operands[6] = part[1][1];
10823 operands[7] = part[1][0];
2450a057
JH
10824 }
10825 else
10826 {
26e5b205
JH
10827 operands[2] = part[0][1];
10828 operands[3] = part[0][0];
10829 operands[5] = part[1][1];
10830 operands[6] = part[1][0];
2450a057
JH
10831 }
10832 }
10833 else
10834 {
26e5b205 10835 if (nparts == 3)
2450a057 10836 {
26e5b205
JH
10837 operands[2] = part[0][0];
10838 operands[3] = part[0][1];
10839 operands[4] = part[0][2];
10840 operands[5] = part[1][0];
10841 operands[6] = part[1][1];
10842 operands[7] = part[1][2];
2450a057
JH
10843 }
10844 else
10845 {
26e5b205
JH
10846 operands[2] = part[0][0];
10847 operands[3] = part[0][1];
10848 operands[5] = part[1][0];
10849 operands[6] = part[1][1];
e075ae69
RH
10850 }
10851 }
26e5b205
JH
10852 emit_move_insn (operands[2], operands[5]);
10853 emit_move_insn (operands[3], operands[6]);
10854 if (nparts == 3)
10855 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10856
26e5b205 10857 return;
32b5b1aa 10858}
32b5b1aa 10859
e075ae69 10860void
b96a374d 10861ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10862{
e075ae69
RH
10863 rtx low[2], high[2];
10864 int count;
b985a30f 10865
e075ae69
RH
10866 if (GET_CODE (operands[2]) == CONST_INT)
10867 {
10868 split_di (operands, 2, low, high);
10869 count = INTVAL (operands[2]) & 63;
32b5b1aa 10870
e075ae69
RH
10871 if (count >= 32)
10872 {
10873 emit_move_insn (high[0], low[1]);
10874 emit_move_insn (low[0], const0_rtx);
b985a30f 10875
e075ae69
RH
10876 if (count > 32)
10877 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10878 }
10879 else
10880 {
10881 if (!rtx_equal_p (operands[0], operands[1]))
10882 emit_move_insn (operands[0], operands[1]);
10883 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10884 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10885 }
10886 }
10887 else
10888 {
10889 if (!rtx_equal_p (operands[0], operands[1]))
10890 emit_move_insn (operands[0], operands[1]);
b985a30f 10891
e075ae69 10892 split_di (operands, 1, low, high);
b985a30f 10893
e075ae69
RH
10894 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10895 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 10896
fe577e58 10897 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10898 {
fe577e58 10899 if (! no_new_pseudos)
e075ae69
RH
10900 scratch = force_reg (SImode, const0_rtx);
10901 else
10902 emit_move_insn (scratch, const0_rtx);
10903
10904 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10905 scratch));
10906 }
10907 else
10908 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10909 }
e9a25f70 10910}
32b5b1aa 10911
e075ae69 10912void
b96a374d 10913ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10914{
e075ae69
RH
10915 rtx low[2], high[2];
10916 int count;
32b5b1aa 10917
e075ae69
RH
10918 if (GET_CODE (operands[2]) == CONST_INT)
10919 {
10920 split_di (operands, 2, low, high);
10921 count = INTVAL (operands[2]) & 63;
32b5b1aa 10922
8937b6a2
RS
10923 if (count == 63)
10924 {
10925 emit_move_insn (high[0], high[1]);
10926 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10927 emit_move_insn (low[0], high[0]);
10928
10929 }
10930 else if (count >= 32)
e075ae69
RH
10931 {
10932 emit_move_insn (low[0], high[1]);
32b5b1aa 10933
e075ae69
RH
10934 if (! reload_completed)
10935 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10936 else
10937 {
10938 emit_move_insn (high[0], low[0]);
10939 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10940 }
10941
10942 if (count > 32)
10943 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10944 }
10945 else
10946 {
10947 if (!rtx_equal_p (operands[0], operands[1]))
10948 emit_move_insn (operands[0], operands[1]);
10949 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10950 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10951 }
10952 }
10953 else
32b5b1aa 10954 {
e075ae69
RH
10955 if (!rtx_equal_p (operands[0], operands[1]))
10956 emit_move_insn (operands[0], operands[1]);
10957
10958 split_di (operands, 1, low, high);
10959
10960 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10961 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10962
fe577e58 10963 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 10964 {
fe577e58 10965 if (! no_new_pseudos)
e075ae69
RH
10966 scratch = gen_reg_rtx (SImode);
10967 emit_move_insn (scratch, high[0]);
10968 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10969 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10970 scratch));
10971 }
10972 else
10973 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10974 }
e075ae69 10975}
32b5b1aa 10976
e075ae69 10977void
b96a374d 10978ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10979{
10980 rtx low[2], high[2];
10981 int count;
32b5b1aa 10982
e075ae69 10983 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10984 {
e075ae69
RH
10985 split_di (operands, 2, low, high);
10986 count = INTVAL (operands[2]) & 63;
10987
10988 if (count >= 32)
c7271385 10989 {
e075ae69
RH
10990 emit_move_insn (low[0], high[1]);
10991 emit_move_insn (high[0], const0_rtx);
32b5b1aa 10992
e075ae69
RH
10993 if (count > 32)
10994 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10995 }
10996 else
10997 {
10998 if (!rtx_equal_p (operands[0], operands[1]))
10999 emit_move_insn (operands[0], operands[1]);
11000 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11001 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11002 }
32b5b1aa 11003 }
e075ae69
RH
11004 else
11005 {
11006 if (!rtx_equal_p (operands[0], operands[1]))
11007 emit_move_insn (operands[0], operands[1]);
32b5b1aa 11008
e075ae69
RH
11009 split_di (operands, 1, low, high);
11010
11011 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11012 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11013
11014 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 11015 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 11016 {
fe577e58 11017 if (! no_new_pseudos)
e075ae69
RH
11018 scratch = force_reg (SImode, const0_rtx);
11019 else
11020 emit_move_insn (scratch, const0_rtx);
11021
11022 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11023 scratch));
11024 }
11025 else
11026 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11027 }
32b5b1aa 11028}
3f803cd9 11029
0407c02b 11030/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
11031 it is aligned to VALUE bytes. If true, jump to the label. */
11032static rtx
b96a374d 11033ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
11034{
11035 rtx label = gen_label_rtx ();
11036 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11037 if (GET_MODE (variable) == DImode)
11038 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11039 else
11040 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11041 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 11042 1, label);
0945b39d
JH
11043 return label;
11044}
11045
11046/* Adjust COUNTER by the VALUE. */
11047static void
b96a374d 11048ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
11049{
11050 if (GET_MODE (countreg) == DImode)
11051 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11052 else
11053 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11054}
11055
11056/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 11057rtx
b96a374d 11058ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
11059{
11060 rtx r;
11061 if (GET_MODE (exp) == VOIDmode)
11062 return force_reg (Pmode, exp);
11063 if (GET_MODE (exp) == Pmode)
11064 return copy_to_mode_reg (Pmode, exp);
11065 r = gen_reg_rtx (Pmode);
11066 emit_insn (gen_zero_extendsidi2 (r, exp));
11067 return r;
11068}
11069
11070/* Expand string move (memcpy) operation. Use i386 string operations when
11071 profitable. expand_clrstr contains similar code. */
11072int
b96a374d 11073ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 11074{
4e44c1ef 11075 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
11076 enum machine_mode counter_mode;
11077 HOST_WIDE_INT align = 0;
11078 unsigned HOST_WIDE_INT count = 0;
0945b39d 11079
0945b39d
JH
11080 if (GET_CODE (align_exp) == CONST_INT)
11081 align = INTVAL (align_exp);
11082
d0a5295a
RH
11083 /* Can't use any of this if the user has appropriated esi or edi. */
11084 if (global_regs[4] || global_regs[5])
11085 return 0;
11086
5519a4f9 11087 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11088 if (!TARGET_ALIGN_STRINGOPS)
11089 align = 64;
11090
11091 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11092 {
11093 count = INTVAL (count_exp);
11094 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11095 return 0;
11096 }
0945b39d
JH
11097
11098 /* Figure out proper mode for counter. For 32bits it is always SImode,
11099 for 64bits use SImode when possible, otherwise DImode.
11100 Set count to number of bytes copied when known at compile time. */
11101 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11102 || x86_64_zero_extended_value (count_exp))
11103 counter_mode = SImode;
11104 else
11105 counter_mode = DImode;
11106
11107 if (counter_mode != SImode && counter_mode != DImode)
11108 abort ();
11109
11110 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
11111 if (destreg != XEXP (dst, 0))
11112 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 11113 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
11114 if (srcreg != XEXP (src, 0))
11115 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
11116
11117 /* When optimizing for size emit simple rep ; movsb instruction for
11118 counts not divisible by 4. */
11119
11120 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11121 {
4e44c1ef 11122 emit_insn (gen_cld ());
0945b39d 11123 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
11124 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11125 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11126 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11127 destexp, srcexp));
0945b39d
JH
11128 }
11129
11130 /* For constant aligned (or small unaligned) copies use rep movsl
11131 followed by code copying the rest. For PentiumPro ensure 8 byte
11132 alignment to allow rep movsl acceleration. */
11133
11134 else if (count != 0
11135 && (align >= 8
11136 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11137 || optimize_size || count < (unsigned int) 64))
0945b39d 11138 {
4e44c1ef 11139 unsigned HOST_WIDE_INT offset = 0;
0945b39d 11140 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11141 rtx srcmem, dstmem;
11142
11143 emit_insn (gen_cld ());
0945b39d
JH
11144 if (count & ~(size - 1))
11145 {
11146 countreg = copy_to_mode_reg (counter_mode,
11147 GEN_INT ((count >> (size == 4 ? 2 : 3))
11148 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11149 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11150
11151 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11152 GEN_INT (size == 4 ? 2 : 3));
11153 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11154 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11155
11156 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11157 countreg, destexp, srcexp));
11158 offset = count & ~(size - 1);
0945b39d
JH
11159 }
11160 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11161 {
11162 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11163 offset);
11164 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11165 offset);
11166 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11167 offset += 4;
11168 }
0945b39d 11169 if (count & 0x02)
4e44c1ef
JJ
11170 {
11171 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11172 offset);
11173 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11174 offset);
11175 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11176 offset += 2;
11177 }
0945b39d 11178 if (count & 0x01)
4e44c1ef
JJ
11179 {
11180 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11181 offset);
11182 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11183 offset);
11184 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11185 }
0945b39d
JH
11186 }
11187 /* The generic code based on the glibc implementation:
11188 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11189 allowing accelerated copying there)
11190 - copy the data using rep movsl
11191 - copy the rest. */
11192 else
11193 {
11194 rtx countreg2;
11195 rtx label = NULL;
4e44c1ef 11196 rtx srcmem, dstmem;
37ad04a5
JH
11197 int desired_alignment = (TARGET_PENTIUMPRO
11198 && (count == 0 || count >= (unsigned int) 260)
11199 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
11200 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11201 dst = change_address (dst, BLKmode, destreg);
11202 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
11203
11204 /* In case we don't know anything about the alignment, default to
11205 library version, since it is usually equally fast and result in
b96a374d 11206 shorter code.
4977bab6
ZW
11207
11208 Also emit call when we know that the count is large and call overhead
11209 will not be important. */
11210 if (!TARGET_INLINE_ALL_STRINGOPS
11211 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 11212 return 0;
0945b39d
JH
11213
11214 if (TARGET_SINGLE_STRINGOP)
11215 emit_insn (gen_cld ());
11216
11217 countreg2 = gen_reg_rtx (Pmode);
11218 countreg = copy_to_mode_reg (counter_mode, count_exp);
11219
11220 /* We don't use loops to align destination and to copy parts smaller
11221 than 4 bytes, because gcc is able to optimize such code better (in
11222 the case the destination or the count really is aligned, gcc is often
11223 able to predict the branches) and also it is friendlier to the
a4f31c00 11224 hardware branch prediction.
0945b39d 11225
d1f87653 11226 Using loops is beneficial for generic case, because we can
0945b39d
JH
11227 handle small counts using the loops. Many CPUs (such as Athlon)
11228 have large REP prefix setup costs.
11229
4aae8a9a 11230 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
11231 add some customizability to this code. */
11232
37ad04a5 11233 if (count == 0 && align < desired_alignment)
0945b39d
JH
11234 {
11235 label = gen_label_rtx ();
aaae0bb9 11236 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11237 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11238 }
11239 if (align <= 1)
11240 {
11241 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11242 srcmem = change_address (src, QImode, srcreg);
11243 dstmem = change_address (dst, QImode, destreg);
11244 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11245 ix86_adjust_counter (countreg, 1);
11246 emit_label (label);
11247 LABEL_NUSES (label) = 1;
11248 }
11249 if (align <= 2)
11250 {
11251 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11252 srcmem = change_address (src, HImode, srcreg);
11253 dstmem = change_address (dst, HImode, destreg);
11254 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11255 ix86_adjust_counter (countreg, 2);
11256 emit_label (label);
11257 LABEL_NUSES (label) = 1;
11258 }
37ad04a5 11259 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11260 {
11261 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11262 srcmem = change_address (src, SImode, srcreg);
11263 dstmem = change_address (dst, SImode, destreg);
11264 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11265 ix86_adjust_counter (countreg, 4);
11266 emit_label (label);
11267 LABEL_NUSES (label) = 1;
11268 }
11269
37ad04a5
JH
11270 if (label && desired_alignment > 4 && !TARGET_64BIT)
11271 {
11272 emit_label (label);
11273 LABEL_NUSES (label) = 1;
11274 label = NULL_RTX;
11275 }
0945b39d
JH
11276 if (!TARGET_SINGLE_STRINGOP)
11277 emit_insn (gen_cld ());
11278 if (TARGET_64BIT)
11279 {
11280 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11281 GEN_INT (3)));
4e44c1ef 11282 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11283 }
11284 else
11285 {
4e44c1ef
JJ
11286 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11287 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11288 }
4e44c1ef
JJ
11289 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11290 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11291 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11292 countreg2, destexp, srcexp));
0945b39d
JH
11293
11294 if (label)
11295 {
11296 emit_label (label);
11297 LABEL_NUSES (label) = 1;
11298 }
11299 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11300 {
11301 srcmem = change_address (src, SImode, srcreg);
11302 dstmem = change_address (dst, SImode, destreg);
11303 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11304 }
0945b39d
JH
11305 if ((align <= 4 || count == 0) && TARGET_64BIT)
11306 {
11307 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11308 srcmem = change_address (src, SImode, srcreg);
11309 dstmem = change_address (dst, SImode, destreg);
11310 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11311 emit_label (label);
11312 LABEL_NUSES (label) = 1;
11313 }
11314 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11315 {
11316 srcmem = change_address (src, HImode, srcreg);
11317 dstmem = change_address (dst, HImode, destreg);
11318 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11319 }
0945b39d
JH
11320 if (align <= 2 || count == 0)
11321 {
11322 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11323 srcmem = change_address (src, HImode, srcreg);
11324 dstmem = change_address (dst, HImode, destreg);
11325 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11326 emit_label (label);
11327 LABEL_NUSES (label) = 1;
11328 }
11329 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11330 {
11331 srcmem = change_address (src, QImode, srcreg);
11332 dstmem = change_address (dst, QImode, destreg);
11333 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11334 }
0945b39d
JH
11335 if (align <= 1 || count == 0)
11336 {
11337 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11338 srcmem = change_address (src, QImode, srcreg);
11339 dstmem = change_address (dst, QImode, destreg);
11340 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11341 emit_label (label);
11342 LABEL_NUSES (label) = 1;
11343 }
11344 }
11345
0945b39d
JH
11346 return 1;
11347}
11348
11349/* Expand string clear operation (bzero). Use i386 string operations when
11350 profitable. expand_movstr contains similar code. */
11351int
4e44c1ef 11352ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 11353{
4e44c1ef 11354 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
11355 enum machine_mode counter_mode;
11356 HOST_WIDE_INT align = 0;
11357 unsigned HOST_WIDE_INT count = 0;
11358
11359 if (GET_CODE (align_exp) == CONST_INT)
11360 align = INTVAL (align_exp);
11361
d0a5295a
RH
11362 /* Can't use any of this if the user has appropriated esi. */
11363 if (global_regs[4])
11364 return 0;
11365
5519a4f9 11366 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11367 if (!TARGET_ALIGN_STRINGOPS)
11368 align = 32;
11369
11370 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11371 {
11372 count = INTVAL (count_exp);
11373 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11374 return 0;
11375 }
0945b39d
JH
11376 /* Figure out proper mode for counter. For 32bits it is always SImode,
11377 for 64bits use SImode when possible, otherwise DImode.
11378 Set count to number of bytes copied when known at compile time. */
11379 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11380 || x86_64_zero_extended_value (count_exp))
11381 counter_mode = SImode;
11382 else
11383 counter_mode = DImode;
11384
4e44c1ef
JJ
11385 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11386 if (destreg != XEXP (dst, 0))
11387 dst = replace_equiv_address_nv (dst, destreg);
0945b39d
JH
11388
11389 emit_insn (gen_cld ());
11390
11391 /* When optimizing for size emit simple rep ; movsb instruction for
11392 counts not divisible by 4. */
11393
11394 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11395 {
11396 countreg = ix86_zero_extend_to_Pmode (count_exp);
11397 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
11398 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11399 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
11400 }
11401 else if (count != 0
11402 && (align >= 8
11403 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11404 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11405 {
11406 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11407 unsigned HOST_WIDE_INT offset = 0;
11408
0945b39d
JH
11409 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11410 if (count & ~(size - 1))
11411 {
11412 countreg = copy_to_mode_reg (counter_mode,
11413 GEN_INT ((count >> (size == 4 ? 2 : 3))
11414 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11415 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11416 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11417 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11418 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11419 offset = count & ~(size - 1);
0945b39d
JH
11420 }
11421 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11422 {
11423 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11424 offset);
11425 emit_insn (gen_strset (destreg, mem,
0945b39d 11426 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
11427 offset += 4;
11428 }
0945b39d 11429 if (count & 0x02)
4e44c1ef
JJ
11430 {
11431 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11432 offset);
11433 emit_insn (gen_strset (destreg, mem,
0945b39d 11434 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
11435 offset += 2;
11436 }
0945b39d 11437 if (count & 0x01)
4e44c1ef
JJ
11438 {
11439 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11440 offset);
11441 emit_insn (gen_strset (destreg, mem,
0945b39d 11442 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 11443 }
0945b39d
JH
11444 }
11445 else
11446 {
11447 rtx countreg2;
11448 rtx label = NULL;
37ad04a5
JH
11449 /* Compute desired alignment of the string operation. */
11450 int desired_alignment = (TARGET_PENTIUMPRO
11451 && (count == 0 || count >= (unsigned int) 260)
11452 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11453
11454 /* In case we don't know anything about the alignment, default to
11455 library version, since it is usually equally fast and result in
4977bab6
ZW
11456 shorter code.
11457
11458 Also emit call when we know that the count is large and call overhead
11459 will not be important. */
11460 if (!TARGET_INLINE_ALL_STRINGOPS
11461 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11462 return 0;
11463
11464 if (TARGET_SINGLE_STRINGOP)
11465 emit_insn (gen_cld ());
11466
11467 countreg2 = gen_reg_rtx (Pmode);
11468 countreg = copy_to_mode_reg (counter_mode, count_exp);
11469 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
11470 /* Get rid of MEM_OFFSET, it won't be accurate. */
11471 dst = change_address (dst, BLKmode, destreg);
0945b39d 11472
37ad04a5 11473 if (count == 0 && align < desired_alignment)
0945b39d
JH
11474 {
11475 label = gen_label_rtx ();
37ad04a5 11476 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11477 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11478 }
11479 if (align <= 1)
11480 {
11481 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11482 emit_insn (gen_strset (destreg, dst,
11483 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11484 ix86_adjust_counter (countreg, 1);
11485 emit_label (label);
11486 LABEL_NUSES (label) = 1;
11487 }
11488 if (align <= 2)
11489 {
11490 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11491 emit_insn (gen_strset (destreg, dst,
11492 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11493 ix86_adjust_counter (countreg, 2);
11494 emit_label (label);
11495 LABEL_NUSES (label) = 1;
11496 }
37ad04a5 11497 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11498 {
11499 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11500 emit_insn (gen_strset (destreg, dst,
11501 (TARGET_64BIT
11502 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11503 : zeroreg)));
0945b39d
JH
11504 ix86_adjust_counter (countreg, 4);
11505 emit_label (label);
11506 LABEL_NUSES (label) = 1;
11507 }
11508
37ad04a5
JH
11509 if (label && desired_alignment > 4 && !TARGET_64BIT)
11510 {
11511 emit_label (label);
11512 LABEL_NUSES (label) = 1;
11513 label = NULL_RTX;
11514 }
11515
0945b39d
JH
11516 if (!TARGET_SINGLE_STRINGOP)
11517 emit_insn (gen_cld ());
11518 if (TARGET_64BIT)
11519 {
11520 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11521 GEN_INT (3)));
4e44c1ef 11522 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11523 }
11524 else
11525 {
4e44c1ef
JJ
11526 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11527 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11528 }
4e44c1ef
JJ
11529 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11530 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11531
0945b39d
JH
11532 if (label)
11533 {
11534 emit_label (label);
11535 LABEL_NUSES (label) = 1;
11536 }
37ad04a5 11537
0945b39d 11538 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11539 emit_insn (gen_strset (destreg, dst,
11540 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11541 if (TARGET_64BIT && (align <= 4 || count == 0))
11542 {
79258dce 11543 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11544 emit_insn (gen_strset (destreg, dst,
11545 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11546 emit_label (label);
11547 LABEL_NUSES (label) = 1;
11548 }
11549 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11550 emit_insn (gen_strset (destreg, dst,
11551 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11552 if (align <= 2 || count == 0)
11553 {
74411039 11554 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11555 emit_insn (gen_strset (destreg, dst,
11556 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11557 emit_label (label);
11558 LABEL_NUSES (label) = 1;
11559 }
11560 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11561 emit_insn (gen_strset (destreg, dst,
11562 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11563 if (align <= 1 || count == 0)
11564 {
74411039 11565 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11566 emit_insn (gen_strset (destreg, dst,
11567 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11568 emit_label (label);
11569 LABEL_NUSES (label) = 1;
11570 }
11571 }
11572 return 1;
11573}
4e44c1ef 11574
0945b39d
JH
11575/* Expand strlen. */
11576int
b96a374d 11577ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11578{
11579 rtx addr, scratch1, scratch2, scratch3, scratch4;
11580
11581 /* The generic case of strlen expander is long. Avoid it's
11582 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11583
11584 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11585 && !TARGET_INLINE_ALL_STRINGOPS
11586 && !optimize_size
11587 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11588 return 0;
11589
11590 addr = force_reg (Pmode, XEXP (src, 0));
11591 scratch1 = gen_reg_rtx (Pmode);
11592
11593 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11594 && !optimize_size)
11595 {
11596 /* Well it seems that some optimizer does not combine a call like
11597 foo(strlen(bar), strlen(bar));
11598 when the move and the subtraction is done here. It does calculate
11599 the length just once when these instructions are done inside of
11600 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11601 often used and I use one fewer register for the lifetime of
11602 output_strlen_unroll() this is better. */
11603
11604 emit_move_insn (out, addr);
11605
4e44c1ef 11606 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
11607
11608 /* strlensi_unroll_1 returns the address of the zero at the end of
11609 the string, like memchr(), so compute the length by subtracting
11610 the start address. */
11611 if (TARGET_64BIT)
11612 emit_insn (gen_subdi3 (out, out, addr));
11613 else
11614 emit_insn (gen_subsi3 (out, out, addr));
11615 }
11616 else
11617 {
4e44c1ef 11618 rtx unspec;
0945b39d
JH
11619 scratch2 = gen_reg_rtx (Pmode);
11620 scratch3 = gen_reg_rtx (Pmode);
11621 scratch4 = force_reg (Pmode, constm1_rtx);
11622
11623 emit_move_insn (scratch3, addr);
11624 eoschar = force_reg (QImode, eoschar);
11625
11626 emit_insn (gen_cld ());
4e44c1ef
JJ
11627 src = replace_equiv_address_nv (src, scratch3);
11628
11629 /* If .md starts supporting :P, this can be done in .md. */
11630 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11631 scratch4), UNSPEC_SCAS);
11632 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
11633 if (TARGET_64BIT)
11634 {
0945b39d
JH
11635 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11636 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11637 }
11638 else
11639 {
0945b39d
JH
11640 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11641 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11642 }
11643 }
11644 return 1;
11645}
11646
e075ae69
RH
11647/* Expand the appropriate insns for doing strlen if not just doing
11648 repnz; scasb
11649
11650 out = result, initialized with the start address
11651 align_rtx = alignment of the address.
11652 scratch = scratch register, initialized with the startaddress when
77ebd435 11653 not aligned, otherwise undefined
3f803cd9 11654
39e3f58c 11655 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11656 some address computing at the end. These things are done in i386.md. */
11657
0945b39d 11658static void
4e44c1ef 11659ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 11660{
e075ae69
RH
11661 int align;
11662 rtx tmp;
11663 rtx align_2_label = NULL_RTX;
11664 rtx align_3_label = NULL_RTX;
11665 rtx align_4_label = gen_label_rtx ();
11666 rtx end_0_label = gen_label_rtx ();
e075ae69 11667 rtx mem;
e2e52e1b 11668 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11669 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11670 rtx cmp;
e075ae69
RH
11671
11672 align = 0;
11673 if (GET_CODE (align_rtx) == CONST_INT)
11674 align = INTVAL (align_rtx);
3f803cd9 11675
e9a25f70 11676 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11677
e9a25f70 11678 /* Is there a known alignment and is it less than 4? */
e075ae69 11679 if (align < 4)
3f803cd9 11680 {
0945b39d
JH
11681 rtx scratch1 = gen_reg_rtx (Pmode);
11682 emit_move_insn (scratch1, out);
e9a25f70 11683 /* Is there a known alignment and is it not 2? */
e075ae69 11684 if (align != 2)
3f803cd9 11685 {
e075ae69
RH
11686 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11687 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11688
11689 /* Leave just the 3 lower bits. */
0945b39d 11690 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11691 NULL_RTX, 0, OPTAB_WIDEN);
11692
9076b9c1 11693 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11694 Pmode, 1, align_4_label);
60c81c89 11695 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 11696 Pmode, 1, align_2_label);
60c81c89 11697 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 11698 Pmode, 1, align_3_label);
3f803cd9
SC
11699 }
11700 else
11701 {
e9a25f70
JL
11702 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11703 check if is aligned to 4 - byte. */
e9a25f70 11704
60c81c89 11705 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
11706 NULL_RTX, 0, OPTAB_WIDEN);
11707
9076b9c1 11708 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11709 Pmode, 1, align_4_label);
3f803cd9
SC
11710 }
11711
4e44c1ef 11712 mem = change_address (src, QImode, out);
e9a25f70 11713
e075ae69 11714 /* Now compare the bytes. */
e9a25f70 11715
0f290768 11716 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11717 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11718 QImode, 1, end_0_label);
3f803cd9 11719
0f290768 11720 /* Increment the address. */
0945b39d
JH
11721 if (TARGET_64BIT)
11722 emit_insn (gen_adddi3 (out, out, const1_rtx));
11723 else
11724 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11725
e075ae69
RH
11726 /* Not needed with an alignment of 2 */
11727 if (align != 2)
11728 {
11729 emit_label (align_2_label);
3f803cd9 11730
d43e0b7d
RK
11731 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11732 end_0_label);
e075ae69 11733
0945b39d
JH
11734 if (TARGET_64BIT)
11735 emit_insn (gen_adddi3 (out, out, const1_rtx));
11736 else
11737 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11738
11739 emit_label (align_3_label);
11740 }
11741
d43e0b7d
RK
11742 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11743 end_0_label);
e075ae69 11744
0945b39d
JH
11745 if (TARGET_64BIT)
11746 emit_insn (gen_adddi3 (out, out, const1_rtx));
11747 else
11748 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11749 }
11750
e075ae69
RH
11751 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11752 align this loop. It gives only huge programs, but does not help to
11753 speed up. */
11754 emit_label (align_4_label);
3f803cd9 11755
4e44c1ef 11756 mem = change_address (src, SImode, out);
e075ae69 11757 emit_move_insn (scratch, mem);
0945b39d
JH
11758 if (TARGET_64BIT)
11759 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11760 else
11761 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11762
e2e52e1b
JH
11763 /* This formula yields a nonzero result iff one of the bytes is zero.
11764 This saves three branches inside loop and many cycles. */
11765
11766 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11767 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11768 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11769 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11770 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11771 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11772 align_4_label);
e2e52e1b
JH
11773
11774 if (TARGET_CMOVE)
11775 {
11776 rtx reg = gen_reg_rtx (SImode);
0945b39d 11777 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11778 emit_move_insn (reg, tmpreg);
11779 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11780
0f290768 11781 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11782 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11783 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11784 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11785 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11786 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11787 reg,
11788 tmpreg)));
e2e52e1b 11789 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 11790 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 11791 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
11792
11793 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11794 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11795 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11796 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11797 reg2,
11798 out)));
e2e52e1b
JH
11799
11800 }
11801 else
11802 {
11803 rtx end_2_label = gen_label_rtx ();
11804 /* Is zero in the first two bytes? */
11805
16189740 11806 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11807 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11808 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11809 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11810 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11811 pc_rtx);
11812 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11813 JUMP_LABEL (tmp) = end_2_label;
11814
0f290768 11815 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11816 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 11817 if (TARGET_64BIT)
60c81c89 11818 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 11819 else
60c81c89 11820 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
11821
11822 emit_label (end_2_label);
11823
11824 }
11825
0f290768 11826 /* Avoid branch in fixing the byte. */
e2e52e1b 11827 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11828 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11829 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11830 if (TARGET_64BIT)
e6e81735 11831 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11832 else
e6e81735 11833 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11834
11835 emit_label (end_0_label);
11836}
0e07aff3
RH
11837
11838void
0f901c4c
SH
11839ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11840 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 11841 rtx pop, int sibcall)
0e07aff3
RH
11842{
11843 rtx use = NULL, call;
11844
11845 if (pop == const0_rtx)
11846 pop = NULL;
11847 if (TARGET_64BIT && pop)
11848 abort ();
11849
b069de3b
SS
11850#if TARGET_MACHO
11851 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11852 fnaddr = machopic_indirect_call_target (fnaddr);
11853#else
0e07aff3
RH
11854 /* Static functions and indirect calls don't need the pic register. */
11855 if (! TARGET_64BIT && flag_pic
11856 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11857 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11858 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11859
11860 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11861 {
11862 rtx al = gen_rtx_REG (QImode, 0);
11863 emit_move_insn (al, callarg2);
11864 use_reg (&use, al);
11865 }
b069de3b 11866#endif /* TARGET_MACHO */
0e07aff3
RH
11867
11868 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11869 {
11870 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11871 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11872 }
4977bab6
ZW
11873 if (sibcall && TARGET_64BIT
11874 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11875 {
11876 rtx addr;
11877 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11878 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11879 emit_move_insn (fnaddr, addr);
11880 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11881 }
0e07aff3
RH
11882
11883 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11884 if (retval)
11885 call = gen_rtx_SET (VOIDmode, retval, call);
11886 if (pop)
11887 {
11888 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11889 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11890 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11891 }
11892
11893 call = emit_call_insn (call);
11894 if (use)
11895 CALL_INSN_FUNCTION_USAGE (call) = use;
11896}
fce5a9f2 11897
e075ae69 11898\f
e075ae69
RH
11899/* Clear stack slot assignments remembered from previous functions.
11900 This is called from INIT_EXPANDERS once before RTL is emitted for each
11901 function. */
11902
e2500fed 11903static struct machine_function *
b96a374d 11904ix86_init_machine_status (void)
37b15744 11905{
d7394366
JH
11906 struct machine_function *f;
11907
11908 f = ggc_alloc_cleared (sizeof (struct machine_function));
11909 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11910
11911 return f;
1526a060
BS
11912}
11913
e075ae69
RH
11914/* Return a MEM corresponding to a stack slot with mode MODE.
11915 Allocate a new slot if necessary.
11916
11917 The RTL for a function can have several slots available: N is
11918 which slot to use. */
11919
11920rtx
b96a374d 11921assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11922{
ddb0ae00
ZW
11923 struct stack_local_entry *s;
11924
e075ae69
RH
11925 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11926 abort ();
11927
ddb0ae00
ZW
11928 for (s = ix86_stack_locals; s; s = s->next)
11929 if (s->mode == mode && s->n == n)
11930 return s->rtl;
11931
11932 s = (struct stack_local_entry *)
11933 ggc_alloc (sizeof (struct stack_local_entry));
11934 s->n = n;
11935 s->mode = mode;
11936 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11937
ddb0ae00
ZW
11938 s->next = ix86_stack_locals;
11939 ix86_stack_locals = s;
11940 return s->rtl;
e075ae69 11941}
f996902d
RH
11942
11943/* Construct the SYMBOL_REF for the tls_get_addr function. */
11944
e2500fed 11945static GTY(()) rtx ix86_tls_symbol;
f996902d 11946rtx
b96a374d 11947ix86_tls_get_addr (void)
f996902d 11948{
f996902d 11949
e2500fed 11950 if (!ix86_tls_symbol)
f996902d 11951 {
75d38379
JJ
11952 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11953 (TARGET_GNU_TLS && !TARGET_64BIT)
11954 ? "___tls_get_addr"
11955 : "__tls_get_addr");
f996902d
RH
11956 }
11957
e2500fed 11958 return ix86_tls_symbol;
f996902d 11959}
e075ae69
RH
11960\f
11961/* Calculate the length of the memory address in the instruction
11962 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11963
11964static int
b96a374d 11965memory_address_length (rtx addr)
e075ae69
RH
11966{
11967 struct ix86_address parts;
11968 rtx base, index, disp;
11969 int len;
11970
11971 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11972 || GET_CODE (addr) == POST_INC
11973 || GET_CODE (addr) == PRE_MODIFY
11974 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11975 return 0;
3f803cd9 11976
e075ae69
RH
11977 if (! ix86_decompose_address (addr, &parts))
11978 abort ();
3f803cd9 11979
e075ae69
RH
11980 base = parts.base;
11981 index = parts.index;
11982 disp = parts.disp;
11983 len = 0;
3f803cd9 11984
7b65ed54
EB
11985 /* Rule of thumb:
11986 - esp as the base always wants an index,
11987 - ebp as the base always wants a displacement. */
11988
e075ae69
RH
11989 /* Register Indirect. */
11990 if (base && !index && !disp)
11991 {
7b65ed54
EB
11992 /* esp (for its index) and ebp (for its displacement) need
11993 the two-byte modrm form. */
e075ae69
RH
11994 if (addr == stack_pointer_rtx
11995 || addr == arg_pointer_rtx
564d80f4
JH
11996 || addr == frame_pointer_rtx
11997 || addr == hard_frame_pointer_rtx)
e075ae69 11998 len = 1;
3f803cd9 11999 }
e9a25f70 12000
e075ae69
RH
12001 /* Direct Addressing. */
12002 else if (disp && !base && !index)
12003 len = 4;
12004
3f803cd9
SC
12005 else
12006 {
e075ae69
RH
12007 /* Find the length of the displacement constant. */
12008 if (disp)
12009 {
12010 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
12011 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12012 && base)
e075ae69
RH
12013 len = 1;
12014 else
12015 len = 4;
12016 }
7b65ed54
EB
12017 /* ebp always wants a displacement. */
12018 else if (base == hard_frame_pointer_rtx)
12019 len = 1;
3f803cd9 12020
43f3a59d 12021 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
12022 if (index
12023 /* ...like esp, which always wants an index. */
12024 || base == stack_pointer_rtx
12025 || base == arg_pointer_rtx
12026 || base == frame_pointer_rtx)
e075ae69 12027 len += 1;
3f803cd9
SC
12028 }
12029
e075ae69
RH
12030 return len;
12031}
79325812 12032
5bf0ebab
RH
12033/* Compute default value for "length_immediate" attribute. When SHORTFORM
12034 is set, expect that insn have 8bit immediate alternative. */
e075ae69 12035int
b96a374d 12036ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 12037{
6ef67412
JH
12038 int len = 0;
12039 int i;
6c698a6d 12040 extract_insn_cached (insn);
6ef67412
JH
12041 for (i = recog_data.n_operands - 1; i >= 0; --i)
12042 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 12043 {
6ef67412 12044 if (len)
3071fab5 12045 abort ();
6ef67412
JH
12046 if (shortform
12047 && GET_CODE (recog_data.operand[i]) == CONST_INT
12048 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12049 len = 1;
12050 else
12051 {
12052 switch (get_attr_mode (insn))
12053 {
12054 case MODE_QI:
12055 len+=1;
12056 break;
12057 case MODE_HI:
12058 len+=2;
12059 break;
12060 case MODE_SI:
12061 len+=4;
12062 break;
14f73b5a
JH
12063 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12064 case MODE_DI:
12065 len+=4;
12066 break;
6ef67412 12067 default:
c725bd79 12068 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
12069 }
12070 }
3071fab5 12071 }
6ef67412
JH
12072 return len;
12073}
12074/* Compute default value for "length_address" attribute. */
12075int
b96a374d 12076ix86_attr_length_address_default (rtx insn)
6ef67412
JH
12077{
12078 int i;
9b73c90a
EB
12079
12080 if (get_attr_type (insn) == TYPE_LEA)
12081 {
12082 rtx set = PATTERN (insn);
12083 if (GET_CODE (set) == SET)
12084 ;
12085 else if (GET_CODE (set) == PARALLEL
12086 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12087 set = XVECEXP (set, 0, 0);
12088 else
12089 {
12090#ifdef ENABLE_CHECKING
12091 abort ();
12092#endif
12093 return 0;
12094 }
12095
12096 return memory_address_length (SET_SRC (set));
12097 }
12098
6c698a6d 12099 extract_insn_cached (insn);
1ccbefce
RH
12100 for (i = recog_data.n_operands - 1; i >= 0; --i)
12101 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12102 {
6ef67412 12103 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
12104 break;
12105 }
6ef67412 12106 return 0;
3f803cd9 12107}
e075ae69
RH
12108\f
12109/* Return the maximum number of instructions a cpu can issue. */
b657fc39 12110
c237e94a 12111static int
b96a374d 12112ix86_issue_rate (void)
b657fc39 12113{
9e555526 12114 switch (ix86_tune)
b657fc39 12115 {
e075ae69
RH
12116 case PROCESSOR_PENTIUM:
12117 case PROCESSOR_K6:
12118 return 2;
79325812 12119
e075ae69 12120 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
12121 case PROCESSOR_PENTIUM4:
12122 case PROCESSOR_ATHLON:
4977bab6 12123 case PROCESSOR_K8:
89c43c0a 12124 case PROCESSOR_NOCONA:
e075ae69 12125 return 3;
b657fc39 12126
b657fc39 12127 default:
e075ae69 12128 return 1;
b657fc39 12129 }
b657fc39
L
12130}
12131
e075ae69
RH
12132/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12133 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 12134
e075ae69 12135static int
b96a374d 12136ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12137{
12138 rtx set, set2;
b657fc39 12139
e075ae69
RH
12140 /* Simplify the test for uninteresting insns. */
12141 if (insn_type != TYPE_SETCC
12142 && insn_type != TYPE_ICMOV
12143 && insn_type != TYPE_FCMOV
12144 && insn_type != TYPE_IBR)
12145 return 0;
b657fc39 12146
e075ae69
RH
12147 if ((set = single_set (dep_insn)) != 0)
12148 {
12149 set = SET_DEST (set);
12150 set2 = NULL_RTX;
12151 }
12152 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12153 && XVECLEN (PATTERN (dep_insn), 0) == 2
12154 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12155 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12156 {
12157 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12158 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12159 }
78a0d70c
ZW
12160 else
12161 return 0;
b657fc39 12162
78a0d70c
ZW
12163 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12164 return 0;
b657fc39 12165
f5143c46 12166 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
12167 not any other potentially set register. */
12168 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12169 return 0;
12170
12171 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12172 return 0;
12173
12174 return 1;
e075ae69 12175}
b657fc39 12176
e075ae69
RH
12177/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12178 address with operands set by DEP_INSN. */
12179
12180static int
b96a374d 12181ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12182{
12183 rtx addr;
12184
6ad48e84
JH
12185 if (insn_type == TYPE_LEA
12186 && TARGET_PENTIUM)
5fbdde42
RH
12187 {
12188 addr = PATTERN (insn);
12189 if (GET_CODE (addr) == SET)
12190 ;
12191 else if (GET_CODE (addr) == PARALLEL
12192 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12193 addr = XVECEXP (addr, 0, 0);
12194 else
12195 abort ();
12196 addr = SET_SRC (addr);
12197 }
e075ae69
RH
12198 else
12199 {
12200 int i;
6c698a6d 12201 extract_insn_cached (insn);
1ccbefce
RH
12202 for (i = recog_data.n_operands - 1; i >= 0; --i)
12203 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12204 {
1ccbefce 12205 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
12206 goto found;
12207 }
12208 return 0;
12209 found:;
b657fc39
L
12210 }
12211
e075ae69 12212 return modified_in_p (addr, dep_insn);
b657fc39 12213}
a269a03c 12214
c237e94a 12215static int
b96a374d 12216ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 12217{
e075ae69 12218 enum attr_type insn_type, dep_insn_type;
6ad48e84 12219 enum attr_memory memory, dep_memory;
e075ae69 12220 rtx set, set2;
9b00189f 12221 int dep_insn_code_number;
a269a03c 12222
d1f87653 12223 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 12224 if (REG_NOTE_KIND (link) != 0)
309ada50 12225 return 0;
a269a03c 12226
9b00189f
JH
12227 dep_insn_code_number = recog_memoized (dep_insn);
12228
e075ae69 12229 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 12230 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 12231 return cost;
a269a03c 12232
1c71e60e
JH
12233 insn_type = get_attr_type (insn);
12234 dep_insn_type = get_attr_type (dep_insn);
9b00189f 12235
9e555526 12236 switch (ix86_tune)
a269a03c
JC
12237 {
12238 case PROCESSOR_PENTIUM:
e075ae69
RH
12239 /* Address Generation Interlock adds a cycle of latency. */
12240 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12241 cost += 1;
12242
12243 /* ??? Compares pair with jump/setcc. */
12244 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12245 cost = 0;
12246
d1f87653 12247 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 12248 if (insn_type == TYPE_FMOV
e075ae69
RH
12249 && get_attr_memory (insn) == MEMORY_STORE
12250 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12251 cost += 1;
12252 break;
a269a03c 12253
e075ae69 12254 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
12255 memory = get_attr_memory (insn);
12256 dep_memory = get_attr_memory (dep_insn);
12257
0f290768 12258 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
12259 increase the cost here for non-imov insns. */
12260 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
12261 && dep_insn_type != TYPE_FMOV
12262 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
12263 cost += 1;
12264
12265 /* INT->FP conversion is expensive. */
12266 if (get_attr_fp_int_src (dep_insn))
12267 cost += 5;
12268
12269 /* There is one cycle extra latency between an FP op and a store. */
12270 if (insn_type == TYPE_FMOV
12271 && (set = single_set (dep_insn)) != NULL_RTX
12272 && (set2 = single_set (insn)) != NULL_RTX
12273 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12274 && GET_CODE (SET_DEST (set2)) == MEM)
12275 cost += 1;
6ad48e84
JH
12276
12277 /* Show ability of reorder buffer to hide latency of load by executing
12278 in parallel with previous instruction in case
12279 previous instruction is not needed to compute the address. */
12280 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12281 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12282 {
6ad48e84
JH
12283 /* Claim moves to take one cycle, as core can issue one load
12284 at time and the next load can start cycle later. */
12285 if (dep_insn_type == TYPE_IMOV
12286 || dep_insn_type == TYPE_FMOV)
12287 cost = 1;
12288 else if (cost > 1)
12289 cost--;
12290 }
e075ae69 12291 break;
a269a03c 12292
e075ae69 12293 case PROCESSOR_K6:
6ad48e84
JH
12294 memory = get_attr_memory (insn);
12295 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
12296 /* The esp dependency is resolved before the instruction is really
12297 finished. */
12298 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12299 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12300 return 1;
a269a03c 12301
0f290768 12302 /* Since we can't represent delayed latencies of load+operation,
e075ae69 12303 increase the cost here for non-imov insns. */
6ad48e84 12304 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
12305 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12306
12307 /* INT->FP conversion is expensive. */
12308 if (get_attr_fp_int_src (dep_insn))
12309 cost += 5;
6ad48e84
JH
12310
12311 /* Show ability of reorder buffer to hide latency of load by executing
12312 in parallel with previous instruction in case
12313 previous instruction is not needed to compute the address. */
12314 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12315 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12316 {
6ad48e84
JH
12317 /* Claim moves to take one cycle, as core can issue one load
12318 at time and the next load can start cycle later. */
12319 if (dep_insn_type == TYPE_IMOV
12320 || dep_insn_type == TYPE_FMOV)
12321 cost = 1;
12322 else if (cost > 2)
12323 cost -= 2;
12324 else
12325 cost = 1;
12326 }
a14003ee 12327 break;
e075ae69 12328
309ada50 12329 case PROCESSOR_ATHLON:
4977bab6 12330 case PROCESSOR_K8:
6ad48e84
JH
12331 memory = get_attr_memory (insn);
12332 dep_memory = get_attr_memory (dep_insn);
12333
6ad48e84
JH
12334 /* Show ability of reorder buffer to hide latency of load by executing
12335 in parallel with previous instruction in case
12336 previous instruction is not needed to compute the address. */
12337 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12338 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12339 {
26f74aa3
JH
12340 enum attr_unit unit = get_attr_unit (insn);
12341 int loadcost = 3;
12342
12343 /* Because of the difference between the length of integer and
12344 floating unit pipeline preparation stages, the memory operands
b96a374d 12345 for floating point are cheaper.
26f74aa3 12346
c51e6d85 12347 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
12348 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12349 loadcost = 3;
12350 else
12351 loadcost = TARGET_ATHLON ? 2 : 0;
12352
12353 if (cost >= loadcost)
12354 cost -= loadcost;
6ad48e84
JH
12355 else
12356 cost = 0;
12357 }
309ada50 12358
a269a03c 12359 default:
a269a03c
JC
12360 break;
12361 }
12362
12363 return cost;
12364}
0a726ef1 12365
9b690711 12366static int
b96a374d 12367ia32_use_dfa_pipeline_interface (void)
9b690711 12368{
56bab446
SB
12369 if (TARGET_PENTIUM
12370 || TARGET_PENTIUMPRO
12371 || TARGET_ATHLON_K8)
9b690711
RH
12372 return 1;
12373 return 0;
12374}
12375
12376/* How many alternative schedules to try. This should be as wide as the
12377 scheduling freedom in the DFA, but no wider. Making this value too
12378 large results extra work for the scheduler. */
12379
12380static int
b96a374d 12381ia32_multipass_dfa_lookahead (void)
9b690711 12382{
9e555526 12383 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 12384 return 2;
56bab446
SB
12385
12386 if (ix86_tune == PROCESSOR_PENTIUMPRO)
12387 return 1;
12388
9b690711 12389 else
56bab446 12390 return 0;
9b690711
RH
12391}
12392
0e4970d7 12393\f
a7180f70
BS
12394/* Compute the alignment given to a constant that is being placed in memory.
12395 EXP is the constant and ALIGN is the alignment that the object would
12396 ordinarily have.
12397 The value of this function is used instead of that alignment to align
12398 the object. */
12399
12400int
b96a374d 12401ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12402{
12403 if (TREE_CODE (exp) == REAL_CST)
12404 {
12405 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12406 return 64;
12407 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12408 return 128;
12409 }
4137ba7a
JJ
12410 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12411 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12412 return BITS_PER_WORD;
a7180f70
BS
12413
12414 return align;
12415}
12416
12417/* Compute the alignment for a static variable.
12418 TYPE is the data type, and ALIGN is the alignment that
12419 the object would ordinarily have. The value of this function is used
12420 instead of that alignment to align the object. */
12421
12422int
b96a374d 12423ix86_data_alignment (tree type, int align)
a7180f70
BS
12424{
12425 if (AGGREGATE_TYPE_P (type)
12426 && TYPE_SIZE (type)
12427 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12428 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12429 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12430 return 256;
12431
0d7d98ee
JH
12432 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12433 to 16byte boundary. */
12434 if (TARGET_64BIT)
12435 {
12436 if (AGGREGATE_TYPE_P (type)
12437 && TYPE_SIZE (type)
12438 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12439 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12440 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12441 return 128;
12442 }
12443
a7180f70
BS
12444 if (TREE_CODE (type) == ARRAY_TYPE)
12445 {
12446 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12447 return 64;
12448 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12449 return 128;
12450 }
12451 else if (TREE_CODE (type) == COMPLEX_TYPE)
12452 {
0f290768 12453
a7180f70
BS
12454 if (TYPE_MODE (type) == DCmode && align < 64)
12455 return 64;
12456 if (TYPE_MODE (type) == XCmode && align < 128)
12457 return 128;
12458 }
12459 else if ((TREE_CODE (type) == RECORD_TYPE
12460 || TREE_CODE (type) == UNION_TYPE
12461 || TREE_CODE (type) == QUAL_UNION_TYPE)
12462 && TYPE_FIELDS (type))
12463 {
12464 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12465 return 64;
12466 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12467 return 128;
12468 }
12469 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12470 || TREE_CODE (type) == INTEGER_TYPE)
12471 {
12472 if (TYPE_MODE (type) == DFmode && align < 64)
12473 return 64;
12474 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12475 return 128;
12476 }
12477
12478 return align;
12479}
12480
12481/* Compute the alignment for a local variable.
12482 TYPE is the data type, and ALIGN is the alignment that
12483 the object would ordinarily have. The value of this macro is used
12484 instead of that alignment to align the object. */
12485
12486int
b96a374d 12487ix86_local_alignment (tree type, int align)
a7180f70 12488{
0d7d98ee
JH
12489 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12490 to 16byte boundary. */
12491 if (TARGET_64BIT)
12492 {
12493 if (AGGREGATE_TYPE_P (type)
12494 && TYPE_SIZE (type)
12495 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12496 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12497 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12498 return 128;
12499 }
a7180f70
BS
12500 if (TREE_CODE (type) == ARRAY_TYPE)
12501 {
12502 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12503 return 64;
12504 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12505 return 128;
12506 }
12507 else if (TREE_CODE (type) == COMPLEX_TYPE)
12508 {
12509 if (TYPE_MODE (type) == DCmode && align < 64)
12510 return 64;
12511 if (TYPE_MODE (type) == XCmode && align < 128)
12512 return 128;
12513 }
12514 else if ((TREE_CODE (type) == RECORD_TYPE
12515 || TREE_CODE (type) == UNION_TYPE
12516 || TREE_CODE (type) == QUAL_UNION_TYPE)
12517 && TYPE_FIELDS (type))
12518 {
12519 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12520 return 64;
12521 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12522 return 128;
12523 }
12524 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12525 || TREE_CODE (type) == INTEGER_TYPE)
12526 {
0f290768 12527
a7180f70
BS
12528 if (TYPE_MODE (type) == DFmode && align < 64)
12529 return 64;
12530 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12531 return 128;
12532 }
12533 return align;
12534}
0ed08620
JH
12535\f
12536/* Emit RTL insns to initialize the variable parts of a trampoline.
12537 FNADDR is an RTX for the address of the function's pure code.
12538 CXT is an RTX for the static chain value for the function. */
12539void
b96a374d 12540x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12541{
12542 if (!TARGET_64BIT)
12543 {
12544 /* Compute offset from the end of the jmp to the target function. */
12545 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12546 plus_constant (tramp, 10),
12547 NULL_RTX, 1, OPTAB_DIRECT);
12548 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12549 gen_int_mode (0xb9, QImode));
0ed08620
JH
12550 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12551 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12552 gen_int_mode (0xe9, QImode));
0ed08620
JH
12553 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12554 }
12555 else
12556 {
12557 int offset = 0;
12558 /* Try to load address using shorter movl instead of movabs.
12559 We may want to support movq for kernel mode, but kernel does not use
12560 trampolines at the moment. */
12561 if (x86_64_zero_extended_value (fnaddr))
12562 {
12563 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12564 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12565 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12566 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12567 gen_lowpart (SImode, fnaddr));
12568 offset += 6;
12569 }
12570 else
12571 {
12572 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12573 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12574 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12575 fnaddr);
12576 offset += 10;
12577 }
12578 /* Load static chain using movabs to r10. */
12579 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12580 gen_int_mode (0xba49, HImode));
0ed08620
JH
12581 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12582 cxt);
12583 offset += 10;
12584 /* Jump to the r11 */
12585 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12586 gen_int_mode (0xff49, HImode));
0ed08620 12587 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12588 gen_int_mode (0xe3, QImode));
0ed08620
JH
12589 offset += 3;
12590 if (offset > TRAMPOLINE_SIZE)
b531087a 12591 abort ();
0ed08620 12592 }
5791cc29
JT
12593
12594#ifdef TRANSFER_FROM_TRAMPOLINE
f84d109f 12595 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
12596 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12597#endif
0ed08620 12598}
eeb06b1b 12599\f
6a2dd09a
RS
12600#define def_builtin(MASK, NAME, TYPE, CODE) \
12601do { \
453ee231
JH
12602 if ((MASK) & target_flags \
12603 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
6a2dd09a
RS
12604 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12605 NULL, NULL_TREE); \
eeb06b1b 12606} while (0)
bd793c65 12607
bd793c65
BS
12608struct builtin_description
12609{
8b60264b
KG
12610 const unsigned int mask;
12611 const enum insn_code icode;
12612 const char *const name;
12613 const enum ix86_builtins code;
12614 const enum rtx_code comparison;
12615 const unsigned int flag;
bd793c65
BS
12616};
12617
8b60264b 12618static const struct builtin_description bdesc_comi[] =
bd793c65 12619{
37f22004
L
12620 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12621 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12622 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12623 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12624 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12625 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12626 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12627 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12628 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12629 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12630 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12631 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12632 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12633 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12634 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12635 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12636 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12637 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12638 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12639 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12640 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12641 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12642 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12643 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12644};
12645
8b60264b 12646static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12647{
12648 /* SSE */
37f22004
L
12649 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12650 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12651 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12652 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12653 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12654 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12655 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12656 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12657
12658 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12659 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12660 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12661 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12662 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12663 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12664 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12665 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12666 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12667 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12668 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12669 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12670 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12671 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12672 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12673 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12674 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12675 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12676 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12677 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12678
12679 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12680 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12681 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12682 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12683
12684 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12685 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12686 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12687 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12688
12689 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12690 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12694
12695 /* MMX */
eeb06b1b
BS
12696 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12697 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12698 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12699 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12700 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12701 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12702 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12703 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12704
12705 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12706 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12707 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12708 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12709 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12710 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12711 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12712 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12713
12714 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12715 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12716 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12717
12718 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12719 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12720 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12721 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12722
37f22004
L
12723 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12724 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12725
12726 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12727 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12728 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12729 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12730 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12731 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12732
37f22004
L
12733 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12735 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12736 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12737
12738 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12739 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12740 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12741 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12742 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12743 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12744
12745 /* Special. */
eeb06b1b
BS
12746 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12747 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12749
37f22004
L
12750 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12751 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12752 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12753
12754 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12755 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12756 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12757 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12760
12761 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12762 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12763 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12764 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12765 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12766 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12767
12768 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12769 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12770 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12771 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12772
37f22004 12773 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12774 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12775
12776 /* SSE2 */
12777 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12785
12786 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12787 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12788 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12789 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12790 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12791 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12792 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12793 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12794 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12795 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12796 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12797 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12798 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12799 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12800 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12801 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12802 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12803 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12804 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12805 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12806
12807 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12811
1877be45
JH
12812 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12816
12817 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12820
12821 /* SSE2 MMX */
12822 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12825 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12826 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12829 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12830
12831 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12832 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12833 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12834 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12835 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12836 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12837 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12838 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12839
12840 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12841 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12842 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12844
916b60b7
BS
12845 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12846 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12849
12850 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12852
12853 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12855 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12859
12860 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12864
12865 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12868 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12869 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12872 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12873
916b60b7
BS
12874 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12877
12878 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12880
12881 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12887
12888 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12894
12895 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12899
12900 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12901
fbe5eb6d 12902 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 12903 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 12904 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
12905 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12906
9e200aaf
KC
12907 /* SSE3 MMX */
12908 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12909 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12910 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12911 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12912 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12913 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12914};
12915
8b60264b 12916static const struct builtin_description bdesc_1arg[] =
bd793c65 12917{
37f22004
L
12918 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12919 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12920
37f22004
L
12921 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12922 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12923 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12924
37f22004
L
12925 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12926 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12927 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12928 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12929 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12930 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
12931
12932 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12935 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12936
12937 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12938
12939 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12941
fbe5eb6d
BS
12942 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12947
fbe5eb6d 12948 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12949
fbe5eb6d
BS
12950 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
12952 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12953 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
12954
12955 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12957 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12958
22c7c85e
L
12959 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12960
9e200aaf
KC
12961 /* SSE3 */
12962 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12963 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12964 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
12965};
12966
f6155fda 12967void
b96a374d 12968ix86_init_builtins (void)
f6155fda
SS
12969{
12970 if (TARGET_MMX)
12971 ix86_init_mmx_sse_builtins ();
12972}
12973
12974/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12975 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12976 builtins. */
e37af218 12977static void
b96a374d 12978ix86_init_mmx_sse_builtins (void)
bd793c65 12979{
8b60264b 12980 const struct builtin_description * d;
77ebd435 12981 size_t i;
bd793c65 12982
4a5eab38
PB
12983 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12984 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12985 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12986 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12987 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12988 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12989 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12990 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12991 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12992 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12993
bd793c65 12994 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
12995 tree pcchar_type_node = build_pointer_type (
12996 build_type_variant (char_type_node, 1, 0));
bd793c65 12997 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
12998 tree pcfloat_type_node = build_pointer_type (
12999 build_type_variant (float_type_node, 1, 0));
bd793c65 13000 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 13001 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
13002 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13003
13004 /* Comparisons. */
13005 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
13006 = build_function_type_list (integer_type_node,
13007 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13008 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
13009 = build_function_type_list (V4SI_type_node,
13010 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13011 /* MMX/SSE/integer conversions. */
bd793c65 13012 tree int_ftype_v4sf
b4de2f7d
AH
13013 = build_function_type_list (integer_type_node,
13014 V4SF_type_node, NULL_TREE);
453ee231
JH
13015 tree int64_ftype_v4sf
13016 = build_function_type_list (long_long_integer_type_node,
13017 V4SF_type_node, NULL_TREE);
bd793c65 13018 tree int_ftype_v8qi
b4de2f7d 13019 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13020 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
13021 = build_function_type_list (V4SF_type_node,
13022 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13023 tree v4sf_ftype_v4sf_int64
13024 = build_function_type_list (V4SF_type_node,
13025 V4SF_type_node, long_long_integer_type_node,
13026 NULL_TREE);
bd793c65 13027 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
13028 = build_function_type_list (V4SF_type_node,
13029 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13030 tree int_ftype_v4hi_int
b4de2f7d
AH
13031 = build_function_type_list (integer_type_node,
13032 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13033 tree v4hi_ftype_v4hi_int_int
e7a60f56 13034 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
13035 integer_type_node, integer_type_node,
13036 NULL_TREE);
bd793c65
BS
13037 /* Miscellaneous. */
13038 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
13039 = build_function_type_list (V8QI_type_node,
13040 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13041 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
13042 = build_function_type_list (V4HI_type_node,
13043 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13044 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
13045 = build_function_type_list (V4SF_type_node,
13046 V4SF_type_node, V4SF_type_node,
13047 integer_type_node, NULL_TREE);
bd793c65 13048 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
13049 = build_function_type_list (V2SI_type_node,
13050 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13051 tree v4hi_ftype_v4hi_int
b4de2f7d 13052 = build_function_type_list (V4HI_type_node,
e7a60f56 13053 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13054 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
13055 = build_function_type_list (V4HI_type_node,
13056 V4HI_type_node, long_long_unsigned_type_node,
13057 NULL_TREE);
bd793c65 13058 tree v2si_ftype_v2si_di
b4de2f7d
AH
13059 = build_function_type_list (V2SI_type_node,
13060 V2SI_type_node, long_long_unsigned_type_node,
13061 NULL_TREE);
bd793c65 13062 tree void_ftype_void
b4de2f7d 13063 = build_function_type (void_type_node, void_list_node);
bd793c65 13064 tree void_ftype_unsigned
b4de2f7d 13065 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
13066 tree void_ftype_unsigned_unsigned
13067 = build_function_type_list (void_type_node, unsigned_type_node,
13068 unsigned_type_node, NULL_TREE);
13069 tree void_ftype_pcvoid_unsigned_unsigned
13070 = build_function_type_list (void_type_node, const_ptr_type_node,
13071 unsigned_type_node, unsigned_type_node,
13072 NULL_TREE);
bd793c65 13073 tree unsigned_ftype_void
b4de2f7d 13074 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13075 tree di_ftype_void
b4de2f7d 13076 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 13077 tree v4sf_ftype_void
b4de2f7d 13078 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 13079 tree v2si_ftype_v4sf
b4de2f7d 13080 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13081 /* Loads/stores. */
bd793c65 13082 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13083 = build_function_type_list (void_type_node,
13084 V8QI_type_node, V8QI_type_node,
13085 pchar_type_node, NULL_TREE);
068f5dea
JH
13086 tree v4sf_ftype_pcfloat
13087 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13088 /* @@@ the type is bogus */
13089 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13090 = build_function_type_list (V4SF_type_node,
f8ca7923 13091 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13092 tree void_ftype_pv2si_v4sf
b4de2f7d 13093 = build_function_type_list (void_type_node,
f8ca7923 13094 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13095 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13096 = build_function_type_list (void_type_node,
13097 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13098 tree void_ftype_pdi_di
b4de2f7d
AH
13099 = build_function_type_list (void_type_node,
13100 pdi_type_node, long_long_unsigned_type_node,
13101 NULL_TREE);
916b60b7 13102 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13103 = build_function_type_list (void_type_node,
13104 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13105 /* Normal vector unops. */
13106 tree v4sf_ftype_v4sf
b4de2f7d 13107 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13108
bd793c65
BS
13109 /* Normal vector binops. */
13110 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13111 = build_function_type_list (V4SF_type_node,
13112 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13113 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13114 = build_function_type_list (V8QI_type_node,
13115 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13116 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13117 = build_function_type_list (V4HI_type_node,
13118 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13119 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13120 = build_function_type_list (V2SI_type_node,
13121 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13122 tree di_ftype_di_di
b4de2f7d
AH
13123 = build_function_type_list (long_long_unsigned_type_node,
13124 long_long_unsigned_type_node,
13125 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13126
47f339cf 13127 tree v2si_ftype_v2sf
ae3aa00d 13128 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13129 tree v2sf_ftype_v2si
b4de2f7d 13130 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13131 tree v2si_ftype_v2si
b4de2f7d 13132 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13133 tree v2sf_ftype_v2sf
b4de2f7d 13134 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13135 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13136 = build_function_type_list (V2SF_type_node,
13137 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13138 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13139 = build_function_type_list (V2SI_type_node,
13140 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 13141 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
13142 tree pcint_type_node = build_pointer_type (
13143 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 13144 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13145 tree pcdouble_type_node = build_pointer_type (
13146 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13147 tree int_ftype_v2df_v2df
b4de2f7d
AH
13148 = build_function_type_list (integer_type_node,
13149 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
13150
13151 tree ti_ftype_void
b4de2f7d 13152 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
13153 tree v2di_ftype_void
13154 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 13155 tree ti_ftype_ti_ti
b4de2f7d
AH
13156 = build_function_type_list (intTI_type_node,
13157 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13158 tree void_ftype_pcvoid
13159 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13160 tree v2di_ftype_di
b4de2f7d
AH
13161 = build_function_type_list (V2DI_type_node,
13162 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13163 tree di_ftype_v2di
13164 = build_function_type_list (long_long_unsigned_type_node,
13165 V2DI_type_node, NULL_TREE);
fbe5eb6d 13166 tree v4sf_ftype_v4si
b4de2f7d 13167 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13168 tree v4si_ftype_v4sf
b4de2f7d 13169 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13170 tree v2df_ftype_v4si
b4de2f7d 13171 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13172 tree v4si_ftype_v2df
b4de2f7d 13173 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13174 tree v2si_ftype_v2df
b4de2f7d 13175 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13176 tree v4sf_ftype_v2df
b4de2f7d 13177 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13178 tree v2df_ftype_v2si
b4de2f7d 13179 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13180 tree v2df_ftype_v4sf
b4de2f7d 13181 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13182 tree int_ftype_v2df
b4de2f7d 13183 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13184 tree int64_ftype_v2df
13185 = build_function_type_list (long_long_integer_type_node,
b96a374d 13186 V2DF_type_node, NULL_TREE);
fbe5eb6d 13187 tree v2df_ftype_v2df_int
b4de2f7d
AH
13188 = build_function_type_list (V2DF_type_node,
13189 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13190 tree v2df_ftype_v2df_int64
13191 = build_function_type_list (V2DF_type_node,
13192 V2DF_type_node, long_long_integer_type_node,
13193 NULL_TREE);
fbe5eb6d 13194 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13195 = build_function_type_list (V4SF_type_node,
13196 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13197 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13198 = build_function_type_list (V2DF_type_node,
13199 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13200 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13201 = build_function_type_list (V2DF_type_node,
13202 V2DF_type_node, V2DF_type_node,
13203 integer_type_node,
13204 NULL_TREE);
fbe5eb6d 13205 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13206 = build_function_type_list (V2DF_type_node,
13207 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13208 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13209 = build_function_type_list (void_type_node,
13210 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13211 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13212 = build_function_type_list (void_type_node,
13213 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13214 tree void_ftype_pint_int
b4de2f7d
AH
13215 = build_function_type_list (void_type_node,
13216 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13217 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13218 = build_function_type_list (void_type_node,
13219 V16QI_type_node, V16QI_type_node,
13220 pchar_type_node, NULL_TREE);
068f5dea
JH
13221 tree v2df_ftype_pcdouble
13222 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13223 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13224 = build_function_type_list (V2DF_type_node,
13225 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13226 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13227 = build_function_type_list (V16QI_type_node,
13228 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13229 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13230 = build_function_type_list (V8HI_type_node,
13231 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13232 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13233 = build_function_type_list (V4SI_type_node,
13234 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13235 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13236 = build_function_type_list (V2DI_type_node,
13237 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13238 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13239 = build_function_type_list (V2DI_type_node,
13240 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13241 tree v2df_ftype_v2df
b4de2f7d 13242 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13243 tree v2df_ftype_double
b4de2f7d 13244 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13245 tree v2df_ftype_double_double
b4de2f7d
AH
13246 = build_function_type_list (V2DF_type_node,
13247 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13248 tree int_ftype_v8hi_int
b4de2f7d
AH
13249 = build_function_type_list (integer_type_node,
13250 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13251 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13252 = build_function_type_list (V8HI_type_node,
13253 V8HI_type_node, integer_type_node,
13254 integer_type_node, NULL_TREE);
916b60b7 13255 tree v2di_ftype_v2di_int
b4de2f7d
AH
13256 = build_function_type_list (V2DI_type_node,
13257 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13258 tree v4si_ftype_v4si_int
b4de2f7d
AH
13259 = build_function_type_list (V4SI_type_node,
13260 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13261 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13262 = build_function_type_list (V8HI_type_node,
13263 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13264 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13265 = build_function_type_list (V8HI_type_node,
13266 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13267 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13268 = build_function_type_list (V4SI_type_node,
13269 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13270 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13271 = build_function_type_list (V4SI_type_node,
13272 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13273 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13274 = build_function_type_list (long_long_unsigned_type_node,
13275 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 13276 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13277 = build_function_type_list (V2DI_type_node,
13278 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 13279 tree int_ftype_v16qi
b4de2f7d 13280 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13281 tree v16qi_ftype_pcchar
13282 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13283 tree void_ftype_pchar_v16qi
13284 = build_function_type_list (void_type_node,
13285 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13286 tree v4si_ftype_pcint
13287 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13288 tree void_ftype_pcint_v4si
f02e1358 13289 = build_function_type_list (void_type_node,
068f5dea 13290 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13291 tree v2di_ftype_v2di
13292 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13293
f8a1ebc6
JH
13294 tree float80_type;
13295 tree float128_type;
13296
13297 /* The __float80 type. */
13298 if (TYPE_MODE (long_double_type_node) == XFmode)
13299 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13300 "__float80");
13301 else
13302 {
13303 /* The __float80 type. */
13304 float80_type = make_node (REAL_TYPE);
13305 TYPE_PRECISION (float80_type) = 96;
13306 layout_type (float80_type);
13307 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13308 }
13309
13310 float128_type = make_node (REAL_TYPE);
13311 TYPE_PRECISION (float128_type) = 128;
13312 layout_type (float128_type);
13313 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13314
bd793c65
BS
13315 /* Add all builtins that are more or less simple operations on two
13316 operands. */
ca7558fc 13317 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13318 {
13319 /* Use one of the operands; the target can have a different mode for
13320 mask-generating compares. */
13321 enum machine_mode mode;
13322 tree type;
13323
13324 if (d->name == 0)
13325 continue;
13326 mode = insn_data[d->icode].operand[1].mode;
13327
bd793c65
BS
13328 switch (mode)
13329 {
fbe5eb6d
BS
13330 case V16QImode:
13331 type = v16qi_ftype_v16qi_v16qi;
13332 break;
13333 case V8HImode:
13334 type = v8hi_ftype_v8hi_v8hi;
13335 break;
13336 case V4SImode:
13337 type = v4si_ftype_v4si_v4si;
13338 break;
13339 case V2DImode:
13340 type = v2di_ftype_v2di_v2di;
13341 break;
13342 case V2DFmode:
13343 type = v2df_ftype_v2df_v2df;
13344 break;
13345 case TImode:
13346 type = ti_ftype_ti_ti;
13347 break;
bd793c65
BS
13348 case V4SFmode:
13349 type = v4sf_ftype_v4sf_v4sf;
13350 break;
13351 case V8QImode:
13352 type = v8qi_ftype_v8qi_v8qi;
13353 break;
13354 case V4HImode:
13355 type = v4hi_ftype_v4hi_v4hi;
13356 break;
13357 case V2SImode:
13358 type = v2si_ftype_v2si_v2si;
13359 break;
bd793c65
BS
13360 case DImode:
13361 type = di_ftype_di_di;
13362 break;
13363
13364 default:
13365 abort ();
13366 }
0f290768 13367
bd793c65
BS
13368 /* Override for comparisons. */
13369 if (d->icode == CODE_FOR_maskcmpv4sf3
13370 || d->icode == CODE_FOR_maskncmpv4sf3
13371 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13372 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13373 type = v4si_ftype_v4sf_v4sf;
13374
fbe5eb6d
BS
13375 if (d->icode == CODE_FOR_maskcmpv2df3
13376 || d->icode == CODE_FOR_maskncmpv2df3
13377 || d->icode == CODE_FOR_vmmaskcmpv2df3
13378 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13379 type = v2di_ftype_v2df_v2df;
13380
eeb06b1b 13381 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13382 }
13383
13384 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13385 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13386 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13387 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13388 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13389 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13390
13391 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13392 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13393 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13394
13395 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13396 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13397
13398 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13399 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13400
bd793c65 13401 /* comi/ucomi insns. */
ca7558fc 13402 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13403 if (d->mask == MASK_SSE2)
13404 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13405 else
13406 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13407
1255c85c
BS
13408 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13409 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13410 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13411
37f22004
L
13412 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13413 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13414 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13415 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13416 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13417 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13418 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13419 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13420 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13421 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13422 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13423
13424 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13425 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13426
13427 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13428
13429 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13430 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13431 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13432 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13433 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13434 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13435
13436 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13437 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13438 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13439 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13440
13441 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13442 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13443 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13444 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13445
13446 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13447
13448 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13449
13450 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13451 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13452 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13453 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13454 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13455 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13456
13457 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13458
47f339cf
BS
13459 /* Original 3DNow! */
13460 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13461 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13462 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13467 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13468 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13469 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13470 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13473 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13474 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13475 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13476 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13478 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13479 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13480
13481 /* 3DNow! extension as used in the Athlon CPU. */
13482 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13483 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13484 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13485 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13486 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13487 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13488
37f22004 13489 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13490
13491 /* SSE2 */
13492 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13493 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13494
13495 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13496 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13497 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13498
068f5dea
JH
13499 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13500 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13501 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13502 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13505
13506 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13510
13511 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13512 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13513 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13515 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13516
13517 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13520 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13521
13522 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13524
13525 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13526
13527 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13528 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13529
13530 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13531 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13535
13536 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13537
13538 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13540 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13541 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13542
13543 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13546
13547 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13548 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13549 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13551
13552 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13555 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13557 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13559
068f5dea 13560 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13561 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13562 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13563
068f5dea
JH
13564 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13567 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13569 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13570 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13571
37f22004 13572 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13573
916b60b7
BS
13574 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13577
13578 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13581
13582 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13584
ab3146fd 13585 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13586 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13589
ab3146fd 13590 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13591 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13594
13595 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13597
13598 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13599
13600 /* Prescott New Instructions. */
9e200aaf 13601 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
13602 void_ftype_pcvoid_unsigned_unsigned,
13603 IX86_BUILTIN_MONITOR);
9e200aaf 13604 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
13605 void_ftype_unsigned_unsigned,
13606 IX86_BUILTIN_MWAIT);
9e200aaf 13607 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
13608 v4sf_ftype_v4sf,
13609 IX86_BUILTIN_MOVSHDUP);
9e200aaf 13610 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
13611 v4sf_ftype_v4sf,
13612 IX86_BUILTIN_MOVSLDUP);
9e200aaf 13613 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 13614 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
9e200aaf 13615 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
22c7c85e 13616 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
9e200aaf 13617 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
22c7c85e 13618 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13619}
13620
13621/* Errors in the source file can cause expand_expr to return const0_rtx
13622 where we expect a vector. To avoid crashing, use one of the vector
13623 clear instructions. */
13624static rtx
b96a374d 13625safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
13626{
13627 if (x != const0_rtx)
13628 return x;
13629 x = gen_reg_rtx (mode);
13630
47f339cf 13631 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13632 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13633 : gen_rtx_SUBREG (DImode, x, 0)));
13634 else
e37af218 13635 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13636 : gen_rtx_SUBREG (V4SFmode, x, 0),
13637 CONST0_RTX (V4SFmode)));
bd793c65
BS
13638 return x;
13639}
13640
13641/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13642
13643static rtx
b96a374d 13644ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13645{
13646 rtx pat;
13647 tree arg0 = TREE_VALUE (arglist);
13648 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13649 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13650 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13651 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13652 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13653 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13654
13655 if (VECTOR_MODE_P (mode0))
13656 op0 = safe_vector_operand (op0, mode0);
13657 if (VECTOR_MODE_P (mode1))
13658 op1 = safe_vector_operand (op1, mode1);
13659
13660 if (! target
13661 || GET_MODE (target) != tmode
13662 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13663 target = gen_reg_rtx (tmode);
13664
d9deed68
JH
13665 if (GET_MODE (op1) == SImode && mode1 == TImode)
13666 {
13667 rtx x = gen_reg_rtx (V4SImode);
13668 emit_insn (gen_sse2_loadd (x, op1));
13669 op1 = gen_lowpart (TImode, x);
13670 }
13671
bd793c65
BS
13672 /* In case the insn wants input operands in modes different from
13673 the result, abort. */
ebe75517
JH
13674 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13675 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13676 abort ();
13677
13678 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13679 op0 = copy_to_mode_reg (mode0, op0);
13680 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13681 op1 = copy_to_mode_reg (mode1, op1);
13682
59bef189
RH
13683 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13684 yet one of the two must not be a memory. This is normally enforced
13685 by expanders, but we didn't bother to create one here. */
13686 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13687 op0 = copy_to_mode_reg (mode0, op0);
13688
bd793c65
BS
13689 pat = GEN_FCN (icode) (target, op0, op1);
13690 if (! pat)
13691 return 0;
13692 emit_insn (pat);
13693 return target;
13694}
13695
13696/* Subroutine of ix86_expand_builtin to take care of stores. */
13697
13698static rtx
b96a374d 13699ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13700{
13701 rtx pat;
13702 tree arg0 = TREE_VALUE (arglist);
13703 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13704 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13705 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13706 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13707 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13708
13709 if (VECTOR_MODE_P (mode1))
13710 op1 = safe_vector_operand (op1, mode1);
13711
13712 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13713 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13714
bd793c65
BS
13715 pat = GEN_FCN (icode) (op0, op1);
13716 if (pat)
13717 emit_insn (pat);
13718 return 0;
13719}
13720
13721/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13722
13723static rtx
b96a374d
AJ
13724ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13725 rtx target, int do_load)
bd793c65
BS
13726{
13727 rtx pat;
13728 tree arg0 = TREE_VALUE (arglist);
13729 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13730 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13731 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13732
13733 if (! target
13734 || GET_MODE (target) != tmode
13735 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13736 target = gen_reg_rtx (tmode);
13737 if (do_load)
13738 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13739 else
13740 {
13741 if (VECTOR_MODE_P (mode0))
13742 op0 = safe_vector_operand (op0, mode0);
13743
13744 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13745 op0 = copy_to_mode_reg (mode0, op0);
13746 }
13747
13748 pat = GEN_FCN (icode) (target, op0);
13749 if (! pat)
13750 return 0;
13751 emit_insn (pat);
13752 return target;
13753}
13754
13755/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13756 sqrtss, rsqrtss, rcpss. */
13757
13758static rtx
b96a374d 13759ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13760{
13761 rtx pat;
13762 tree arg0 = TREE_VALUE (arglist);
59bef189 13763 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13764 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13765 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13766
13767 if (! target
13768 || GET_MODE (target) != tmode
13769 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13770 target = gen_reg_rtx (tmode);
13771
13772 if (VECTOR_MODE_P (mode0))
13773 op0 = safe_vector_operand (op0, mode0);
13774
13775 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13776 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13777
59bef189
RH
13778 op1 = op0;
13779 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13780 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13781
59bef189 13782 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13783 if (! pat)
13784 return 0;
13785 emit_insn (pat);
13786 return target;
13787}
13788
13789/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13790
13791static rtx
b96a374d
AJ
13792ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13793 rtx target)
bd793c65
BS
13794{
13795 rtx pat;
13796 tree arg0 = TREE_VALUE (arglist);
13797 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13798 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13799 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13800 rtx op2;
13801 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13802 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13803 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13804 enum rtx_code comparison = d->comparison;
13805
13806 if (VECTOR_MODE_P (mode0))
13807 op0 = safe_vector_operand (op0, mode0);
13808 if (VECTOR_MODE_P (mode1))
13809 op1 = safe_vector_operand (op1, mode1);
13810
13811 /* Swap operands if we have a comparison that isn't available in
13812 hardware. */
13813 if (d->flag)
13814 {
21e1b5f1
BS
13815 rtx tmp = gen_reg_rtx (mode1);
13816 emit_move_insn (tmp, op1);
bd793c65 13817 op1 = op0;
21e1b5f1 13818 op0 = tmp;
bd793c65 13819 }
21e1b5f1
BS
13820
13821 if (! target
13822 || GET_MODE (target) != tmode
13823 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13824 target = gen_reg_rtx (tmode);
13825
13826 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13827 op0 = copy_to_mode_reg (mode0, op0);
13828 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13829 op1 = copy_to_mode_reg (mode1, op1);
13830
13831 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13832 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13833 if (! pat)
13834 return 0;
13835 emit_insn (pat);
13836 return target;
13837}
13838
13839/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13840
13841static rtx
b96a374d
AJ
13842ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13843 rtx target)
bd793c65
BS
13844{
13845 rtx pat;
13846 tree arg0 = TREE_VALUE (arglist);
13847 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13848 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13849 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13850 rtx op2;
13851 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13852 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13853 enum rtx_code comparison = d->comparison;
13854
13855 if (VECTOR_MODE_P (mode0))
13856 op0 = safe_vector_operand (op0, mode0);
13857 if (VECTOR_MODE_P (mode1))
13858 op1 = safe_vector_operand (op1, mode1);
13859
13860 /* Swap operands if we have a comparison that isn't available in
13861 hardware. */
13862 if (d->flag)
13863 {
13864 rtx tmp = op1;
13865 op1 = op0;
13866 op0 = tmp;
bd793c65
BS
13867 }
13868
13869 target = gen_reg_rtx (SImode);
13870 emit_move_insn (target, const0_rtx);
13871 target = gen_rtx_SUBREG (QImode, target, 0);
13872
13873 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13874 op0 = copy_to_mode_reg (mode0, op0);
13875 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13876 op1 = copy_to_mode_reg (mode1, op1);
13877
13878 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13879 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13880 if (! pat)
13881 return 0;
13882 emit_insn (pat);
29628f27
BS
13883 emit_insn (gen_rtx_SET (VOIDmode,
13884 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13885 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13886 SET_DEST (pat),
29628f27 13887 const0_rtx)));
bd793c65 13888
6f1a6c5b 13889 return SUBREG_REG (target);
bd793c65
BS
13890}
13891
13892/* Expand an expression EXP that calls a built-in function,
13893 with result going to TARGET if that's convenient
13894 (and in mode MODE if that's convenient).
13895 SUBTARGET may be used as the target for computing one of EXP's operands.
13896 IGNORE is nonzero if the value is to be ignored. */
13897
13898rtx
b96a374d
AJ
13899ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13900 enum machine_mode mode ATTRIBUTE_UNUSED,
13901 int ignore ATTRIBUTE_UNUSED)
bd793c65 13902{
8b60264b 13903 const struct builtin_description *d;
77ebd435 13904 size_t i;
bd793c65
BS
13905 enum insn_code icode;
13906 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13907 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13908 tree arg0, arg1, arg2;
bd793c65
BS
13909 rtx op0, op1, op2, pat;
13910 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13911 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13912
13913 switch (fcode)
13914 {
13915 case IX86_BUILTIN_EMMS:
13916 emit_insn (gen_emms ());
13917 return 0;
13918
13919 case IX86_BUILTIN_SFENCE:
13920 emit_insn (gen_sfence ());
13921 return 0;
13922
bd793c65 13923 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13924 case IX86_BUILTIN_PEXTRW128:
13925 icode = (fcode == IX86_BUILTIN_PEXTRW
13926 ? CODE_FOR_mmx_pextrw
13927 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13928 arg0 = TREE_VALUE (arglist);
13929 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13930 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13931 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13932 tmode = insn_data[icode].operand[0].mode;
13933 mode0 = insn_data[icode].operand[1].mode;
13934 mode1 = insn_data[icode].operand[2].mode;
13935
13936 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13937 op0 = copy_to_mode_reg (mode0, op0);
13938 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13939 {
ebe75517
JH
13940 error ("selector must be an integer constant in the range 0..%i",
13941 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 13942 return gen_reg_rtx (tmode);
bd793c65
BS
13943 }
13944 if (target == 0
13945 || GET_MODE (target) != tmode
13946 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13947 target = gen_reg_rtx (tmode);
13948 pat = GEN_FCN (icode) (target, op0, op1);
13949 if (! pat)
13950 return 0;
13951 emit_insn (pat);
13952 return target;
13953
13954 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13955 case IX86_BUILTIN_PINSRW128:
13956 icode = (fcode == IX86_BUILTIN_PINSRW
13957 ? CODE_FOR_mmx_pinsrw
13958 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13959 arg0 = TREE_VALUE (arglist);
13960 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13961 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13962 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13963 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13964 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13965 tmode = insn_data[icode].operand[0].mode;
13966 mode0 = insn_data[icode].operand[1].mode;
13967 mode1 = insn_data[icode].operand[2].mode;
13968 mode2 = insn_data[icode].operand[3].mode;
13969
13970 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13971 op0 = copy_to_mode_reg (mode0, op0);
13972 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13973 op1 = copy_to_mode_reg (mode1, op1);
13974 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13975 {
ebe75517
JH
13976 error ("selector must be an integer constant in the range 0..%i",
13977 fcode == IX86_BUILTIN_PINSRW ? 15:255);
bd793c65
BS
13978 return const0_rtx;
13979 }
13980 if (target == 0
13981 || GET_MODE (target) != tmode
13982 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13983 target = gen_reg_rtx (tmode);
13984 pat = GEN_FCN (icode) (target, op0, op1, op2);
13985 if (! pat)
13986 return 0;
13987 emit_insn (pat);
13988 return target;
13989
13990 case IX86_BUILTIN_MASKMOVQ:
077084dd 13991 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13992 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13993 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13994 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13995 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13996 /* Note the arg order is different from the operand order. */
13997 arg1 = TREE_VALUE (arglist);
13998 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13999 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14001 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14002 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14003 mode0 = insn_data[icode].operand[0].mode;
14004 mode1 = insn_data[icode].operand[1].mode;
14005 mode2 = insn_data[icode].operand[2].mode;
14006
5c464583 14007 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
14008 op0 = copy_to_mode_reg (mode0, op0);
14009 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14010 op1 = copy_to_mode_reg (mode1, op1);
14011 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14012 op2 = copy_to_mode_reg (mode2, op2);
14013 pat = GEN_FCN (icode) (op0, op1, op2);
14014 if (! pat)
14015 return 0;
14016 emit_insn (pat);
14017 return 0;
14018
14019 case IX86_BUILTIN_SQRTSS:
14020 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14021 case IX86_BUILTIN_RSQRTSS:
14022 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14023 case IX86_BUILTIN_RCPSS:
14024 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14025
14026 case IX86_BUILTIN_LOADAPS:
14027 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14028
14029 case IX86_BUILTIN_LOADUPS:
14030 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14031
14032 case IX86_BUILTIN_STOREAPS:
e37af218 14033 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 14034
bd793c65 14035 case IX86_BUILTIN_STOREUPS:
e37af218 14036 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
14037
14038 case IX86_BUILTIN_LOADSS:
14039 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14040
14041 case IX86_BUILTIN_STORESS:
e37af218 14042 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 14043
0f290768 14044 case IX86_BUILTIN_LOADHPS:
bd793c65 14045 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
14046 case IX86_BUILTIN_LOADHPD:
14047 case IX86_BUILTIN_LOADLPD:
14048 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14049 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14050 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
997404de 14051 : CODE_FOR_sse2_movsd);
bd793c65
BS
14052 arg0 = TREE_VALUE (arglist);
14053 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14054 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14055 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14056 tmode = insn_data[icode].operand[0].mode;
14057 mode0 = insn_data[icode].operand[1].mode;
14058 mode1 = insn_data[icode].operand[2].mode;
14059
14060 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14061 op0 = copy_to_mode_reg (mode0, op0);
14062 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14063 if (target == 0
14064 || GET_MODE (target) != tmode
14065 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14066 target = gen_reg_rtx (tmode);
14067 pat = GEN_FCN (icode) (target, op0, op1);
14068 if (! pat)
14069 return 0;
14070 emit_insn (pat);
14071 return target;
0f290768 14072
bd793c65
BS
14073 case IX86_BUILTIN_STOREHPS:
14074 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
14075 case IX86_BUILTIN_STOREHPD:
14076 case IX86_BUILTIN_STORELPD:
14077 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14078 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14079 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
997404de 14080 : CODE_FOR_sse2_movsd);
bd793c65
BS
14081 arg0 = TREE_VALUE (arglist);
14082 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14083 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14084 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14085 mode0 = insn_data[icode].operand[1].mode;
14086 mode1 = insn_data[icode].operand[2].mode;
14087
14088 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14089 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14090 op1 = copy_to_mode_reg (mode1, op1);
14091
14092 pat = GEN_FCN (icode) (op0, op0, op1);
14093 if (! pat)
14094 return 0;
14095 emit_insn (pat);
14096 return 0;
14097
14098 case IX86_BUILTIN_MOVNTPS:
e37af218 14099 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14100 case IX86_BUILTIN_MOVNTQ:
e37af218 14101 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14102
14103 case IX86_BUILTIN_LDMXCSR:
14104 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14105 target = assign_386_stack_local (SImode, 0);
14106 emit_move_insn (target, op0);
14107 emit_insn (gen_ldmxcsr (target));
14108 return 0;
14109
14110 case IX86_BUILTIN_STMXCSR:
14111 target = assign_386_stack_local (SImode, 0);
14112 emit_insn (gen_stmxcsr (target));
14113 return copy_to_mode_reg (SImode, target);
14114
bd793c65 14115 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14116 case IX86_BUILTIN_SHUFPD:
14117 icode = (fcode == IX86_BUILTIN_SHUFPS
14118 ? CODE_FOR_sse_shufps
14119 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14120 arg0 = TREE_VALUE (arglist);
14121 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14122 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14123 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14124 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14125 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14126 tmode = insn_data[icode].operand[0].mode;
14127 mode0 = insn_data[icode].operand[1].mode;
14128 mode1 = insn_data[icode].operand[2].mode;
14129 mode2 = insn_data[icode].operand[3].mode;
14130
14131 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14132 op0 = copy_to_mode_reg (mode0, op0);
14133 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14134 op1 = copy_to_mode_reg (mode1, op1);
14135 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14136 {
14137 /* @@@ better error message */
14138 error ("mask must be an immediate");
6f1a6c5b 14139 return gen_reg_rtx (tmode);
bd793c65
BS
14140 }
14141 if (target == 0
14142 || GET_MODE (target) != tmode
14143 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14144 target = gen_reg_rtx (tmode);
14145 pat = GEN_FCN (icode) (target, op0, op1, op2);
14146 if (! pat)
14147 return 0;
14148 emit_insn (pat);
14149 return target;
14150
14151 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14152 case IX86_BUILTIN_PSHUFD:
14153 case IX86_BUILTIN_PSHUFHW:
14154 case IX86_BUILTIN_PSHUFLW:
14155 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14156 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14157 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14158 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14159 arg0 = TREE_VALUE (arglist);
14160 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14161 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14162 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14163 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14164 mode1 = insn_data[icode].operand[1].mode;
14165 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14166
29628f27
BS
14167 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14168 op0 = copy_to_mode_reg (mode1, op0);
14169 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14170 {
14171 /* @@@ better error message */
14172 error ("mask must be an immediate");
14173 return const0_rtx;
14174 }
14175 if (target == 0
14176 || GET_MODE (target) != tmode
14177 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14178 target = gen_reg_rtx (tmode);
29628f27 14179 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14180 if (! pat)
14181 return 0;
14182 emit_insn (pat);
14183 return target;
14184
ab3146fd
ZD
14185 case IX86_BUILTIN_PSLLDQI128:
14186 case IX86_BUILTIN_PSRLDQI128:
14187 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14188 : CODE_FOR_sse2_lshrti3);
14189 arg0 = TREE_VALUE (arglist);
14190 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14191 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14192 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14193 tmode = insn_data[icode].operand[0].mode;
14194 mode1 = insn_data[icode].operand[1].mode;
14195 mode2 = insn_data[icode].operand[2].mode;
14196
14197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14198 {
14199 op0 = copy_to_reg (op0);
14200 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14201 }
14202 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14203 {
14204 error ("shift must be an immediate");
14205 return const0_rtx;
14206 }
14207 target = gen_reg_rtx (V2DImode);
14208 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14209 if (! pat)
14210 return 0;
14211 emit_insn (pat);
14212 return target;
14213
47f339cf
BS
14214 case IX86_BUILTIN_FEMMS:
14215 emit_insn (gen_femms ());
14216 return NULL_RTX;
14217
14218 case IX86_BUILTIN_PAVGUSB:
14219 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14220
14221 case IX86_BUILTIN_PF2ID:
14222 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14223
14224 case IX86_BUILTIN_PFACC:
14225 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14226
14227 case IX86_BUILTIN_PFADD:
14228 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14229
14230 case IX86_BUILTIN_PFCMPEQ:
14231 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14232
14233 case IX86_BUILTIN_PFCMPGE:
14234 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14235
14236 case IX86_BUILTIN_PFCMPGT:
14237 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14238
14239 case IX86_BUILTIN_PFMAX:
14240 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14241
14242 case IX86_BUILTIN_PFMIN:
14243 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14244
14245 case IX86_BUILTIN_PFMUL:
14246 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14247
14248 case IX86_BUILTIN_PFRCP:
14249 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14250
14251 case IX86_BUILTIN_PFRCPIT1:
14252 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14253
14254 case IX86_BUILTIN_PFRCPIT2:
14255 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14256
14257 case IX86_BUILTIN_PFRSQIT1:
14258 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14259
14260 case IX86_BUILTIN_PFRSQRT:
14261 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14262
14263 case IX86_BUILTIN_PFSUB:
14264 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14265
14266 case IX86_BUILTIN_PFSUBR:
14267 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14268
14269 case IX86_BUILTIN_PI2FD:
14270 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14271
14272 case IX86_BUILTIN_PMULHRW:
14273 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14274
47f339cf
BS
14275 case IX86_BUILTIN_PF2IW:
14276 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14277
14278 case IX86_BUILTIN_PFNACC:
14279 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14280
14281 case IX86_BUILTIN_PFPNACC:
14282 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14283
14284 case IX86_BUILTIN_PI2FW:
14285 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14286
14287 case IX86_BUILTIN_PSWAPDSI:
14288 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14289
14290 case IX86_BUILTIN_PSWAPDSF:
14291 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14292
e37af218
RH
14293 case IX86_BUILTIN_SSE_ZERO:
14294 target = gen_reg_rtx (V4SFmode);
4977bab6 14295 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14296 return target;
14297
bd793c65
BS
14298 case IX86_BUILTIN_MMX_ZERO:
14299 target = gen_reg_rtx (DImode);
14300 emit_insn (gen_mmx_clrdi (target));
14301 return target;
14302
f02e1358
JH
14303 case IX86_BUILTIN_CLRTI:
14304 target = gen_reg_rtx (V2DImode);
14305 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14306 return target;
14307
14308
fbe5eb6d
BS
14309 case IX86_BUILTIN_SQRTSD:
14310 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14311 case IX86_BUILTIN_LOADAPD:
14312 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14313 case IX86_BUILTIN_LOADUPD:
14314 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14315
14316 case IX86_BUILTIN_STOREAPD:
14317 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14318 case IX86_BUILTIN_STOREUPD:
14319 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14320
14321 case IX86_BUILTIN_LOADSD:
14322 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14323
14324 case IX86_BUILTIN_STORESD:
14325 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14326
14327 case IX86_BUILTIN_SETPD1:
14328 target = assign_386_stack_local (DFmode, 0);
14329 arg0 = TREE_VALUE (arglist);
14330 emit_move_insn (adjust_address (target, DFmode, 0),
14331 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14332 op0 = gen_reg_rtx (V2DFmode);
14333 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
60c81c89 14334 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
fbe5eb6d
BS
14335 return op0;
14336
14337 case IX86_BUILTIN_SETPD:
14338 target = assign_386_stack_local (V2DFmode, 0);
14339 arg0 = TREE_VALUE (arglist);
14340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14341 emit_move_insn (adjust_address (target, DFmode, 0),
14342 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14343 emit_move_insn (adjust_address (target, DFmode, 8),
14344 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14345 op0 = gen_reg_rtx (V2DFmode);
14346 emit_insn (gen_sse2_movapd (op0, target));
14347 return op0;
14348
14349 case IX86_BUILTIN_LOADRPD:
14350 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14351 gen_reg_rtx (V2DFmode), 1);
60c81c89 14352 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
fbe5eb6d
BS
14353 return target;
14354
14355 case IX86_BUILTIN_LOADPD1:
14356 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14357 gen_reg_rtx (V2DFmode), 1);
14358 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14359 return target;
14360
14361 case IX86_BUILTIN_STOREPD1:
14362 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14363 case IX86_BUILTIN_STORERPD:
14364 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14365
48126a97
JH
14366 case IX86_BUILTIN_CLRPD:
14367 target = gen_reg_rtx (V2DFmode);
14368 emit_insn (gen_sse_clrv2df (target));
14369 return target;
14370
fbe5eb6d
BS
14371 case IX86_BUILTIN_MFENCE:
14372 emit_insn (gen_sse2_mfence ());
14373 return 0;
14374 case IX86_BUILTIN_LFENCE:
14375 emit_insn (gen_sse2_lfence ());
14376 return 0;
14377
14378 case IX86_BUILTIN_CLFLUSH:
14379 arg0 = TREE_VALUE (arglist);
14380 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14381 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14382 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14383 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14384
14385 emit_insn (gen_sse2_clflush (op0));
14386 return 0;
14387
14388 case IX86_BUILTIN_MOVNTPD:
14389 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14390 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14391 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14392 case IX86_BUILTIN_MOVNTI:
14393 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14394
f02e1358
JH
14395 case IX86_BUILTIN_LOADDQA:
14396 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14397 case IX86_BUILTIN_LOADDQU:
14398 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14399 case IX86_BUILTIN_LOADD:
14400 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14401
14402 case IX86_BUILTIN_STOREDQA:
14403 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14404 case IX86_BUILTIN_STOREDQU:
14405 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14406 case IX86_BUILTIN_STORED:
14407 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14408
22c7c85e
L
14409 case IX86_BUILTIN_MONITOR:
14410 arg0 = TREE_VALUE (arglist);
14411 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14412 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14413 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14414 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14415 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14416 if (!REG_P (op0))
14417 op0 = copy_to_mode_reg (SImode, op0);
14418 if (!REG_P (op1))
14419 op1 = copy_to_mode_reg (SImode, op1);
14420 if (!REG_P (op2))
14421 op2 = copy_to_mode_reg (SImode, op2);
14422 emit_insn (gen_monitor (op0, op1, op2));
14423 return 0;
14424
14425 case IX86_BUILTIN_MWAIT:
14426 arg0 = TREE_VALUE (arglist);
14427 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14428 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14429 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14430 if (!REG_P (op0))
14431 op0 = copy_to_mode_reg (SImode, op0);
14432 if (!REG_P (op1))
14433 op1 = copy_to_mode_reg (SImode, op1);
14434 emit_insn (gen_mwait (op0, op1));
14435 return 0;
14436
14437 case IX86_BUILTIN_LOADDDUP:
14438 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14439
14440 case IX86_BUILTIN_LDDQU:
14441 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14442 1);
14443
bd793c65
BS
14444 default:
14445 break;
14446 }
14447
ca7558fc 14448 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14449 if (d->code == fcode)
14450 {
14451 /* Compares are treated specially. */
14452 if (d->icode == CODE_FOR_maskcmpv4sf3
14453 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14454 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14455 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14456 || d->icode == CODE_FOR_maskcmpv2df3
14457 || d->icode == CODE_FOR_vmmaskcmpv2df3
14458 || d->icode == CODE_FOR_maskncmpv2df3
14459 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14460 return ix86_expand_sse_compare (d, arglist, target);
14461
14462 return ix86_expand_binop_builtin (d->icode, arglist, target);
14463 }
14464
ca7558fc 14465 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14466 if (d->code == fcode)
14467 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14468
ca7558fc 14469 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14470 if (d->code == fcode)
14471 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14472
bd793c65
BS
14473 /* @@@ Should really do something sensible here. */
14474 return 0;
bd793c65 14475}
4211a8fb
JH
14476
14477/* Store OPERAND to the memory after reload is completed. This means
f710504c 14478 that we can't easily use assign_stack_local. */
4211a8fb 14479rtx
b96a374d 14480ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14481{
898d374d 14482 rtx result;
4211a8fb
JH
14483 if (!reload_completed)
14484 abort ();
a5b378d6 14485 if (TARGET_RED_ZONE)
898d374d
JH
14486 {
14487 result = gen_rtx_MEM (mode,
14488 gen_rtx_PLUS (Pmode,
14489 stack_pointer_rtx,
14490 GEN_INT (-RED_ZONE_SIZE)));
14491 emit_move_insn (result, operand);
14492 }
a5b378d6 14493 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14494 {
898d374d 14495 switch (mode)
4211a8fb 14496 {
898d374d
JH
14497 case HImode:
14498 case SImode:
14499 operand = gen_lowpart (DImode, operand);
5efb1046 14500 /* FALLTHRU */
898d374d 14501 case DImode:
4211a8fb 14502 emit_insn (
898d374d
JH
14503 gen_rtx_SET (VOIDmode,
14504 gen_rtx_MEM (DImode,
14505 gen_rtx_PRE_DEC (DImode,
14506 stack_pointer_rtx)),
14507 operand));
14508 break;
14509 default:
14510 abort ();
14511 }
14512 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14513 }
14514 else
14515 {
14516 switch (mode)
14517 {
14518 case DImode:
14519 {
14520 rtx operands[2];
14521 split_di (&operand, 1, operands, operands + 1);
14522 emit_insn (
14523 gen_rtx_SET (VOIDmode,
14524 gen_rtx_MEM (SImode,
14525 gen_rtx_PRE_DEC (Pmode,
14526 stack_pointer_rtx)),
14527 operands[1]));
14528 emit_insn (
14529 gen_rtx_SET (VOIDmode,
14530 gen_rtx_MEM (SImode,
14531 gen_rtx_PRE_DEC (Pmode,
14532 stack_pointer_rtx)),
14533 operands[0]));
14534 }
14535 break;
14536 case HImode:
14537 /* It is better to store HImodes as SImodes. */
14538 if (!TARGET_PARTIAL_REG_STALL)
14539 operand = gen_lowpart (SImode, operand);
5efb1046 14540 /* FALLTHRU */
898d374d 14541 case SImode:
4211a8fb 14542 emit_insn (
898d374d
JH
14543 gen_rtx_SET (VOIDmode,
14544 gen_rtx_MEM (GET_MODE (operand),
14545 gen_rtx_PRE_DEC (SImode,
14546 stack_pointer_rtx)),
14547 operand));
14548 break;
14549 default:
14550 abort ();
4211a8fb 14551 }
898d374d 14552 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14553 }
898d374d 14554 return result;
4211a8fb
JH
14555}
14556
14557/* Free operand from the memory. */
14558void
b96a374d 14559ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14560{
a5b378d6 14561 if (!TARGET_RED_ZONE)
898d374d
JH
14562 {
14563 int size;
14564
14565 if (mode == DImode || TARGET_64BIT)
14566 size = 8;
14567 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14568 size = 2;
14569 else
14570 size = 4;
14571 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14572 to pop or add instruction if registers are available. */
14573 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14574 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14575 GEN_INT (size))));
14576 }
4211a8fb 14577}
a946dd00 14578
f84aa48a
JH
14579/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14580 QImode must go into class Q_REGS.
14581 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14582 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14583enum reg_class
b96a374d 14584ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14585{
1877be45
JH
14586 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14587 return NO_REGS;
f84aa48a
JH
14588 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14589 {
14590 /* SSE can't load any constant directly yet. */
14591 if (SSE_CLASS_P (class))
14592 return NO_REGS;
14593 /* Floats can load 0 and 1. */
14594 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14595 {
14596 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14597 if (MAYBE_SSE_CLASS_P (class))
14598 return (reg_class_subset_p (class, GENERAL_REGS)
14599 ? GENERAL_REGS : FLOAT_REGS);
14600 else
14601 return class;
14602 }
14603 /* General regs can load everything. */
14604 if (reg_class_subset_p (class, GENERAL_REGS))
14605 return GENERAL_REGS;
14606 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14607 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14608 return NO_REGS;
14609 }
14610 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14611 return NO_REGS;
14612 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14613 return Q_REGS;
14614 return class;
14615}
14616
14617/* If we are copying between general and FP registers, we need a memory
14618 location. The same is true for SSE and MMX registers.
14619
14620 The macro can't work reliably when one of the CLASSES is class containing
14621 registers from multiple units (SSE, MMX, integer). We avoid this by never
14622 combining those units in single alternative in the machine description.
14623 Ensure that this constraint holds to avoid unexpected surprises.
14624
14625 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14626 enforce these sanity checks. */
14627int
b96a374d
AJ
14628ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14629 enum machine_mode mode, int strict)
f84aa48a
JH
14630{
14631 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14632 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14633 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14634 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14635 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14636 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14637 {
14638 if (strict)
14639 abort ();
14640 else
14641 return 1;
14642 }
14643 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14644 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14645 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14646 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14647 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14648}
14649/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14650 one in class CLASS2.
f84aa48a
JH
14651
14652 It is not required that the cost always equal 2 when FROM is the same as TO;
14653 on some machines it is expensive to move between registers if they are not
14654 general registers. */
14655int
b96a374d
AJ
14656ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14657 enum reg_class class2)
f84aa48a
JH
14658{
14659 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14660 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14661 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14662
f84aa48a
JH
14663 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14664 {
d631b80a
RH
14665 int cost = 1;
14666
14667 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14668 MEMORY_MOVE_COST (mode, class1, 1));
14669 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14670 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14671
d631b80a
RH
14672 /* In case of copying from general_purpose_register we may emit multiple
14673 stores followed by single load causing memory size mismatch stall.
d1f87653 14674 Count this as arbitrarily high cost of 20. */
62415523 14675 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14676 cost += 20;
14677
14678 /* In the case of FP/MMX moves, the registers actually overlap, and we
14679 have to switch modes in order to treat them differently. */
14680 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14681 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14682 cost += 20;
14683
14684 return cost;
f84aa48a 14685 }
d631b80a 14686
92d0fb09 14687 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14688 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14689 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14690 return ix86_cost->mmxsse_to_integer;
14691 if (MAYBE_FLOAT_CLASS_P (class1))
14692 return ix86_cost->fp_move;
14693 if (MAYBE_SSE_CLASS_P (class1))
14694 return ix86_cost->sse_move;
14695 if (MAYBE_MMX_CLASS_P (class1))
14696 return ix86_cost->mmx_move;
f84aa48a
JH
14697 return 2;
14698}
14699
a946dd00
JH
14700/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14701int
b96a374d 14702ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14703{
14704 /* Flags and only flags can only hold CCmode values. */
14705 if (CC_REGNO_P (regno))
14706 return GET_MODE_CLASS (mode) == MODE_CC;
14707 if (GET_MODE_CLASS (mode) == MODE_CC
14708 || GET_MODE_CLASS (mode) == MODE_RANDOM
14709 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14710 return 0;
14711 if (FP_REGNO_P (regno))
14712 return VALID_FP_MODE_P (mode);
14713 if (SSE_REGNO_P (regno))
a67a3220 14714 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
a946dd00 14715 if (MMX_REGNO_P (regno))
a67a3220
JH
14716 return (TARGET_MMX
14717 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
a946dd00
JH
14718 /* We handle both integer and floats in the general purpose registers.
14719 In future we should be able to handle vector modes as well. */
14720 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14721 return 0;
14722 /* Take care for QImode values - they can be in non-QI regs, but then
14723 they do cause partial register stalls. */
d2836273 14724 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14725 return 1;
14726 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14727}
fa79946e
JH
14728
14729/* Return the cost of moving data of mode M between a
14730 register and memory. A value of 2 is the default; this cost is
14731 relative to those in `REGISTER_MOVE_COST'.
14732
14733 If moving between registers and memory is more expensive than
14734 between two registers, you should define this macro to express the
a4f31c00
AJ
14735 relative cost.
14736
fa79946e
JH
14737 Model also increased moving costs of QImode registers in non
14738 Q_REGS classes.
14739 */
14740int
b96a374d 14741ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14742{
14743 if (FLOAT_CLASS_P (class))
14744 {
14745 int index;
14746 switch (mode)
14747 {
14748 case SFmode:
14749 index = 0;
14750 break;
14751 case DFmode:
14752 index = 1;
14753 break;
14754 case XFmode:
fa79946e
JH
14755 index = 2;
14756 break;
14757 default:
14758 return 100;
14759 }
14760 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14761 }
14762 if (SSE_CLASS_P (class))
14763 {
14764 int index;
14765 switch (GET_MODE_SIZE (mode))
14766 {
14767 case 4:
14768 index = 0;
14769 break;
14770 case 8:
14771 index = 1;
14772 break;
14773 case 16:
14774 index = 2;
14775 break;
14776 default:
14777 return 100;
14778 }
14779 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14780 }
14781 if (MMX_CLASS_P (class))
14782 {
14783 int index;
14784 switch (GET_MODE_SIZE (mode))
14785 {
14786 case 4:
14787 index = 0;
14788 break;
14789 case 8:
14790 index = 1;
14791 break;
14792 default:
14793 return 100;
14794 }
14795 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14796 }
14797 switch (GET_MODE_SIZE (mode))
14798 {
14799 case 1:
14800 if (in)
14801 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14802 : ix86_cost->movzbl_load);
14803 else
14804 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14805 : ix86_cost->int_store[0] + 4);
14806 break;
14807 case 2:
14808 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14809 default:
14810 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14811 if (mode == TFmode)
14812 mode = XFmode;
3bb7e126 14813 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14814 * (((int) GET_MODE_SIZE (mode)
14815 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14816 }
14817}
0ecf09f9 14818
3c50106f
RH
14819/* Compute a (partial) cost for rtx X. Return true if the complete
14820 cost has been computed, and false if subexpressions should be
14821 scanned. In either case, *TOTAL contains the cost result. */
14822
14823static bool
b96a374d 14824ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14825{
14826 enum machine_mode mode = GET_MODE (x);
14827
14828 switch (code)
14829 {
14830 case CONST_INT:
14831 case CONST:
14832 case LABEL_REF:
14833 case SYMBOL_REF:
14834 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14835 *total = 3;
14836 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14837 *total = 2;
3504dad3
JH
14838 else if (flag_pic && SYMBOLIC_CONST (x)
14839 && (!TARGET_64BIT
14840 || (!GET_CODE (x) != LABEL_REF
14841 && (GET_CODE (x) != SYMBOL_REF
12969f45 14842 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14843 *total = 1;
14844 else
14845 *total = 0;
14846 return true;
14847
14848 case CONST_DOUBLE:
14849 if (mode == VOIDmode)
14850 *total = 0;
14851 else
14852 switch (standard_80387_constant_p (x))
14853 {
14854 case 1: /* 0.0 */
14855 *total = 1;
14856 break;
881b2a96 14857 default: /* Other constants */
3c50106f
RH
14858 *total = 2;
14859 break;
881b2a96
RS
14860 case 0:
14861 case -1:
3c50106f
RH
14862 /* Start with (MEM (SYMBOL_REF)), since that's where
14863 it'll probably end up. Add a penalty for size. */
14864 *total = (COSTS_N_INSNS (1)
3504dad3 14865 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14866 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14867 break;
14868 }
14869 return true;
14870
14871 case ZERO_EXTEND:
14872 /* The zero extensions is often completely free on x86_64, so make
14873 it as cheap as possible. */
14874 if (TARGET_64BIT && mode == DImode
14875 && GET_MODE (XEXP (x, 0)) == SImode)
14876 *total = 1;
14877 else if (TARGET_ZERO_EXTEND_WITH_AND)
14878 *total = COSTS_N_INSNS (ix86_cost->add);
14879 else
14880 *total = COSTS_N_INSNS (ix86_cost->movzx);
14881 return false;
14882
14883 case SIGN_EXTEND:
14884 *total = COSTS_N_INSNS (ix86_cost->movsx);
14885 return false;
14886
14887 case ASHIFT:
14888 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14889 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14890 {
14891 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14892 if (value == 1)
14893 {
14894 *total = COSTS_N_INSNS (ix86_cost->add);
14895 return false;
14896 }
14897 if ((value == 2 || value == 3)
14898 && !TARGET_DECOMPOSE_LEA
14899 && ix86_cost->lea <= ix86_cost->shift_const)
14900 {
14901 *total = COSTS_N_INSNS (ix86_cost->lea);
14902 return false;
14903 }
14904 }
5efb1046 14905 /* FALLTHRU */
3c50106f
RH
14906
14907 case ROTATE:
14908 case ASHIFTRT:
14909 case LSHIFTRT:
14910 case ROTATERT:
14911 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14912 {
14913 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14914 {
14915 if (INTVAL (XEXP (x, 1)) > 32)
14916 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14917 else
14918 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14919 }
14920 else
14921 {
14922 if (GET_CODE (XEXP (x, 1)) == AND)
14923 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14924 else
14925 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14926 }
14927 }
14928 else
14929 {
14930 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14931 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14932 else
14933 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14934 }
14935 return false;
14936
14937 case MULT:
14938 if (FLOAT_MODE_P (mode))
3c50106f 14939 {
4a5eab38
PB
14940 *total = COSTS_N_INSNS (ix86_cost->fmul);
14941 return false;
3c50106f
RH
14942 }
14943 else
14944 {
4a5eab38
PB
14945 rtx op0 = XEXP (x, 0);
14946 rtx op1 = XEXP (x, 1);
14947 int nbits;
14948 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14949 {
14950 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14951 for (nbits = 0; value != 0; value &= value - 1)
14952 nbits++;
14953 }
14954 else
14955 /* This is arbitrary. */
14956 nbits = 7;
14957
14958 /* Compute costs correctly for widening multiplication. */
14959 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14960 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14961 == GET_MODE_SIZE (mode))
14962 {
14963 int is_mulwiden = 0;
14964 enum machine_mode inner_mode = GET_MODE (op0);
14965
14966 if (GET_CODE (op0) == GET_CODE (op1))
14967 is_mulwiden = 1, op1 = XEXP (op1, 0);
14968 else if (GET_CODE (op1) == CONST_INT)
14969 {
14970 if (GET_CODE (op0) == SIGN_EXTEND)
14971 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14972 == INTVAL (op1);
14973 else
14974 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14975 }
14976
14977 if (is_mulwiden)
14978 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14979 }
14980
14981 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14982 + nbits * ix86_cost->mult_bit)
14983 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14984
14985 return true;
3c50106f 14986 }
3c50106f
RH
14987
14988 case DIV:
14989 case UDIV:
14990 case MOD:
14991 case UMOD:
14992 if (FLOAT_MODE_P (mode))
14993 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14994 else
14995 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14996 return false;
14997
14998 case PLUS:
14999 if (FLOAT_MODE_P (mode))
15000 *total = COSTS_N_INSNS (ix86_cost->fadd);
15001 else if (!TARGET_DECOMPOSE_LEA
15002 && GET_MODE_CLASS (mode) == MODE_INT
15003 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15004 {
15005 if (GET_CODE (XEXP (x, 0)) == PLUS
15006 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15007 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15008 && CONSTANT_P (XEXP (x, 1)))
15009 {
15010 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15011 if (val == 2 || val == 4 || val == 8)
15012 {
15013 *total = COSTS_N_INSNS (ix86_cost->lea);
15014 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15015 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15016 outer_code);
15017 *total += rtx_cost (XEXP (x, 1), outer_code);
15018 return true;
15019 }
15020 }
15021 else if (GET_CODE (XEXP (x, 0)) == MULT
15022 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15023 {
15024 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15025 if (val == 2 || val == 4 || val == 8)
15026 {
15027 *total = COSTS_N_INSNS (ix86_cost->lea);
15028 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15029 *total += rtx_cost (XEXP (x, 1), outer_code);
15030 return true;
15031 }
15032 }
15033 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15034 {
15035 *total = COSTS_N_INSNS (ix86_cost->lea);
15036 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15037 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15038 *total += rtx_cost (XEXP (x, 1), outer_code);
15039 return true;
15040 }
15041 }
5efb1046 15042 /* FALLTHRU */
3c50106f
RH
15043
15044 case MINUS:
15045 if (FLOAT_MODE_P (mode))
15046 {
15047 *total = COSTS_N_INSNS (ix86_cost->fadd);
15048 return false;
15049 }
5efb1046 15050 /* FALLTHRU */
3c50106f
RH
15051
15052 case AND:
15053 case IOR:
15054 case XOR:
15055 if (!TARGET_64BIT && mode == DImode)
15056 {
15057 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15058 + (rtx_cost (XEXP (x, 0), outer_code)
15059 << (GET_MODE (XEXP (x, 0)) != DImode))
15060 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 15061 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
15062 return true;
15063 }
5efb1046 15064 /* FALLTHRU */
3c50106f
RH
15065
15066 case NEG:
15067 if (FLOAT_MODE_P (mode))
15068 {
15069 *total = COSTS_N_INSNS (ix86_cost->fchs);
15070 return false;
15071 }
5efb1046 15072 /* FALLTHRU */
3c50106f
RH
15073
15074 case NOT:
15075 if (!TARGET_64BIT && mode == DImode)
15076 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15077 else
15078 *total = COSTS_N_INSNS (ix86_cost->add);
15079 return false;
15080
15081 case FLOAT_EXTEND:
15082 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15083 *total = 0;
15084 return false;
15085
15086 case ABS:
15087 if (FLOAT_MODE_P (mode))
15088 *total = COSTS_N_INSNS (ix86_cost->fabs);
15089 return false;
15090
15091 case SQRT:
15092 if (FLOAT_MODE_P (mode))
15093 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15094 return false;
15095
74dc3e94
RH
15096 case UNSPEC:
15097 if (XINT (x, 1) == UNSPEC_TP)
15098 *total = 0;
15099 return false;
15100
3c50106f
RH
15101 default:
15102 return false;
15103 }
15104}
15105
21c318ba 15106#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4 15107static void
b96a374d 15108ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
2cc07db4
RH
15109{
15110 init_section ();
15111 fputs ("\tpushl $", asm_out_file);
15112 assemble_name (asm_out_file, XSTR (symbol, 0));
15113 fputc ('\n', asm_out_file);
15114}
15115#endif
162f023b 15116
b069de3b
SS
15117#if TARGET_MACHO
15118
15119static int current_machopic_label_num;
15120
15121/* Given a symbol name and its associated stub, write out the
15122 definition of the stub. */
15123
15124void
b96a374d 15125machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
15126{
15127 unsigned int length;
15128 char *binder_name, *symbol_name, lazy_ptr_name[32];
15129 int label = ++current_machopic_label_num;
15130
15131 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15132 symb = (*targetm.strip_name_encoding) (symb);
15133
15134 length = strlen (stub);
15135 binder_name = alloca (length + 32);
15136 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15137
15138 length = strlen (symb);
15139 symbol_name = alloca (length + 32);
15140 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15141
15142 sprintf (lazy_ptr_name, "L%d$lz", label);
15143
15144 if (MACHOPIC_PURE)
15145 machopic_picsymbol_stub_section ();
15146 else
15147 machopic_symbol_stub_section ();
15148
15149 fprintf (file, "%s:\n", stub);
15150 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15151
15152 if (MACHOPIC_PURE)
15153 {
15154 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15155 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15156 fprintf (file, "\tjmp %%edx\n");
15157 }
15158 else
15159 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 15160
b069de3b 15161 fprintf (file, "%s:\n", binder_name);
b96a374d 15162
b069de3b
SS
15163 if (MACHOPIC_PURE)
15164 {
15165 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15166 fprintf (file, "\tpushl %%eax\n");
15167 }
15168 else
15169 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15170
15171 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15172
15173 machopic_lazy_symbol_ptr_section ();
15174 fprintf (file, "%s:\n", lazy_ptr_name);
15175 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15176 fprintf (file, "\t.long %s\n", binder_name);
15177}
15178#endif /* TARGET_MACHO */
15179
162f023b
JH
15180/* Order the registers for register allocator. */
15181
15182void
b96a374d 15183x86_order_regs_for_local_alloc (void)
162f023b
JH
15184{
15185 int pos = 0;
15186 int i;
15187
15188 /* First allocate the local general purpose registers. */
15189 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15190 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15191 reg_alloc_order [pos++] = i;
15192
15193 /* Global general purpose registers. */
15194 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15195 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15196 reg_alloc_order [pos++] = i;
15197
15198 /* x87 registers come first in case we are doing FP math
15199 using them. */
15200 if (!TARGET_SSE_MATH)
15201 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15202 reg_alloc_order [pos++] = i;
fce5a9f2 15203
162f023b
JH
15204 /* SSE registers. */
15205 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15206 reg_alloc_order [pos++] = i;
15207 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15208 reg_alloc_order [pos++] = i;
15209
d1f87653 15210 /* x87 registers. */
162f023b
JH
15211 if (TARGET_SSE_MATH)
15212 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15213 reg_alloc_order [pos++] = i;
15214
15215 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15216 reg_alloc_order [pos++] = i;
15217
15218 /* Initialize the rest of array as we do not allocate some registers
15219 at all. */
15220 while (pos < FIRST_PSEUDO_REGISTER)
15221 reg_alloc_order [pos++] = 0;
15222}
194734e9 15223
4977bab6
ZW
15224#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15225#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15226#endif
15227
fe77449a
DR
15228/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15229 struct attribute_spec.handler. */
15230static tree
b96a374d
AJ
15231ix86_handle_struct_attribute (tree *node, tree name,
15232 tree args ATTRIBUTE_UNUSED,
15233 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15234{
15235 tree *type = NULL;
15236 if (DECL_P (*node))
15237 {
15238 if (TREE_CODE (*node) == TYPE_DECL)
15239 type = &TREE_TYPE (*node);
15240 }
15241 else
15242 type = node;
15243
15244 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15245 || TREE_CODE (*type) == UNION_TYPE)))
15246 {
15247 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15248 *no_add_attrs = true;
15249 }
15250
15251 else if ((is_attribute_p ("ms_struct", name)
15252 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15253 || ((is_attribute_p ("gcc_struct", name)
15254 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15255 {
15256 warning ("`%s' incompatible attribute ignored",
15257 IDENTIFIER_POINTER (name));
15258 *no_add_attrs = true;
15259 }
15260
15261 return NULL_TREE;
15262}
15263
4977bab6 15264static bool
b96a374d 15265ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15266{
fe77449a 15267 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15268 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15269 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15270}
15271
483ab821
MM
15272/* Returns an expression indicating where the this parameter is
15273 located on entry to the FUNCTION. */
15274
15275static rtx
b96a374d 15276x86_this_parameter (tree function)
483ab821
MM
15277{
15278 tree type = TREE_TYPE (function);
15279
3961e8fe
RH
15280 if (TARGET_64BIT)
15281 {
61f71b34 15282 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
15283 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15284 }
15285
e767b5be 15286 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
15287 {
15288 tree parm;
15289
15290 parm = TYPE_ARG_TYPES (type);
15291 /* Figure out whether or not the function has a variable number of
15292 arguments. */
3961e8fe 15293 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15294 if (TREE_VALUE (parm) == void_type_node)
15295 break;
e767b5be 15296 /* If not, the this parameter is in the first argument. */
483ab821 15297 if (parm)
e767b5be
JH
15298 {
15299 int regno = 0;
15300 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15301 regno = 2;
02e02343 15302 return gen_rtx_REG (SImode, regno);
e767b5be 15303 }
483ab821
MM
15304 }
15305
61f71b34 15306 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
15307 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15308 else
15309 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15310}
15311
3961e8fe
RH
15312/* Determine whether x86_output_mi_thunk can succeed. */
15313
15314static bool
b96a374d
AJ
15315x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15316 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15317 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15318{
15319 /* 64-bit can handle anything. */
15320 if (TARGET_64BIT)
15321 return true;
15322
15323 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 15324 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
15325 return true;
15326
15327 /* Need a free register for vcall_offset. */
15328 if (vcall_offset)
15329 return false;
15330
15331 /* Need a free register for GOT references. */
15332 if (flag_pic && !(*targetm.binds_local_p) (function))
15333 return false;
15334
15335 /* Otherwise ok. */
15336 return true;
15337}
15338
15339/* Output the assembler code for a thunk function. THUNK_DECL is the
15340 declaration for the thunk function itself, FUNCTION is the decl for
15341 the target function. DELTA is an immediate constant offset to be
272d0bee 15342 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15343 *(*this + vcall_offset) should be added to THIS. */
483ab821 15344
c590b625 15345static void
b96a374d
AJ
15346x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15347 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15348 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15349{
194734e9 15350 rtx xops[3];
3961e8fe
RH
15351 rtx this = x86_this_parameter (function);
15352 rtx this_reg, tmp;
194734e9 15353
3961e8fe
RH
15354 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15355 pull it in now and let DELTA benefit. */
15356 if (REG_P (this))
15357 this_reg = this;
15358 else if (vcall_offset)
15359 {
15360 /* Put the this parameter into %eax. */
15361 xops[0] = this;
15362 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15363 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15364 }
15365 else
15366 this_reg = NULL_RTX;
15367
15368 /* Adjust the this parameter by a fixed constant. */
15369 if (delta)
194734e9 15370 {
483ab821 15371 xops[0] = GEN_INT (delta);
3961e8fe
RH
15372 xops[1] = this_reg ? this_reg : this;
15373 if (TARGET_64BIT)
194734e9 15374 {
3961e8fe
RH
15375 if (!x86_64_general_operand (xops[0], DImode))
15376 {
15377 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15378 xops[1] = tmp;
15379 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15380 xops[0] = tmp;
15381 xops[1] = this;
15382 }
15383 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15384 }
15385 else
3961e8fe 15386 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15387 }
3961e8fe
RH
15388
15389 /* Adjust the this parameter by a value stored in the vtable. */
15390 if (vcall_offset)
194734e9 15391 {
3961e8fe
RH
15392 if (TARGET_64BIT)
15393 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15394 else
e767b5be
JH
15395 {
15396 int tmp_regno = 2 /* ECX */;
15397 if (lookup_attribute ("fastcall",
15398 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15399 tmp_regno = 0 /* EAX */;
15400 tmp = gen_rtx_REG (SImode, tmp_regno);
15401 }
483ab821 15402
3961e8fe
RH
15403 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15404 xops[1] = tmp;
15405 if (TARGET_64BIT)
15406 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15407 else
15408 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15409
3961e8fe
RH
15410 /* Adjust the this parameter. */
15411 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15412 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15413 {
15414 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15415 xops[0] = GEN_INT (vcall_offset);
15416 xops[1] = tmp2;
15417 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15418 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15419 }
3961e8fe
RH
15420 xops[1] = this_reg;
15421 if (TARGET_64BIT)
15422 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15423 else
15424 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15425 }
194734e9 15426
3961e8fe
RH
15427 /* If necessary, drop THIS back to its stack slot. */
15428 if (this_reg && this_reg != this)
15429 {
15430 xops[0] = this_reg;
15431 xops[1] = this;
15432 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15433 }
194734e9 15434
89ce1c8f 15435 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15436 if (TARGET_64BIT)
15437 {
15438 if (!flag_pic || (*targetm.binds_local_p) (function))
15439 output_asm_insn ("jmp\t%P0", xops);
15440 else
fcbe3b89 15441 {
89ce1c8f 15442 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15443 tmp = gen_rtx_CONST (Pmode, tmp);
15444 tmp = gen_rtx_MEM (QImode, tmp);
15445 xops[0] = tmp;
15446 output_asm_insn ("jmp\t%A0", xops);
15447 }
3961e8fe
RH
15448 }
15449 else
15450 {
15451 if (!flag_pic || (*targetm.binds_local_p) (function))
15452 output_asm_insn ("jmp\t%P0", xops);
194734e9 15453 else
21ff35fb 15454#if TARGET_MACHO
095fa594
SH
15455 if (TARGET_MACHO)
15456 {
0f901c4c 15457 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
095fa594
SH
15458 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15459 tmp = gen_rtx_MEM (QImode, tmp);
15460 xops[0] = tmp;
15461 output_asm_insn ("jmp\t%0", xops);
15462 }
15463 else
15464#endif /* TARGET_MACHO */
194734e9 15465 {
3961e8fe
RH
15466 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15467 output_set_got (tmp);
15468
15469 xops[1] = tmp;
15470 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15471 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15472 }
15473 }
15474}
e2500fed 15475
1bc7c5b6 15476static void
b96a374d 15477x86_file_start (void)
1bc7c5b6
ZW
15478{
15479 default_file_start ();
15480 if (X86_FILE_START_VERSION_DIRECTIVE)
15481 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15482 if (X86_FILE_START_FLTUSED)
15483 fputs ("\t.global\t__fltused\n", asm_out_file);
15484 if (ix86_asm_dialect == ASM_INTEL)
15485 fputs ("\t.intel_syntax\n", asm_out_file);
15486}
15487
e932b21b 15488int
b96a374d 15489x86_field_alignment (tree field, int computed)
e932b21b
JH
15490{
15491 enum machine_mode mode;
ad9335eb
JJ
15492 tree type = TREE_TYPE (field);
15493
15494 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15495 return computed;
ad9335eb
JJ
15496 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15497 ? get_inner_array_type (type) : type);
39e3a681
JJ
15498 if (mode == DFmode || mode == DCmode
15499 || GET_MODE_CLASS (mode) == MODE_INT
15500 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15501 return MIN (32, computed);
15502 return computed;
15503}
15504
a5fa1ecd
JH
15505/* Output assembler code to FILE to increment profiler label # LABELNO
15506 for profiling a function entry. */
15507void
b96a374d 15508x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15509{
15510 if (TARGET_64BIT)
15511 if (flag_pic)
15512 {
15513#ifndef NO_PROFILE_COUNTERS
15514 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15515#endif
15516 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15517 }
15518 else
15519 {
15520#ifndef NO_PROFILE_COUNTERS
15521 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15522#endif
15523 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15524 }
15525 else if (flag_pic)
15526 {
15527#ifndef NO_PROFILE_COUNTERS
15528 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15529 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15530#endif
15531 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15532 }
15533 else
15534 {
15535#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15536 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15537 PROFILE_COUNT_REGISTER);
15538#endif
15539 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15540 }
15541}
15542
d2c49530
JH
15543/* We don't have exact information about the insn sizes, but we may assume
15544 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15545 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15546 99% of cases. */
15547
15548static int
b96a374d 15549min_insn_size (rtx insn)
d2c49530
JH
15550{
15551 int l = 0;
15552
15553 if (!INSN_P (insn) || !active_insn_p (insn))
15554 return 0;
15555
15556 /* Discard alignments we've emit and jump instructions. */
15557 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15558 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15559 return 0;
15560 if (GET_CODE (insn) == JUMP_INSN
15561 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15562 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15563 return 0;
15564
15565 /* Important case - calls are always 5 bytes.
15566 It is common to have many calls in the row. */
15567 if (GET_CODE (insn) == CALL_INSN
15568 && symbolic_reference_mentioned_p (PATTERN (insn))
15569 && !SIBLING_CALL_P (insn))
15570 return 5;
15571 if (get_attr_length (insn) <= 1)
15572 return 1;
15573
15574 /* For normal instructions we may rely on the sizes of addresses
15575 and the presence of symbol to require 4 bytes of encoding.
15576 This is not the case for jumps where references are PC relative. */
15577 if (GET_CODE (insn) != JUMP_INSN)
15578 {
15579 l = get_attr_length_address (insn);
15580 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15581 l = 4;
15582 }
15583 if (l)
15584 return 1+l;
15585 else
15586 return 2;
15587}
15588
c51e6d85 15589/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15590 window. */
15591
15592static void
be04394b 15593ix86_avoid_jump_misspredicts (void)
d2c49530
JH
15594{
15595 rtx insn, start = get_insns ();
15596 int nbytes = 0, njumps = 0;
15597 int isjump = 0;
15598
15599 /* Look for all minimal intervals of instructions containing 4 jumps.
15600 The intervals are bounded by START and INSN. NBYTES is the total
15601 size of instructions in the interval including INSN and not including
15602 START. When the NBYTES is smaller than 16 bytes, it is possible
15603 that the end of START and INSN ends up in the same 16byte page.
15604
15605 The smallest offset in the page INSN can start is the case where START
15606 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15607 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15608 */
15609 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15610 {
15611
15612 nbytes += min_insn_size (insn);
c263766c
RH
15613 if (dump_file)
15614 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15615 INSN_UID (insn), min_insn_size (insn));
15616 if ((GET_CODE (insn) == JUMP_INSN
15617 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15618 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15619 || GET_CODE (insn) == CALL_INSN)
15620 njumps++;
15621 else
15622 continue;
15623
15624 while (njumps > 3)
15625 {
15626 start = NEXT_INSN (start);
15627 if ((GET_CODE (start) == JUMP_INSN
15628 && GET_CODE (PATTERN (start)) != ADDR_VEC
15629 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15630 || GET_CODE (start) == CALL_INSN)
15631 njumps--, isjump = 1;
15632 else
15633 isjump = 0;
15634 nbytes -= min_insn_size (start);
15635 }
15636 if (njumps < 0)
15637 abort ();
c263766c
RH
15638 if (dump_file)
15639 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15640 INSN_UID (start), INSN_UID (insn), nbytes);
15641
15642 if (njumps == 3 && isjump && nbytes < 16)
15643 {
15644 int padsize = 15 - nbytes + min_insn_size (insn);
15645
c263766c
RH
15646 if (dump_file)
15647 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15648 INSN_UID (insn), padsize);
d2c49530
JH
15649 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15650 }
15651 }
15652}
15653
be04394b 15654/* AMD Athlon works faster
d1f87653 15655 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15656 by other jump instruction. We avoid the penalty by inserting NOP just
15657 before the RET instructions in such cases. */
18dbd950 15658static void
be04394b 15659ix86_pad_returns (void)
2a500b9e
JH
15660{
15661 edge e;
15662
2a500b9e
JH
15663 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15664 {
15665 basic_block bb = e->src;
a813c111 15666 rtx ret = BB_END (bb);
2a500b9e 15667 rtx prev;
253c7a00 15668 bool replace = false;
2a500b9e 15669
253c7a00
JH
15670 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15671 || !maybe_hot_bb_p (bb))
2a500b9e 15672 continue;
4977bab6
ZW
15673 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15674 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15675 break;
2a500b9e
JH
15676 if (prev && GET_CODE (prev) == CODE_LABEL)
15677 {
15678 edge e;
15679 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15680 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e 15681 && !(e->flags & EDGE_FALLTHRU))
253c7a00 15682 replace = true;
2a500b9e 15683 }
253c7a00 15684 if (!replace)
2a500b9e 15685 {
4977bab6 15686 prev = prev_active_insn (ret);
25f57a0e
JH
15687 if (prev
15688 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15689 || GET_CODE (prev) == CALL_INSN))
253c7a00 15690 replace = true;
c51e6d85 15691 /* Empty functions get branch mispredict even when the jump destination
4977bab6
ZW
15692 is not visible to us. */
15693 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
253c7a00
JH
15694 replace = true;
15695 }
15696 if (replace)
15697 {
15698 emit_insn_before (gen_return_internal_long (), ret);
15699 delete_insn (ret);
2a500b9e 15700 }
2a500b9e 15701 }
be04394b
JH
15702}
15703
15704/* Implement machine specific optimizations. We implement padding of returns
15705 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15706static void
15707ix86_reorg (void)
15708{
15709 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15710 ix86_pad_returns ();
15711 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15712 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
15713}
15714
4977bab6
ZW
15715/* Return nonzero when QImode register that must be represented via REX prefix
15716 is used. */
15717bool
b96a374d 15718x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15719{
15720 int i;
15721 extract_insn_cached (insn);
15722 for (i = 0; i < recog_data.n_operands; i++)
15723 if (REG_P (recog_data.operand[i])
15724 && REGNO (recog_data.operand[i]) >= 4)
15725 return true;
15726 return false;
15727}
15728
15729/* Return nonzero when P points to register encoded via REX prefix.
15730 Called via for_each_rtx. */
15731static int
b96a374d 15732extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15733{
15734 unsigned int regno;
15735 if (!REG_P (*p))
15736 return 0;
15737 regno = REGNO (*p);
15738 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15739}
15740
15741/* Return true when INSN mentions register that must be encoded using REX
15742 prefix. */
15743bool
b96a374d 15744x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15745{
15746 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15747}
15748
1d6ba901 15749/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15750 optabs would emit if we didn't have TFmode patterns. */
15751
15752void
b96a374d 15753x86_emit_floatuns (rtx operands[2])
8d705469
JH
15754{
15755 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15756 enum machine_mode mode, inmode;
15757
15758 inmode = GET_MODE (operands[1]);
15759 if (inmode != SImode
15760 && inmode != DImode)
15761 abort ();
8d705469
JH
15762
15763 out = operands[0];
1d6ba901 15764 in = force_reg (inmode, operands[1]);
8d705469
JH
15765 mode = GET_MODE (out);
15766 neglab = gen_label_rtx ();
15767 donelab = gen_label_rtx ();
15768 i1 = gen_reg_rtx (Pmode);
15769 f0 = gen_reg_rtx (mode);
15770
15771 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15772
15773 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15774 emit_jump_insn (gen_jump (donelab));
15775 emit_barrier ();
15776
15777 emit_label (neglab);
15778
15779 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15780 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15781 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15782 expand_float (f0, i0, 0);
15783 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15784
15785 emit_label (donelab);
15786}
15787
dafc5b82
JH
15788/* Return if we do not know how to pass TYPE solely in registers. */
15789bool
b96a374d 15790ix86_must_pass_in_stack (enum machine_mode mode, tree type)
dafc5b82
JH
15791{
15792 if (default_must_pass_in_stack (mode, type))
15793 return true;
15794 return (!TARGET_64BIT && type && mode == TImode);
15795}
15796
997404de
JH
15797/* Initialize vector TARGET via VALS. */
15798void
15799ix86_expand_vector_init (rtx target, rtx vals)
15800{
15801 enum machine_mode mode = GET_MODE (target);
15802 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15803 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15804 int i;
15805
15806 for (i = n_elts - 1; i >= 0; i--)
15807 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15808 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15809 break;
15810
15811 /* Few special cases first...
15812 ... constants are best loaded from constant pool. */
15813 if (i < 0)
15814 {
15815 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15816 return;
15817 }
15818
15819 /* ... values where only first field is non-constant are best loaded
1ae58c30 15820 from the pool and overwritten via move later. */
997404de
JH
15821 if (!i)
15822 {
15823 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15824 GET_MODE_INNER (mode), 0);
15825
15826 op = force_reg (mode, op);
15827 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15828 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15829 switch (GET_MODE (target))
15830 {
15831 case V2DFmode:
15832 emit_insn (gen_sse2_movsd (target, target, op));
15833 break;
15834 case V4SFmode:
15835 emit_insn (gen_sse_movss (target, target, op));
15836 break;
15837 default:
15838 break;
15839 }
15840 return;
15841 }
15842
15843 /* And the busy sequence doing rotations. */
15844 switch (GET_MODE (target))
15845 {
15846 case V2DFmode:
15847 {
15848 rtx vecop0 =
15849 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15850 rtx vecop1 =
15851 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15852
15853 vecop0 = force_reg (V2DFmode, vecop0);
15854 vecop1 = force_reg (V2DFmode, vecop1);
15855 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15856 }
15857 break;
15858 case V4SFmode:
15859 {
15860 rtx vecop0 =
15861 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15862 rtx vecop1 =
15863 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15864 rtx vecop2 =
15865 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15866 rtx vecop3 =
15867 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15868 rtx tmp1 = gen_reg_rtx (V4SFmode);
15869 rtx tmp2 = gen_reg_rtx (V4SFmode);
15870
15871 vecop0 = force_reg (V4SFmode, vecop0);
15872 vecop1 = force_reg (V4SFmode, vecop1);
15873 vecop2 = force_reg (V4SFmode, vecop2);
15874 vecop3 = force_reg (V4SFmode, vecop3);
15875 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15876 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15877 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15878 }
15879 break;
15880 default:
15881 abort ();
15882 }
15883}
15884
67dfe110
KH
15885/* Worker function for TARGET_MD_ASM_CLOBBERS.
15886
15887 We do this in the new i386 backend to maintain source compatibility
15888 with the old cc0-based compiler. */
15889
15890static tree
15891ix86_md_asm_clobbers (tree clobbers)
15892{
15893 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15894 clobbers);
15895 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15896 clobbers);
15897 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15898 clobbers);
15899 return clobbers;
15900}
15901
3c5cb3e4
KH
15902/* Worker function for REVERSE_CONDITION. */
15903
15904enum rtx_code
15905ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15906{
15907 return (mode != CCFPmode && mode != CCFPUmode
15908 ? reverse_condition (code)
15909 : reverse_condition_maybe_unordered (code));
15910}
15911
5ea9cb6e
RS
15912/* Output code to perform an x87 FP register move, from OPERANDS[1]
15913 to OPERANDS[0]. */
15914
15915const char *
15916output_387_reg_move (rtx insn, rtx *operands)
15917{
15918 if (REG_P (operands[1])
15919 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15920 {
15921 if (REGNO (operands[0]) == FIRST_STACK_REG
15922 && TARGET_USE_FFREEP)
15923 return "ffreep\t%y0";
15924 return "fstp\t%y0";
15925 }
15926 if (STACK_TOP_P (operands[0]))
15927 return "fld%z1\t%y1";
15928 return "fst\t%y0";
15929}
15930
5ae27cfa
UB
15931/* Output code to perform a conditional jump to LABEL, if C2 flag in
15932 FP status register is set. */
15933
15934void
15935ix86_emit_fp_unordered_jump (rtx label)
15936{
15937 rtx reg = gen_reg_rtx (HImode);
15938 rtx temp;
15939
15940 emit_insn (gen_x86_fnstsw_1 (reg));
15941 emit_insn (gen_x86_sahf_1 (reg));
15942
15943 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15944 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15945 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15946 gen_rtx_LABEL_REF (VOIDmode, label),
15947 pc_rtx);
15948 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15949 emit_jump_insn (temp);
15950}
15951
c2fcfa4f
UB
15952/* Output code to perform a log1p XFmode calculation. */
15953
15954void ix86_emit_i387_log1p (rtx op0, rtx op1)
15955{
15956 rtx label1 = gen_label_rtx ();
15957 rtx label2 = gen_label_rtx ();
15958
15959 rtx tmp = gen_reg_rtx (XFmode);
15960 rtx tmp2 = gen_reg_rtx (XFmode);
15961
15962 emit_insn (gen_absxf2 (tmp, op1));
15963 emit_insn (gen_cmpxf (tmp,
15964 CONST_DOUBLE_FROM_REAL_VALUE (
15965 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15966 XFmode)));
15967 emit_jump_insn (gen_bge (label1));
15968
15969 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15970 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15971 emit_jump (label2);
15972
15973 emit_label (label1);
15974 emit_move_insn (tmp, CONST1_RTX (XFmode));
15975 emit_insn (gen_addxf3 (tmp, op1, tmp));
15976 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15977 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15978
15979 emit_label (label2);
15980}
15981
e2500fed 15982#include "gt-i386.h"
This page took 4.239436 seconds and 5 git commands to generate.