]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
20020118-1.c: Declare abort.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
e129d93a 3 2002, 2003, 2004 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9 34#include "output.h"
8bc527af 35#include "insn-codes.h"
2a2ab3f9 36#include "insn-attr.h"
2a2ab3f9 37#include "flags.h"
a8ffcc81 38#include "except.h"
ecbc4695 39#include "function.h"
00c79232 40#include "recog.h"
ced8dd8c 41#include "expr.h"
e78d8e51 42#include "optabs.h"
f103890b 43#include "toplev.h"
e075ae69 44#include "basic-block.h"
1526a060 45#include "ggc.h"
672a6f42
NB
46#include "target.h"
47#include "target-def.h"
f1e639b1 48#include "langhooks.h"
dafc5b82 49#include "cgraph.h"
cd3ce9b4 50#include "tree-gimple.h"
2a2ab3f9 51
8dfe5673 52#ifndef CHECK_STACK_LIMIT
07933f72 53#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
54#endif
55
3c50106f
RH
56/* Return index of given mode in mult and division cost tables. */
57#define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
2ab0437e 64/* Processor costs (relative to an add) */
fce5a9f2 65static const
2ab0437e
JH
66struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 72 0, /* cost of multiply per each bit set */
4977bab6 73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
74 3, /* cost of movsx */
75 3, /* cost of movzx */
2ab0437e
JH
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
f4365627
JH
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
4977bab6 100 1, /* Branch cost */
229b303a
RS
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
2ab0437e 107};
229b303a 108
32b5b1aa 109/* Processor costs (relative to an add) */
fce5a9f2 110static const
32b5b1aa 111struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 112 1, /* cost of an add instruction */
32b5b1aa
SC
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
4977bab6 116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 117 1, /* cost of multiply per each bit set */
4977bab6 118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
119 3, /* cost of movsx */
120 2, /* cost of movzx */
96e7ae40 121 15, /* "large" insn */
e2e52e1b 122 3, /* MOVE_RATIO */
7c6b971d 123 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
0f290768 126 Relative to reg-reg move (2). */
96e7ae40
JH
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
fa79946e
JH
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
f4365627
JH
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
4977bab6 145 1, /* Branch cost */
229b303a
RS
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
152};
153
fce5a9f2 154static const
32b5b1aa
SC
155struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
4977bab6 160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 161 1, /* cost of multiply per each bit set */
4977bab6 162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
163 3, /* cost of movsx */
164 2, /* cost of movzx */
96e7ae40 165 15, /* "large" insn */
e2e52e1b 166 3, /* MOVE_RATIO */
7c6b971d 167 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
0f290768 170 Relative to reg-reg move (2). */
96e7ae40
JH
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
fa79946e
JH
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
f4365627
JH
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
4977bab6 189 1, /* Branch cost */
229b303a
RS
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
196};
197
fce5a9f2 198static const
e5cb57e8 199struct processor_costs pentium_cost = {
32b5b1aa
SC
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
856b07a1 202 4, /* variable shift costs */
e5cb57e8 203 1, /* constant shift costs */
4977bab6 204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 205 0, /* cost of multiply per each bit set */
4977bab6 206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
207 3, /* cost of movsx */
208 2, /* cost of movzx */
96e7ae40 209 8, /* "large" insn */
e2e52e1b 210 6, /* MOVE_RATIO */
7c6b971d 211 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
0f290768 214 Relative to reg-reg move (2). */
96e7ae40
JH
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
fa79946e
JH
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
f4365627
JH
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
4977bab6 233 2, /* Branch cost */
229b303a
RS
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
240};
241
fce5a9f2 242static const
856b07a1
SC
243struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
e075ae69 246 1, /* variable shift costs */
856b07a1 247 1, /* constant shift costs */
4977bab6 248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 249 0, /* cost of multiply per each bit set */
4977bab6 250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
251 1, /* cost of movsx */
252 1, /* cost of movzx */
96e7ae40 253 8, /* "large" insn */
e2e52e1b 254 6, /* MOVE_RATIO */
7c6b971d 255 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
0f290768 258 Relative to reg-reg move (2). */
96e7ae40
JH
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
fa79946e
JH
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
f4365627
JH
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
4977bab6 277 2, /* Branch cost */
229b303a
RS
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
856b07a1
SC
284};
285
fce5a9f2 286static const
a269a03c
JC
287struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
e075ae69 289 2, /* cost of a lea instruction */
a269a03c
JC
290 1, /* variable shift costs */
291 1, /* constant shift costs */
4977bab6 292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 293 0, /* cost of multiply per each bit set */
4977bab6 294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
295 2, /* cost of movsx */
296 2, /* cost of movzx */
96e7ae40 297 8, /* "large" insn */
e2e52e1b 298 4, /* MOVE_RATIO */
7c6b971d 299 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
0f290768 302 Relative to reg-reg move (2). */
96e7ae40
JH
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
fa79946e
JH
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
f4365627
JH
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
4977bab6 321 1, /* Branch cost */
229b303a
RS
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
4f770e7b
RS
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
229b303a
RS
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
a269a03c
JC
328};
329
fce5a9f2 330static const
309ada50
JH
331struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
0b5107cf 333 2, /* cost of a lea instruction */
309ada50
JH
334 1, /* variable shift costs */
335 1, /* constant shift costs */
4977bab6 336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 337 0, /* cost of multiply per each bit set */
4977bab6 338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
339 1, /* cost of movsx */
340 1, /* cost of movzx */
309ada50 341 8, /* "large" insn */
e2e52e1b 342 9, /* MOVE_RATIO */
309ada50 343 4, /* cost for loading QImode using movzbl */
b72b1c29 344 {3, 4, 3}, /* cost of loading integer registers
309ada50 345 in QImode, HImode and SImode.
0f290768 346 Relative to reg-reg move (2). */
b72b1c29 347 {3, 4, 3}, /* cost of storing integer registers */
309ada50 348 4, /* cost of reg,reg fld/fst */
b72b1c29 349 {4, 4, 12}, /* cost of loading fp registers
309ada50 350 in SFmode, DFmode and XFmode */
b72b1c29 351 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 352 2, /* cost of moving MMX register */
b72b1c29 353 {4, 4}, /* cost of loading MMX registers
fa79946e 354 in SImode and DImode */
b72b1c29 355 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
b72b1c29 358 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 361 in SImode, DImode and TImode */
b72b1c29 362 5, /* MMX or SSE register to integer */
f4365627
JH
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
4977bab6 365 2, /* Branch cost */
229b303a
RS
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
309ada50
JH
372};
373
4977bab6
ZW
374static const
375struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416};
417
fce5a9f2 418static const
b4e89e2d
JH
419struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
e0c00392 421 3, /* cost of a lea instruction */
4977bab6
ZW
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 425 0, /* cost of multiply per each bit set */
4977bab6 426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
427 1, /* cost of movsx */
428 1, /* cost of movzx */
b4e89e2d
JH
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
f4365627
JH
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
4977bab6 453 2, /* Branch cost */
229b303a
RS
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
460};
461
89c43c0a
VM
462static const
463struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504};
505
8b60264b 506const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 507
a269a03c
JC
508/* Processor feature/optimization bitmasks. */
509#define m_386 (1<<PROCESSOR_I386)
510#define m_486 (1<<PROCESSOR_I486)
511#define m_PENT (1<<PROCESSOR_PENTIUM)
512#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513#define m_K6 (1<<PROCESSOR_K6)
309ada50 514#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 515#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
516#define m_K8 (1<<PROCESSOR_K8)
517#define m_ATHLON_K8 (m_K8 | m_ATHLON)
89c43c0a 518#define m_NOCONA (1<<PROCESSOR_NOCONA)
a269a03c 519
4977bab6 520const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
89c43c0a 521const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 522const int x86_zero_extend_with_and = m_486 | m_PENT;
89c43c0a 523const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
e075ae69 524const int x86_double_with_add = ~m_386;
a269a03c 525const int x86_use_bit_test = m_386;
4977bab6 526const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
89c43c0a 527const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
4977bab6 528const int x86_3dnow_a = m_ATHLON_K8;
89c43c0a
VM
529const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530const int x86_branch_hints = m_PENT4 | m_NOCONA;
531const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
e075ae69
RH
532const int x86_partial_reg_stall = m_PPRO;
533const int x86_use_loop = m_K6;
4977bab6 534const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
535const int x86_use_mov0 = m_K6;
536const int x86_use_cltd = ~(m_PENT | m_K6);
537const int x86_read_modify_write = ~m_PENT;
538const int x86_read_modify = ~(m_PENT | m_PPRO);
539const int x86_split_long_moves = m_PPRO;
4977bab6 540const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 541const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
89c43c0a 542const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
d9f32422
JH
543const int x86_qimode_math = ~(0);
544const int x86_promote_qi_regs = 0;
545const int x86_himode_math = ~(m_PPRO);
546const int x86_promote_hi_regs = m_PPRO;
89c43c0a
VM
547const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
7b50a809
JH
555const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
89c43c0a 557const int x86_decompose_lea = m_PENT4 | m_NOCONA;
495333a6 558const int x86_shift1 = ~m_486;
89c43c0a
VM
559const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
4977bab6 561/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 562 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
563 scalar values in proper format leaving the upper part undefined. */
564const int x86_sse_partial_regs = m_ATHLON_K8;
565/* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568const int x86_sse_typeless_stores = m_ATHLON_K8;
89c43c0a 569const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
4977bab6
ZW
570const int x86_use_ffreep = m_ATHLON_K8;
571const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 572const int x86_inter_unit_moves = ~(m_ATHLON_K8);
89c43c0a 573const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
be04394b
JH
574/* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
89c43c0a 576const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 577
d1f87653 578/* In case the average insn count for single function invocation is
6ab16dd9
JH
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
4977bab6 581#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 582
5bf0ebab
RH
583/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
587
588/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 590
e075ae69 591enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
592{
593 /* ax, dx, cx, bx */
ab408a86 594 AREG, DREG, CREG, BREG,
4c0d89b5 595 /* si, di, bp, sp */
e075ae69 596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 600 /* arg pointer */
83774849 601 NON_Q_REGS,
564d80f4 602 /* flags, fpsr, dirflag, frame */
a7180f70
BS
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
4c0d89b5 612};
c572e5ba 613
3d117b30 614/* The "default" register map used in 32bit mode. */
83774849 615
0f290768 616int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
617{
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
625};
626
5bf0ebab
RH
627static int const x86_64_int_parameter_registers[6] =
628{
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
631};
632
633static int const x86_64_int_return_registers[4] =
634{
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
636};
53c17031 637
0f7fa3d0
JH
638/* The "default" register map used in 64bit mode. */
639int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
640{
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
648};
649
83774849
RH
650/* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
703*/
0f290768 704int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
705{
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
713};
714
c572e5ba
JVA
715/* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
717
07933f72
GS
718rtx ix86_compare_op0 = NULL_RTX;
719rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 720
7a2e09f4 721#define MAX_386_STACK_LOCALS 3
8362f420
JH
722/* Size of the register save area. */
723#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
724
725/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
726
727struct stack_local_entry GTY(())
728{
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
733};
734
4dd2ac2c
JH
735/* Structure describing stack frame layout.
736 Stack grows downward:
737
738 [arguments]
739 <- ARG_POINTER
740 saved pc
741
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
745
746 [padding1] \
747 )
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
751 )
752 [padding2] /
753 */
754struct ix86_frame
755{
756 int nregs;
757 int padding1;
8362f420 758 int va_arg_size;
4dd2ac2c
JH
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
8362f420 762 int red_zone_size;
4dd2ac2c
JH
763
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
769
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
4dd2ac2c
JH
773};
774
c93e80a5
JH
775/* Used to enable/disable debugging features. */
776const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
777/* Code model option as passed by user. */
778const char *ix86_cmodel_string;
779/* Parsed value. */
780enum cmodel ix86_cmodel;
80f33d06
GS
781/* Asm dialect. */
782const char *ix86_asm_string;
783enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
784/* TLS dialext. */
785const char *ix86_tls_dialect_string;
786enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 787
5bf0ebab 788/* Which unit we are generating floating point math for. */
965f5423
JH
789enum fpmath_unit ix86_fpmath;
790
5bf0ebab 791/* Which cpu are we scheduling for. */
9e555526 792enum processor_type ix86_tune;
5bf0ebab
RH
793/* Which instruction set architecture to use. */
794enum processor_type ix86_arch;
c8c5cb99
SC
795
796/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 797const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 798const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 799const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 800
0f290768 801/* # of registers to use to pass arguments. */
e075ae69 802const char *ix86_regparm_string;
e9a25f70 803
f4365627
JH
804/* true if sse prefetch instruction is not NOOP. */
805int x86_prefetch_sse;
806
e075ae69
RH
807/* ix86_regparm_string as a number */
808int ix86_regparm;
e9a25f70
JL
809
810/* Alignment to use for loops and jumps: */
811
0f290768 812/* Power of two alignment for loops. */
e075ae69 813const char *ix86_align_loops_string;
e9a25f70 814
0f290768 815/* Power of two alignment for non-loop jumps. */
e075ae69 816const char *ix86_align_jumps_string;
e9a25f70 817
3af4bd89 818/* Power of two alignment for stack boundary in bytes. */
e075ae69 819const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
820
821/* Preferred alignment for stack boundary in bits. */
95899b34 822unsigned int ix86_preferred_stack_boundary;
3af4bd89 823
e9a25f70 824/* Values 1-5: see jump.c */
e075ae69
RH
825int ix86_branch_cost;
826const char *ix86_branch_cost_string;
e9a25f70 827
0f290768 828/* Power of two alignment for functions. */
e075ae69 829const char *ix86_align_funcs_string;
623fe810
RH
830
831/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832static char internal_label_prefix[16];
833static int internal_label_prefix_len;
e075ae69 834\f
b96a374d
AJ
835static int local_symbolic_operand (rtx, enum machine_mode);
836static int tls_symbolic_operand_1 (rtx, enum tls_model);
837static void output_pic_addr_const (FILE *, rtx, int);
838static void put_condition_code (enum rtx_code, enum machine_mode,
839 int, int, FILE *);
840static const char *get_some_local_dynamic_name (void);
841static int get_some_local_dynamic_name_1 (rtx *, void *);
842static rtx maybe_get_pool_constant (rtx);
843static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
845 rtx *);
e129d93a
ILT
846static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
848 enum machine_mode);
b96a374d
AJ
849static rtx get_thread_pointer (int);
850static rtx legitimize_tls_address (rtx, enum tls_model, int);
851static void get_pc_thunk_name (char [32], unsigned int);
852static rtx gen_push (rtx);
853static int memory_address_length (rtx addr);
854static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855static int ix86_agi_dependant (rtx, rtx, enum attr_type);
b96a374d
AJ
856static struct machine_function * ix86_init_machine_status (void);
857static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858static int ix86_nsaved_regs (void);
859static void ix86_emit_save_regs (void);
860static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 861static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 862static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
863static HOST_WIDE_INT ix86_GOT_alias_set (void);
864static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 866static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
867static int ix86_issue_rate (void);
868static int ix86_adjust_cost (rtx, rtx, rtx, int);
b96a374d
AJ
869static int ia32_multipass_dfa_lookahead (void);
870static void ix86_init_mmx_sse_builtins (void);
871static rtx x86_this_parameter (tree);
872static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875static void x86_file_start (void);
876static void ix86_reorg (void);
c35d187f
RH
877static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
879static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
23a60a04 881static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
e075ae69
RH
882
883struct ix86_address
884{
885 rtx base, index, disp;
886 HOST_WIDE_INT scale;
74dc3e94 887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
e075ae69 888};
b08de47e 889
b96a374d
AJ
890static int ix86_decompose_address (rtx, struct ix86_address *);
891static int ix86_address_cost (rtx);
892static bool ix86_cannot_force_const_mem (rtx);
893static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
894
895struct builtin_description;
b96a374d
AJ
896static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 tree, rtx);
898static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 tree, rtx);
900static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903static rtx ix86_expand_store_builtin (enum insn_code, tree);
904static rtx safe_vector_operand (rtx, enum machine_mode);
905static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912static int ix86_fp_comparison_cost (enum rtx_code code);
913static unsigned int ix86_select_alt_pic_regnum (void);
914static int ix86_save_reg (unsigned int, int);
915static void ix86_compute_frame_layout (struct ix86_frame *);
916static int ix86_comp_type_attributes (tree, tree);
e767b5be 917static int ix86_function_regparm (tree, tree);
91d231cb 918const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
919static bool ix86_function_ok_for_sibcall (tree, tree);
920static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922static int ix86_value_regno (enum machine_mode);
923static bool contains_128bit_aligned_vector_p (tree);
0397ac35 924static rtx ix86_struct_value_rtx (tree, int);
b96a374d
AJ
925static bool ix86_ms_bitfield_layout_p (tree);
926static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927static int extended_reg_mentioned_1 (rtx *, void *);
928static bool ix86_rtx_costs (rtx, int, int, int *);
929static int min_insn_size (rtx);
67dfe110 930static tree ix86_md_asm_clobbers (tree clobbers);
fe984136 931static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
8cd5a4e0
RH
932static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
933 tree, bool);
7c262518 934
21c318ba 935#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
b96a374d 936static void ix86_svr3_asm_out_constructor (rtx, int);
2cc07db4 937#endif
e56feed6 938
53c17031
JH
939/* Register class used for passing given 64bit part of the argument.
940 These represent classes as documented by the PS ABI, with the exception
941 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 942 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 943
d1f87653 944 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
945 whenever possible (upper half does contain padding).
946 */
947enum x86_64_reg_class
948 {
949 X86_64_NO_CLASS,
950 X86_64_INTEGER_CLASS,
951 X86_64_INTEGERSI_CLASS,
952 X86_64_SSE_CLASS,
953 X86_64_SSESF_CLASS,
954 X86_64_SSEDF_CLASS,
955 X86_64_SSEUP_CLASS,
956 X86_64_X87_CLASS,
957 X86_64_X87UP_CLASS,
958 X86_64_MEMORY_CLASS
959 };
0b5826ac 960static const char * const x86_64_reg_class_name[] =
53c17031
JH
961 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
962
963#define MAX_CLASSES 4
b96a374d
AJ
964static int classify_argument (enum machine_mode, tree,
965 enum x86_64_reg_class [MAX_CLASSES], int);
966static int examine_argument (enum machine_mode, tree, int, int *, int *);
967static rtx construct_container (enum machine_mode, tree, int, int, int,
968 const int *, int);
969static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
970 enum x86_64_reg_class);
881b2a96 971
43f3a59d 972/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
973static REAL_VALUE_TYPE ext_80387_constants_table [5];
974static bool ext_80387_constants_init = 0;
b96a374d 975static void init_ext_80387_constants (void);
672a6f42
NB
976\f
977/* Initialize the GCC target structure. */
91d231cb
JM
978#undef TARGET_ATTRIBUTE_TABLE
979#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 980#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
981# undef TARGET_MERGE_DECL_ATTRIBUTES
982# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
983#endif
984
8d8e52be
JM
985#undef TARGET_COMP_TYPE_ATTRIBUTES
986#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
987
f6155fda
SS
988#undef TARGET_INIT_BUILTINS
989#define TARGET_INIT_BUILTINS ix86_init_builtins
990
991#undef TARGET_EXPAND_BUILTIN
992#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
993
bd09bdeb
RH
994#undef TARGET_ASM_FUNCTION_EPILOGUE
995#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 996
17b53c33
NB
997#undef TARGET_ASM_OPEN_PAREN
998#define TARGET_ASM_OPEN_PAREN ""
999#undef TARGET_ASM_CLOSE_PAREN
1000#define TARGET_ASM_CLOSE_PAREN ""
1001
301d03af
RS
1002#undef TARGET_ASM_ALIGNED_HI_OP
1003#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1004#undef TARGET_ASM_ALIGNED_SI_OP
1005#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1006#ifdef ASM_QUAD
1007#undef TARGET_ASM_ALIGNED_DI_OP
1008#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1009#endif
1010
1011#undef TARGET_ASM_UNALIGNED_HI_OP
1012#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1013#undef TARGET_ASM_UNALIGNED_SI_OP
1014#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1015#undef TARGET_ASM_UNALIGNED_DI_OP
1016#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1017
c237e94a
ZW
1018#undef TARGET_SCHED_ADJUST_COST
1019#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1020#undef TARGET_SCHED_ISSUE_RATE
1021#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
9b690711
RH
1022#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1023#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1024 ia32_multipass_dfa_lookahead
c237e94a 1025
4977bab6
ZW
1026#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1027#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1028
f996902d
RH
1029#ifdef HAVE_AS_TLS
1030#undef TARGET_HAVE_TLS
1031#define TARGET_HAVE_TLS true
1032#endif
3a04ff64
RH
1033#undef TARGET_CANNOT_FORCE_CONST_MEM
1034#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 1035
7daebb7a 1036#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1037#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1038
4977bab6
ZW
1039#undef TARGET_MS_BITFIELD_LAYOUT_P
1040#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1041
c590b625
RH
1042#undef TARGET_ASM_OUTPUT_MI_THUNK
1043#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1044#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1045#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1046
1bc7c5b6
ZW
1047#undef TARGET_ASM_FILE_START
1048#define TARGET_ASM_FILE_START x86_file_start
1049
3c50106f
RH
1050#undef TARGET_RTX_COSTS
1051#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1052#undef TARGET_ADDRESS_COST
1053#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1054
e129d93a
ILT
1055#undef TARGET_FIXED_CONDITION_CODE_REGS
1056#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1057#undef TARGET_CC_MODES_COMPATIBLE
1058#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1059
18dbd950
RS
1060#undef TARGET_MACHINE_DEPENDENT_REORG
1061#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1062
c35d187f
RH
1063#undef TARGET_BUILD_BUILTIN_VA_LIST
1064#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1065
67dfe110
KH
1066#undef TARGET_MD_ASM_CLOBBERS
1067#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1068
9184f892
KH
1069#undef TARGET_PROMOTE_PROTOTYPES
1070#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
0397ac35
RH
1071#undef TARGET_STRUCT_VALUE_RTX
1072#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
a0524eb3
KH
1073#undef TARGET_SETUP_INCOMING_VARARGS
1074#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
fe984136
RH
1075#undef TARGET_MUST_PASS_IN_STACK
1076#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
8cd5a4e0
RH
1077#undef TARGET_PASS_BY_REFERENCE
1078#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
a0524eb3 1079
cd3ce9b4
JM
1080#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1081#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1082
07a43492
DJ
1083#ifdef SUBTARGET_INSERT_ATTRIBUTES
1084#undef TARGET_INSERT_ATTRIBUTES
1085#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1086#endif
1087
f6897b10 1088struct gcc_target targetm = TARGET_INITIALIZER;
89c43c0a 1089
e075ae69 1090\f
67c2b45f
JS
1091/* The svr4 ABI for the i386 says that records and unions are returned
1092 in memory. */
1093#ifndef DEFAULT_PCC_STRUCT_RETURN
1094#define DEFAULT_PCC_STRUCT_RETURN 1
1095#endif
1096
f5316dfe
MM
1097/* Sometimes certain combinations of command options do not make
1098 sense on a particular target machine. You can define a macro
1099 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1100 defined, is executed once just after all the command options have
1101 been parsed.
1102
1103 Don't use this macro to turn on various extra optimizations for
1104 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1105
1106void
b96a374d 1107override_options (void)
f5316dfe 1108{
400500c4 1109 int i;
3326f410
DJ
1110 int ix86_tune_defaulted = 0;
1111
e075ae69
RH
1112 /* Comes from final.c -- no real reason to change it. */
1113#define MAX_CODE_ALIGN 16
f5316dfe 1114
c8c5cb99
SC
1115 static struct ptt
1116 {
8b60264b
KG
1117 const struct processor_costs *cost; /* Processor costs */
1118 const int target_enable; /* Target flags to enable. */
1119 const int target_disable; /* Target flags to disable. */
1120 const int align_loop; /* Default alignments. */
2cca7283 1121 const int align_loop_max_skip;
8b60264b 1122 const int align_jump;
2cca7283 1123 const int align_jump_max_skip;
8b60264b 1124 const int align_func;
e075ae69 1125 }
0f290768 1126 const processor_target_table[PROCESSOR_max] =
e075ae69 1127 {
4977bab6
ZW
1128 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1129 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1130 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1131 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1132 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1133 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1134 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
89c43c0a
VM
1135 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1136 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
e075ae69
RH
1137 };
1138
f4365627 1139 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1140 static struct pta
1141 {
8b60264b
KG
1142 const char *const name; /* processor name or nickname. */
1143 const enum processor_type processor;
0dd0e980
JH
1144 const enum pta_flags
1145 {
1146 PTA_SSE = 1,
1147 PTA_SSE2 = 2,
5bbeea44
JH
1148 PTA_SSE3 = 4,
1149 PTA_MMX = 8,
1150 PTA_PREFETCH_SSE = 16,
1151 PTA_3DNOW = 32,
4977bab6
ZW
1152 PTA_3DNOW_A = 64,
1153 PTA_64BIT = 128
0dd0e980 1154 } flags;
e075ae69 1155 }
0f290768 1156 const processor_alias_table[] =
e075ae69 1157 {
0dd0e980
JH
1158 {"i386", PROCESSOR_I386, 0},
1159 {"i486", PROCESSOR_I486, 0},
1160 {"i586", PROCESSOR_PENTIUM, 0},
1161 {"pentium", PROCESSOR_PENTIUM, 0},
1162 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1163 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1164 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1165 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1166 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1167 {"i686", PROCESSOR_PENTIUMPRO, 0},
1168 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1169 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1170 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
5bbeea44
JH
1171 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1172 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1173 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1176 | PTA_MMX | PTA_PREFETCH_SSE},
89c43c0a
VM
1177 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1178 | PTA_MMX | PTA_PREFETCH_SSE},
1179 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
5bbeea44 1180 | PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1181 {"k6", PROCESSOR_K6, PTA_MMX},
1182 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1183 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1184 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1185 | PTA_3DNOW_A},
f4365627 1186 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1187 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1188 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1189 | PTA_3DNOW_A | PTA_SSE},
f4365627 1190 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1191 | PTA_3DNOW_A | PTA_SSE},
f4365627 1192 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1193 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1194 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1195 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1196 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1198 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1199 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1200 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1201 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1202 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1203 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1204 };
c8c5cb99 1205
ca7558fc 1206 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1207
41ed2237 1208 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1209 in case they weren't overwritten by command line options. */
55ba61f3
JH
1210 if (TARGET_64BIT)
1211 {
1212 if (flag_omit_frame_pointer == 2)
1213 flag_omit_frame_pointer = 1;
1214 if (flag_asynchronous_unwind_tables == 2)
1215 flag_asynchronous_unwind_tables = 1;
1216 if (flag_pcc_struct_return == 2)
1217 flag_pcc_struct_return = 0;
1218 }
1219 else
1220 {
1221 if (flag_omit_frame_pointer == 2)
1222 flag_omit_frame_pointer = 0;
1223 if (flag_asynchronous_unwind_tables == 2)
1224 flag_asynchronous_unwind_tables = 0;
1225 if (flag_pcc_struct_return == 2)
7c712dcc 1226 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1227 }
1228
f5316dfe
MM
1229#ifdef SUBTARGET_OVERRIDE_OPTIONS
1230 SUBTARGET_OVERRIDE_OPTIONS;
1231#endif
1232
9e555526
RH
1233 if (!ix86_tune_string && ix86_arch_string)
1234 ix86_tune_string = ix86_arch_string;
1235 if (!ix86_tune_string)
3326f410
DJ
1236 {
1237 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1238 ix86_tune_defaulted = 1;
1239 }
f4365627 1240 if (!ix86_arch_string)
3fec9fa9 1241 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
e075ae69 1242
6189a572
JH
1243 if (ix86_cmodel_string != 0)
1244 {
1245 if (!strcmp (ix86_cmodel_string, "small"))
1246 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1247 else if (flag_pic)
c725bd79 1248 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1249 else if (!strcmp (ix86_cmodel_string, "32"))
1250 ix86_cmodel = CM_32;
1251 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1252 ix86_cmodel = CM_KERNEL;
1253 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1254 ix86_cmodel = CM_MEDIUM;
1255 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1256 ix86_cmodel = CM_LARGE;
1257 else
1258 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1259 }
1260 else
1261 {
1262 ix86_cmodel = CM_32;
1263 if (TARGET_64BIT)
1264 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1265 }
c93e80a5
JH
1266 if (ix86_asm_string != 0)
1267 {
1268 if (!strcmp (ix86_asm_string, "intel"))
1269 ix86_asm_dialect = ASM_INTEL;
1270 else if (!strcmp (ix86_asm_string, "att"))
1271 ix86_asm_dialect = ASM_ATT;
1272 else
1273 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1274 }
6189a572 1275 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1276 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1277 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1278 if (ix86_cmodel == CM_LARGE)
c725bd79 1279 sorry ("code model `large' not supported yet");
0c2dc519 1280 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1281 sorry ("%i-bit mode not compiled in",
0c2dc519 1282 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1283
f4365627
JH
1284 for (i = 0; i < pta_size; i++)
1285 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1286 {
1287 ix86_arch = processor_alias_table[i].processor;
1288 /* Default cpu tuning to the architecture. */
9e555526 1289 ix86_tune = ix86_arch;
f4365627 1290 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1291 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1292 target_flags |= MASK_MMX;
1293 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1294 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1295 target_flags |= MASK_3DNOW;
1296 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1297 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1298 target_flags |= MASK_3DNOW_A;
1299 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1300 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1301 target_flags |= MASK_SSE;
1302 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1303 && !(target_flags_explicit & MASK_SSE2))
f4365627 1304 target_flags |= MASK_SSE2;
5bbeea44
JH
1305 if (processor_alias_table[i].flags & PTA_SSE3
1306 && !(target_flags_explicit & MASK_SSE3))
1307 target_flags |= MASK_SSE3;
f4365627
JH
1308 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1309 x86_prefetch_sse = true;
4977bab6 1310 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
1311 {
1312 if (ix86_tune_defaulted)
1313 {
1314 ix86_tune_string = "x86-64";
1315 for (i = 0; i < pta_size; i++)
1316 if (! strcmp (ix86_tune_string,
1317 processor_alias_table[i].name))
1318 break;
1319 ix86_tune = processor_alias_table[i].processor;
1320 }
1321 else
1322 error ("CPU you selected does not support x86-64 "
1323 "instruction set");
1324 }
f4365627
JH
1325 break;
1326 }
400500c4 1327
f4365627
JH
1328 if (i == pta_size)
1329 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1330
f4365627 1331 for (i = 0; i < pta_size; i++)
9e555526 1332 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
f4365627 1333 {
9e555526 1334 ix86_tune = processor_alias_table[i].processor;
4977bab6
ZW
1335 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1336 error ("CPU you selected does not support x86-64 instruction set");
c618c6ec
JJ
1337
1338 /* Intel CPUs have always interpreted SSE prefetch instructions as
1339 NOPs; so, we can enable SSE prefetch instructions even when
1340 -mtune (rather than -march) points us to a processor that has them.
1341 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1342 higher processors. */
1343 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1344 x86_prefetch_sse = true;
f4365627
JH
1345 break;
1346 }
f4365627 1347 if (i == pta_size)
9e555526 1348 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1349
2ab0437e
JH
1350 if (optimize_size)
1351 ix86_cost = &size_cost;
1352 else
9e555526
RH
1353 ix86_cost = processor_target_table[ix86_tune].cost;
1354 target_flags |= processor_target_table[ix86_tune].target_enable;
1355 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1356
36edd3cc
BS
1357 /* Arrange to set up i386_stack_locals for all functions. */
1358 init_machine_status = ix86_init_machine_status;
fce5a9f2 1359
0f290768 1360 /* Validate -mregparm= value. */
e075ae69 1361 if (ix86_regparm_string)
b08de47e 1362 {
400500c4
RK
1363 i = atoi (ix86_regparm_string);
1364 if (i < 0 || i > REGPARM_MAX)
1365 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1366 else
1367 ix86_regparm = i;
b08de47e 1368 }
0d7d98ee
JH
1369 else
1370 if (TARGET_64BIT)
1371 ix86_regparm = REGPARM_MAX;
b08de47e 1372
3e18fdf6 1373 /* If the user has provided any of the -malign-* options,
a4f31c00 1374 warn and use that value only if -falign-* is not set.
3e18fdf6 1375 Remove this code in GCC 3.2 or later. */
e075ae69 1376 if (ix86_align_loops_string)
b08de47e 1377 {
3e18fdf6
GK
1378 warning ("-malign-loops is obsolete, use -falign-loops");
1379 if (align_loops == 0)
1380 {
1381 i = atoi (ix86_align_loops_string);
1382 if (i < 0 || i > MAX_CODE_ALIGN)
1383 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1384 else
1385 align_loops = 1 << i;
1386 }
b08de47e 1387 }
3af4bd89 1388
e075ae69 1389 if (ix86_align_jumps_string)
b08de47e 1390 {
3e18fdf6
GK
1391 warning ("-malign-jumps is obsolete, use -falign-jumps");
1392 if (align_jumps == 0)
1393 {
1394 i = atoi (ix86_align_jumps_string);
1395 if (i < 0 || i > MAX_CODE_ALIGN)
1396 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1397 else
1398 align_jumps = 1 << i;
1399 }
b08de47e 1400 }
b08de47e 1401
e075ae69 1402 if (ix86_align_funcs_string)
b08de47e 1403 {
3e18fdf6
GK
1404 warning ("-malign-functions is obsolete, use -falign-functions");
1405 if (align_functions == 0)
1406 {
1407 i = atoi (ix86_align_funcs_string);
1408 if (i < 0 || i > MAX_CODE_ALIGN)
1409 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1410 else
1411 align_functions = 1 << i;
1412 }
b08de47e 1413 }
3af4bd89 1414
3e18fdf6 1415 /* Default align_* from the processor table. */
3e18fdf6 1416 if (align_loops == 0)
2cca7283 1417 {
9e555526
RH
1418 align_loops = processor_target_table[ix86_tune].align_loop;
1419 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1420 }
3e18fdf6 1421 if (align_jumps == 0)
2cca7283 1422 {
9e555526
RH
1423 align_jumps = processor_target_table[ix86_tune].align_jump;
1424 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1425 }
3e18fdf6 1426 if (align_functions == 0)
2cca7283 1427 {
9e555526 1428 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1429 }
3e18fdf6 1430
e4c0478d 1431 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1432 The default of 128 bits is for Pentium III's SSE __m128, but we
1433 don't want additional code to keep the stack aligned when
1434 optimizing for code size. */
1435 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1436 ? TARGET_64BIT ? 128 : 32
fbb83b43 1437 : 128);
e075ae69 1438 if (ix86_preferred_stack_boundary_string)
3af4bd89 1439 {
400500c4 1440 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1441 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1442 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1443 TARGET_64BIT ? 4 : 2);
400500c4
RK
1444 else
1445 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1446 }
77a989d1 1447
0f290768 1448 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1449 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1450 if (ix86_branch_cost_string)
804a8ee0 1451 {
400500c4
RK
1452 i = atoi (ix86_branch_cost_string);
1453 if (i < 0 || i > 5)
1454 error ("-mbranch-cost=%d is not between 0 and 5", i);
1455 else
1456 ix86_branch_cost = i;
804a8ee0 1457 }
804a8ee0 1458
f996902d
RH
1459 if (ix86_tls_dialect_string)
1460 {
1461 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1462 ix86_tls_dialect = TLS_DIALECT_GNU;
1463 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1464 ix86_tls_dialect = TLS_DIALECT_SUN;
1465 else
1466 error ("bad value (%s) for -mtls-dialect= switch",
1467 ix86_tls_dialect_string);
1468 }
1469
e9a25f70
JL
1470 /* Keep nonleaf frame pointers. */
1471 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1472 flag_omit_frame_pointer = 1;
e075ae69
RH
1473
1474 /* If we're doing fast math, we don't care about comparison order
1475 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1476 if (flag_unsafe_math_optimizations)
e075ae69
RH
1477 target_flags &= ~MASK_IEEE_FP;
1478
30c99a84
RH
1479 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1480 since the insns won't need emulation. */
1481 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1482 target_flags &= ~MASK_NO_FANCY_MATH_387;
1483
9e200aaf
KC
1484 /* Turn on SSE2 builtins for -msse3. */
1485 if (TARGET_SSE3)
22c7c85e
L
1486 target_flags |= MASK_SSE2;
1487
1488 /* Turn on SSE builtins for -msse2. */
1489 if (TARGET_SSE2)
1490 target_flags |= MASK_SSE;
1491
14f73b5a
JH
1492 if (TARGET_64BIT)
1493 {
1494 if (TARGET_ALIGN_DOUBLE)
c725bd79 1495 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1496 if (TARGET_RTD)
c725bd79 1497 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1498 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1499 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1500 ix86_fpmath = FPMATH_SSE;
14f73b5a 1501 }
965f5423 1502 else
a5b378d6
JH
1503 {
1504 ix86_fpmath = FPMATH_387;
1505 /* i386 ABI does not specify red zone. It still makes sense to use it
1506 when programmer takes care to stack from being destroyed. */
1507 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1508 target_flags |= MASK_NO_RED_ZONE;
1509 }
965f5423
JH
1510
1511 if (ix86_fpmath_string != 0)
1512 {
1513 if (! strcmp (ix86_fpmath_string, "387"))
1514 ix86_fpmath = FPMATH_387;
1515 else if (! strcmp (ix86_fpmath_string, "sse"))
1516 {
1517 if (!TARGET_SSE)
1518 {
1519 warning ("SSE instruction set disabled, using 387 arithmetics");
1520 ix86_fpmath = FPMATH_387;
1521 }
1522 else
1523 ix86_fpmath = FPMATH_SSE;
1524 }
1525 else if (! strcmp (ix86_fpmath_string, "387,sse")
1526 || ! strcmp (ix86_fpmath_string, "sse,387"))
1527 {
1528 if (!TARGET_SSE)
1529 {
1530 warning ("SSE instruction set disabled, using 387 arithmetics");
1531 ix86_fpmath = FPMATH_387;
1532 }
1533 else if (!TARGET_80387)
1534 {
1535 warning ("387 instruction set disabled, using SSE arithmetics");
1536 ix86_fpmath = FPMATH_SSE;
1537 }
1538 else
1539 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1540 }
fce5a9f2 1541 else
965f5423
JH
1542 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1543 }
14f73b5a 1544
a7180f70
BS
1545 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1546 on by -msse. */
1547 if (TARGET_SSE)
e37af218
RH
1548 {
1549 target_flags |= MASK_MMX;
1550 x86_prefetch_sse = true;
1551 }
c6036a37 1552
47f339cf
BS
1553 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1554 if (TARGET_3DNOW)
1555 {
1556 target_flags |= MASK_MMX;
d1f87653 1557 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1558 extensions it adds. */
1559 if (x86_3dnow_a & (1 << ix86_arch))
1560 target_flags |= MASK_3DNOW_A;
1561 }
9e555526 1562 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1563 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1564 && !optimize_size)
1565 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1566
1567 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1568 {
1569 char *p;
1570 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1571 p = strchr (internal_label_prefix, 'X');
1572 internal_label_prefix_len = p - internal_label_prefix;
1573 *p = '\0';
1574 }
f5316dfe
MM
1575}
1576\f
32b5b1aa 1577void
b96a374d 1578optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1579{
e9a25f70
JL
1580 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1581 make the problem with not enough registers even worse. */
32b5b1aa
SC
1582#ifdef INSN_SCHEDULING
1583 if (level > 1)
1584 flag_schedule_insns = 0;
1585#endif
55ba61f3
JH
1586
1587 /* The default values of these switches depend on the TARGET_64BIT
1588 that is not known at this moment. Mark these values with 2 and
1589 let user the to override these. In case there is no command line option
1590 specifying them, we will set the defaults in override_options. */
1591 if (optimize >= 1)
1592 flag_omit_frame_pointer = 2;
1593 flag_pcc_struct_return = 2;
1594 flag_asynchronous_unwind_tables = 2;
32b5b1aa 1595}
b08de47e 1596\f
91d231cb
JM
1597/* Table of valid machine attributes. */
1598const struct attribute_spec ix86_attribute_table[] =
b08de47e 1599{
91d231cb 1600 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1601 /* Stdcall attribute says callee is responsible for popping arguments
1602 if they are not variable. */
91d231cb 1603 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1604 /* Fastcall attribute says callee is responsible for popping arguments
1605 if they are not variable. */
1606 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1607 /* Cdecl attribute says the callee is a normal C declaration */
1608 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1609 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1610 passed in registers. */
91d231cb
JM
1611 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1612#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1613 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1614 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1615 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1616#endif
fe77449a
DR
1617 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1618 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
07a43492
DJ
1619#ifdef SUBTARGET_ATTRIBUTE_TABLE
1620 SUBTARGET_ATTRIBUTE_TABLE,
1621#endif
91d231cb
JM
1622 { NULL, 0, 0, false, false, false, NULL }
1623};
1624
5fbf0217
EB
1625/* Decide whether we can make a sibling call to a function. DECL is the
1626 declaration of the function being targeted by the call and EXP is the
1627 CALL_EXPR representing the call. */
4977bab6
ZW
1628
1629static bool
b96a374d 1630ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1631{
1632 /* If we are generating position-independent code, we cannot sibcall
1633 optimize any indirect call, or a direct call to a global function,
1634 as the PLT requires %ebx be live. */
1635 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1636 return false;
1637
1638 /* If we are returning floats on the 80387 register stack, we cannot
1639 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1640 function that does or, conversely, from a function that does return
1641 a float to a function that doesn't; the necessary stack adjustment
1642 would not be executed. */
4977bab6 1643 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1644 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1645 return false;
1646
1647 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1648 register for the address of the target function. Make sure that all
4977bab6
ZW
1649 such registers are not used for passing parameters. */
1650 if (!decl && !TARGET_64BIT)
1651 {
e767b5be 1652 tree type;
4977bab6
ZW
1653
1654 /* We're looking at the CALL_EXPR, we need the type of the function. */
1655 type = TREE_OPERAND (exp, 0); /* pointer expression */
1656 type = TREE_TYPE (type); /* pointer type */
1657 type = TREE_TYPE (type); /* function type */
1658
e767b5be 1659 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1660 {
1661 /* ??? Need to count the actual number of registers to be used,
1662 not the possible number of registers. Fix later. */
1663 return false;
1664 }
1665 }
1666
1667 /* Otherwise okay. That also includes certain types of indirect calls. */
1668 return true;
1669}
1670
e91f04de 1671/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1672 arguments as in struct attribute_spec.handler. */
1673static tree
b96a374d
AJ
1674ix86_handle_cdecl_attribute (tree *node, tree name,
1675 tree args ATTRIBUTE_UNUSED,
1676 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1677{
1678 if (TREE_CODE (*node) != FUNCTION_TYPE
1679 && TREE_CODE (*node) != METHOD_TYPE
1680 && TREE_CODE (*node) != FIELD_DECL
1681 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1682 {
91d231cb
JM
1683 warning ("`%s' attribute only applies to functions",
1684 IDENTIFIER_POINTER (name));
1685 *no_add_attrs = true;
1686 }
e91f04de
CH
1687 else
1688 {
1689 if (is_attribute_p ("fastcall", name))
1690 {
1691 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1692 {
1693 error ("fastcall and stdcall attributes are not compatible");
1694 }
1695 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1696 {
1697 error ("fastcall and regparm attributes are not compatible");
1698 }
1699 }
1700 else if (is_attribute_p ("stdcall", name))
1701 {
1702 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1703 {
1704 error ("fastcall and stdcall attributes are not compatible");
1705 }
1706 }
1707 }
b08de47e 1708
91d231cb
JM
1709 if (TARGET_64BIT)
1710 {
1711 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1712 *no_add_attrs = true;
1713 }
b08de47e 1714
91d231cb
JM
1715 return NULL_TREE;
1716}
b08de47e 1717
91d231cb
JM
1718/* Handle a "regparm" attribute;
1719 arguments as in struct attribute_spec.handler. */
1720static tree
b96a374d
AJ
1721ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1722 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1723{
1724 if (TREE_CODE (*node) != FUNCTION_TYPE
1725 && TREE_CODE (*node) != METHOD_TYPE
1726 && TREE_CODE (*node) != FIELD_DECL
1727 && TREE_CODE (*node) != TYPE_DECL)
1728 {
1729 warning ("`%s' attribute only applies to functions",
1730 IDENTIFIER_POINTER (name));
1731 *no_add_attrs = true;
1732 }
1733 else
1734 {
1735 tree cst;
b08de47e 1736
91d231cb
JM
1737 cst = TREE_VALUE (args);
1738 if (TREE_CODE (cst) != INTEGER_CST)
1739 {
1740 warning ("`%s' attribute requires an integer constant argument",
1741 IDENTIFIER_POINTER (name));
1742 *no_add_attrs = true;
1743 }
1744 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1745 {
1746 warning ("argument to `%s' attribute larger than %d",
1747 IDENTIFIER_POINTER (name), REGPARM_MAX);
1748 *no_add_attrs = true;
1749 }
e91f04de
CH
1750
1751 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1752 {
1753 error ("fastcall and regparm attributes are not compatible");
1754 }
b08de47e
MM
1755 }
1756
91d231cb 1757 return NULL_TREE;
b08de47e
MM
1758}
1759
1760/* Return 0 if the attributes for two types are incompatible, 1 if they
1761 are compatible, and 2 if they are nearly compatible (which causes a
1762 warning to be generated). */
1763
8d8e52be 1764static int
b96a374d 1765ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1766{
0f290768 1767 /* Check for mismatch of non-default calling convention. */
27c38fbe 1768 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1769
1770 if (TREE_CODE (type1) != FUNCTION_TYPE)
1771 return 1;
1772
b96a374d 1773 /* Check for mismatched fastcall types */
e91f04de
CH
1774 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1775 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1776 return 0;
e91f04de 1777
afcfe58c 1778 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1779 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1780 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac
MM
1781 return 0;
1782 if (ix86_function_regparm (type1, NULL)
1783 != ix86_function_regparm (type2, NULL))
afcfe58c 1784 return 0;
b08de47e
MM
1785 return 1;
1786}
b08de47e 1787\f
e767b5be
JH
1788/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1789 DECL may be NULL when calling function indirectly
839a4992 1790 or considering a libcall. */
483ab821
MM
1791
1792static int
e767b5be 1793ix86_function_regparm (tree type, tree decl)
483ab821
MM
1794{
1795 tree attr;
e767b5be
JH
1796 int regparm = ix86_regparm;
1797 bool user_convention = false;
483ab821 1798
e767b5be
JH
1799 if (!TARGET_64BIT)
1800 {
1801 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1802 if (attr)
1803 {
1804 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1805 user_convention = true;
1806 }
1807
1808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1809 {
1810 regparm = 2;
1811 user_convention = true;
1812 }
1813
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1816 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1817 {
1818 struct cgraph_local_info *i = cgraph_local_info (decl);
1819 if (i && i->local)
1820 {
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1824 regparm = 2;
1825 else
1826 regparm = 3;
1827 }
1828 }
1829 }
1830 return regparm;
483ab821
MM
1831}
1832
fe9f516f
RH
1833/* Return true if EAX is live at the start of the function. Used by
1834 ix86_expand_prologue to determine if we need special help before
1835 calling allocate_stack_worker. */
1836
1837static bool
1838ix86_eax_live_at_start_p (void)
1839{
1840 /* Cheat. Don't bother working forward from ix86_function_regparm
1841 to the function type to whether an actual argument is located in
1842 eax. Instead just look at cfg info, which is still close enough
1843 to correct at this point. This gives false positives for broken
1844 functions that might use uninitialized data that happens to be
1845 allocated in eax, but who cares? */
1846 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1847}
1848
b08de47e
MM
1849/* Value is the number of bytes of arguments automatically
1850 popped when returning from a subroutine call.
1851 FUNDECL is the declaration node of the function (as a tree),
1852 FUNTYPE is the data type of the function (as a tree),
1853 or for a library call it is an identifier node for the subroutine name.
1854 SIZE is the number of bytes of arguments passed on the stack.
1855
1856 On the 80386, the RTD insn may be used to pop them if the number
1857 of args is fixed, but if the number is variable then the caller
1858 must pop them all. RTD can't be used for library calls now
1859 because the library is compiled with the Unix compiler.
1860 Use of RTD is a selectable option, since it is incompatible with
1861 standard Unix calling sequences. If the option is not selected,
1862 the caller must always pop the args.
1863
1864 The attribute stdcall is equivalent to RTD on a per module basis. */
1865
1866int
b96a374d 1867ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1868{
3345ee7d 1869 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1870
43f3a59d 1871 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1872 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1873
43f3a59d
KH
1874 /* Stdcall and fastcall functions will pop the stack if not
1875 variable args. */
e91f04de
CH
1876 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1877 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1878 rtd = 1;
79325812 1879
698cdd84
SC
1880 if (rtd
1881 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1882 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1883 == void_type_node)))
698cdd84
SC
1884 return size;
1885 }
79325812 1886
232b8f52 1887 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1888 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
0d7d98ee 1889 && !TARGET_64BIT)
232b8f52 1890 {
e767b5be 1891 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1892
1893 if (!nregs)
1894 return GET_MODE_SIZE (Pmode);
1895 }
1896
1897 return 0;
b08de47e 1898}
b08de47e
MM
1899\f
1900/* Argument support functions. */
1901
53c17031
JH
1902/* Return true when register may be used to pass function parameters. */
1903bool
b96a374d 1904ix86_function_arg_regno_p (int regno)
53c17031
JH
1905{
1906 int i;
1907 if (!TARGET_64BIT)
0333394e
JJ
1908 return (regno < REGPARM_MAX
1909 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1910 if (SSE_REGNO_P (regno) && TARGET_SSE)
1911 return true;
1912 /* RAX is used as hidden argument to va_arg functions. */
1913 if (!regno)
1914 return true;
1915 for (i = 0; i < REGPARM_MAX; i++)
1916 if (regno == x86_64_int_parameter_registers[i])
1917 return true;
1918 return false;
1919}
1920
fe984136
RH
1921/* Return if we do not know how to pass TYPE solely in registers. */
1922
1923static bool
1924ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1925{
1926 if (must_pass_in_stack_var_size_or_pad (mode, type))
1927 return true;
1928 return (!TARGET_64BIT && type && mode == TImode);
1929}
1930
b08de47e
MM
1931/* Initialize a variable CUM of type CUMULATIVE_ARGS
1932 for a call to a function whose data type is FNTYPE.
1933 For a library call, FNTYPE is 0. */
1934
1935void
b96a374d
AJ
1936init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1937 tree fntype, /* tree ptr for function decl */
1938 rtx libname, /* SYMBOL_REF of library name or 0 */
1939 tree fndecl)
b08de47e
MM
1940{
1941 static CUMULATIVE_ARGS zero_cum;
1942 tree param, next_param;
1943
1944 if (TARGET_DEBUG_ARG)
1945 {
1946 fprintf (stderr, "\ninit_cumulative_args (");
1947 if (fntype)
e9a25f70
JL
1948 fprintf (stderr, "fntype code = %s, ret code = %s",
1949 tree_code_name[(int) TREE_CODE (fntype)],
1950 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1951 else
1952 fprintf (stderr, "no fntype");
1953
1954 if (libname)
1955 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1956 }
1957
1958 *cum = zero_cum;
1959
1960 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1961 if (fntype)
1962 cum->nregs = ix86_function_regparm (fntype, fndecl);
1963 else
1964 cum->nregs = ix86_regparm;
78fbfc4b
JB
1965 if (TARGET_SSE)
1966 cum->sse_nregs = SSE_REGPARM_MAX;
1967 if (TARGET_MMX)
1968 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
1969 cum->warn_sse = true;
1970 cum->warn_mmx = true;
53c17031 1971 cum->maybe_vaarg = false;
b08de47e 1972
e91f04de
CH
1973 /* Use ecx and edx registers if function has fastcall attribute */
1974 if (fntype && !TARGET_64BIT)
1975 {
1976 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1977 {
1978 cum->nregs = 2;
1979 cum->fastcall = 1;
1980 }
1981 }
1982
b08de47e
MM
1983 /* Determine if this function has variable arguments. This is
1984 indicated by the last argument being 'void_type_mode' if there
1985 are no variable arguments. If there are variable arguments, then
78fbfc4b 1986 we won't pass anything in registers in 32-bit mode. */
b08de47e 1987
78fbfc4b 1988 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
b08de47e
MM
1989 {
1990 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1991 param != 0; param = next_param)
b08de47e
MM
1992 {
1993 next_param = TREE_CHAIN (param);
e9a25f70 1994 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1995 {
1996 if (!TARGET_64BIT)
e91f04de
CH
1997 {
1998 cum->nregs = 0;
e1be55d0
JH
1999 cum->sse_nregs = 0;
2000 cum->mmx_nregs = 0;
2001 cum->warn_sse = 0;
2002 cum->warn_mmx = 0;
e91f04de
CH
2003 cum->fastcall = 0;
2004 }
53c17031
JH
2005 cum->maybe_vaarg = true;
2006 }
b08de47e
MM
2007 }
2008 }
53c17031
JH
2009 if ((!fntype && !libname)
2010 || (fntype && !TYPE_ARG_TYPES (fntype)))
2011 cum->maybe_vaarg = 1;
b08de47e
MM
2012
2013 if (TARGET_DEBUG_ARG)
2014 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2015
2016 return;
2017}
2018
d1f87653 2019/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 2020 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
2021 class and assign registers accordingly. */
2022
2023/* Return the union class of CLASS1 and CLASS2.
2024 See the x86-64 PS ABI for details. */
2025
2026static enum x86_64_reg_class
b96a374d 2027merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
2028{
2029 /* Rule #1: If both classes are equal, this is the resulting class. */
2030 if (class1 == class2)
2031 return class1;
2032
2033 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2034 the other class. */
2035 if (class1 == X86_64_NO_CLASS)
2036 return class2;
2037 if (class2 == X86_64_NO_CLASS)
2038 return class1;
2039
2040 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2041 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2042 return X86_64_MEMORY_CLASS;
2043
2044 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2045 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2046 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2047 return X86_64_INTEGERSI_CLASS;
2048 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2049 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2050 return X86_64_INTEGER_CLASS;
2051
2052 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2053 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2054 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2055 return X86_64_MEMORY_CLASS;
2056
2057 /* Rule #6: Otherwise class SSE is used. */
2058 return X86_64_SSE_CLASS;
2059}
2060
2061/* Classify the argument of type TYPE and mode MODE.
2062 CLASSES will be filled by the register class used to pass each word
2063 of the operand. The number of words is returned. In case the parameter
2064 should be passed in memory, 0 is returned. As a special case for zero
2065 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2066
2067 BIT_OFFSET is used internally for handling records and specifies offset
2068 of the offset in bits modulo 256 to avoid overflow cases.
2069
2070 See the x86-64 PS ABI for details.
2071*/
2072
2073static int
b96a374d
AJ
2074classify_argument (enum machine_mode mode, tree type,
2075 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 2076{
296e4ae8 2077 HOST_WIDE_INT bytes =
53c17031 2078 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 2079 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 2080
c60ee6f5
JH
2081 /* Variable sized entities are always passed/returned in memory. */
2082 if (bytes < 0)
2083 return 0;
2084
dafc5b82 2085 if (mode != VOIDmode
fe984136 2086 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
2087 return 0;
2088
53c17031
JH
2089 if (type && AGGREGATE_TYPE_P (type))
2090 {
2091 int i;
2092 tree field;
2093 enum x86_64_reg_class subclasses[MAX_CLASSES];
2094
2095 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2096 if (bytes > 16)
2097 return 0;
2098
2099 for (i = 0; i < words; i++)
2100 classes[i] = X86_64_NO_CLASS;
2101
2102 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2103 signalize memory class, so handle it as special case. */
2104 if (!words)
2105 {
2106 classes[0] = X86_64_NO_CLASS;
2107 return 1;
2108 }
2109
2110 /* Classify each field of record and merge classes. */
2111 if (TREE_CODE (type) == RECORD_TYPE)
2112 {
91ea38f9 2113 /* For classes first merge in the field of the subclasses. */
fa743e8c 2114 if (TYPE_BINFO (type))
91ea38f9 2115 {
fa743e8c 2116 tree binfo, base_binfo;
91ea38f9
JH
2117 int i;
2118
fa743e8c
NS
2119 for (binfo = TYPE_BINFO (type), i = 0;
2120 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
91ea38f9 2121 {
91ea38f9 2122 int num;
fa743e8c
NS
2123 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2124 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2125
2126 num = classify_argument (TYPE_MODE (type),
2127 type, subclasses,
2128 (offset + bit_offset) % 256);
2129 if (!num)
2130 return 0;
2131 for (i = 0; i < num; i++)
2132 {
db01f480 2133 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2134 classes[i + pos] =
2135 merge_classes (subclasses[i], classes[i + pos]);
2136 }
2137 }
2138 }
43f3a59d 2139 /* And now merge the fields of structure. */
53c17031
JH
2140 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2141 {
2142 if (TREE_CODE (field) == FIELD_DECL)
2143 {
2144 int num;
2145
2146 /* Bitfields are always classified as integer. Handle them
2147 early, since later code would consider them to be
2148 misaligned integers. */
2149 if (DECL_BIT_FIELD (field))
2150 {
2151 for (i = int_bit_position (field) / 8 / 8;
2152 i < (int_bit_position (field)
2153 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2154 + 63) / 8 / 8; i++)
53c17031
JH
2155 classes[i] =
2156 merge_classes (X86_64_INTEGER_CLASS,
2157 classes[i]);
2158 }
2159 else
2160 {
2161 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2162 TREE_TYPE (field), subclasses,
2163 (int_bit_position (field)
2164 + bit_offset) % 256);
2165 if (!num)
2166 return 0;
2167 for (i = 0; i < num; i++)
2168 {
2169 int pos =
db01f480 2170 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2171 classes[i + pos] =
2172 merge_classes (subclasses[i], classes[i + pos]);
2173 }
2174 }
2175 }
2176 }
2177 }
2178 /* Arrays are handled as small records. */
2179 else if (TREE_CODE (type) == ARRAY_TYPE)
2180 {
2181 int num;
2182 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2183 TREE_TYPE (type), subclasses, bit_offset);
2184 if (!num)
2185 return 0;
2186
2187 /* The partial classes are now full classes. */
2188 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2189 subclasses[0] = X86_64_SSE_CLASS;
2190 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2191 subclasses[0] = X86_64_INTEGER_CLASS;
2192
2193 for (i = 0; i < words; i++)
2194 classes[i] = subclasses[i % num];
2195 }
2196 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2197 else if (TREE_CODE (type) == UNION_TYPE
2198 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2199 {
91ea38f9 2200 /* For classes first merge in the field of the subclasses. */
fa743e8c 2201 if (TYPE_BINFO (type))
91ea38f9 2202 {
fa743e8c 2203 tree binfo, base_binfo;
91ea38f9
JH
2204 int i;
2205
fa743e8c
NS
2206 for (binfo = TYPE_BINFO (type), i = 0;
2207 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
91ea38f9 2208 {
91ea38f9 2209 int num;
fa743e8c
NS
2210 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2211 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2212
2213 num = classify_argument (TYPE_MODE (type),
2214 type, subclasses,
db01f480 2215 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2216 if (!num)
2217 return 0;
2218 for (i = 0; i < num; i++)
2219 {
c16576e6 2220 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2221 classes[i + pos] =
2222 merge_classes (subclasses[i], classes[i + pos]);
2223 }
2224 }
2225 }
53c17031
JH
2226 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2227 {
2228 if (TREE_CODE (field) == FIELD_DECL)
2229 {
2230 int num;
2231 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2232 TREE_TYPE (field), subclasses,
2233 bit_offset);
2234 if (!num)
2235 return 0;
2236 for (i = 0; i < num; i++)
2237 classes[i] = merge_classes (subclasses[i], classes[i]);
2238 }
2239 }
2240 }
448ec26c
WH
2241 else if (TREE_CODE (type) == SET_TYPE)
2242 {
2243 if (bytes <= 4)
2244 {
2245 classes[0] = X86_64_INTEGERSI_CLASS;
2246 return 1;
2247 }
2248 else if (bytes <= 8)
2249 {
2250 classes[0] = X86_64_INTEGER_CLASS;
2251 return 1;
2252 }
2253 else if (bytes <= 12)
2254 {
2255 classes[0] = X86_64_INTEGER_CLASS;
2256 classes[1] = X86_64_INTEGERSI_CLASS;
2257 return 2;
2258 }
2259 else
2260 {
2261 classes[0] = X86_64_INTEGER_CLASS;
2262 classes[1] = X86_64_INTEGER_CLASS;
2263 return 2;
2264 }
2265 }
53c17031
JH
2266 else
2267 abort ();
2268
2269 /* Final merger cleanup. */
2270 for (i = 0; i < words; i++)
2271 {
2272 /* If one class is MEMORY, everything should be passed in
2273 memory. */
2274 if (classes[i] == X86_64_MEMORY_CLASS)
2275 return 0;
2276
d6a7951f 2277 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2278 X86_64_SSE_CLASS. */
2279 if (classes[i] == X86_64_SSEUP_CLASS
2280 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2281 classes[i] = X86_64_SSE_CLASS;
2282
d6a7951f 2283 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2284 if (classes[i] == X86_64_X87UP_CLASS
2285 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2286 classes[i] = X86_64_SSE_CLASS;
2287 }
2288 return words;
2289 }
2290
2291 /* Compute alignment needed. We align all types to natural boundaries with
2292 exception of XFmode that is aligned to 64bits. */
2293 if (mode != VOIDmode && mode != BLKmode)
2294 {
2295 int mode_alignment = GET_MODE_BITSIZE (mode);
2296
2297 if (mode == XFmode)
2298 mode_alignment = 128;
2299 else if (mode == XCmode)
2300 mode_alignment = 256;
2c6b27c3
JH
2301 if (COMPLEX_MODE_P (mode))
2302 mode_alignment /= 2;
f5143c46 2303 /* Misaligned fields are always returned in memory. */
53c17031
JH
2304 if (bit_offset % mode_alignment)
2305 return 0;
2306 }
2307
9e9fb0ce
JB
2308 /* for V1xx modes, just use the base mode */
2309 if (VECTOR_MODE_P (mode)
2310 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2311 mode = GET_MODE_INNER (mode);
2312
53c17031
JH
2313 /* Classification of atomic types. */
2314 switch (mode)
2315 {
2316 case DImode:
2317 case SImode:
2318 case HImode:
2319 case QImode:
2320 case CSImode:
2321 case CHImode:
2322 case CQImode:
2323 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2324 classes[0] = X86_64_INTEGERSI_CLASS;
2325 else
2326 classes[0] = X86_64_INTEGER_CLASS;
2327 return 1;
2328 case CDImode:
2329 case TImode:
2330 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2331 return 2;
2332 case CTImode:
9e9fb0ce 2333 return 0;
53c17031
JH
2334 case SFmode:
2335 if (!(bit_offset % 64))
2336 classes[0] = X86_64_SSESF_CLASS;
2337 else
2338 classes[0] = X86_64_SSE_CLASS;
2339 return 1;
2340 case DFmode:
2341 classes[0] = X86_64_SSEDF_CLASS;
2342 return 1;
f8a1ebc6 2343 case XFmode:
53c17031
JH
2344 classes[0] = X86_64_X87_CLASS;
2345 classes[1] = X86_64_X87UP_CLASS;
2346 return 2;
f8a1ebc6 2347 case TFmode:
9e9fb0ce
JB
2348 classes[0] = X86_64_SSE_CLASS;
2349 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
2350 return 2;
2351 case SCmode:
2352 classes[0] = X86_64_SSE_CLASS;
2353 return 1;
9e9fb0ce
JB
2354 case DCmode:
2355 classes[0] = X86_64_SSEDF_CLASS;
2356 classes[1] = X86_64_SSEDF_CLASS;
2357 return 2;
2358 case XCmode:
2359 case TCmode:
2360 /* These modes are larger than 16 bytes. */
2361 return 0;
e95d6b23
JH
2362 case V4SFmode:
2363 case V4SImode:
495333a6
JH
2364 case V16QImode:
2365 case V8HImode:
2366 case V2DFmode:
2367 case V2DImode:
e95d6b23
JH
2368 classes[0] = X86_64_SSE_CLASS;
2369 classes[1] = X86_64_SSEUP_CLASS;
2370 return 2;
2371 case V2SFmode:
2372 case V2SImode:
2373 case V4HImode:
2374 case V8QImode:
9e9fb0ce
JB
2375 classes[0] = X86_64_SSE_CLASS;
2376 return 1;
53c17031 2377 case BLKmode:
e95d6b23 2378 case VOIDmode:
53c17031
JH
2379 return 0;
2380 default:
9e9fb0ce
JB
2381 if (VECTOR_MODE_P (mode))
2382 {
2383 if (bytes > 16)
2384 return 0;
2385 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2386 {
2387 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2388 classes[0] = X86_64_INTEGERSI_CLASS;
2389 else
2390 classes[0] = X86_64_INTEGER_CLASS;
2391 classes[1] = X86_64_INTEGER_CLASS;
2392 return 1 + (bytes > 8);
2393 }
2394 }
53c17031
JH
2395 abort ();
2396 }
2397}
2398
2399/* Examine the argument and return set number of register required in each
f5143c46 2400 class. Return 0 iff parameter should be passed in memory. */
53c17031 2401static int
b96a374d
AJ
2402examine_argument (enum machine_mode mode, tree type, int in_return,
2403 int *int_nregs, int *sse_nregs)
53c17031
JH
2404{
2405 enum x86_64_reg_class class[MAX_CLASSES];
2406 int n = classify_argument (mode, type, class, 0);
2407
2408 *int_nregs = 0;
2409 *sse_nregs = 0;
2410 if (!n)
2411 return 0;
2412 for (n--; n >= 0; n--)
2413 switch (class[n])
2414 {
2415 case X86_64_INTEGER_CLASS:
2416 case X86_64_INTEGERSI_CLASS:
2417 (*int_nregs)++;
2418 break;
2419 case X86_64_SSE_CLASS:
2420 case X86_64_SSESF_CLASS:
2421 case X86_64_SSEDF_CLASS:
2422 (*sse_nregs)++;
2423 break;
2424 case X86_64_NO_CLASS:
2425 case X86_64_SSEUP_CLASS:
2426 break;
2427 case X86_64_X87_CLASS:
2428 case X86_64_X87UP_CLASS:
2429 if (!in_return)
2430 return 0;
2431 break;
2432 case X86_64_MEMORY_CLASS:
2433 abort ();
2434 }
2435 return 1;
2436}
2437/* Construct container for the argument used by GCC interface. See
2438 FUNCTION_ARG for the detailed description. */
2439static rtx
b96a374d
AJ
2440construct_container (enum machine_mode mode, tree type, int in_return,
2441 int nintregs, int nsseregs, const int * intreg,
2442 int sse_regno)
53c17031
JH
2443{
2444 enum machine_mode tmpmode;
2445 int bytes =
2446 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2447 enum x86_64_reg_class class[MAX_CLASSES];
2448 int n;
2449 int i;
2450 int nexps = 0;
2451 int needed_sseregs, needed_intregs;
2452 rtx exp[MAX_CLASSES];
2453 rtx ret;
2454
2455 n = classify_argument (mode, type, class, 0);
2456 if (TARGET_DEBUG_ARG)
2457 {
2458 if (!n)
2459 fprintf (stderr, "Memory class\n");
2460 else
2461 {
2462 fprintf (stderr, "Classes:");
2463 for (i = 0; i < n; i++)
2464 {
2465 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2466 }
2467 fprintf (stderr, "\n");
2468 }
2469 }
2470 if (!n)
2471 return NULL;
2472 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2473 return NULL;
2474 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2475 return NULL;
2476
2477 /* First construct simple cases. Avoid SCmode, since we want to use
2478 single register to pass this type. */
2479 if (n == 1 && mode != SCmode)
2480 switch (class[0])
2481 {
2482 case X86_64_INTEGER_CLASS:
2483 case X86_64_INTEGERSI_CLASS:
2484 return gen_rtx_REG (mode, intreg[0]);
2485 case X86_64_SSE_CLASS:
2486 case X86_64_SSESF_CLASS:
2487 case X86_64_SSEDF_CLASS:
2488 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2489 case X86_64_X87_CLASS:
2490 return gen_rtx_REG (mode, FIRST_STACK_REG);
2491 case X86_64_NO_CLASS:
2492 /* Zero sized array, struct or class. */
2493 return NULL;
2494 default:
2495 abort ();
2496 }
2c6b27c3
JH
2497 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2498 && mode != BLKmode)
e95d6b23 2499 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2500 if (n == 2
2501 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2502 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2503 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2504 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2505 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2506 && intreg[0] + 1 == intreg[1])
2507 return gen_rtx_REG (mode, intreg[0]);
2508 if (n == 4
2509 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2c6b27c3
JH
2510 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2511 && mode != BLKmode)
f8a1ebc6 2512 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
53c17031
JH
2513
2514 /* Otherwise figure out the entries of the PARALLEL. */
2515 for (i = 0; i < n; i++)
2516 {
2517 switch (class[i])
2518 {
2519 case X86_64_NO_CLASS:
2520 break;
2521 case X86_64_INTEGER_CLASS:
2522 case X86_64_INTEGERSI_CLASS:
d1f87653 2523 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2524 if (i * 8 + 8 > bytes)
2525 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2526 else if (class[i] == X86_64_INTEGERSI_CLASS)
2527 tmpmode = SImode;
2528 else
2529 tmpmode = DImode;
2530 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2531 if (tmpmode == BLKmode)
2532 tmpmode = DImode;
2533 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2534 gen_rtx_REG (tmpmode, *intreg),
2535 GEN_INT (i*8));
2536 intreg++;
2537 break;
2538 case X86_64_SSESF_CLASS:
2539 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2540 gen_rtx_REG (SFmode,
2541 SSE_REGNO (sse_regno)),
2542 GEN_INT (i*8));
2543 sse_regno++;
2544 break;
2545 case X86_64_SSEDF_CLASS:
2546 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2547 gen_rtx_REG (DFmode,
2548 SSE_REGNO (sse_regno)),
2549 GEN_INT (i*8));
2550 sse_regno++;
2551 break;
2552 case X86_64_SSE_CLASS:
12f5c45e
JH
2553 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2554 tmpmode = TImode;
53c17031
JH
2555 else
2556 tmpmode = DImode;
2557 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2558 gen_rtx_REG (tmpmode,
2559 SSE_REGNO (sse_regno)),
2560 GEN_INT (i*8));
12f5c45e
JH
2561 if (tmpmode == TImode)
2562 i++;
53c17031
JH
2563 sse_regno++;
2564 break;
2565 default:
2566 abort ();
2567 }
2568 }
2569 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2570 for (i = 0; i < nexps; i++)
2571 XVECEXP (ret, 0, i) = exp [i];
2572 return ret;
2573}
2574
b08de47e
MM
2575/* Update the data in CUM to advance over an argument
2576 of mode MODE and data type TYPE.
2577 (TYPE is null for libcalls where that information may not be available.) */
2578
2579void
b96a374d
AJ
2580function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2581 enum machine_mode mode, /* current arg mode */
2582 tree type, /* type of the argument or 0 if lib support */
2583 int named) /* whether or not the argument was named */
b08de47e 2584{
5ac9118e
KG
2585 int bytes =
2586 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2587 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2588
2589 if (TARGET_DEBUG_ARG)
2590 fprintf (stderr,
bcf17554
JH
2591 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2592 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
53c17031 2593 if (TARGET_64BIT)
b08de47e 2594 {
53c17031
JH
2595 int int_nregs, sse_nregs;
2596 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2597 cum->words += words;
2598 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2599 {
53c17031
JH
2600 cum->nregs -= int_nregs;
2601 cum->sse_nregs -= sse_nregs;
2602 cum->regno += int_nregs;
2603 cum->sse_regno += sse_nregs;
82a127a9 2604 }
53c17031
JH
2605 else
2606 cum->words += words;
b08de47e 2607 }
a4f31c00 2608 else
82a127a9 2609 {
bcf17554
JH
2610 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2611 && (!type || !AGGREGATE_TYPE_P (type)))
53c17031
JH
2612 {
2613 cum->sse_words += words;
2614 cum->sse_nregs -= 1;
2615 cum->sse_regno += 1;
2616 if (cum->sse_nregs <= 0)
2617 {
2618 cum->sse_nregs = 0;
2619 cum->sse_regno = 0;
2620 }
2621 }
bcf17554
JH
2622 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2623 && (!type || !AGGREGATE_TYPE_P (type)))
2624 {
2625 cum->mmx_words += words;
2626 cum->mmx_nregs -= 1;
2627 cum->mmx_regno += 1;
2628 if (cum->mmx_nregs <= 0)
2629 {
2630 cum->mmx_nregs = 0;
2631 cum->mmx_regno = 0;
2632 }
2633 }
53c17031 2634 else
82a127a9 2635 {
53c17031
JH
2636 cum->words += words;
2637 cum->nregs -= words;
2638 cum->regno += words;
2639
2640 if (cum->nregs <= 0)
2641 {
2642 cum->nregs = 0;
2643 cum->regno = 0;
2644 }
82a127a9
CM
2645 }
2646 }
b08de47e
MM
2647 return;
2648}
2649
2650/* Define where to put the arguments to a function.
2651 Value is zero to push the argument on the stack,
2652 or a hard register in which to store the argument.
2653
2654 MODE is the argument's machine mode.
2655 TYPE is the data type of the argument (as a tree).
2656 This is null for libcalls where that information may
2657 not be available.
2658 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2659 the preceding args and about the function being called.
2660 NAMED is nonzero if this argument is a named parameter
2661 (otherwise it is an extra parameter matching an ellipsis). */
2662
07933f72 2663rtx
b96a374d
AJ
2664function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2665 enum machine_mode mode, /* current arg mode */
2666 tree type, /* type of the argument or 0 if lib support */
2667 int named) /* != 0 for normal args, == 0 for ... args */
b08de47e
MM
2668{
2669 rtx ret = NULL_RTX;
5ac9118e
KG
2670 int bytes =
2671 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 2672 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 2673 static bool warnedsse, warnedmmx;
b08de47e 2674
90d5887b
PB
2675 /* To simplify the code below, represent vector types with a vector mode
2676 even if MMX/SSE are not active. */
2677 if (type
2678 && TREE_CODE (type) == VECTOR_TYPE
2679 && (bytes == 8 || bytes == 16)
2680 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2681 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2682 {
2683 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2684 mode = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2685 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2686
2687 /* Get the mode which has this inner mode and number of units. */
2688 while (GET_MODE_NUNITS (mode) != TYPE_VECTOR_SUBPARTS (type)
2689 || GET_MODE_INNER (mode) != innermode)
2690 {
2691 mode = GET_MODE_WIDER_MODE (mode);
2692 if (mode == VOIDmode)
2693 abort ();
2694 }
2695 }
2696
5bdc5878 2697 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2698 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2699 any AL settings. */
32ee7d1d 2700 if (mode == VOIDmode)
b08de47e 2701 {
53c17031
JH
2702 if (TARGET_64BIT)
2703 return GEN_INT (cum->maybe_vaarg
2704 ? (cum->sse_nregs < 0
2705 ? SSE_REGPARM_MAX
2706 : cum->sse_regno)
2707 : -1);
2708 else
2709 return constm1_rtx;
b08de47e 2710 }
53c17031
JH
2711 if (TARGET_64BIT)
2712 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2713 &x86_64_int_parameter_registers [cum->regno],
2714 cum->sse_regno);
2715 else
2716 switch (mode)
2717 {
2718 /* For now, pass fp/complex values on the stack. */
2719 default:
2720 break;
2721
2722 case BLKmode:
8d454008
RH
2723 if (bytes < 0)
2724 break;
5efb1046 2725 /* FALLTHRU */
53c17031
JH
2726 case DImode:
2727 case SImode:
2728 case HImode:
2729 case QImode:
2730 if (words <= cum->nregs)
b96a374d
AJ
2731 {
2732 int regno = cum->regno;
2733
2734 /* Fastcall allocates the first two DWORD (SImode) or
2735 smaller arguments to ECX and EDX. */
2736 if (cum->fastcall)
2737 {
2738 if (mode == BLKmode || mode == DImode)
2739 break;
2740
2741 /* ECX not EAX is the first allocated register. */
2742 if (regno == 0)
e767b5be 2743 regno = 2;
b96a374d
AJ
2744 }
2745 ret = gen_rtx_REG (mode, regno);
2746 }
53c17031
JH
2747 break;
2748 case TImode:
bcf17554
JH
2749 case V16QImode:
2750 case V8HImode:
2751 case V4SImode:
2752 case V2DImode:
2753 case V4SFmode:
2754 case V2DFmode:
2755 if (!type || !AGGREGATE_TYPE_P (type))
2756 {
78fbfc4b 2757 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
bcf17554
JH
2758 {
2759 warnedsse = true;
2760 warning ("SSE vector argument without SSE enabled "
2761 "changes the ABI");
2762 }
2763 if (cum->sse_nregs)
2764 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2765 }
2766 break;
2767 case V8QImode:
2768 case V4HImode:
2769 case V2SImode:
2770 case V2SFmode:
2771 if (!type || !AGGREGATE_TYPE_P (type))
2772 {
e1be55d0 2773 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
2774 {
2775 warnedmmx = true;
2776 warning ("MMX vector argument without MMX enabled "
2777 "changes the ABI");
2778 }
2779 if (cum->mmx_nregs)
2780 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2781 }
53c17031
JH
2782 break;
2783 }
b08de47e
MM
2784
2785 if (TARGET_DEBUG_ARG)
2786 {
2787 fprintf (stderr,
91ea38f9 2788 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2789 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2790
2791 if (ret)
91ea38f9 2792 print_simple_rtl (stderr, ret);
b08de47e
MM
2793 else
2794 fprintf (stderr, ", stack");
2795
2796 fprintf (stderr, " )\n");
2797 }
2798
2799 return ret;
2800}
53c17031 2801
09b2e78d
ZD
2802/* A C expression that indicates when an argument must be passed by
2803 reference. If nonzero for an argument, a copy of that argument is
2804 made in memory and a pointer to the argument is passed instead of
2805 the argument itself. The pointer is passed in whatever way is
2806 appropriate for passing a pointer to that type. */
2807
8cd5a4e0
RH
2808static bool
2809ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2810 enum machine_mode mode ATTRIBUTE_UNUSED,
2811 tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2812{
2813 if (!TARGET_64BIT)
2814 return 0;
2815
2816 if (type && int_size_in_bytes (type) == -1)
2817 {
2818 if (TARGET_DEBUG_ARG)
2819 fprintf (stderr, "function_arg_pass_by_reference\n");
2820 return 1;
2821 }
2822
2823 return 0;
2824}
2825
8b978a57 2826/* Return true when TYPE should be 128bit aligned for 32bit argument passing
90d5887b 2827 ABI. Only called if TARGET_SSE. */
8b978a57 2828static bool
b96a374d 2829contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2830{
2831 enum machine_mode mode = TYPE_MODE (type);
2832 if (SSE_REG_MODE_P (mode)
2833 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2834 return true;
2835 if (TYPE_ALIGN (type) < 128)
2836 return false;
2837
2838 if (AGGREGATE_TYPE_P (type))
2839 {
2a43945f 2840 /* Walk the aggregates recursively. */
8b978a57
JH
2841 if (TREE_CODE (type) == RECORD_TYPE
2842 || TREE_CODE (type) == UNION_TYPE
2843 || TREE_CODE (type) == QUAL_UNION_TYPE)
2844 {
2845 tree field;
2846
fa743e8c 2847 if (TYPE_BINFO (type))
8b978a57 2848 {
fa743e8c 2849 tree binfo, base_binfo;
8b978a57
JH
2850 int i;
2851
fa743e8c
NS
2852 for (binfo = TYPE_BINFO (type), i = 0;
2853 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2854 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2855 return true;
8b978a57 2856 }
43f3a59d 2857 /* And now merge the fields of structure. */
8b978a57
JH
2858 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2859 {
2860 if (TREE_CODE (field) == FIELD_DECL
2861 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2862 return true;
2863 }
2864 }
2865 /* Just for use if some languages passes arrays by value. */
2866 else if (TREE_CODE (type) == ARRAY_TYPE)
2867 {
2868 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2869 return true;
2870 }
2871 else
2872 abort ();
2873 }
2874 return false;
2875}
2876
bb498ea3
AH
2877/* Gives the alignment boundary, in bits, of an argument with the
2878 specified mode and type. */
53c17031
JH
2879
2880int
b96a374d 2881ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2882{
2883 int align;
53c17031
JH
2884 if (type)
2885 align = TYPE_ALIGN (type);
2886 else
2887 align = GET_MODE_ALIGNMENT (mode);
2888 if (align < PARM_BOUNDARY)
2889 align = PARM_BOUNDARY;
8b978a57
JH
2890 if (!TARGET_64BIT)
2891 {
2892 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2893 make an exception for SSE modes since these require 128bit
b96a374d 2894 alignment.
8b978a57
JH
2895
2896 The handling here differs from field_alignment. ICC aligns MMX
2897 arguments to 4 byte boundaries, while structure fields are aligned
2898 to 8 byte boundaries. */
78fbfc4b
JB
2899 if (!TARGET_SSE)
2900 align = PARM_BOUNDARY;
2901 else if (!type)
8b978a57
JH
2902 {
2903 if (!SSE_REG_MODE_P (mode))
2904 align = PARM_BOUNDARY;
2905 }
2906 else
2907 {
2908 if (!contains_128bit_aligned_vector_p (type))
2909 align = PARM_BOUNDARY;
2910 }
8b978a57 2911 }
53c17031
JH
2912 if (align > 128)
2913 align = 128;
2914 return align;
2915}
2916
2917/* Return true if N is a possible register number of function value. */
2918bool
b96a374d 2919ix86_function_value_regno_p (int regno)
53c17031
JH
2920{
2921 if (!TARGET_64BIT)
2922 {
2923 return ((regno) == 0
2924 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2925 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2926 }
2927 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2928 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2929 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2930}
2931
2932/* Define how to find the value returned by a function.
2933 VALTYPE is the data type of the value (as a tree).
2934 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2935 otherwise, FUNC is 0. */
2936rtx
b96a374d 2937ix86_function_value (tree valtype)
53c17031
JH
2938{
2939 if (TARGET_64BIT)
2940 {
2941 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2942 REGPARM_MAX, SSE_REGPARM_MAX,
2943 x86_64_int_return_registers, 0);
d1f87653
KH
2944 /* For zero sized structures, construct_container return NULL, but we need
2945 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2946 if (!ret)
2947 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2948 return ret;
2949 }
2950 else
b069de3b
SS
2951 return gen_rtx_REG (TYPE_MODE (valtype),
2952 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2953}
2954
f5143c46 2955/* Return false iff type is returned in memory. */
53c17031 2956int
b96a374d 2957ix86_return_in_memory (tree type)
53c17031 2958{
a30b6839
RH
2959 int needed_intregs, needed_sseregs, size;
2960 enum machine_mode mode = TYPE_MODE (type);
2961
53c17031 2962 if (TARGET_64BIT)
a30b6839
RH
2963 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2964
2965 if (mode == BLKmode)
2966 return 1;
2967
2968 size = int_size_in_bytes (type);
2969
2970 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2971 return 0;
2972
2973 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 2974 {
a30b6839
RH
2975 /* User-created vectors small enough to fit in EAX. */
2976 if (size < 8)
5e062767 2977 return 0;
a30b6839
RH
2978
2979 /* MMX/3dNow values are returned on the stack, since we've
2980 got to EMMS/FEMMS before returning. */
2981 if (size == 8)
53c17031 2982 return 1;
a30b6839 2983
0397ac35 2984 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 2985 if (size == 16)
0397ac35 2986 return (TARGET_SSE ? 0 : 1);
53c17031 2987 }
a30b6839 2988
cf2348cb 2989 if (mode == XFmode)
a30b6839 2990 return 0;
f8a1ebc6 2991
a30b6839
RH
2992 if (size > 12)
2993 return 1;
2994 return 0;
53c17031
JH
2995}
2996
0397ac35
RH
2997/* When returning SSE vector types, we have a choice of either
2998 (1) being abi incompatible with a -march switch, or
2999 (2) generating an error.
3000 Given no good solution, I think the safest thing is one warning.
3001 The user won't be able to use -Werror, but....
3002
3003 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3004 called in response to actually generating a caller or callee that
3005 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3006 via aggregate_value_p for general type probing from tree-ssa. */
3007
3008static rtx
3009ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3010{
3011 static bool warned;
3012
3013 if (!TARGET_SSE && type && !warned)
3014 {
3015 /* Look at the return type of the function, not the function type. */
3016 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3017
3018 if (mode == TImode
3019 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3020 {
3021 warned = true;
3022 warning ("SSE vector return without SSE enabled changes the ABI");
3023 }
3024 }
3025
3026 return NULL;
3027}
3028
53c17031
JH
3029/* Define how to find the value returned by a library function
3030 assuming the value has mode MODE. */
3031rtx
b96a374d 3032ix86_libcall_value (enum machine_mode mode)
53c17031
JH
3033{
3034 if (TARGET_64BIT)
3035 {
3036 switch (mode)
3037 {
f8a1ebc6
JH
3038 case SFmode:
3039 case SCmode:
3040 case DFmode:
3041 case DCmode:
9e9fb0ce 3042 case TFmode:
f8a1ebc6
JH
3043 return gen_rtx_REG (mode, FIRST_SSE_REG);
3044 case XFmode:
f8a1ebc6 3045 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
9e9fb0ce 3046 case XCmode:
f8a1ebc6
JH
3047 case TCmode:
3048 return NULL;
3049 default:
3050 return gen_rtx_REG (mode, 0);
53c17031
JH
3051 }
3052 }
3053 else
f8a1ebc6 3054 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
3055}
3056
3057/* Given a mode, return the register to use for a return value. */
3058
3059static int
b96a374d 3060ix86_value_regno (enum machine_mode mode)
b069de3b 3061{
a30b6839 3062 /* Floating point return values in %st(0). */
b069de3b
SS
3063 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3064 return FIRST_FLOAT_REG;
a30b6839
RH
3065 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3066 we prevent this case when sse is not available. */
3067 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 3068 return FIRST_SSE_REG;
a30b6839 3069 /* Everything else in %eax. */
b069de3b 3070 return 0;
53c17031 3071}
ad919812
JH
3072\f
3073/* Create the va_list data type. */
53c17031 3074
c35d187f
RH
3075static tree
3076ix86_build_builtin_va_list (void)
ad919812
JH
3077{
3078 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 3079
ad919812
JH
3080 /* For i386 we use plain pointer to argument area. */
3081 if (!TARGET_64BIT)
3082 return build_pointer_type (char_type_node);
3083
f1e639b1 3084 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
3085 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3086
fce5a9f2 3087 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 3088 unsigned_type_node);
fce5a9f2 3089 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
3090 unsigned_type_node);
3091 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3092 ptr_type_node);
3093 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3094 ptr_type_node);
3095
3096 DECL_FIELD_CONTEXT (f_gpr) = record;
3097 DECL_FIELD_CONTEXT (f_fpr) = record;
3098 DECL_FIELD_CONTEXT (f_ovf) = record;
3099 DECL_FIELD_CONTEXT (f_sav) = record;
3100
3101 TREE_CHAIN (record) = type_decl;
3102 TYPE_NAME (record) = type_decl;
3103 TYPE_FIELDS (record) = f_gpr;
3104 TREE_CHAIN (f_gpr) = f_fpr;
3105 TREE_CHAIN (f_fpr) = f_ovf;
3106 TREE_CHAIN (f_ovf) = f_sav;
3107
3108 layout_type (record);
3109
3110 /* The correct type is an array type of one element. */
3111 return build_array_type (record, build_index_type (size_zero_node));
3112}
3113
a0524eb3 3114/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 3115
a0524eb3 3116static void
b96a374d
AJ
3117ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3118 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3119 int no_rtl)
ad919812
JH
3120{
3121 CUMULATIVE_ARGS next_cum;
3122 rtx save_area = NULL_RTX, mem;
3123 rtx label;
3124 rtx label_ref;
3125 rtx tmp_reg;
3126 rtx nsse_reg;
3127 int set;
3128 tree fntype;
3129 int stdarg_p;
3130 int i;
3131
3132 if (!TARGET_64BIT)
3133 return;
3134
3135 /* Indicate to allocate space on the stack for varargs save area. */
3136 ix86_save_varrargs_registers = 1;
3137
5474eed5
JH
3138 cfun->stack_alignment_needed = 128;
3139
ad919812
JH
3140 fntype = TREE_TYPE (current_function_decl);
3141 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3142 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3143 != void_type_node));
3144
3145 /* For varargs, we do not want to skip the dummy va_dcl argument.
3146 For stdargs, we do want to skip the last named argument. */
3147 next_cum = *cum;
3148 if (stdarg_p)
3149 function_arg_advance (&next_cum, mode, type, 1);
3150
3151 if (!no_rtl)
3152 save_area = frame_pointer_rtx;
3153
3154 set = get_varargs_alias_set ();
3155
3156 for (i = next_cum.regno; i < ix86_regparm; i++)
3157 {
3158 mem = gen_rtx_MEM (Pmode,
3159 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 3160 set_mem_alias_set (mem, set);
ad919812
JH
3161 emit_move_insn (mem, gen_rtx_REG (Pmode,
3162 x86_64_int_parameter_registers[i]));
3163 }
3164
3165 if (next_cum.sse_nregs)
3166 {
3167 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 3168 of SSE parameter registers used to call this function. We use
ad919812
JH
3169 sse_prologue_save insn template that produces computed jump across
3170 SSE saves. We need some preparation work to get this working. */
3171
3172 label = gen_label_rtx ();
3173 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3174
3175 /* Compute address to jump to :
3176 label - 5*eax + nnamed_sse_arguments*5 */
3177 tmp_reg = gen_reg_rtx (Pmode);
3178 nsse_reg = gen_reg_rtx (Pmode);
3179 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3180 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 3181 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
3182 GEN_INT (4))));
3183 if (next_cum.sse_regno)
3184 emit_move_insn
3185 (nsse_reg,
3186 gen_rtx_CONST (DImode,
3187 gen_rtx_PLUS (DImode,
3188 label_ref,
3189 GEN_INT (next_cum.sse_regno * 4))));
3190 else
3191 emit_move_insn (nsse_reg, label_ref);
3192 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3193
3194 /* Compute address of memory block we save into. We always use pointer
3195 pointing 127 bytes after first byte to store - this is needed to keep
3196 instruction size limited by 4 bytes. */
3197 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
3198 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3199 plus_constant (save_area,
3200 8 * REGPARM_MAX + 127)));
ad919812 3201 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 3202 set_mem_alias_set (mem, set);
8ac61af7 3203 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
3204
3205 /* And finally do the dirty job! */
8ac61af7
RK
3206 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3207 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
3208 }
3209
3210}
3211
3212/* Implement va_start. */
3213
3214void
b96a374d 3215ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
3216{
3217 HOST_WIDE_INT words, n_gpr, n_fpr;
3218 tree f_gpr, f_fpr, f_ovf, f_sav;
3219 tree gpr, fpr, ovf, sav, t;
3220
3221 /* Only 64bit target needs something special. */
3222 if (!TARGET_64BIT)
3223 {
e5faf155 3224 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3225 return;
3226 }
3227
3228 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3229 f_fpr = TREE_CHAIN (f_gpr);
3230 f_ovf = TREE_CHAIN (f_fpr);
3231 f_sav = TREE_CHAIN (f_ovf);
3232
3233 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
44de5aeb
RK
3234 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3235 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3236 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3237 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
3238
3239 /* Count number of gp and fp argument registers used. */
3240 words = current_function_args_info.words;
3241 n_gpr = current_function_args_info.regno;
3242 n_fpr = current_function_args_info.sse_regno;
3243
3244 if (TARGET_DEBUG_ARG)
3245 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3246 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
3247
3248 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3249 build_int_2 (n_gpr * 8, 0));
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3252
3253 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3254 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3255 TREE_SIDE_EFFECTS (t) = 1;
3256 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3257
3258 /* Find the overflow area. */
3259 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3260 if (words != 0)
3261 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3262 build_int_2 (words * UNITS_PER_WORD, 0));
3263 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3264 TREE_SIDE_EFFECTS (t) = 1;
3265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3266
3267 /* Find the register save area.
3268 Prologue of the function save it right above stack frame. */
3269 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3270 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3271 TREE_SIDE_EFFECTS (t) = 1;
3272 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3273}
3274
3275/* Implement va_arg. */
cd3ce9b4 3276
23a60a04
JM
3277tree
3278ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 3279{
cd3ce9b4
JM
3280 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3281 tree f_gpr, f_fpr, f_ovf, f_sav;
3282 tree gpr, fpr, ovf, sav, t;
3283 int size, rsize;
3284 tree lab_false, lab_over = NULL_TREE;
3285 tree addr, t2;
3286 rtx container;
3287 int indirect_p = 0;
3288 tree ptrtype;
3289
3290 /* Only 64bit target needs something special. */
3291 if (!TARGET_64BIT)
23a60a04 3292 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
3293
3294 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3295 f_fpr = TREE_CHAIN (f_gpr);
3296 f_ovf = TREE_CHAIN (f_fpr);
3297 f_sav = TREE_CHAIN (f_ovf);
3298
3299 valist = build_fold_indirect_ref (valist);
44de5aeb
RK
3300 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3301 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3302 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3303 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 3304
08b0dc1b
RH
3305 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3306 if (indirect_p)
3307 type = build_pointer_type (type);
cd3ce9b4 3308 size = int_size_in_bytes (type);
cd3ce9b4
JM
3309 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3310
3311 container = construct_container (TYPE_MODE (type), type, 0,
3312 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3313 /*
3314 * Pull the value out of the saved registers ...
3315 */
3316
3317 addr = create_tmp_var (ptr_type_node, "addr");
3318 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3319
3320 if (container)
3321 {
3322 int needed_intregs, needed_sseregs;
e52a6df5 3323 bool need_temp;
cd3ce9b4
JM
3324 tree int_addr, sse_addr;
3325
3326 lab_false = create_artificial_label ();
3327 lab_over = create_artificial_label ();
3328
3329 examine_argument (TYPE_MODE (type), type, 0,
3330 &needed_intregs, &needed_sseregs);
3331
e52a6df5
JB
3332 need_temp = (!REG_P (container)
3333 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3334 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
3335
3336 /* In case we are passing structure, verify that it is consecutive block
3337 on the register save area. If not we need to do moves. */
3338 if (!need_temp && !REG_P (container))
3339 {
3340 /* Verify that all registers are strictly consecutive */
3341 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3342 {
3343 int i;
3344
3345 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3346 {
3347 rtx slot = XVECEXP (container, 0, i);
3348 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3349 || INTVAL (XEXP (slot, 1)) != i * 16)
3350 need_temp = 1;
3351 }
3352 }
3353 else
3354 {
3355 int i;
3356
3357 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3358 {
3359 rtx slot = XVECEXP (container, 0, i);
3360 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3361 || INTVAL (XEXP (slot, 1)) != i * 8)
3362 need_temp = 1;
3363 }
3364 }
3365 }
3366 if (!need_temp)
3367 {
3368 int_addr = addr;
3369 sse_addr = addr;
3370 }
3371 else
3372 {
3373 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3374 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3375 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3376 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3377 }
3378 /* First ensure that we fit completely in registers. */
3379 if (needed_intregs)
3380 {
3381 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3382 TREE_TYPE (t) = TREE_TYPE (gpr);
3383 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3384 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3385 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3386 gimplify_and_add (t, pre_p);
3387 }
3388 if (needed_sseregs)
3389 {
3390 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3391 + REGPARM_MAX * 8, 0);
3392 TREE_TYPE (t) = TREE_TYPE (fpr);
3393 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3394 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3395 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3396 gimplify_and_add (t, pre_p);
3397 }
3398
3399 /* Compute index to start of area used for integer regs. */
3400 if (needed_intregs)
3401 {
3402 /* int_addr = gpr + sav; */
3403 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3404 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3405 gimplify_and_add (t, pre_p);
3406 }
3407 if (needed_sseregs)
3408 {
3409 /* sse_addr = fpr + sav; */
3410 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3411 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3412 gimplify_and_add (t, pre_p);
3413 }
3414 if (need_temp)
3415 {
3416 int i;
3417 tree temp = create_tmp_var (type, "va_arg_tmp");
3418
3419 /* addr = &temp; */
3420 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3421 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3422 gimplify_and_add (t, pre_p);
3423
3424 for (i = 0; i < XVECLEN (container, 0); i++)
3425 {
3426 rtx slot = XVECEXP (container, 0, i);
3427 rtx reg = XEXP (slot, 0);
3428 enum machine_mode mode = GET_MODE (reg);
3429 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3430 tree addr_type = build_pointer_type (piece_type);
3431 tree src_addr, src;
3432 int src_offset;
3433 tree dest_addr, dest;
3434
3435 if (SSE_REGNO_P (REGNO (reg)))
3436 {
3437 src_addr = sse_addr;
3438 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3439 }
3440 else
3441 {
3442 src_addr = int_addr;
3443 src_offset = REGNO (reg) * 8;
3444 }
23a60a04 3445 src_addr = fold_convert (addr_type, src_addr);
cd3ce9b4
JM
3446 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3447 size_int (src_offset)));
3448 src = build_fold_indirect_ref (src_addr);
3449
23a60a04 3450 dest_addr = fold_convert (addr_type, addr);
cd3ce9b4
JM
3451 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3452 size_int (INTVAL (XEXP (slot, 1)))));
3453 dest = build_fold_indirect_ref (dest_addr);
3454
3455 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3456 gimplify_and_add (t, pre_p);
3457 }
3458 }
3459
3460 if (needed_intregs)
3461 {
3462 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3463 build_int_2 (needed_intregs * 8, 0));
3464 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3465 gimplify_and_add (t, pre_p);
3466 }
3467 if (needed_sseregs)
3468 {
3469 t =
3470 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3471 build_int_2 (needed_sseregs * 16, 0));
3472 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3473 gimplify_and_add (t, pre_p);
3474 }
3475
3476 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3477 gimplify_and_add (t, pre_p);
3478
3479 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3480 append_to_statement_list (t, pre_p);
3481 }
3482
3483 /* ... otherwise out of the overflow area. */
3484
3485 /* Care for on-stack alignment if needed. */
3486 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3487 t = ovf;
3488 else
3489 {
3490 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3491 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3492 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3493 }
3494 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3495
3496 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3497 gimplify_and_add (t2, pre_p);
3498
3499 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3500 build_int_2 (rsize * UNITS_PER_WORD, 0));
3501 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3502 gimplify_and_add (t, pre_p);
3503
3504 if (container)
3505 {
3506 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3507 append_to_statement_list (t, pre_p);
3508 }
3509
3510 ptrtype = build_pointer_type (type);
23a60a04 3511 addr = fold_convert (ptrtype, addr);
cd3ce9b4
JM
3512
3513 if (indirect_p)
3514 addr = build_fold_indirect_ref (addr);
23a60a04 3515 return build_fold_indirect_ref (addr);
cd3ce9b4 3516}
ad919812 3517\f
c3c637e3
GS
3518/* Return nonzero if OP is either a i387 or SSE fp register. */
3519int
b96a374d 3520any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3521{
3522 return ANY_FP_REG_P (op);
3523}
3524
3525/* Return nonzero if OP is an i387 fp register. */
3526int
b96a374d 3527fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
c3c637e3
GS
3528{
3529 return FP_REG_P (op);
3530}
3531
3532/* Return nonzero if OP is a non-fp register_operand. */
3533int
b96a374d 3534register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3535{
3536 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3537}
3538
40b982a9 3539/* Return nonzero if OP is a register operand other than an
c3c637e3
GS
3540 i387 fp register. */
3541int
b96a374d 3542register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
c3c637e3
GS
3543{
3544 return register_operand (op, mode) && !FP_REG_P (op);
3545}
3546
7dd4b4a3
JH
3547/* Return nonzero if OP is general operand representable on x86_64. */
3548
3549int
b96a374d 3550x86_64_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3551{
3552 if (!TARGET_64BIT)
3553 return general_operand (op, mode);
3554 if (nonimmediate_operand (op, mode))
3555 return 1;
c05dbe81 3556 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3557}
3558
3559/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 3560 as either sign extended or zero extended constant. */
7dd4b4a3
JH
3561
3562int
b96a374d 3563x86_64_szext_general_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3564{
3565 if (!TARGET_64BIT)
3566 return general_operand (op, mode);
3567 if (nonimmediate_operand (op, mode))
3568 return 1;
c05dbe81 3569 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3570}
3571
3572/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3573
3574int
b96a374d 3575x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3576{
3577 if (!TARGET_64BIT)
3578 return nonmemory_operand (op, mode);
3579 if (register_operand (op, mode))
3580 return 1;
c05dbe81 3581 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3582}
3583
3584/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3585
3586int
b96a374d 3587x86_64_movabs_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3588{
3589 if (!TARGET_64BIT || !flag_pic)
3590 return nonmemory_operand (op, mode);
c05dbe81 3591 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
7dd4b4a3
JH
3592 return 1;
3593 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3594 return 1;
3595 return 0;
3596}
3597
7e6dc358
JJ
3598/* Return nonzero if OPNUM's MEM should be matched
3599 in movabs* patterns. */
3600
3601int
3602ix86_check_movabs (rtx insn, int opnum)
3603{
3604 rtx set, mem;
3605
3606 set = PATTERN (insn);
3607 if (GET_CODE (set) == PARALLEL)
3608 set = XVECEXP (set, 0, 0);
3609 if (GET_CODE (set) != SET)
3610 abort ();
3611 mem = XEXP (set, opnum);
3612 while (GET_CODE (mem) == SUBREG)
3613 mem = SUBREG_REG (mem);
3614 if (GET_CODE (mem) != MEM)
3615 abort ();
3616 return (volatile_ok || !MEM_VOLATILE_P (mem));
3617}
3618
7dd4b4a3
JH
3619/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3620
3621int
b96a374d 3622x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3623{
3624 if (!TARGET_64BIT)
3625 return nonmemory_operand (op, mode);
3626 if (register_operand (op, mode))
3627 return 1;
c05dbe81 3628 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
7dd4b4a3
JH
3629}
3630
3631/* Return nonzero if OP is immediate operand representable on x86_64. */
3632
3633int
b96a374d 3634x86_64_immediate_operand (rtx op, enum machine_mode mode)
7dd4b4a3
JH
3635{
3636 if (!TARGET_64BIT)
3637 return immediate_operand (op, mode);
c05dbe81 3638 return x86_64_sign_extended_value (op);
7dd4b4a3
JH
3639}
3640
3641/* Return nonzero if OP is immediate operand representable on x86_64. */
3642
3643int
b96a374d 3644x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7dd4b4a3
JH
3645{
3646 return x86_64_zero_extended_value (op);
3647}
3648
794a292d
JJ
3649/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3650 for shift & compare patterns, as shifting by 0 does not change flags),
3651 else return zero. */
3652
3653int
b96a374d 3654const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
794a292d
JJ
3655{
3656 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3657}
3658
e075ae69
RH
3659/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3660 reference and a constant. */
b08de47e
MM
3661
3662int
8d531ab9 3663symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 3664{
e075ae69 3665 switch (GET_CODE (op))
2a2ab3f9 3666 {
e075ae69
RH
3667 case SYMBOL_REF:
3668 case LABEL_REF:
3669 return 1;
3670
3671 case CONST:
3672 op = XEXP (op, 0);
3673 if (GET_CODE (op) == SYMBOL_REF
3674 || GET_CODE (op) == LABEL_REF
3675 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
3676 && (XINT (op, 1) == UNSPEC_GOT
3677 || XINT (op, 1) == UNSPEC_GOTOFF
3678 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
3679 return 1;
3680 if (GET_CODE (op) != PLUS
3681 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3682 return 0;
3683
3684 op = XEXP (op, 0);
3685 if (GET_CODE (op) == SYMBOL_REF
3686 || GET_CODE (op) == LABEL_REF)
3687 return 1;
3688 /* Only @GOTOFF gets offsets. */
3689 if (GET_CODE (op) != UNSPEC
8ee41eaf 3690 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3691 return 0;
3692
3693 op = XVECEXP (op, 0, 0);
3694 if (GET_CODE (op) == SYMBOL_REF
3695 || GET_CODE (op) == LABEL_REF)
3696 return 1;
3697 return 0;
3698
3699 default:
3700 return 0;
2a2ab3f9
JVA
3701 }
3702}
2a2ab3f9 3703
e075ae69 3704/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3705
e075ae69 3706int
8d531ab9 3707pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3708{
6eb791fc
JH
3709 if (GET_CODE (op) != CONST)
3710 return 0;
3711 op = XEXP (op, 0);
3712 if (TARGET_64BIT)
3713 {
a0c8285b
JH
3714 if (GET_CODE (op) == UNSPEC
3715 && XINT (op, 1) == UNSPEC_GOTPCREL)
3716 return 1;
3717 if (GET_CODE (op) == PLUS
fdacb904
JH
3718 && GET_CODE (XEXP (op, 0)) == UNSPEC
3719 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
6eb791fc
JH
3720 return 1;
3721 }
fce5a9f2 3722 else
2a2ab3f9 3723 {
e075ae69
RH
3724 if (GET_CODE (op) == UNSPEC)
3725 return 1;
3726 if (GET_CODE (op) != PLUS
3727 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3728 return 0;
3729 op = XEXP (op, 0);
3730 if (GET_CODE (op) == UNSPEC)
3731 return 1;
2a2ab3f9 3732 }
e075ae69 3733 return 0;
2a2ab3f9 3734}
2a2ab3f9 3735
623fe810
RH
3736/* Return true if OP is a symbolic operand that resolves locally. */
3737
3738static int
b96a374d 3739local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
623fe810 3740{
623fe810
RH
3741 if (GET_CODE (op) == CONST
3742 && GET_CODE (XEXP (op, 0)) == PLUS
c05dbe81 3743 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
623fe810
RH
3744 op = XEXP (XEXP (op, 0), 0);
3745
8bfb45f8
JJ
3746 if (GET_CODE (op) == LABEL_REF)
3747 return 1;
3748
623fe810
RH
3749 if (GET_CODE (op) != SYMBOL_REF)
3750 return 0;
3751
2ae5ae57 3752 if (SYMBOL_REF_LOCAL_P (op))
623fe810
RH
3753 return 1;
3754
3755 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3756 the compiler that assumes it can just stick the results of
623fe810
RH
3757 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3758 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3759 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3760 if (strncmp (XSTR (op, 0), internal_label_prefix,
3761 internal_label_prefix_len) == 0)
3762 return 1;
3763
3764 return 0;
3765}
3766
2ae5ae57 3767/* Test for various thread-local symbols. */
f996902d
RH
3768
3769int
8d531ab9 3770tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d 3771{
f996902d
RH
3772 if (GET_CODE (op) != SYMBOL_REF)
3773 return 0;
2ae5ae57 3774 return SYMBOL_REF_TLS_MODEL (op);
f996902d
RH
3775}
3776
2ae5ae57 3777static inline int
b96a374d 3778tls_symbolic_operand_1 (rtx op, enum tls_model kind)
f996902d 3779{
f996902d
RH
3780 if (GET_CODE (op) != SYMBOL_REF)
3781 return 0;
2ae5ae57 3782 return SYMBOL_REF_TLS_MODEL (op) == kind;
f996902d
RH
3783}
3784
3785int
8d531ab9 3786global_dynamic_symbolic_operand (rtx op,
b96a374d 3787 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3788{
3789 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3790}
3791
3792int
8d531ab9 3793local_dynamic_symbolic_operand (rtx op,
b96a374d 3794 enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3795{
3796 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3797}
3798
3799int
8d531ab9 3800initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3801{
3802 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3803}
3804
3805int
8d531ab9 3806local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
f996902d
RH
3807{
3808 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3809}
3810
28d52ffb
RH
3811/* Test for a valid operand for a call instruction. Don't allow the
3812 arg pointer register or virtual regs since they may decay into
3813 reg + const, which the patterns can't handle. */
2a2ab3f9 3814
e075ae69 3815int
b96a374d 3816call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3817{
e075ae69
RH
3818 /* Disallow indirect through a virtual register. This leads to
3819 compiler aborts when trying to eliminate them. */
3820 if (GET_CODE (op) == REG
3821 && (op == arg_pointer_rtx
564d80f4 3822 || op == frame_pointer_rtx
e075ae69
RH
3823 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3824 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3825 return 0;
2a2ab3f9 3826
28d52ffb
RH
3827 /* Disallow `call 1234'. Due to varying assembler lameness this
3828 gets either rejected or translated to `call .+1234'. */
3829 if (GET_CODE (op) == CONST_INT)
3830 return 0;
3831
cbbf65e0
RH
3832 /* Explicitly allow SYMBOL_REF even if pic. */
3833 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3834 return 1;
2a2ab3f9 3835
cbbf65e0
RH
3836 /* Otherwise we can allow any general_operand in the address. */
3837 return general_operand (op, Pmode);
e075ae69 3838}
79325812 3839
4977bab6
ZW
3840/* Test for a valid operand for a call instruction. Don't allow the
3841 arg pointer register or virtual regs since they may decay into
3842 reg + const, which the patterns can't handle. */
3843
3844int
b96a374d 3845sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
3846{
3847 /* Disallow indirect through a virtual register. This leads to
3848 compiler aborts when trying to eliminate them. */
3849 if (GET_CODE (op) == REG
3850 && (op == arg_pointer_rtx
3851 || op == frame_pointer_rtx
3852 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3853 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3854 return 0;
3855
3856 /* Explicitly allow SYMBOL_REF even if pic. */
3857 if (GET_CODE (op) == SYMBOL_REF)
3858 return 1;
3859
3860 /* Otherwise we can only allow register operands. */
3861 return register_operand (op, Pmode);
3862}
3863
e075ae69 3864int
b96a374d 3865constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3866{
eaf19aba
JJ
3867 if (GET_CODE (op) == CONST
3868 && GET_CODE (XEXP (op, 0)) == PLUS
3869 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3870 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3871 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3872}
2a2ab3f9 3873
e075ae69 3874/* Match exactly zero and one. */
e9a25f70 3875
0f290768 3876int
8d531ab9 3877const0_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3878{
3879 return op == CONST0_RTX (mode);
3880}
e9a25f70 3881
0f290768 3882int
8d531ab9 3883const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3884{
3885 return op == const1_rtx;
3886}
2a2ab3f9 3887
e075ae69 3888/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3889
e075ae69 3890int
8d531ab9 3891const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
3892{
3893 return (GET_CODE (op) == CONST_INT
3894 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3895}
e9a25f70 3896
ebe75517 3897int
8d531ab9 3898const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3899{
3900 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3901}
3902
3903int
8d531ab9 3904const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3905{
3906 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3907}
3908
3909int
8d531ab9 3910const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3911{
3912 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3913}
3914
3915int
8d531ab9 3916const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
ebe75517
JH
3917{
3918 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3919}
3920
3921
d1f87653 3922/* True if this is a constant appropriate for an increment or decrement. */
81fd0956 3923
e075ae69 3924int
8d531ab9 3925incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69 3926{
f5143c46 3927 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d 3928 registers, since carry flag is not set. */
89c43c0a 3929 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
b4e89e2d 3930 return 0;
2b1c08f5 3931 return op == const1_rtx || op == constm1_rtx;
e075ae69 3932}
2a2ab3f9 3933
371bc54b
JH
3934/* Return nonzero if OP is acceptable as operand of DImode shift
3935 expander. */
3936
3937int
b96a374d 3938shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
371bc54b
JH
3939{
3940 if (TARGET_64BIT)
3941 return nonimmediate_operand (op, mode);
3942 else
3943 return register_operand (op, mode);
3944}
3945
0f290768 3946/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3947 register eliminable to the stack pointer. Otherwise, this is
3948 a register operand.
2a2ab3f9 3949
e075ae69
RH
3950 This is used to prevent esp from being used as an index reg.
3951 Which would only happen in pathological cases. */
5f1ec3e6 3952
e075ae69 3953int
8d531ab9 3954reg_no_sp_operand (rtx op, enum machine_mode mode)
e075ae69
RH
3955{
3956 rtx t = op;
3957 if (GET_CODE (t) == SUBREG)
3958 t = SUBREG_REG (t);
564d80f4 3959 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3960 return 0;
2a2ab3f9 3961
e075ae69 3962 return register_operand (op, mode);
2a2ab3f9 3963}
b840bfb0 3964
915119a5 3965int
8d531ab9 3966mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
3967{
3968 return MMX_REG_P (op);
3969}
3970
2c5a510c
RH
3971/* Return false if this is any eliminable register. Otherwise
3972 general_operand. */
3973
3974int
8d531ab9 3975general_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3976{
3977 rtx t = op;
3978 if (GET_CODE (t) == SUBREG)
3979 t = SUBREG_REG (t);
3980 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3981 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3982 || t == virtual_stack_dynamic_rtx)
3983 return 0;
1020a5ab
RH
3984 if (REG_P (t)
3985 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3986 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3987 return 0;
2c5a510c
RH
3988
3989 return general_operand (op, mode);
3990}
3991
3992/* Return false if this is any eliminable register. Otherwise
3993 register_operand or const_int. */
3994
3995int
8d531ab9 3996nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
2c5a510c
RH
3997{
3998 rtx t = op;
3999 if (GET_CODE (t) == SUBREG)
4000 t = SUBREG_REG (t);
4001 if (t == arg_pointer_rtx || t == frame_pointer_rtx
4002 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
4003 || t == virtual_stack_dynamic_rtx)
4004 return 0;
4005
4006 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
4007}
4008
7ec70495
JH
4009/* Return false if this is any eliminable register or stack register,
4010 otherwise work like register_operand. */
4011
4012int
8d531ab9 4013index_register_operand (rtx op, enum machine_mode mode)
7ec70495
JH
4014{
4015 rtx t = op;
4016 if (GET_CODE (t) == SUBREG)
4017 t = SUBREG_REG (t);
4018 if (!REG_P (t))
4019 return 0;
4020 if (t == arg_pointer_rtx
4021 || t == frame_pointer_rtx
4022 || t == virtual_incoming_args_rtx
4023 || t == virtual_stack_vars_rtx
4024 || t == virtual_stack_dynamic_rtx
4025 || REGNO (t) == STACK_POINTER_REGNUM)
4026 return 0;
4027
4028 return general_operand (op, mode);
4029}
4030
e075ae69 4031/* Return true if op is a Q_REGS class register. */
b840bfb0 4032
e075ae69 4033int
8d531ab9 4034q_regs_operand (rtx op, enum machine_mode mode)
b840bfb0 4035{
e075ae69
RH
4036 if (mode != VOIDmode && GET_MODE (op) != mode)
4037 return 0;
4038 if (GET_CODE (op) == SUBREG)
4039 op = SUBREG_REG (op);
7799175f 4040 return ANY_QI_REG_P (op);
0f290768 4041}
b840bfb0 4042
4977bab6
ZW
4043/* Return true if op is an flags register. */
4044
4045int
8d531ab9 4046flags_reg_operand (rtx op, enum machine_mode mode)
4977bab6
ZW
4047{
4048 if (mode != VOIDmode && GET_MODE (op) != mode)
4049 return 0;
4050 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
4051}
4052
e075ae69 4053/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 4054
e075ae69 4055int
8d531ab9 4056non_q_regs_operand (rtx op, enum machine_mode mode)
e075ae69
RH
4057{
4058 if (mode != VOIDmode && GET_MODE (op) != mode)
4059 return 0;
4060 if (GET_CODE (op) == SUBREG)
4061 op = SUBREG_REG (op);
4062 return NON_QI_REG_P (op);
0f290768 4063}
b840bfb0 4064
4977bab6 4065int
b96a374d
AJ
4066zero_extended_scalar_load_operand (rtx op,
4067 enum machine_mode mode ATTRIBUTE_UNUSED)
4977bab6
ZW
4068{
4069 unsigned n_elts;
4070 if (GET_CODE (op) != MEM)
4071 return 0;
4072 op = maybe_get_pool_constant (op);
4073 if (!op)
4074 return 0;
4075 if (GET_CODE (op) != CONST_VECTOR)
4076 return 0;
4077 n_elts =
4078 (GET_MODE_SIZE (GET_MODE (op)) /
4079 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4080 for (n_elts--; n_elts > 0; n_elts--)
4081 {
4082 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4083 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4084 return 0;
4085 }
4086 return 1;
4087}
4088
fdc4b40b
JH
4089/* Return 1 when OP is operand acceptable for standard SSE move. */
4090int
b96a374d 4091vector_move_operand (rtx op, enum machine_mode mode)
fdc4b40b
JH
4092{
4093 if (nonimmediate_operand (op, mode))
4094 return 1;
4095 if (GET_MODE (op) != mode && mode != VOIDmode)
4096 return 0;
4097 return (op == CONST0_RTX (GET_MODE (op)));
4098}
4099
74dc3e94
RH
4100/* Return true if op if a valid address, and does not contain
4101 a segment override. */
4102
4103int
8d531ab9 4104no_seg_address_operand (rtx op, enum machine_mode mode)
74dc3e94
RH
4105{
4106 struct ix86_address parts;
4107
4108 if (! address_operand (op, mode))
4109 return 0;
4110
4111 if (! ix86_decompose_address (op, &parts))
4112 abort ();
4113
4114 return parts.seg == SEG_DEFAULT;
4115}
4116
915119a5
BS
4117/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4118 insns. */
4119int
b96a374d 4120sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
915119a5
BS
4121{
4122 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
4123 switch (code)
4124 {
4125 /* Operations supported directly. */
4126 case EQ:
4127 case LT:
4128 case LE:
4129 case UNORDERED:
4130 case NE:
4131 case UNGE:
4132 case UNGT:
4133 case ORDERED:
4134 return 1;
4135 /* These are equivalent to ones above in non-IEEE comparisons. */
4136 case UNEQ:
4137 case UNLT:
4138 case UNLE:
4139 case LTGT:
4140 case GE:
4141 case GT:
4142 return !TARGET_IEEE_FP;
4143 default:
4144 return 0;
4145 }
915119a5 4146}
9076b9c1 4147/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 4148int
8d531ab9 4149ix86_comparison_operator (rtx op, enum machine_mode mode)
e075ae69 4150{
9076b9c1 4151 enum machine_mode inmode;
9a915772 4152 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
4153 if (mode != VOIDmode && GET_MODE (op) != mode)
4154 return 0;
ec8e098d 4155 if (!COMPARISON_P (op))
9a915772
JH
4156 return 0;
4157 inmode = GET_MODE (XEXP (op, 0));
4158
4159 if (inmode == CCFPmode || inmode == CCFPUmode)
4160 {
4161 enum rtx_code second_code, bypass_code;
4162 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4163 return (bypass_code == NIL && second_code == NIL);
4164 }
4165 switch (code)
3a3677ff
RH
4166 {
4167 case EQ: case NE:
3a3677ff 4168 return 1;
9076b9c1 4169 case LT: case GE:
7e08e190 4170 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
4171 || inmode == CCGOCmode || inmode == CCNOmode)
4172 return 1;
4173 return 0;
7e08e190 4174 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 4175 if (inmode == CCmode)
9076b9c1
JH
4176 return 1;
4177 return 0;
4178 case GT: case LE:
7e08e190 4179 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
4180 return 1;
4181 return 0;
3a3677ff
RH
4182 default:
4183 return 0;
4184 }
4185}
4186
e6e81735
JH
4187/* Return 1 if OP is a valid comparison operator testing carry flag
4188 to be set. */
4189int
8d531ab9 4190ix86_carry_flag_operator (rtx op, enum machine_mode mode)
e6e81735
JH
4191{
4192 enum machine_mode inmode;
4193 enum rtx_code code = GET_CODE (op);
4194
4195 if (mode != VOIDmode && GET_MODE (op) != mode)
4196 return 0;
ec8e098d 4197 if (!COMPARISON_P (op))
e6e81735
JH
4198 return 0;
4199 inmode = GET_MODE (XEXP (op, 0));
4200 if (GET_CODE (XEXP (op, 0)) != REG
4201 || REGNO (XEXP (op, 0)) != 17
4202 || XEXP (op, 1) != const0_rtx)
4203 return 0;
4204
4205 if (inmode == CCFPmode || inmode == CCFPUmode)
4206 {
4207 enum rtx_code second_code, bypass_code;
4208
4209 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4210 if (bypass_code != NIL || second_code != NIL)
4211 return 0;
4212 code = ix86_fp_compare_code_to_integer (code);
4213 }
4214 else if (inmode != CCmode)
4215 return 0;
4216 return code == LTU;
4217}
4218
9076b9c1 4219/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 4220
9076b9c1 4221int
8d531ab9 4222fcmov_comparison_operator (rtx op, enum machine_mode mode)
3a3677ff 4223{
b62d22a2 4224 enum machine_mode inmode;
9a915772 4225 enum rtx_code code = GET_CODE (op);
e6e81735 4226
3a3677ff
RH
4227 if (mode != VOIDmode && GET_MODE (op) != mode)
4228 return 0;
ec8e098d 4229 if (!COMPARISON_P (op))
9a915772
JH
4230 return 0;
4231 inmode = GET_MODE (XEXP (op, 0));
4232 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 4233 {
9a915772 4234 enum rtx_code second_code, bypass_code;
e6e81735 4235
9a915772
JH
4236 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4237 if (bypass_code != NIL || second_code != NIL)
4238 return 0;
4239 code = ix86_fp_compare_code_to_integer (code);
4240 }
4241 /* i387 supports just limited amount of conditional codes. */
4242 switch (code)
4243 {
4244 case LTU: case GTU: case LEU: case GEU:
4245 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
4246 return 1;
4247 return 0;
9a915772
JH
4248 case ORDERED: case UNORDERED:
4249 case EQ: case NE:
4250 return 1;
3a3677ff
RH
4251 default:
4252 return 0;
4253 }
e075ae69 4254}
b840bfb0 4255
e9e80858
JH
4256/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4257
4258int
8d531ab9 4259promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e9e80858
JH
4260{
4261 switch (GET_CODE (op))
4262 {
4263 case MULT:
4264 /* Modern CPUs have same latency for HImode and SImode multiply,
4265 but 386 and 486 do HImode multiply faster. */
9e555526 4266 return ix86_tune > PROCESSOR_I486;
e9e80858
JH
4267 case PLUS:
4268 case AND:
4269 case IOR:
4270 case XOR:
4271 case ASHIFT:
4272 return 1;
4273 default:
4274 return 0;
4275 }
4276}
4277
e075ae69
RH
4278/* Nearly general operand, but accept any const_double, since we wish
4279 to be able to drop them into memory rather than have them get pulled
4280 into registers. */
b840bfb0 4281
2a2ab3f9 4282int
8d531ab9 4283cmp_fp_expander_operand (rtx op, enum machine_mode mode)
2a2ab3f9 4284{
e075ae69 4285 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 4286 return 0;
e075ae69 4287 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 4288 return 1;
e075ae69 4289 return general_operand (op, mode);
2a2ab3f9
JVA
4290}
4291
e075ae69 4292/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
4293
4294int
8d531ab9 4295ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
2a2ab3f9 4296{
3522082b 4297 int regno;
0d7d98ee
JH
4298 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4299 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 4300 return 0;
3522082b
JH
4301
4302 if (!register_operand (op, VOIDmode))
4303 return 0;
4304
d1f87653 4305 /* Be careful to accept only registers having upper parts. */
3522082b
JH
4306 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4307 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
4308}
4309
4310/* Return 1 if this is a valid binary floating-point operation.
0f290768 4311 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
4312
4313int
8d531ab9 4314binary_fp_operator (rtx op, enum machine_mode mode)
e075ae69
RH
4315{
4316 if (mode != VOIDmode && mode != GET_MODE (op))
4317 return 0;
4318
2a2ab3f9
JVA
4319 switch (GET_CODE (op))
4320 {
e075ae69
RH
4321 case PLUS:
4322 case MINUS:
4323 case MULT:
4324 case DIV:
4325 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 4326
2a2ab3f9
JVA
4327 default:
4328 return 0;
4329 }
4330}
fee2770d 4331
e075ae69 4332int
8d531ab9 4333mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4334{
4335 return GET_CODE (op) == MULT;
4336}
4337
4338int
8d531ab9 4339div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
e075ae69
RH
4340{
4341 return GET_CODE (op) == DIV;
4342}
0a726ef1
JL
4343
4344int
b96a374d 4345arith_or_logical_operator (rtx op, enum machine_mode mode)
0a726ef1 4346{
e075ae69 4347 return ((mode == VOIDmode || GET_MODE (op) == mode)
ec8e098d 4348 && ARITHMETIC_P (op));
0a726ef1
JL
4349}
4350
e075ae69 4351/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
4352
4353int
8d531ab9 4354memory_displacement_operand (rtx op, enum machine_mode mode)
4f2c8ebb 4355{
e075ae69 4356 struct ix86_address parts;
e9a25f70 4357
e075ae69
RH
4358 if (! memory_operand (op, mode))
4359 return 0;
4360
4361 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4362 abort ();
4363
4364 return parts.disp != NULL_RTX;
4f2c8ebb
RS
4365}
4366
16189740 4367/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
4368 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4369
4370 ??? It seems likely that this will only work because cmpsi is an
4371 expander, and no actual insns use this. */
4f2c8ebb
RS
4372
4373int
b96a374d 4374cmpsi_operand (rtx op, enum machine_mode mode)
fee2770d 4375{
b9b2c339 4376 if (nonimmediate_operand (op, mode))
e075ae69
RH
4377 return 1;
4378
4379 if (GET_CODE (op) == AND
4380 && GET_MODE (op) == SImode
4381 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4382 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4383 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4384 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4385 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4386 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 4387 return 1;
e9a25f70 4388
fee2770d
RS
4389 return 0;
4390}
d784886d 4391
e075ae69
RH
4392/* Returns 1 if OP is memory operand that can not be represented by the
4393 modRM array. */
d784886d
RK
4394
4395int
8d531ab9 4396long_memory_operand (rtx op, enum machine_mode mode)
d784886d 4397{
e075ae69 4398 if (! memory_operand (op, mode))
d784886d
RK
4399 return 0;
4400
e075ae69 4401 return memory_address_length (op) != 0;
d784886d 4402}
2247f6ed
JH
4403
4404/* Return nonzero if the rtx is known aligned. */
4405
4406int
b96a374d 4407aligned_operand (rtx op, enum machine_mode mode)
2247f6ed
JH
4408{
4409 struct ix86_address parts;
4410
4411 if (!general_operand (op, mode))
4412 return 0;
4413
0f290768 4414 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
4415 if (GET_CODE (op) != MEM)
4416 return 1;
4417
0f290768 4418 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
4419 if (MEM_VOLATILE_P (op))
4420 return 0;
4421
4422 op = XEXP (op, 0);
4423
4424 /* Pushes and pops are only valid on the stack pointer. */
4425 if (GET_CODE (op) == PRE_DEC
4426 || GET_CODE (op) == POST_INC)
4427 return 1;
4428
4429 /* Decode the address. */
4430 if (! ix86_decompose_address (op, &parts))
4431 abort ();
4432
4433 /* Look for some component that isn't known to be aligned. */
4434 if (parts.index)
4435 {
4436 if (parts.scale < 4
bdb429a5 4437 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
4438 return 0;
4439 }
4440 if (parts.base)
4441 {
bdb429a5 4442 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
4443 return 0;
4444 }
4445 if (parts.disp)
4446 {
4447 if (GET_CODE (parts.disp) != CONST_INT
4448 || (INTVAL (parts.disp) & 3) != 0)
4449 return 0;
4450 }
4451
4452 /* Didn't find one -- this must be an aligned address. */
4453 return 1;
4454}
e075ae69 4455\f
881b2a96
RS
4456/* Initialize the table of extra 80387 mathematical constants. */
4457
4458static void
b96a374d 4459init_ext_80387_constants (void)
881b2a96
RS
4460{
4461 static const char * cst[5] =
4462 {
4463 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4464 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4465 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4466 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4467 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4468 };
4469 int i;
4470
4471 for (i = 0; i < 5; i++)
4472 {
4473 real_from_string (&ext_80387_constants_table[i], cst[i]);
4474 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 4475 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 4476 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
4477 }
4478
4479 ext_80387_constants_init = 1;
4480}
4481
e075ae69 4482/* Return true if the constant is something that can be loaded with
881b2a96 4483 a special instruction. */
57dbca5e
BS
4484
4485int
b96a374d 4486standard_80387_constant_p (rtx x)
57dbca5e 4487{
2b04e52b 4488 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 4489 return -1;
881b2a96 4490
2b04e52b
JH
4491 if (x == CONST0_RTX (GET_MODE (x)))
4492 return 1;
4493 if (x == CONST1_RTX (GET_MODE (x)))
4494 return 2;
881b2a96 4495
22cc69c4
RS
4496 /* For XFmode constants, try to find a special 80387 instruction when
4497 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 4498 if (GET_MODE (x) == XFmode
22cc69c4 4499 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
4500 {
4501 REAL_VALUE_TYPE r;
4502 int i;
4503
4504 if (! ext_80387_constants_init)
4505 init_ext_80387_constants ();
4506
4507 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4508 for (i = 0; i < 5; i++)
4509 if (real_identical (&r, &ext_80387_constants_table[i]))
4510 return i + 3;
4511 }
4512
e075ae69 4513 return 0;
57dbca5e
BS
4514}
4515
881b2a96
RS
4516/* Return the opcode of the special instruction to be used to load
4517 the constant X. */
4518
4519const char *
b96a374d 4520standard_80387_constant_opcode (rtx x)
881b2a96
RS
4521{
4522 switch (standard_80387_constant_p (x))
4523 {
b96a374d 4524 case 1:
881b2a96
RS
4525 return "fldz";
4526 case 2:
4527 return "fld1";
b96a374d 4528 case 3:
881b2a96
RS
4529 return "fldlg2";
4530 case 4:
4531 return "fldln2";
b96a374d 4532 case 5:
881b2a96
RS
4533 return "fldl2e";
4534 case 6:
4535 return "fldl2t";
b96a374d 4536 case 7:
881b2a96
RS
4537 return "fldpi";
4538 }
4539 abort ();
4540}
4541
4542/* Return the CONST_DOUBLE representing the 80387 constant that is
4543 loaded by the specified special instruction. The argument IDX
4544 matches the return value from standard_80387_constant_p. */
4545
4546rtx
b96a374d 4547standard_80387_constant_rtx (int idx)
881b2a96
RS
4548{
4549 int i;
4550
4551 if (! ext_80387_constants_init)
4552 init_ext_80387_constants ();
4553
4554 switch (idx)
4555 {
4556 case 3:
4557 case 4:
4558 case 5:
4559 case 6:
4560 case 7:
4561 i = idx - 3;
4562 break;
4563
4564 default:
4565 abort ();
4566 }
4567
1f48e56d 4568 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 4569 XFmode);
881b2a96
RS
4570}
4571
2b04e52b
JH
4572/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4573 */
4574int
b96a374d 4575standard_sse_constant_p (rtx x)
2b04e52b 4576{
0e67d460
JH
4577 if (x == const0_rtx)
4578 return 1;
2b04e52b
JH
4579 return (x == CONST0_RTX (GET_MODE (x)));
4580}
4581
2a2ab3f9
JVA
4582/* Returns 1 if OP contains a symbol reference */
4583
4584int
b96a374d 4585symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 4586{
8d531ab9
KH
4587 const char *fmt;
4588 int i;
2a2ab3f9
JVA
4589
4590 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4591 return 1;
4592
4593 fmt = GET_RTX_FORMAT (GET_CODE (op));
4594 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4595 {
4596 if (fmt[i] == 'E')
4597 {
8d531ab9 4598 int j;
2a2ab3f9
JVA
4599
4600 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4601 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4602 return 1;
4603 }
e9a25f70 4604
2a2ab3f9
JVA
4605 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4606 return 1;
4607 }
4608
4609 return 0;
4610}
e075ae69
RH
4611
4612/* Return 1 if it is appropriate to emit `ret' instructions in the
4613 body of a function. Do this only if the epilogue is simple, needing a
4614 couple of insns. Prior to reloading, we can't tell how many registers
4615 must be saved, so return 0 then. Return 0 if there is no frame
4616 marker to de-allocate.
4617
4618 If NON_SAVING_SETJMP is defined and true, then it is not possible
4619 for the epilogue to be simple, so return 0. This is a special case
4620 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4621 until final, but jump_optimize may need to know sooner if a
4622 `return' is OK. */
32b5b1aa
SC
4623
4624int
b96a374d 4625ix86_can_use_return_insn_p (void)
32b5b1aa 4626{
4dd2ac2c 4627 struct ix86_frame frame;
9a7372d6 4628
e075ae69
RH
4629#ifdef NON_SAVING_SETJMP
4630 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4631 return 0;
4632#endif
9a7372d6
RH
4633
4634 if (! reload_completed || frame_pointer_needed)
4635 return 0;
32b5b1aa 4636
9a7372d6
RH
4637 /* Don't allow more than 32 pop, since that's all we can do
4638 with one instruction. */
4639 if (current_function_pops_args
4640 && current_function_args_size >= 32768)
e075ae69 4641 return 0;
32b5b1aa 4642
4dd2ac2c
JH
4643 ix86_compute_frame_layout (&frame);
4644 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 4645}
6189a572
JH
4646\f
4647/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4648int
b96a374d 4649x86_64_sign_extended_value (rtx value)
6189a572
JH
4650{
4651 switch (GET_CODE (value))
4652 {
4653 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4654 to be at least 32 and this all acceptable constants are
4655 represented as CONST_INT. */
4656 case CONST_INT:
4657 if (HOST_BITS_PER_WIDE_INT == 32)
4658 return 1;
4659 else
4660 {
4661 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 4662 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
4663 }
4664 break;
4665
75d38379
JJ
4666 /* For certain code models, the symbolic references are known to fit.
4667 in CM_SMALL_PIC model we know it fits if it is local to the shared
4668 library. Don't count TLS SYMBOL_REFs here, since they should fit
4669 only if inside of UNSPEC handled below. */
6189a572 4670 case SYMBOL_REF:
d7222e38
JH
4671 /* TLS symbols are not constant. */
4672 if (tls_symbolic_operand (value, Pmode))
4673 return false;
c05dbe81 4674 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
6189a572
JH
4675
4676 /* For certain code models, the code is near as well. */
4677 case LABEL_REF:
c05dbe81
JH
4678 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4679 || ix86_cmodel == CM_KERNEL);
6189a572
JH
4680
4681 /* We also may accept the offsetted memory references in certain special
4682 cases. */
4683 case CONST:
75d38379
JJ
4684 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4685 switch (XINT (XEXP (value, 0), 1))
4686 {
4687 case UNSPEC_GOTPCREL:
4688 case UNSPEC_DTPOFF:
4689 case UNSPEC_GOTNTPOFF:
4690 case UNSPEC_NTPOFF:
4691 return 1;
4692 default:
4693 break;
4694 }
4695 if (GET_CODE (XEXP (value, 0)) == PLUS)
6189a572
JH
4696 {
4697 rtx op1 = XEXP (XEXP (value, 0), 0);
4698 rtx op2 = XEXP (XEXP (value, 0), 1);
4699 HOST_WIDE_INT offset;
4700
4701 if (ix86_cmodel == CM_LARGE)
4702 return 0;
4703 if (GET_CODE (op2) != CONST_INT)
4704 return 0;
4705 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4706 switch (GET_CODE (op1))
4707 {
4708 case SYMBOL_REF:
75d38379 4709 /* For CM_SMALL assume that latest object is 16MB before
6189a572
JH
4710 end of 31bits boundary. We may also accept pretty
4711 large negative constants knowing that all objects are
4712 in the positive half of address space. */
4713 if (ix86_cmodel == CM_SMALL
75d38379 4714 && offset < 16*1024*1024
6189a572
JH
4715 && trunc_int_for_mode (offset, SImode) == offset)
4716 return 1;
4717 /* For CM_KERNEL we know that all object resist in the
4718 negative half of 32bits address space. We may not
4719 accept negative offsets, since they may be just off
d6a7951f 4720 and we may accept pretty large positive ones. */
6189a572
JH
4721 if (ix86_cmodel == CM_KERNEL
4722 && offset > 0
4723 && trunc_int_for_mode (offset, SImode) == offset)
4724 return 1;
4725 break;
4726 case LABEL_REF:
4727 /* These conditions are similar to SYMBOL_REF ones, just the
4728 constraints for code models differ. */
c05dbe81 4729 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
75d38379 4730 && offset < 16*1024*1024
6189a572
JH
4731 && trunc_int_for_mode (offset, SImode) == offset)
4732 return 1;
4733 if (ix86_cmodel == CM_KERNEL
4734 && offset > 0
4735 && trunc_int_for_mode (offset, SImode) == offset)
4736 return 1;
4737 break;
75d38379
JJ
4738 case UNSPEC:
4739 switch (XINT (op1, 1))
4740 {
4741 case UNSPEC_DTPOFF:
4742 case UNSPEC_NTPOFF:
4743 if (offset > 0
4744 && trunc_int_for_mode (offset, SImode) == offset)
4745 return 1;
4746 }
4747 break;
6189a572
JH
4748 default:
4749 return 0;
4750 }
4751 }
4752 return 0;
4753 default:
4754 return 0;
4755 }
4756}
4757
4758/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4759int
b96a374d 4760x86_64_zero_extended_value (rtx value)
6189a572
JH
4761{
4762 switch (GET_CODE (value))
4763 {
4764 case CONST_DOUBLE:
4765 if (HOST_BITS_PER_WIDE_INT == 32)
4766 return (GET_MODE (value) == VOIDmode
4767 && !CONST_DOUBLE_HIGH (value));
4768 else
4769 return 0;
4770 case CONST_INT:
4771 if (HOST_BITS_PER_WIDE_INT == 32)
4772 return INTVAL (value) >= 0;
4773 else
b531087a 4774 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
4775 break;
4776
4777 /* For certain code models, the symbolic references are known to fit. */
4778 case SYMBOL_REF:
d7222e38
JH
4779 /* TLS symbols are not constant. */
4780 if (tls_symbolic_operand (value, Pmode))
4781 return false;
6189a572
JH
4782 return ix86_cmodel == CM_SMALL;
4783
4784 /* For certain code models, the code is near as well. */
4785 case LABEL_REF:
4786 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4787
4788 /* We also may accept the offsetted memory references in certain special
4789 cases. */
4790 case CONST:
4791 if (GET_CODE (XEXP (value, 0)) == PLUS)
4792 {
4793 rtx op1 = XEXP (XEXP (value, 0), 0);
4794 rtx op2 = XEXP (XEXP (value, 0), 1);
4795
4796 if (ix86_cmodel == CM_LARGE)
4797 return 0;
4798 switch (GET_CODE (op1))
4799 {
4800 case SYMBOL_REF:
4801 return 0;
d6a7951f 4802 /* For small code model we may accept pretty large positive
6189a572
JH
4803 offsets, since one bit is available for free. Negative
4804 offsets are limited by the size of NULL pointer area
4805 specified by the ABI. */
4806 if (ix86_cmodel == CM_SMALL
4807 && GET_CODE (op2) == CONST_INT
4808 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4809 && (trunc_int_for_mode (INTVAL (op2), SImode)
4810 == INTVAL (op2)))
4811 return 1;
4812 /* ??? For the kernel, we may accept adjustment of
4813 -0x10000000, since we know that it will just convert
d6a7951f 4814 negative address space to positive, but perhaps this
6189a572
JH
4815 is not worthwhile. */
4816 break;
4817 case LABEL_REF:
4818 /* These conditions are similar to SYMBOL_REF ones, just the
4819 constraints for code models differ. */
4820 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4821 && GET_CODE (op2) == CONST_INT
4822 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4823 && (trunc_int_for_mode (INTVAL (op2), SImode)
4824 == INTVAL (op2)))
4825 return 1;
4826 break;
4827 default:
4828 return 0;
4829 }
4830 }
4831 return 0;
4832 default:
4833 return 0;
4834 }
4835}
6fca22eb
RH
4836
4837/* Value should be nonzero if functions must have frame pointers.
4838 Zero means the frame pointer need not be set up (and parms may
4839 be accessed via the stack pointer) in functions that seem suitable. */
4840
4841int
b96a374d 4842ix86_frame_pointer_required (void)
6fca22eb
RH
4843{
4844 /* If we accessed previous frames, then the generated code expects
4845 to be able to access the saved ebp value in our frame. */
4846 if (cfun->machine->accesses_prev_frame)
4847 return 1;
a4f31c00 4848
6fca22eb
RH
4849 /* Several x86 os'es need a frame pointer for other reasons,
4850 usually pertaining to setjmp. */
4851 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4852 return 1;
4853
4854 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4855 the frame pointer by default. Turn it back on now if we've not
4856 got a leaf function. */
a7943381 4857 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
4858 && (!current_function_is_leaf))
4859 return 1;
4860
4861 if (current_function_profile)
6fca22eb
RH
4862 return 1;
4863
4864 return 0;
4865}
4866
4867/* Record that the current function accesses previous call frames. */
4868
4869void
b96a374d 4870ix86_setup_frame_addresses (void)
6fca22eb
RH
4871{
4872 cfun->machine->accesses_prev_frame = 1;
4873}
e075ae69 4874\f
145aacc2
RH
4875#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4876# define USE_HIDDEN_LINKONCE 1
4877#else
4878# define USE_HIDDEN_LINKONCE 0
4879#endif
4880
bd09bdeb 4881static int pic_labels_used;
e9a25f70 4882
145aacc2
RH
4883/* Fills in the label name that should be used for a pc thunk for
4884 the given register. */
4885
4886static void
b96a374d 4887get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
4888{
4889 if (USE_HIDDEN_LINKONCE)
4890 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4891 else
4892 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4893}
4894
4895
e075ae69
RH
4896/* This function generates code for -fpic that loads %ebx with
4897 the return address of the caller and then returns. */
4898
4899void
b96a374d 4900ix86_file_end (void)
e075ae69
RH
4901{
4902 rtx xops[2];
bd09bdeb 4903 int regno;
32b5b1aa 4904
bd09bdeb 4905 for (regno = 0; regno < 8; ++regno)
7c262518 4906 {
145aacc2
RH
4907 char name[32];
4908
bd09bdeb
RH
4909 if (! ((pic_labels_used >> regno) & 1))
4910 continue;
4911
145aacc2 4912 get_pc_thunk_name (name, regno);
bd09bdeb 4913
145aacc2
RH
4914 if (USE_HIDDEN_LINKONCE)
4915 {
4916 tree decl;
4917
4918 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4919 error_mark_node);
4920 TREE_PUBLIC (decl) = 1;
4921 TREE_STATIC (decl) = 1;
4922 DECL_ONE_ONLY (decl) = 1;
4923
4924 (*targetm.asm_out.unique_section) (decl, 0);
4925 named_section (decl, NULL, 0);
4926
a5fe455b
ZW
4927 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4928 fputs ("\t.hidden\t", asm_out_file);
4929 assemble_name (asm_out_file, name);
4930 fputc ('\n', asm_out_file);
4931 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
4932 }
4933 else
4934 {
4935 text_section ();
a5fe455b 4936 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 4937 }
bd09bdeb
RH
4938
4939 xops[0] = gen_rtx_REG (SImode, regno);
4940 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4941 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4942 output_asm_insn ("ret", xops);
7c262518 4943 }
3edc56a9 4944
a5fe455b
ZW
4945 if (NEED_INDICATE_EXEC_STACK)
4946 file_end_indicate_exec_stack ();
32b5b1aa 4947}
32b5b1aa 4948
c8c03509 4949/* Emit code for the SET_GOT patterns. */
32b5b1aa 4950
c8c03509 4951const char *
b96a374d 4952output_set_got (rtx dest)
c8c03509
RH
4953{
4954 rtx xops[3];
0d7d98ee 4955
c8c03509 4956 xops[0] = dest;
5fc0e5df 4957 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4958
c8c03509 4959 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4960 {
c8c03509
RH
4961 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4962
4963 if (!flag_pic)
4964 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4965 else
4966 output_asm_insn ("call\t%a2", xops);
4967
b069de3b
SS
4968#if TARGET_MACHO
4969 /* Output the "canonical" label name ("Lxx$pb") here too. This
4970 is what will be referred to by the Mach-O PIC subsystem. */
4971 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4972#endif
4977bab6 4973 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
4974 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4975
4976 if (flag_pic)
4977 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4978 }
e075ae69 4979 else
e5cb57e8 4980 {
145aacc2
RH
4981 char name[32];
4982 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4983 pic_labels_used |= 1 << REGNO (dest);
f996902d 4984
145aacc2 4985 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4986 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4987 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4988 }
e5cb57e8 4989
c8c03509
RH
4990 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4991 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4992 else if (!TARGET_MACHO)
8e9fadc3 4993 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4994
c8c03509 4995 return "";
e9a25f70 4996}
8dfe5673 4997
0d7d98ee 4998/* Generate an "push" pattern for input ARG. */
e9a25f70 4999
e075ae69 5000static rtx
b96a374d 5001gen_push (rtx arg)
e9a25f70 5002{
c5c76735 5003 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
5004 gen_rtx_MEM (Pmode,
5005 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
5006 stack_pointer_rtx)),
5007 arg);
e9a25f70
JL
5008}
5009
bd09bdeb
RH
5010/* Return >= 0 if there is an unused call-clobbered register available
5011 for the entire function. */
5012
5013static unsigned int
b96a374d 5014ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
5015{
5016 if (current_function_is_leaf && !current_function_profile)
5017 {
5018 int i;
5019 for (i = 2; i >= 0; --i)
5020 if (!regs_ever_live[i])
5021 return i;
5022 }
5023
5024 return INVALID_REGNUM;
5025}
fce5a9f2 5026
4dd2ac2c
JH
5027/* Return 1 if we need to save REGNO. */
5028static int
b96a374d 5029ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 5030{
bd09bdeb
RH
5031 if (pic_offset_table_rtx
5032 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5033 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 5034 || current_function_profile
8c38a24f
MM
5035 || current_function_calls_eh_return
5036 || current_function_uses_const_pool))
bd09bdeb
RH
5037 {
5038 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5039 return 0;
5040 return 1;
5041 }
1020a5ab
RH
5042
5043 if (current_function_calls_eh_return && maybe_eh_return)
5044 {
5045 unsigned i;
5046 for (i = 0; ; i++)
5047 {
b531087a 5048 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
5049 if (test == INVALID_REGNUM)
5050 break;
9b690711 5051 if (test == regno)
1020a5ab
RH
5052 return 1;
5053 }
5054 }
4dd2ac2c 5055
1020a5ab
RH
5056 return (regs_ever_live[regno]
5057 && !call_used_regs[regno]
5058 && !fixed_regs[regno]
5059 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
5060}
5061
0903fcab
JH
5062/* Return number of registers to be saved on the stack. */
5063
5064static int
b96a374d 5065ix86_nsaved_regs (void)
0903fcab
JH
5066{
5067 int nregs = 0;
0903fcab
JH
5068 int regno;
5069
4dd2ac2c 5070 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5071 if (ix86_save_reg (regno, true))
4dd2ac2c 5072 nregs++;
0903fcab
JH
5073 return nregs;
5074}
5075
5076/* Return the offset between two registers, one to be eliminated, and the other
5077 its replacement, at the start of a routine. */
5078
5079HOST_WIDE_INT
b96a374d 5080ix86_initial_elimination_offset (int from, int to)
0903fcab 5081{
4dd2ac2c
JH
5082 struct ix86_frame frame;
5083 ix86_compute_frame_layout (&frame);
564d80f4
JH
5084
5085 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 5086 return frame.hard_frame_pointer_offset;
564d80f4
JH
5087 else if (from == FRAME_POINTER_REGNUM
5088 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 5089 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
5090 else
5091 {
564d80f4
JH
5092 if (to != STACK_POINTER_REGNUM)
5093 abort ();
5094 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 5095 return frame.stack_pointer_offset;
564d80f4
JH
5096 else if (from != FRAME_POINTER_REGNUM)
5097 abort ();
0903fcab 5098 else
4dd2ac2c 5099 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
5100 }
5101}
5102
4dd2ac2c 5103/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 5104
4dd2ac2c 5105static void
b96a374d 5106ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 5107{
65954bd8 5108 HOST_WIDE_INT total_size;
95899b34 5109 unsigned int stack_alignment_needed;
b19ee4bd 5110 HOST_WIDE_INT offset;
95899b34 5111 unsigned int preferred_alignment;
4dd2ac2c 5112 HOST_WIDE_INT size = get_frame_size ();
65954bd8 5113
4dd2ac2c 5114 frame->nregs = ix86_nsaved_regs ();
564d80f4 5115 total_size = size;
65954bd8 5116
95899b34
RH
5117 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5118 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5119
d7394366
JH
5120 /* During reload iteration the amount of registers saved can change.
5121 Recompute the value as needed. Do not recompute when amount of registers
5122 didn't change as reload does mutiple calls to the function and does not
5123 expect the decision to change within single iteration. */
5124 if (!optimize_size
5125 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
5126 {
5127 int count = frame->nregs;
5128
d7394366 5129 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
5130 /* The fast prologue uses move instead of push to save registers. This
5131 is significantly longer, but also executes faster as modern hardware
5132 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 5133
d9b40e8d
JH
5134 Be careful about choosing what prologue to emit: When function takes
5135 many instructions to execute we may use slow version as well as in
5136 case function is known to be outside hot spot (this is known with
5137 feedback only). Weight the size of function by number of registers
5138 to save as it is cheap to use one or two push instructions but very
5139 slow to use many of them. */
5140 if (count)
5141 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5142 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5143 || (flag_branch_probabilities
5144 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5145 cfun->machine->use_fast_prologue_epilogue = false;
5146 else
5147 cfun->machine->use_fast_prologue_epilogue
5148 = !expensive_function_p (count);
5149 }
5150 if (TARGET_PROLOGUE_USING_MOVE
5151 && cfun->machine->use_fast_prologue_epilogue)
5152 frame->save_regs_using_mov = true;
5153 else
5154 frame->save_regs_using_mov = false;
5155
5156
9ba81eaa 5157 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
5158 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5159
5160 frame->hard_frame_pointer_offset = offset;
564d80f4 5161
fcbfaa65
RK
5162 /* Do some sanity checking of stack_alignment_needed and
5163 preferred_alignment, since i386 port is the only using those features
f710504c 5164 that may break easily. */
564d80f4 5165
44affdae
JH
5166 if (size && !stack_alignment_needed)
5167 abort ();
44affdae
JH
5168 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5169 abort ();
5170 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5171 abort ();
5172 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5173 abort ();
564d80f4 5174
4dd2ac2c
JH
5175 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5176 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 5177
4dd2ac2c
JH
5178 /* Register save area */
5179 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 5180
8362f420
JH
5181 /* Va-arg area */
5182 if (ix86_save_varrargs_registers)
5183 {
5184 offset += X86_64_VARARGS_SIZE;
5185 frame->va_arg_size = X86_64_VARARGS_SIZE;
5186 }
5187 else
5188 frame->va_arg_size = 0;
5189
4dd2ac2c
JH
5190 /* Align start of frame for local function. */
5191 frame->padding1 = ((offset + stack_alignment_needed - 1)
5192 & -stack_alignment_needed) - offset;
f73ad30e 5193
4dd2ac2c 5194 offset += frame->padding1;
65954bd8 5195
4dd2ac2c
JH
5196 /* Frame pointer points here. */
5197 frame->frame_pointer_offset = offset;
54ff41b7 5198
4dd2ac2c 5199 offset += size;
65954bd8 5200
0b7ae565 5201 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
5202 all the function calls as dead code.
5203 Skipping is however impossible when function calls alloca. Alloca
5204 expander assumes that last current_function_outgoing_args_size
5205 of stack frame are unused. */
5206 if (ACCUMULATE_OUTGOING_ARGS
5207 && (!current_function_is_leaf || current_function_calls_alloca))
4dd2ac2c
JH
5208 {
5209 offset += current_function_outgoing_args_size;
5210 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5211 }
5212 else
5213 frame->outgoing_arguments_size = 0;
564d80f4 5214
002ff5bc
RH
5215 /* Align stack boundary. Only needed if we're calling another function
5216 or using alloca. */
5217 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
5218 frame->padding2 = ((offset + preferred_alignment - 1)
5219 & -preferred_alignment) - offset;
5220 else
5221 frame->padding2 = 0;
4dd2ac2c
JH
5222
5223 offset += frame->padding2;
5224
5225 /* We've reached end of stack frame. */
5226 frame->stack_pointer_offset = offset;
5227
5228 /* Size prologue needs to allocate. */
5229 frame->to_allocate =
5230 (size + frame->padding1 + frame->padding2
8362f420 5231 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 5232
b19ee4bd
JJ
5233 if ((!frame->to_allocate && frame->nregs <= 1)
5234 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
5235 frame->save_regs_using_mov = false;
5236
a5b378d6 5237 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
5238 && current_function_is_leaf)
5239 {
5240 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
5241 if (frame->save_regs_using_mov)
5242 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
5243 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5244 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5245 }
5246 else
5247 frame->red_zone_size = 0;
5248 frame->to_allocate -= frame->red_zone_size;
5249 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
5250#if 0
5251 fprintf (stderr, "nregs: %i\n", frame->nregs);
5252 fprintf (stderr, "size: %i\n", size);
5253 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5254 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 5255 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
5256 fprintf (stderr, "padding2: %i\n", frame->padding2);
5257 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 5258 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
5259 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5260 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5261 frame->hard_frame_pointer_offset);
5262 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5263#endif
65954bd8
JL
5264}
5265
0903fcab
JH
5266/* Emit code to save registers in the prologue. */
5267
5268static void
b96a374d 5269ix86_emit_save_regs (void)
0903fcab 5270{
8d531ab9 5271 int regno;
0903fcab 5272 rtx insn;
0903fcab 5273
4dd2ac2c 5274 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 5275 if (ix86_save_reg (regno, true))
0903fcab 5276 {
0d7d98ee 5277 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
5278 RTX_FRAME_RELATED_P (insn) = 1;
5279 }
5280}
5281
c6036a37
JH
5282/* Emit code to save registers using MOV insns. First register
5283 is restored from POINTER + OFFSET. */
5284static void
b96a374d 5285ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
5286{
5287 int regno;
5288 rtx insn;
5289
5290 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5291 if (ix86_save_reg (regno, true))
5292 {
b72f00af
RK
5293 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5294 Pmode, offset),
c6036a37
JH
5295 gen_rtx_REG (Pmode, regno));
5296 RTX_FRAME_RELATED_P (insn) = 1;
5297 offset += UNITS_PER_WORD;
5298 }
5299}
5300
839a4992 5301/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
5302 The pattern exist to put a dependency on all ebp-based memory accesses.
5303 STYLE should be negative if instructions should be marked as frame related,
5304 zero if %r11 register is live and cannot be freely used and positive
5305 otherwise. */
5306
5307static void
5308pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5309{
5310 rtx insn;
5311
5312 if (! TARGET_64BIT)
5313 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5314 else if (x86_64_immediate_operand (offset, DImode))
5315 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5316 else
5317 {
5318 rtx r11;
5319 /* r11 is used by indirect sibcall return as well, set before the
5320 epilogue and used after the epilogue. ATM indirect sibcall
5321 shouldn't be used together with huge frame sizes in one
5322 function because of the frame_size check in sibcall.c. */
5323 if (style == 0)
5324 abort ();
5325 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5326 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5327 if (style < 0)
5328 RTX_FRAME_RELATED_P (insn) = 1;
5329 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5330 offset));
5331 }
5332 if (style < 0)
5333 RTX_FRAME_RELATED_P (insn) = 1;
5334}
5335
0f290768 5336/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
5337
5338void
b96a374d 5339ix86_expand_prologue (void)
2a2ab3f9 5340{
564d80f4 5341 rtx insn;
bd09bdeb 5342 bool pic_reg_used;
4dd2ac2c 5343 struct ix86_frame frame;
c6036a37 5344 HOST_WIDE_INT allocate;
4dd2ac2c 5345
4977bab6 5346 ix86_compute_frame_layout (&frame);
79325812 5347
e075ae69
RH
5348 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5349 slower on all targets. Also sdb doesn't like it. */
e9a25f70 5350
2a2ab3f9
JVA
5351 if (frame_pointer_needed)
5352 {
564d80f4 5353 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 5354 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 5355
564d80f4 5356 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 5357 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
5358 }
5359
c6036a37 5360 allocate = frame.to_allocate;
c6036a37 5361
d9b40e8d 5362 if (!frame.save_regs_using_mov)
c6036a37
JH
5363 ix86_emit_save_regs ();
5364 else
5365 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 5366
d9b40e8d
JH
5367 /* When using red zone we may start register saving before allocating
5368 the stack frame saving one cycle of the prologue. */
5369 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5370 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5371 : stack_pointer_rtx,
5372 -frame.nregs * UNITS_PER_WORD);
5373
c6036a37 5374 if (allocate == 0)
8dfe5673 5375 ;
e323735c 5376 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
5377 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5378 GEN_INT (-allocate), -1);
79325812 5379 else
8dfe5673 5380 {
fe9f516f
RH
5381 /* Only valid for Win32. */
5382 rtx eax = gen_rtx_REG (SImode, 0);
5383 bool eax_live = ix86_eax_live_at_start_p ();
e9a25f70 5384
8362f420 5385 if (TARGET_64BIT)
b1177d69 5386 abort ();
e075ae69 5387
fe9f516f
RH
5388 if (eax_live)
5389 {
5390 emit_insn (gen_push (eax));
5391 allocate -= 4;
5392 }
5393
5394 insn = emit_move_insn (eax, GEN_INT (allocate));
b1177d69 5395 RTX_FRAME_RELATED_P (insn) = 1;
98417968 5396
b1177d69
KC
5397 insn = emit_insn (gen_allocate_stack_worker (eax));
5398 RTX_FRAME_RELATED_P (insn) = 1;
fe9f516f
RH
5399
5400 if (eax_live)
5401 {
5402 rtx t = plus_constant (stack_pointer_rtx, allocate);
5403 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5404 }
e075ae69 5405 }
fe9f516f 5406
d9b40e8d 5407 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
5408 {
5409 if (!frame_pointer_needed || !frame.to_allocate)
5410 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5411 else
5412 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5413 -frame.nregs * UNITS_PER_WORD);
5414 }
e9a25f70 5415
bd09bdeb
RH
5416 pic_reg_used = false;
5417 if (pic_offset_table_rtx
5418 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5419 || current_function_profile))
5420 {
5421 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5422
5423 if (alt_pic_reg_used != INVALID_REGNUM)
5424 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5425
5426 pic_reg_used = true;
5427 }
5428
e9a25f70 5429 if (pic_reg_used)
c8c03509
RH
5430 {
5431 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5432
66edd3b4
RH
5433 /* Even with accurate pre-reload life analysis, we can wind up
5434 deleting all references to the pic register after reload.
5435 Consider if cross-jumping unifies two sides of a branch
d1f87653 5436 controlled by a comparison vs the only read from a global.
66edd3b4
RH
5437 In which case, allow the set_got to be deleted, though we're
5438 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
5439 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5440 }
77a989d1 5441
66edd3b4
RH
5442 /* Prevent function calls from be scheduled before the call to mcount.
5443 In the pic_reg_used case, make sure that the got load isn't deleted. */
5444 if (current_function_profile)
5445 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
5446}
5447
da2d1d3a
JH
5448/* Emit code to restore saved registers using MOV insns. First register
5449 is restored from POINTER + OFFSET. */
5450static void
72613dfa
JH
5451ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5452 int maybe_eh_return)
da2d1d3a
JH
5453{
5454 int regno;
72613dfa 5455 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 5456
4dd2ac2c 5457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5458 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 5459 {
72613dfa
JH
5460 /* Ensure that adjust_address won't be forced to produce pointer
5461 out of range allowed by x86-64 instruction set. */
5462 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5463 {
5464 rtx r11;
5465
5466 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5467 emit_move_insn (r11, GEN_INT (offset));
5468 emit_insn (gen_adddi3 (r11, r11, pointer));
5469 base_address = gen_rtx_MEM (Pmode, r11);
5470 offset = 0;
5471 }
4dd2ac2c 5472 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 5473 adjust_address (base_address, Pmode, offset));
4dd2ac2c 5474 offset += UNITS_PER_WORD;
da2d1d3a
JH
5475 }
5476}
5477
0f290768 5478/* Restore function stack, frame, and registers. */
e9a25f70 5479
2a2ab3f9 5480void
b96a374d 5481ix86_expand_epilogue (int style)
2a2ab3f9 5482{
1c71e60e 5483 int regno;
fdb8a883 5484 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 5485 struct ix86_frame frame;
65954bd8 5486 HOST_WIDE_INT offset;
4dd2ac2c
JH
5487
5488 ix86_compute_frame_layout (&frame);
2a2ab3f9 5489
a4f31c00 5490 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
5491 must be taken for the normal return case of a function using
5492 eh_return: the eax and edx registers are marked as saved, but not
5493 restored along this path. */
5494 offset = frame.nregs;
5495 if (current_function_calls_eh_return && style != 2)
5496 offset -= 2;
5497 offset *= -UNITS_PER_WORD;
2a2ab3f9 5498
fdb8a883
JW
5499 /* If we're only restoring one register and sp is not valid then
5500 using a move instruction to restore the register since it's
0f290768 5501 less work than reloading sp and popping the register.
da2d1d3a
JH
5502
5503 The default code result in stack adjustment using add/lea instruction,
5504 while this code results in LEAVE instruction (or discrete equivalent),
5505 so it is profitable in some other cases as well. Especially when there
5506 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 5507 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 5508 tuning in future. */
4dd2ac2c 5509 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 5510 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 5511 && cfun->machine->use_fast_prologue_epilogue
c6036a37 5512 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 5513 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 5514 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
5515 && cfun->machine->use_fast_prologue_epilogue
5516 && frame.nregs == 1)
2ab0437e 5517 || current_function_calls_eh_return)
2a2ab3f9 5518 {
da2d1d3a
JH
5519 /* Restore registers. We can use ebp or esp to address the memory
5520 locations. If both are available, default to ebp, since offsets
5521 are known to be small. Only exception is esp pointing directly to the
5522 end of block of saved registers, where we may simplify addressing
5523 mode. */
5524
4dd2ac2c 5525 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
5526 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5527 frame.to_allocate, style == 2);
da2d1d3a 5528 else
1020a5ab
RH
5529 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5530 offset, style == 2);
5531
5532 /* eh_return epilogues need %ecx added to the stack pointer. */
5533 if (style == 2)
5534 {
5535 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 5536
1020a5ab
RH
5537 if (frame_pointer_needed)
5538 {
5539 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5540 tmp = plus_constant (tmp, UNITS_PER_WORD);
5541 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5542
5543 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5544 emit_move_insn (hard_frame_pointer_rtx, tmp);
5545
b19ee4bd
JJ
5546 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5547 const0_rtx, style);
1020a5ab
RH
5548 }
5549 else
5550 {
5551 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5552 tmp = plus_constant (tmp, (frame.to_allocate
5553 + frame.nregs * UNITS_PER_WORD));
5554 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5555 }
5556 }
5557 else if (!frame_pointer_needed)
b19ee4bd
JJ
5558 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5559 GEN_INT (frame.to_allocate
5560 + frame.nregs * UNITS_PER_WORD),
5561 style);
0f290768 5562 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
5563 else if (TARGET_USE_LEAVE || optimize_size
5564 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 5565 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 5566 else
2a2ab3f9 5567 {
b19ee4bd
JJ
5568 pro_epilogue_adjust_stack (stack_pointer_rtx,
5569 hard_frame_pointer_rtx,
5570 const0_rtx, style);
8362f420
JH
5571 if (TARGET_64BIT)
5572 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5573 else
5574 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
5575 }
5576 }
1c71e60e 5577 else
68f654ec 5578 {
1c71e60e
JH
5579 /* First step is to deallocate the stack frame so that we can
5580 pop the registers. */
5581 if (!sp_valid)
5582 {
5583 if (!frame_pointer_needed)
5584 abort ();
b19ee4bd
JJ
5585 pro_epilogue_adjust_stack (stack_pointer_rtx,
5586 hard_frame_pointer_rtx,
5587 GEN_INT (offset), style);
1c71e60e 5588 }
4dd2ac2c 5589 else if (frame.to_allocate)
b19ee4bd
JJ
5590 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5591 GEN_INT (frame.to_allocate), style);
1c71e60e 5592
4dd2ac2c 5593 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 5594 if (ix86_save_reg (regno, false))
8362f420
JH
5595 {
5596 if (TARGET_64BIT)
5597 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5598 else
5599 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5600 }
4dd2ac2c 5601 if (frame_pointer_needed)
8362f420 5602 {
f5143c46 5603 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
5604 able to grok it fast. */
5605 if (TARGET_USE_LEAVE)
5606 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5607 else if (TARGET_64BIT)
8362f420
JH
5608 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5609 else
5610 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5611 }
68f654ec 5612 }
68f654ec 5613
cbbf65e0 5614 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 5615 if (style == 0)
cbbf65e0
RH
5616 return;
5617
2a2ab3f9
JVA
5618 if (current_function_pops_args && current_function_args_size)
5619 {
e075ae69 5620 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 5621
b8c752c8
UD
5622 /* i386 can only pop 64K bytes. If asked to pop more, pop
5623 return address, do explicit add, and jump indirectly to the
0f290768 5624 caller. */
2a2ab3f9 5625
b8c752c8 5626 if (current_function_pops_args >= 65536)
2a2ab3f9 5627 {
e075ae69 5628 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 5629
b19ee4bd 5630 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 5631 if (TARGET_64BIT)
b531087a 5632 abort ();
8362f420 5633
e075ae69
RH
5634 emit_insn (gen_popsi1 (ecx));
5635 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 5636 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 5637 }
79325812 5638 else
e075ae69
RH
5639 emit_jump_insn (gen_return_pop_internal (popc));
5640 }
5641 else
5642 emit_jump_insn (gen_return_internal ());
5643}
bd09bdeb
RH
5644
5645/* Reset from the function's potential modifications. */
5646
5647static void
b96a374d
AJ
5648ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5649 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
5650{
5651 if (pic_offset_table_rtx)
5652 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5653}
e075ae69
RH
5654\f
5655/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
5656 for an instruction. Return 0 if the structure of the address is
5657 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 5658 strictly valid, but still used for computing length of lea instruction. */
e075ae69
RH
5659
5660static int
8d531ab9 5661ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69
RH
5662{
5663 rtx base = NULL_RTX;
5664 rtx index = NULL_RTX;
5665 rtx disp = NULL_RTX;
5666 HOST_WIDE_INT scale = 1;
5667 rtx scale_rtx = NULL_RTX;
b446e5a2 5668 int retval = 1;
74dc3e94 5669 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 5670
90e4e4c5 5671 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
5672 base = addr;
5673 else if (GET_CODE (addr) == PLUS)
5674 {
74dc3e94
RH
5675 rtx addends[4], op;
5676 int n = 0, i;
e075ae69 5677
74dc3e94
RH
5678 op = addr;
5679 do
e075ae69 5680 {
74dc3e94
RH
5681 if (n >= 4)
5682 return 0;
5683 addends[n++] = XEXP (op, 1);
5684 op = XEXP (op, 0);
2a2ab3f9 5685 }
74dc3e94
RH
5686 while (GET_CODE (op) == PLUS);
5687 if (n >= 4)
5688 return 0;
5689 addends[n] = op;
5690
5691 for (i = n; i >= 0; --i)
e075ae69 5692 {
74dc3e94
RH
5693 op = addends[i];
5694 switch (GET_CODE (op))
5695 {
5696 case MULT:
5697 if (index)
5698 return 0;
5699 index = XEXP (op, 0);
5700 scale_rtx = XEXP (op, 1);
5701 break;
5702
5703 case UNSPEC:
5704 if (XINT (op, 1) == UNSPEC_TP
5705 && TARGET_TLS_DIRECT_SEG_REFS
5706 && seg == SEG_DEFAULT)
5707 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5708 else
5709 return 0;
5710 break;
5711
5712 case REG:
5713 case SUBREG:
5714 if (!base)
5715 base = op;
5716 else if (!index)
5717 index = op;
5718 else
5719 return 0;
5720 break;
5721
5722 case CONST:
5723 case CONST_INT:
5724 case SYMBOL_REF:
5725 case LABEL_REF:
5726 if (disp)
5727 return 0;
5728 disp = op;
5729 break;
5730
5731 default:
5732 return 0;
5733 }
e075ae69 5734 }
e075ae69
RH
5735 }
5736 else if (GET_CODE (addr) == MULT)
5737 {
5738 index = XEXP (addr, 0); /* index*scale */
5739 scale_rtx = XEXP (addr, 1);
5740 }
5741 else if (GET_CODE (addr) == ASHIFT)
5742 {
5743 rtx tmp;
5744
5745 /* We're called for lea too, which implements ashift on occasion. */
5746 index = XEXP (addr, 0);
5747 tmp = XEXP (addr, 1);
5748 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 5749 return 0;
e075ae69
RH
5750 scale = INTVAL (tmp);
5751 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 5752 return 0;
e075ae69 5753 scale = 1 << scale;
b446e5a2 5754 retval = -1;
2a2ab3f9 5755 }
2a2ab3f9 5756 else
e075ae69
RH
5757 disp = addr; /* displacement */
5758
5759 /* Extract the integral value of scale. */
5760 if (scale_rtx)
e9a25f70 5761 {
e075ae69 5762 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 5763 return 0;
e075ae69 5764 scale = INTVAL (scale_rtx);
e9a25f70 5765 }
3b3c6a3f 5766
74dc3e94 5767 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 5768 if (base && index && scale == 1
74dc3e94
RH
5769 && (index == arg_pointer_rtx
5770 || index == frame_pointer_rtx
5771 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
5772 {
5773 rtx tmp = base;
5774 base = index;
5775 index = tmp;
5776 }
5777
5778 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
5779 if ((base == hard_frame_pointer_rtx
5780 || base == frame_pointer_rtx
5781 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
5782 disp = const0_rtx;
5783
5784 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5785 Avoid this by transforming to [%esi+0]. */
9e555526 5786 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 5787 && base && !index && !disp
329e1d01 5788 && REG_P (base)
e075ae69
RH
5789 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5790 disp = const0_rtx;
5791
5792 /* Special case: encode reg+reg instead of reg*2. */
5793 if (!base && index && scale && scale == 2)
5794 base = index, scale = 1;
0f290768 5795
e075ae69
RH
5796 /* Special case: scaling cannot be encoded without base or displacement. */
5797 if (!base && !disp && index && scale != 1)
5798 disp = const0_rtx;
5799
5800 out->base = base;
5801 out->index = index;
5802 out->disp = disp;
5803 out->scale = scale;
74dc3e94 5804 out->seg = seg;
3b3c6a3f 5805
b446e5a2 5806 return retval;
e075ae69 5807}
01329426
JH
5808\f
5809/* Return cost of the memory address x.
5810 For i386, it is better to use a complex address than let gcc copy
5811 the address into a reg and make a new pseudo. But not if the address
5812 requires to two regs - that would mean more pseudos with longer
5813 lifetimes. */
dcefdf67 5814static int
b96a374d 5815ix86_address_cost (rtx x)
01329426
JH
5816{
5817 struct ix86_address parts;
5818 int cost = 1;
3b3c6a3f 5819
01329426
JH
5820 if (!ix86_decompose_address (x, &parts))
5821 abort ();
5822
5823 /* More complex memory references are better. */
5824 if (parts.disp && parts.disp != const0_rtx)
5825 cost--;
74dc3e94
RH
5826 if (parts.seg != SEG_DEFAULT)
5827 cost--;
01329426
JH
5828
5829 /* Attempt to minimize number of registers in the address. */
5830 if ((parts.base
5831 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5832 || (parts.index
5833 && (!REG_P (parts.index)
5834 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5835 cost++;
5836
5837 if (parts.base
5838 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5839 && parts.index
5840 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5841 && parts.base != parts.index)
5842 cost++;
5843
5844 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5845 since it's predecode logic can't detect the length of instructions
5846 and it degenerates to vector decoded. Increase cost of such
5847 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 5848 to split such addresses or even refuse such addresses at all.
01329426
JH
5849
5850 Following addressing modes are affected:
5851 [base+scale*index]
5852 [scale*index+disp]
5853 [base+index]
0f290768 5854
01329426
JH
5855 The first and last case may be avoidable by explicitly coding the zero in
5856 memory address, but I don't have AMD-K6 machine handy to check this
5857 theory. */
5858
5859 if (TARGET_K6
5860 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5861 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5862 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5863 cost += 10;
0f290768 5864
01329426
JH
5865 return cost;
5866}
5867\f
b949ea8b
JW
5868/* If X is a machine specific address (i.e. a symbol or label being
5869 referenced as a displacement from the GOT implemented using an
5870 UNSPEC), then return the base term. Otherwise return X. */
5871
5872rtx
b96a374d 5873ix86_find_base_term (rtx x)
b949ea8b
JW
5874{
5875 rtx term;
5876
6eb791fc
JH
5877 if (TARGET_64BIT)
5878 {
5879 if (GET_CODE (x) != CONST)
5880 return x;
5881 term = XEXP (x, 0);
5882 if (GET_CODE (term) == PLUS
5883 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5884 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5885 term = XEXP (term, 0);
5886 if (GET_CODE (term) != UNSPEC
8ee41eaf 5887 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5888 return x;
5889
5890 term = XVECEXP (term, 0, 0);
5891
5892 if (GET_CODE (term) != SYMBOL_REF
5893 && GET_CODE (term) != LABEL_REF)
5894 return x;
5895
5896 return term;
5897 }
5898
69bd9368 5899 term = ix86_delegitimize_address (x);
b949ea8b
JW
5900
5901 if (GET_CODE (term) != SYMBOL_REF
5902 && GET_CODE (term) != LABEL_REF)
5903 return x;
5904
5905 return term;
5906}
5907\f
f996902d
RH
5908/* Determine if a given RTX is a valid constant. We already know this
5909 satisfies CONSTANT_P. */
5910
5911bool
b96a374d 5912legitimate_constant_p (rtx x)
f996902d
RH
5913{
5914 rtx inner;
5915
5916 switch (GET_CODE (x))
5917 {
5918 case SYMBOL_REF:
5919 /* TLS symbols are not constant. */
5920 if (tls_symbolic_operand (x, Pmode))
5921 return false;
5922 break;
5923
5924 case CONST:
5925 inner = XEXP (x, 0);
5926
5927 /* Offsets of TLS symbols are never valid.
5928 Discourage CSE from creating them. */
5929 if (GET_CODE (inner) == PLUS
5930 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5931 return false;
5932
d599f329
ZD
5933 if (GET_CODE (inner) == PLUS
5934 || GET_CODE (inner) == MINUS)
799b33a0
JH
5935 {
5936 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5937 return false;
5938 inner = XEXP (inner, 0);
5939 }
5940
f996902d
RH
5941 /* Only some unspecs are valid as "constants". */
5942 if (GET_CODE (inner) == UNSPEC)
5943 switch (XINT (inner, 1))
5944 {
5945 case UNSPEC_TPOFF:
cb0e3e3f 5946 case UNSPEC_NTPOFF:
f996902d 5947 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
cb0e3e3f
RH
5948 case UNSPEC_DTPOFF:
5949 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5950 default:
5951 return false;
5952 }
5953 break;
5954
5955 default:
5956 break;
5957 }
5958
5959 /* Otherwise we handle everything else in the move patterns. */
5960 return true;
5961}
5962
3a04ff64
RH
5963/* Determine if it's legal to put X into the constant pool. This
5964 is not possible for the address of thread-local symbols, which
5965 is checked above. */
5966
5967static bool
b96a374d 5968ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
5969{
5970 return !legitimate_constant_p (x);
5971}
5972
f996902d
RH
5973/* Determine if a given RTX is a valid constant address. */
5974
5975bool
b96a374d 5976constant_address_p (rtx x)
f996902d 5977{
a94f136b 5978 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
5979}
5980
5981/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5982 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5983 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5984
5985bool
b96a374d 5986legitimate_pic_operand_p (rtx x)
f996902d
RH
5987{
5988 rtx inner;
5989
5990 switch (GET_CODE (x))
5991 {
5992 case CONST:
5993 inner = XEXP (x, 0);
5994
5995 /* Only some unspecs are valid as "constants". */
5996 if (GET_CODE (inner) == UNSPEC)
5997 switch (XINT (inner, 1))
5998 {
5999 case UNSPEC_TPOFF:
6000 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
6001 default:
6002 return false;
6003 }
5efb1046 6004 /* FALLTHRU */
f996902d
RH
6005
6006 case SYMBOL_REF:
6007 case LABEL_REF:
6008 return legitimate_pic_address_disp_p (x);
6009
6010 default:
6011 return true;
6012 }
6013}
6014
e075ae69
RH
6015/* Determine if a given CONST RTX is a valid memory displacement
6016 in PIC mode. */
0f290768 6017
59be65f6 6018int
8d531ab9 6019legitimate_pic_address_disp_p (rtx disp)
91bb873f 6020{
f996902d
RH
6021 bool saw_plus;
6022
6eb791fc
JH
6023 /* In 64bit mode we can allow direct addresses of symbols and labels
6024 when they are not dynamic symbols. */
c05dbe81
JH
6025 if (TARGET_64BIT)
6026 {
6027 /* TLS references should always be enclosed in UNSPEC. */
6028 if (tls_symbolic_operand (disp, GET_MODE (disp)))
6029 return 0;
6030 if (GET_CODE (disp) == SYMBOL_REF
6031 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 6032 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
6033 return 1;
6034 if (GET_CODE (disp) == LABEL_REF)
6035 return 1;
6036 if (GET_CODE (disp) == CONST
a132b6a8
JJ
6037 && GET_CODE (XEXP (disp, 0)) == PLUS)
6038 {
6039 rtx op0 = XEXP (XEXP (disp, 0), 0);
6040 rtx op1 = XEXP (XEXP (disp, 0), 1);
6041
6042 /* TLS references should always be enclosed in UNSPEC. */
6043 if (tls_symbolic_operand (op0, GET_MODE (op0)))
6044 return 0;
6045 if (((GET_CODE (op0) == SYMBOL_REF
6046 && ix86_cmodel == CM_SMALL_PIC
6047 && SYMBOL_REF_LOCAL_P (op0))
6048 || GET_CODE (op0) == LABEL_REF)
6049 && GET_CODE (op1) == CONST_INT
6050 && INTVAL (op1) < 16*1024*1024
6051 && INTVAL (op1) >= -16*1024*1024)
6052 return 1;
6053 }
c05dbe81 6054 }
91bb873f
RH
6055 if (GET_CODE (disp) != CONST)
6056 return 0;
6057 disp = XEXP (disp, 0);
6058
6eb791fc
JH
6059 if (TARGET_64BIT)
6060 {
6061 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6062 of GOT tables. We should not need these anyway. */
6063 if (GET_CODE (disp) != UNSPEC
8ee41eaf 6064 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
6065 return 0;
6066
6067 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6068 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6069 return 0;
6070 return 1;
6071 }
6072
f996902d 6073 saw_plus = false;
91bb873f
RH
6074 if (GET_CODE (disp) == PLUS)
6075 {
6076 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6077 return 0;
6078 disp = XEXP (disp, 0);
f996902d 6079 saw_plus = true;
91bb873f
RH
6080 }
6081
b069de3b
SS
6082 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6083 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6084 {
6085 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6086 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6087 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6088 {
6089 const char *sym_name = XSTR (XEXP (disp, 1), 0);
86ecdfb6 6090 if (! strcmp (sym_name, "<pic base>"))
b069de3b
SS
6091 return 1;
6092 }
6093 }
6094
8ee41eaf 6095 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
6096 return 0;
6097
623fe810
RH
6098 switch (XINT (disp, 1))
6099 {
8ee41eaf 6100 case UNSPEC_GOT:
f996902d
RH
6101 if (saw_plus)
6102 return false;
623fe810 6103 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 6104 case UNSPEC_GOTOFF:
799b33a0
JH
6105 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6106 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6107 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6108 return false;
f996902d 6109 case UNSPEC_GOTTPOFF:
dea73790
JJ
6110 case UNSPEC_GOTNTPOFF:
6111 case UNSPEC_INDNTPOFF:
f996902d
RH
6112 if (saw_plus)
6113 return false;
6114 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6115 case UNSPEC_NTPOFF:
f996902d
RH
6116 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6117 case UNSPEC_DTPOFF:
f996902d 6118 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 6119 }
fce5a9f2 6120
623fe810 6121 return 0;
91bb873f
RH
6122}
6123
e075ae69
RH
6124/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6125 memory address for an instruction. The MODE argument is the machine mode
6126 for the MEM expression that wants to use this address.
6127
6128 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6129 convert common non-canonical forms to canonical form so that they will
6130 be recognized. */
6131
3b3c6a3f 6132int
8d531ab9 6133legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 6134{
e075ae69
RH
6135 struct ix86_address parts;
6136 rtx base, index, disp;
6137 HOST_WIDE_INT scale;
6138 const char *reason = NULL;
6139 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
6140
6141 if (TARGET_DEBUG_ADDR)
6142 {
6143 fprintf (stderr,
e9a25f70 6144 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 6145 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
6146 debug_rtx (addr);
6147 }
6148
b446e5a2 6149 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 6150 {
e075ae69 6151 reason = "decomposition failed";
50e60bc3 6152 goto report_error;
3b3c6a3f
MM
6153 }
6154
e075ae69
RH
6155 base = parts.base;
6156 index = parts.index;
6157 disp = parts.disp;
6158 scale = parts.scale;
91f0226f 6159
e075ae69 6160 /* Validate base register.
e9a25f70
JL
6161
6162 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
6163 is one word out of a two word structure, which is represented internally
6164 as a DImode int. */
e9a25f70 6165
3b3c6a3f
MM
6166 if (base)
6167 {
e075ae69
RH
6168 reason_rtx = base;
6169
90e4e4c5 6170 if (GET_CODE (base) != REG)
3b3c6a3f 6171 {
e075ae69 6172 reason = "base is not a register";
50e60bc3 6173 goto report_error;
3b3c6a3f
MM
6174 }
6175
c954bd01
RH
6176 if (GET_MODE (base) != Pmode)
6177 {
e075ae69 6178 reason = "base is not in Pmode";
50e60bc3 6179 goto report_error;
c954bd01
RH
6180 }
6181
90e4e4c5
RH
6182 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6183 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 6184 {
e075ae69 6185 reason = "base is not valid";
50e60bc3 6186 goto report_error;
3b3c6a3f
MM
6187 }
6188 }
6189
e075ae69 6190 /* Validate index register.
e9a25f70
JL
6191
6192 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
6193 is one word out of a two word structure, which is represented internally
6194 as a DImode int. */
e075ae69
RH
6195
6196 if (index)
3b3c6a3f 6197 {
e075ae69
RH
6198 reason_rtx = index;
6199
90e4e4c5 6200 if (GET_CODE (index) != REG)
3b3c6a3f 6201 {
e075ae69 6202 reason = "index is not a register";
50e60bc3 6203 goto report_error;
3b3c6a3f
MM
6204 }
6205
e075ae69 6206 if (GET_MODE (index) != Pmode)
c954bd01 6207 {
e075ae69 6208 reason = "index is not in Pmode";
50e60bc3 6209 goto report_error;
c954bd01
RH
6210 }
6211
90e4e4c5
RH
6212 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6213 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 6214 {
e075ae69 6215 reason = "index is not valid";
50e60bc3 6216 goto report_error;
3b3c6a3f
MM
6217 }
6218 }
3b3c6a3f 6219
e075ae69
RH
6220 /* Validate scale factor. */
6221 if (scale != 1)
3b3c6a3f 6222 {
e075ae69
RH
6223 reason_rtx = GEN_INT (scale);
6224 if (!index)
3b3c6a3f 6225 {
e075ae69 6226 reason = "scale without index";
50e60bc3 6227 goto report_error;
3b3c6a3f
MM
6228 }
6229
e075ae69 6230 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 6231 {
e075ae69 6232 reason = "scale is not a valid multiplier";
50e60bc3 6233 goto report_error;
3b3c6a3f
MM
6234 }
6235 }
6236
91bb873f 6237 /* Validate displacement. */
3b3c6a3f
MM
6238 if (disp)
6239 {
e075ae69
RH
6240 reason_rtx = disp;
6241
f996902d
RH
6242 if (GET_CODE (disp) == CONST
6243 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6244 switch (XINT (XEXP (disp, 0), 1))
6245 {
6246 case UNSPEC_GOT:
6247 case UNSPEC_GOTOFF:
6248 case UNSPEC_GOTPCREL:
6249 if (!flag_pic)
6250 abort ();
6251 goto is_legitimate_pic;
6252
6253 case UNSPEC_GOTTPOFF:
dea73790
JJ
6254 case UNSPEC_GOTNTPOFF:
6255 case UNSPEC_INDNTPOFF:
f996902d
RH
6256 case UNSPEC_NTPOFF:
6257 case UNSPEC_DTPOFF:
6258 break;
6259
6260 default:
6261 reason = "invalid address unspec";
6262 goto report_error;
6263 }
6264
b069de3b
SS
6265 else if (flag_pic && (SYMBOLIC_CONST (disp)
6266#if TARGET_MACHO
6267 && !machopic_operand_p (disp)
6268#endif
6269 ))
3b3c6a3f 6270 {
f996902d 6271 is_legitimate_pic:
0d7d98ee
JH
6272 if (TARGET_64BIT && (index || base))
6273 {
75d38379
JJ
6274 /* foo@dtpoff(%rX) is ok. */
6275 if (GET_CODE (disp) != CONST
6276 || GET_CODE (XEXP (disp, 0)) != PLUS
6277 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6278 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6279 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6280 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6281 {
6282 reason = "non-constant pic memory reference";
6283 goto report_error;
6284 }
0d7d98ee 6285 }
75d38379 6286 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 6287 {
e075ae69 6288 reason = "displacement is an invalid pic construct";
50e60bc3 6289 goto report_error;
91bb873f
RH
6290 }
6291
4e9efe54 6292 /* This code used to verify that a symbolic pic displacement
0f290768
KH
6293 includes the pic_offset_table_rtx register.
6294
4e9efe54
JH
6295 While this is good idea, unfortunately these constructs may
6296 be created by "adds using lea" optimization for incorrect
6297 code like:
6298
6299 int a;
6300 int foo(int i)
6301 {
6302 return *(&a+i);
6303 }
6304
50e60bc3 6305 This code is nonsensical, but results in addressing
4e9efe54 6306 GOT table with pic_offset_table_rtx base. We can't
f710504c 6307 just refuse it easily, since it gets matched by
4e9efe54
JH
6308 "addsi3" pattern, that later gets split to lea in the
6309 case output register differs from input. While this
6310 can be handled by separate addsi pattern for this case
6311 that never results in lea, this seems to be easier and
6312 correct fix for crash to disable this test. */
3b3c6a3f 6313 }
a94f136b
JH
6314 else if (GET_CODE (disp) != LABEL_REF
6315 && GET_CODE (disp) != CONST_INT
6316 && (GET_CODE (disp) != CONST
6317 || !legitimate_constant_p (disp))
6318 && (GET_CODE (disp) != SYMBOL_REF
6319 || !legitimate_constant_p (disp)))
f996902d
RH
6320 {
6321 reason = "displacement is not constant";
6322 goto report_error;
6323 }
c05dbe81
JH
6324 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6325 {
6326 reason = "displacement is out of range";
6327 goto report_error;
6328 }
3b3c6a3f
MM
6329 }
6330
e075ae69 6331 /* Everything looks valid. */
3b3c6a3f 6332 if (TARGET_DEBUG_ADDR)
e075ae69 6333 fprintf (stderr, "Success.\n");
3b3c6a3f 6334 return TRUE;
e075ae69 6335
5bf0ebab 6336 report_error:
e075ae69
RH
6337 if (TARGET_DEBUG_ADDR)
6338 {
6339 fprintf (stderr, "Error: %s\n", reason);
6340 debug_rtx (reason_rtx);
6341 }
6342 return FALSE;
3b3c6a3f 6343}
3b3c6a3f 6344\f
55efb413
JW
6345/* Return an unique alias set for the GOT. */
6346
0f290768 6347static HOST_WIDE_INT
b96a374d 6348ix86_GOT_alias_set (void)
55efb413 6349{
5bf0ebab
RH
6350 static HOST_WIDE_INT set = -1;
6351 if (set == -1)
6352 set = new_alias_set ();
6353 return set;
0f290768 6354}
55efb413 6355
3b3c6a3f
MM
6356/* Return a legitimate reference for ORIG (an address) using the
6357 register REG. If REG is 0, a new pseudo is generated.
6358
91bb873f 6359 There are two types of references that must be handled:
3b3c6a3f
MM
6360
6361 1. Global data references must load the address from the GOT, via
6362 the PIC reg. An insn is emitted to do this load, and the reg is
6363 returned.
6364
91bb873f
RH
6365 2. Static data references, constant pool addresses, and code labels
6366 compute the address as an offset from the GOT, whose base is in
2ae5ae57 6367 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
6368 differentiate them from global data objects. The returned
6369 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
6370
6371 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 6372 reg also appears in the address. */
3b3c6a3f 6373
b39edae3 6374static rtx
b96a374d 6375legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
6376{
6377 rtx addr = orig;
6378 rtx new = orig;
91bb873f 6379 rtx base;
3b3c6a3f 6380
b069de3b
SS
6381#if TARGET_MACHO
6382 if (reg == 0)
6383 reg = gen_reg_rtx (Pmode);
6384 /* Use the generic Mach-O PIC machinery. */
6385 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6386#endif
6387
c05dbe81
JH
6388 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6389 new = addr;
6390 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 6391 {
c05dbe81
JH
6392 /* This symbol may be referenced via a displacement from the PIC
6393 base address (@GOTOFF). */
3b3c6a3f 6394
c05dbe81
JH
6395 if (reload_in_progress)
6396 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
6397 if (GET_CODE (addr) == CONST)
6398 addr = XEXP (addr, 0);
6399 if (GET_CODE (addr) == PLUS)
6400 {
6401 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6402 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6403 }
6404 else
6405 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
6406 new = gen_rtx_CONST (Pmode, new);
6407 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 6408
c05dbe81
JH
6409 if (reg != 0)
6410 {
6411 emit_move_insn (reg, new);
6412 new = reg;
6413 }
3b3c6a3f 6414 }
91bb873f 6415 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 6416 {
14f73b5a
JH
6417 if (TARGET_64BIT)
6418 {
8ee41eaf 6419 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
6420 new = gen_rtx_CONST (Pmode, new);
6421 new = gen_rtx_MEM (Pmode, new);
6422 RTX_UNCHANGING_P (new) = 1;
6423 set_mem_alias_set (new, ix86_GOT_alias_set ());
6424
6425 if (reg == 0)
6426 reg = gen_reg_rtx (Pmode);
6427 /* Use directly gen_movsi, otherwise the address is loaded
6428 into register for CSE. We don't want to CSE this addresses,
6429 instead we CSE addresses from the GOT table, so skip this. */
6430 emit_insn (gen_movsi (reg, new));
6431 new = reg;
6432 }
6433 else
6434 {
6435 /* This symbol must be referenced via a load from the
6436 Global Offset Table (@GOT). */
3b3c6a3f 6437
66edd3b4
RH
6438 if (reload_in_progress)
6439 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 6440 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
6441 new = gen_rtx_CONST (Pmode, new);
6442 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6443 new = gen_rtx_MEM (Pmode, new);
6444 RTX_UNCHANGING_P (new) = 1;
6445 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 6446
14f73b5a
JH
6447 if (reg == 0)
6448 reg = gen_reg_rtx (Pmode);
6449 emit_move_insn (reg, new);
6450 new = reg;
6451 }
0f290768 6452 }
91bb873f
RH
6453 else
6454 {
6455 if (GET_CODE (addr) == CONST)
3b3c6a3f 6456 {
91bb873f 6457 addr = XEXP (addr, 0);
e3c8ea67
RH
6458
6459 /* We must match stuff we generate before. Assume the only
6460 unspecs that can get here are ours. Not that we could do
43f3a59d 6461 anything with them anyway.... */
e3c8ea67
RH
6462 if (GET_CODE (addr) == UNSPEC
6463 || (GET_CODE (addr) == PLUS
6464 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6465 return orig;
6466 if (GET_CODE (addr) != PLUS)
564d80f4 6467 abort ();
3b3c6a3f 6468 }
91bb873f
RH
6469 if (GET_CODE (addr) == PLUS)
6470 {
6471 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 6472
91bb873f
RH
6473 /* Check first to see if this is a constant offset from a @GOTOFF
6474 symbol reference. */
623fe810 6475 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
6476 && GET_CODE (op1) == CONST_INT)
6477 {
6eb791fc
JH
6478 if (!TARGET_64BIT)
6479 {
66edd3b4
RH
6480 if (reload_in_progress)
6481 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
6482 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6483 UNSPEC_GOTOFF);
6eb791fc
JH
6484 new = gen_rtx_PLUS (Pmode, new, op1);
6485 new = gen_rtx_CONST (Pmode, new);
6486 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 6487
6eb791fc
JH
6488 if (reg != 0)
6489 {
6490 emit_move_insn (reg, new);
6491 new = reg;
6492 }
6493 }
6494 else
91bb873f 6495 {
75d38379
JJ
6496 if (INTVAL (op1) < -16*1024*1024
6497 || INTVAL (op1) >= 16*1024*1024)
6498 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
91bb873f
RH
6499 }
6500 }
6501 else
6502 {
6503 base = legitimize_pic_address (XEXP (addr, 0), reg);
6504 new = legitimize_pic_address (XEXP (addr, 1),
6505 base == reg ? NULL_RTX : reg);
6506
6507 if (GET_CODE (new) == CONST_INT)
6508 new = plus_constant (base, INTVAL (new));
6509 else
6510 {
6511 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6512 {
6513 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6514 new = XEXP (new, 1);
6515 }
6516 new = gen_rtx_PLUS (Pmode, base, new);
6517 }
6518 }
6519 }
3b3c6a3f
MM
6520 }
6521 return new;
6522}
6523\f
74dc3e94 6524/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
6525
6526static rtx
b96a374d 6527get_thread_pointer (int to_reg)
f996902d 6528{
74dc3e94 6529 rtx tp, reg, insn;
f996902d
RH
6530
6531 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
6532 if (!to_reg)
6533 return tp;
f996902d 6534
74dc3e94
RH
6535 reg = gen_reg_rtx (Pmode);
6536 insn = gen_rtx_SET (VOIDmode, reg, tp);
6537 insn = emit_insn (insn);
6538
6539 return reg;
6540}
6541
6542/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6543 false if we expect this to be used for a memory address and true if
6544 we expect to load the address into a register. */
6545
6546static rtx
b96a374d 6547legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
6548{
6549 rtx dest, base, off, pic;
6550 int type;
6551
6552 switch (model)
6553 {
6554 case TLS_MODEL_GLOBAL_DYNAMIC:
6555 dest = gen_reg_rtx (Pmode);
6556 if (TARGET_64BIT)
6557 {
6558 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6559
6560 start_sequence ();
6561 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6562 insns = get_insns ();
6563 end_sequence ();
6564
6565 emit_libcall_block (insns, dest, rax, x);
6566 }
6567 else
6568 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6569 break;
6570
6571 case TLS_MODEL_LOCAL_DYNAMIC:
6572 base = gen_reg_rtx (Pmode);
6573 if (TARGET_64BIT)
6574 {
6575 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6576
6577 start_sequence ();
6578 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6579 insns = get_insns ();
6580 end_sequence ();
6581
6582 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6583 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6584 emit_libcall_block (insns, base, rax, note);
6585 }
6586 else
6587 emit_insn (gen_tls_local_dynamic_base_32 (base));
6588
6589 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6590 off = gen_rtx_CONST (Pmode, off);
6591
6592 return gen_rtx_PLUS (Pmode, base, off);
6593
6594 case TLS_MODEL_INITIAL_EXEC:
6595 if (TARGET_64BIT)
6596 {
6597 pic = NULL;
6598 type = UNSPEC_GOTNTPOFF;
6599 }
6600 else if (flag_pic)
6601 {
6602 if (reload_in_progress)
6603 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6604 pic = pic_offset_table_rtx;
6605 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6606 }
6607 else if (!TARGET_GNU_TLS)
6608 {
6609 pic = gen_reg_rtx (Pmode);
6610 emit_insn (gen_set_got (pic));
6611 type = UNSPEC_GOTTPOFF;
6612 }
6613 else
6614 {
6615 pic = NULL;
6616 type = UNSPEC_INDNTPOFF;
6617 }
6618
6619 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6620 off = gen_rtx_CONST (Pmode, off);
6621 if (pic)
6622 off = gen_rtx_PLUS (Pmode, pic, off);
6623 off = gen_rtx_MEM (Pmode, off);
6624 RTX_UNCHANGING_P (off) = 1;
6625 set_mem_alias_set (off, ix86_GOT_alias_set ());
6626
6627 if (TARGET_64BIT || TARGET_GNU_TLS)
6628 {
6629 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6630 off = force_reg (Pmode, off);
6631 return gen_rtx_PLUS (Pmode, base, off);
6632 }
6633 else
6634 {
6635 base = get_thread_pointer (true);
6636 dest = gen_reg_rtx (Pmode);
6637 emit_insn (gen_subsi3 (dest, base, off));
6638 }
6639 break;
6640
6641 case TLS_MODEL_LOCAL_EXEC:
6642 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6643 (TARGET_64BIT || TARGET_GNU_TLS)
6644 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6645 off = gen_rtx_CONST (Pmode, off);
6646
6647 if (TARGET_64BIT || TARGET_GNU_TLS)
6648 {
6649 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6650 return gen_rtx_PLUS (Pmode, base, off);
6651 }
6652 else
6653 {
6654 base = get_thread_pointer (true);
6655 dest = gen_reg_rtx (Pmode);
6656 emit_insn (gen_subsi3 (dest, base, off));
6657 }
6658 break;
6659
6660 default:
6661 abort ();
6662 }
6663
6664 return dest;
f996902d 6665}
fce5a9f2 6666
3b3c6a3f
MM
6667/* Try machine-dependent ways of modifying an illegitimate address
6668 to be legitimate. If we find one, return the new, valid address.
6669 This macro is used in only one place: `memory_address' in explow.c.
6670
6671 OLDX is the address as it was before break_out_memory_refs was called.
6672 In some cases it is useful to look at this to decide what needs to be done.
6673
6674 MODE and WIN are passed so that this macro can use
6675 GO_IF_LEGITIMATE_ADDRESS.
6676
6677 It is always safe for this macro to do nothing. It exists to recognize
6678 opportunities to optimize the output.
6679
6680 For the 80386, we handle X+REG by loading X into a register R and
6681 using R+REG. R will go in a general reg and indexing will be used.
6682 However, if REG is a broken-out memory address or multiplication,
6683 nothing needs to be done because REG can certainly go in a general reg.
6684
6685 When -fpic is used, special handling is needed for symbolic references.
6686 See comments by legitimize_pic_address in i386.c for details. */
6687
6688rtx
8d531ab9 6689legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
6690{
6691 int changed = 0;
6692 unsigned log;
6693
6694 if (TARGET_DEBUG_ADDR)
6695 {
e9a25f70
JL
6696 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6697 GET_MODE_NAME (mode));
3b3c6a3f
MM
6698 debug_rtx (x);
6699 }
6700
f996902d
RH
6701 log = tls_symbolic_operand (x, mode);
6702 if (log)
74dc3e94 6703 return legitimize_tls_address (x, log, false);
b39edae3
RH
6704 if (GET_CODE (x) == CONST
6705 && GET_CODE (XEXP (x, 0)) == PLUS
6706 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6707 {
6708 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6709 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6710 }
f996902d 6711
3b3c6a3f
MM
6712 if (flag_pic && SYMBOLIC_CONST (x))
6713 return legitimize_pic_address (x, 0);
6714
6715 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6716 if (GET_CODE (x) == ASHIFT
6717 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 6718 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
6719 {
6720 changed = 1;
a269a03c
JC
6721 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6722 GEN_INT (1 << log));
3b3c6a3f
MM
6723 }
6724
6725 if (GET_CODE (x) == PLUS)
6726 {
0f290768 6727 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 6728
3b3c6a3f
MM
6729 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6730 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 6731 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
6732 {
6733 changed = 1;
c5c76735
JL
6734 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6735 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6736 GEN_INT (1 << log));
3b3c6a3f
MM
6737 }
6738
6739 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6740 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 6741 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
6742 {
6743 changed = 1;
c5c76735
JL
6744 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6745 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6746 GEN_INT (1 << log));
3b3c6a3f
MM
6747 }
6748
0f290768 6749 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
6750 if (GET_CODE (XEXP (x, 1)) == MULT)
6751 {
6752 rtx tmp = XEXP (x, 0);
6753 XEXP (x, 0) = XEXP (x, 1);
6754 XEXP (x, 1) = tmp;
6755 changed = 1;
6756 }
6757
6758 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6759 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6760 created by virtual register instantiation, register elimination, and
6761 similar optimizations. */
6762 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6763 {
6764 changed = 1;
c5c76735
JL
6765 x = gen_rtx_PLUS (Pmode,
6766 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6767 XEXP (XEXP (x, 1), 0)),
6768 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
6769 }
6770
e9a25f70
JL
6771 /* Canonicalize
6772 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
6773 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6774 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6775 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6776 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6777 && CONSTANT_P (XEXP (x, 1)))
6778 {
00c79232
ML
6779 rtx constant;
6780 rtx other = NULL_RTX;
3b3c6a3f
MM
6781
6782 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6783 {
6784 constant = XEXP (x, 1);
6785 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6786 }
6787 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6788 {
6789 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6790 other = XEXP (x, 1);
6791 }
6792 else
6793 constant = 0;
6794
6795 if (constant)
6796 {
6797 changed = 1;
c5c76735
JL
6798 x = gen_rtx_PLUS (Pmode,
6799 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6800 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6801 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
6802 }
6803 }
6804
6805 if (changed && legitimate_address_p (mode, x, FALSE))
6806 return x;
6807
6808 if (GET_CODE (XEXP (x, 0)) == MULT)
6809 {
6810 changed = 1;
6811 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6812 }
6813
6814 if (GET_CODE (XEXP (x, 1)) == MULT)
6815 {
6816 changed = 1;
6817 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6818 }
6819
6820 if (changed
6821 && GET_CODE (XEXP (x, 1)) == REG
6822 && GET_CODE (XEXP (x, 0)) == REG)
6823 return x;
6824
6825 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6826 {
6827 changed = 1;
6828 x = legitimize_pic_address (x, 0);
6829 }
6830
6831 if (changed && legitimate_address_p (mode, x, FALSE))
6832 return x;
6833
6834 if (GET_CODE (XEXP (x, 0)) == REG)
6835 {
8d531ab9
KH
6836 rtx temp = gen_reg_rtx (Pmode);
6837 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
6838 if (val != temp)
6839 emit_move_insn (temp, val);
6840
6841 XEXP (x, 1) = temp;
6842 return x;
6843 }
6844
6845 else if (GET_CODE (XEXP (x, 1)) == REG)
6846 {
8d531ab9
KH
6847 rtx temp = gen_reg_rtx (Pmode);
6848 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
6849 if (val != temp)
6850 emit_move_insn (temp, val);
6851
6852 XEXP (x, 0) = temp;
6853 return x;
6854 }
6855 }
6856
6857 return x;
6858}
2a2ab3f9
JVA
6859\f
6860/* Print an integer constant expression in assembler syntax. Addition
6861 and subtraction are the only arithmetic that may appear in these
6862 expressions. FILE is the stdio stream to write to, X is the rtx, and
6863 CODE is the operand print code from the output string. */
6864
6865static void
b96a374d 6866output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6867{
6868 char buf[256];
6869
6870 switch (GET_CODE (x))
6871 {
6872 case PC:
6873 if (flag_pic)
6874 putc ('.', file);
6875 else
6876 abort ();
6877 break;
6878
6879 case SYMBOL_REF:
79bba51c
AP
6880 /* Mark the decl as referenced so that cgraph will output the function. */
6881 if (SYMBOL_REF_DECL (x))
6882 mark_decl_referenced (SYMBOL_REF_DECL (x));
6883
91bb873f 6884 assemble_name (file, XSTR (x, 0));
12969f45 6885 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 6886 fputs ("@PLT", file);
2a2ab3f9
JVA
6887 break;
6888
91bb873f
RH
6889 case LABEL_REF:
6890 x = XEXP (x, 0);
5efb1046 6891 /* FALLTHRU */
2a2ab3f9
JVA
6892 case CODE_LABEL:
6893 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6894 assemble_name (asm_out_file, buf);
6895 break;
6896
6897 case CONST_INT:
f64cecad 6898 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
6899 break;
6900
6901 case CONST:
6902 /* This used to output parentheses around the expression,
6903 but that does not work on the 386 (either ATT or BSD assembler). */
6904 output_pic_addr_const (file, XEXP (x, 0), code);
6905 break;
6906
6907 case CONST_DOUBLE:
6908 if (GET_MODE (x) == VOIDmode)
6909 {
6910 /* We can use %d if the number is <32 bits and positive. */
6911 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
6912 fprintf (file, "0x%lx%08lx",
6913 (unsigned long) CONST_DOUBLE_HIGH (x),
6914 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 6915 else
f64cecad 6916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
6917 }
6918 else
6919 /* We can't handle floating point constants;
6920 PRINT_OPERAND must handle them. */
6921 output_operand_lossage ("floating constant misused");
6922 break;
6923
6924 case PLUS:
e9a25f70 6925 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
6926 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6927 {
2a2ab3f9 6928 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6929 putc ('+', file);
e9a25f70 6930 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 6931 }
91bb873f 6932 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 6933 {
2a2ab3f9 6934 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 6935 putc ('+', file);
e9a25f70 6936 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 6937 }
91bb873f
RH
6938 else
6939 abort ();
2a2ab3f9
JVA
6940 break;
6941
6942 case MINUS:
b069de3b
SS
6943 if (!TARGET_MACHO)
6944 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 6945 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 6946 putc ('-', file);
2a2ab3f9 6947 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
6948 if (!TARGET_MACHO)
6949 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6950 break;
6951
91bb873f
RH
6952 case UNSPEC:
6953 if (XVECLEN (x, 0) != 1)
5bf0ebab 6954 abort ();
91bb873f
RH
6955 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6956 switch (XINT (x, 1))
77ebd435 6957 {
8ee41eaf 6958 case UNSPEC_GOT:
77ebd435
AJ
6959 fputs ("@GOT", file);
6960 break;
8ee41eaf 6961 case UNSPEC_GOTOFF:
77ebd435
AJ
6962 fputs ("@GOTOFF", file);
6963 break;
8ee41eaf 6964 case UNSPEC_GOTPCREL:
edfe8595 6965 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6966 break;
f996902d 6967 case UNSPEC_GOTTPOFF:
dea73790 6968 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6969 fputs ("@GOTTPOFF", file);
6970 break;
6971 case UNSPEC_TPOFF:
6972 fputs ("@TPOFF", file);
6973 break;
6974 case UNSPEC_NTPOFF:
75d38379
JJ
6975 if (TARGET_64BIT)
6976 fputs ("@TPOFF", file);
6977 else
6978 fputs ("@NTPOFF", file);
f996902d
RH
6979 break;
6980 case UNSPEC_DTPOFF:
6981 fputs ("@DTPOFF", file);
6982 break;
dea73790 6983 case UNSPEC_GOTNTPOFF:
75d38379
JJ
6984 if (TARGET_64BIT)
6985 fputs ("@GOTTPOFF(%rip)", file);
6986 else
6987 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6988 break;
6989 case UNSPEC_INDNTPOFF:
6990 fputs ("@INDNTPOFF", file);
6991 break;
77ebd435
AJ
6992 default:
6993 output_operand_lossage ("invalid UNSPEC as operand");
6994 break;
6995 }
91bb873f
RH
6996 break;
6997
2a2ab3f9
JVA
6998 default:
6999 output_operand_lossage ("invalid expression as operand");
7000 }
7001}
1865dbb5 7002
0f290768 7003/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
7004 We need to handle our special PIC relocations. */
7005
0f290768 7006void
b96a374d 7007i386_dwarf_output_addr_const (FILE *file, rtx x)
1865dbb5 7008{
14f73b5a 7009#ifdef ASM_QUAD
18b5b8d6 7010 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
7011#else
7012 if (TARGET_64BIT)
7013 abort ();
18b5b8d6 7014 fprintf (file, "%s", ASM_LONG);
14f73b5a 7015#endif
1865dbb5
JM
7016 if (flag_pic)
7017 output_pic_addr_const (file, x, '\0');
7018 else
7019 output_addr_const (file, x);
7020 fputc ('\n', file);
7021}
7022
b9203463
RH
7023/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
7024 We need to emit DTP-relative relocations. */
7025
7026void
b96a374d 7027i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 7028{
75d38379
JJ
7029 fputs (ASM_LONG, file);
7030 output_addr_const (file, x);
7031 fputs ("@DTPOFF", file);
b9203463
RH
7032 switch (size)
7033 {
7034 case 4:
b9203463
RH
7035 break;
7036 case 8:
75d38379 7037 fputs (", 0", file);
b9203463 7038 break;
b9203463
RH
7039 default:
7040 abort ();
7041 }
b9203463
RH
7042}
7043
1865dbb5
JM
7044/* In the name of slightly smaller debug output, and to cater to
7045 general assembler losage, recognize PIC+GOTOFF and turn it back
7046 into a direct symbol reference. */
7047
69bd9368 7048static rtx
b96a374d 7049ix86_delegitimize_address (rtx orig_x)
1865dbb5 7050{
ec65b2e3 7051 rtx x = orig_x, y;
1865dbb5 7052
4c8c0dec
JJ
7053 if (GET_CODE (x) == MEM)
7054 x = XEXP (x, 0);
7055
6eb791fc
JH
7056 if (TARGET_64BIT)
7057 {
7058 if (GET_CODE (x) != CONST
7059 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 7060 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 7061 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
7062 return orig_x;
7063 return XVECEXP (XEXP (x, 0), 0, 0);
7064 }
7065
1865dbb5 7066 if (GET_CODE (x) != PLUS
1865dbb5
JM
7067 || GET_CODE (XEXP (x, 1)) != CONST)
7068 return orig_x;
7069
ec65b2e3
JJ
7070 if (GET_CODE (XEXP (x, 0)) == REG
7071 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7072 /* %ebx + GOT/GOTOFF */
7073 y = NULL;
7074 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7075 {
7076 /* %ebx + %reg * scale + GOT/GOTOFF */
7077 y = XEXP (x, 0);
7078 if (GET_CODE (XEXP (y, 0)) == REG
7079 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7080 y = XEXP (y, 1);
7081 else if (GET_CODE (XEXP (y, 1)) == REG
7082 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7083 y = XEXP (y, 0);
7084 else
7085 return orig_x;
7086 if (GET_CODE (y) != REG
7087 && GET_CODE (y) != MULT
7088 && GET_CODE (y) != ASHIFT)
7089 return orig_x;
7090 }
7091 else
7092 return orig_x;
7093
1865dbb5
JM
7094 x = XEXP (XEXP (x, 1), 0);
7095 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
7096 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7097 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
7098 {
7099 if (y)
7100 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7101 return XVECEXP (x, 0, 0);
7102 }
1865dbb5
JM
7103
7104 if (GET_CODE (x) == PLUS
7105 && GET_CODE (XEXP (x, 0)) == UNSPEC
7106 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
7107 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7108 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7109 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
7110 {
7111 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7112 if (y)
7113 return gen_rtx_PLUS (Pmode, y, x);
7114 return x;
7115 }
1865dbb5
JM
7116
7117 return orig_x;
7118}
2a2ab3f9 7119\f
a269a03c 7120static void
b96a374d
AJ
7121put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7122 int fp, FILE *file)
a269a03c 7123{
a269a03c
JC
7124 const char *suffix;
7125
9a915772
JH
7126 if (mode == CCFPmode || mode == CCFPUmode)
7127 {
7128 enum rtx_code second_code, bypass_code;
7129 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7130 if (bypass_code != NIL || second_code != NIL)
b531087a 7131 abort ();
9a915772
JH
7132 code = ix86_fp_compare_code_to_integer (code);
7133 mode = CCmode;
7134 }
a269a03c
JC
7135 if (reverse)
7136 code = reverse_condition (code);
e075ae69 7137
a269a03c
JC
7138 switch (code)
7139 {
7140 case EQ:
7141 suffix = "e";
7142 break;
a269a03c
JC
7143 case NE:
7144 suffix = "ne";
7145 break;
a269a03c 7146 case GT:
7e08e190 7147 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
7148 abort ();
7149 suffix = "g";
a269a03c 7150 break;
a269a03c 7151 case GTU:
e075ae69
RH
7152 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7153 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 7154 if (mode != CCmode)
0f290768 7155 abort ();
e075ae69 7156 suffix = fp ? "nbe" : "a";
a269a03c 7157 break;
a269a03c 7158 case LT:
9076b9c1 7159 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 7160 suffix = "s";
7e08e190 7161 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7162 suffix = "l";
9076b9c1 7163 else
0f290768 7164 abort ();
a269a03c 7165 break;
a269a03c 7166 case LTU:
9076b9c1 7167 if (mode != CCmode)
0f290768 7168 abort ();
a269a03c
JC
7169 suffix = "b";
7170 break;
a269a03c 7171 case GE:
9076b9c1 7172 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 7173 suffix = "ns";
7e08e190 7174 else if (mode == CCmode || mode == CCGCmode)
e075ae69 7175 suffix = "ge";
9076b9c1 7176 else
0f290768 7177 abort ();
a269a03c 7178 break;
a269a03c 7179 case GEU:
e075ae69 7180 /* ??? As above. */
7e08e190 7181 if (mode != CCmode)
0f290768 7182 abort ();
7e08e190 7183 suffix = fp ? "nb" : "ae";
a269a03c 7184 break;
a269a03c 7185 case LE:
7e08e190 7186 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
7187 abort ();
7188 suffix = "le";
a269a03c 7189 break;
a269a03c 7190 case LEU:
9076b9c1
JH
7191 if (mode != CCmode)
7192 abort ();
7e08e190 7193 suffix = "be";
a269a03c 7194 break;
3a3677ff 7195 case UNORDERED:
9e7adcb3 7196 suffix = fp ? "u" : "p";
3a3677ff
RH
7197 break;
7198 case ORDERED:
9e7adcb3 7199 suffix = fp ? "nu" : "np";
3a3677ff 7200 break;
a269a03c
JC
7201 default:
7202 abort ();
7203 }
7204 fputs (suffix, file);
7205}
7206
a55f4481
RK
7207/* Print the name of register X to FILE based on its machine mode and number.
7208 If CODE is 'w', pretend the mode is HImode.
7209 If CODE is 'b', pretend the mode is QImode.
7210 If CODE is 'k', pretend the mode is SImode.
7211 If CODE is 'q', pretend the mode is DImode.
7212 If CODE is 'h', pretend the reg is the `high' byte register.
7213 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7214
e075ae69 7215void
b96a374d 7216print_reg (rtx x, int code, FILE *file)
e5cb57e8 7217{
a55f4481
RK
7218 if (REGNO (x) == ARG_POINTER_REGNUM
7219 || REGNO (x) == FRAME_POINTER_REGNUM
7220 || REGNO (x) == FLAGS_REG
7221 || REGNO (x) == FPSR_REG)
480feac0
ZW
7222 abort ();
7223
5bf0ebab 7224 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
7225 putc ('%', file);
7226
ef6257cd 7227 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
7228 code = 2;
7229 else if (code == 'b')
7230 code = 1;
7231 else if (code == 'k')
7232 code = 4;
3f3f2124
JH
7233 else if (code == 'q')
7234 code = 8;
e075ae69
RH
7235 else if (code == 'y')
7236 code = 3;
7237 else if (code == 'h')
7238 code = 0;
7239 else
7240 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 7241
3f3f2124
JH
7242 /* Irritatingly, AMD extended registers use different naming convention
7243 from the normal registers. */
7244 if (REX_INT_REG_P (x))
7245 {
885a70fd
JH
7246 if (!TARGET_64BIT)
7247 abort ();
3f3f2124
JH
7248 switch (code)
7249 {
ef6257cd 7250 case 0:
c725bd79 7251 error ("extended registers have no high halves");
3f3f2124
JH
7252 break;
7253 case 1:
7254 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7255 break;
7256 case 2:
7257 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7258 break;
7259 case 4:
7260 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7261 break;
7262 case 8:
7263 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7264 break;
7265 default:
c725bd79 7266 error ("unsupported operand size for extended register");
3f3f2124
JH
7267 break;
7268 }
7269 return;
7270 }
e075ae69
RH
7271 switch (code)
7272 {
7273 case 3:
7274 if (STACK_TOP_P (x))
7275 {
7276 fputs ("st(0)", file);
7277 break;
7278 }
5efb1046 7279 /* FALLTHRU */
e075ae69 7280 case 8:
3f3f2124 7281 case 4:
e075ae69 7282 case 12:
446988df 7283 if (! ANY_FP_REG_P (x))
885a70fd 7284 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 7285 /* FALLTHRU */
a7180f70 7286 case 16:
e075ae69 7287 case 2:
d4c32b6f 7288 normal:
e075ae69
RH
7289 fputs (hi_reg_name[REGNO (x)], file);
7290 break;
7291 case 1:
d4c32b6f
RH
7292 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7293 goto normal;
e075ae69
RH
7294 fputs (qi_reg_name[REGNO (x)], file);
7295 break;
7296 case 0:
d4c32b6f
RH
7297 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7298 goto normal;
e075ae69
RH
7299 fputs (qi_high_reg_name[REGNO (x)], file);
7300 break;
7301 default:
7302 abort ();
fe25fea3 7303 }
e5cb57e8
SC
7304}
7305
f996902d
RH
7306/* Locate some local-dynamic symbol still in use by this function
7307 so that we can print its name in some tls_local_dynamic_base
7308 pattern. */
7309
7310static const char *
b96a374d 7311get_some_local_dynamic_name (void)
f996902d
RH
7312{
7313 rtx insn;
7314
7315 if (cfun->machine->some_ld_name)
7316 return cfun->machine->some_ld_name;
7317
7318 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7319 if (INSN_P (insn)
7320 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7321 return cfun->machine->some_ld_name;
7322
7323 abort ();
7324}
7325
7326static int
b96a374d 7327get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
7328{
7329 rtx x = *px;
7330
7331 if (GET_CODE (x) == SYMBOL_REF
7332 && local_dynamic_symbolic_operand (x, Pmode))
7333 {
7334 cfun->machine->some_ld_name = XSTR (x, 0);
7335 return 1;
7336 }
7337
7338 return 0;
7339}
7340
2a2ab3f9 7341/* Meaning of CODE:
fe25fea3 7342 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 7343 C -- print opcode suffix for set/cmov insn.
fe25fea3 7344 c -- like C, but print reversed condition
ef6257cd 7345 F,f -- likewise, but for floating-point.
f6f5dff2
RO
7346 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7347 otherwise nothing
2a2ab3f9
JVA
7348 R -- print the prefix for register names.
7349 z -- print the opcode suffix for the size of the current operand.
7350 * -- print a star (in certain assembler syntax)
fb204271 7351 A -- print an absolute memory reference.
2a2ab3f9 7352 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
7353 s -- print a shift double count, followed by the assemblers argument
7354 delimiter.
fe25fea3
SC
7355 b -- print the QImode name of the register for the indicated operand.
7356 %b0 would print %al if operands[0] is reg 0.
7357 w -- likewise, print the HImode name of the register.
7358 k -- likewise, print the SImode name of the register.
3f3f2124 7359 q -- likewise, print the DImode name of the register.
ef6257cd
JH
7360 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7361 y -- print "st(0)" instead of "st" as a register.
a46d1d38 7362 D -- print condition for SSE cmp instruction.
ef6257cd
JH
7363 P -- if PIC, print an @PLT suffix.
7364 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 7365 & -- print some in-use local-dynamic symbol name.
a46d1d38 7366 */
2a2ab3f9
JVA
7367
7368void
b96a374d 7369print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
7370{
7371 if (code)
7372 {
7373 switch (code)
7374 {
7375 case '*':
80f33d06 7376 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
7377 putc ('*', file);
7378 return;
7379
f996902d
RH
7380 case '&':
7381 assemble_name (file, get_some_local_dynamic_name ());
7382 return;
7383
fb204271 7384 case 'A':
80f33d06 7385 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 7386 putc ('*', file);
80f33d06 7387 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
7388 {
7389 /* Intel syntax. For absolute addresses, registers should not
7390 be surrounded by braces. */
7391 if (GET_CODE (x) != REG)
7392 {
7393 putc ('[', file);
7394 PRINT_OPERAND (file, x, 0);
7395 putc (']', file);
7396 return;
7397 }
7398 }
80f33d06
GS
7399 else
7400 abort ();
fb204271
DN
7401
7402 PRINT_OPERAND (file, x, 0);
7403 return;
7404
7405
2a2ab3f9 7406 case 'L':
80f33d06 7407 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7408 putc ('l', file);
2a2ab3f9
JVA
7409 return;
7410
7411 case 'W':
80f33d06 7412 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7413 putc ('w', file);
2a2ab3f9
JVA
7414 return;
7415
7416 case 'B':
80f33d06 7417 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7418 putc ('b', file);
2a2ab3f9
JVA
7419 return;
7420
7421 case 'Q':
80f33d06 7422 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7423 putc ('l', file);
2a2ab3f9
JVA
7424 return;
7425
7426 case 'S':
80f33d06 7427 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7428 putc ('s', file);
2a2ab3f9
JVA
7429 return;
7430
5f1ec3e6 7431 case 'T':
80f33d06 7432 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7433 putc ('t', file);
5f1ec3e6
JVA
7434 return;
7435
2a2ab3f9
JVA
7436 case 'z':
7437 /* 387 opcodes don't get size suffixes if the operands are
0f290768 7438 registers. */
2a2ab3f9
JVA
7439 if (STACK_REG_P (x))
7440 return;
7441
831c4e87
KC
7442 /* Likewise if using Intel opcodes. */
7443 if (ASSEMBLER_DIALECT == ASM_INTEL)
7444 return;
7445
7446 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
7447 switch (GET_MODE_SIZE (GET_MODE (x)))
7448 {
2a2ab3f9 7449 case 2:
155d8a47
JW
7450#ifdef HAVE_GAS_FILDS_FISTS
7451 putc ('s', file);
7452#endif
2a2ab3f9
JVA
7453 return;
7454
7455 case 4:
7456 if (GET_MODE (x) == SFmode)
7457 {
e075ae69 7458 putc ('s', file);
2a2ab3f9
JVA
7459 return;
7460 }
7461 else
e075ae69 7462 putc ('l', file);
2a2ab3f9
JVA
7463 return;
7464
5f1ec3e6 7465 case 12:
2b589241 7466 case 16:
e075ae69
RH
7467 putc ('t', file);
7468 return;
5f1ec3e6 7469
2a2ab3f9
JVA
7470 case 8:
7471 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
7472 {
7473#ifdef GAS_MNEMONICS
e075ae69 7474 putc ('q', file);
56c0e8fa 7475#else
e075ae69
RH
7476 putc ('l', file);
7477 putc ('l', file);
56c0e8fa
JVA
7478#endif
7479 }
e075ae69
RH
7480 else
7481 putc ('l', file);
2a2ab3f9 7482 return;
155d8a47
JW
7483
7484 default:
7485 abort ();
2a2ab3f9 7486 }
4af3895e
JVA
7487
7488 case 'b':
7489 case 'w':
7490 case 'k':
3f3f2124 7491 case 'q':
4af3895e
JVA
7492 case 'h':
7493 case 'y':
5cb6195d 7494 case 'X':
e075ae69 7495 case 'P':
4af3895e
JVA
7496 break;
7497
2d49677f
SC
7498 case 's':
7499 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7500 {
7501 PRINT_OPERAND (file, x, 0);
e075ae69 7502 putc (',', file);
2d49677f 7503 }
a269a03c
JC
7504 return;
7505
a46d1d38
JH
7506 case 'D':
7507 /* Little bit of braindamage here. The SSE compare instructions
7508 does use completely different names for the comparisons that the
7509 fp conditional moves. */
7510 switch (GET_CODE (x))
7511 {
7512 case EQ:
7513 case UNEQ:
7514 fputs ("eq", file);
7515 break;
7516 case LT:
7517 case UNLT:
7518 fputs ("lt", file);
7519 break;
7520 case LE:
7521 case UNLE:
7522 fputs ("le", file);
7523 break;
7524 case UNORDERED:
7525 fputs ("unord", file);
7526 break;
7527 case NE:
7528 case LTGT:
7529 fputs ("neq", file);
7530 break;
7531 case UNGE:
7532 case GE:
7533 fputs ("nlt", file);
7534 break;
7535 case UNGT:
7536 case GT:
7537 fputs ("nle", file);
7538 break;
7539 case ORDERED:
7540 fputs ("ord", file);
7541 break;
7542 default:
7543 abort ();
7544 break;
7545 }
7546 return;
048b1c95 7547 case 'O':
f6f5dff2 7548#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7549 if (ASSEMBLER_DIALECT == ASM_ATT)
7550 {
7551 switch (GET_MODE (x))
7552 {
7553 case HImode: putc ('w', file); break;
7554 case SImode:
7555 case SFmode: putc ('l', file); break;
7556 case DImode:
7557 case DFmode: putc ('q', file); break;
7558 default: abort ();
7559 }
7560 putc ('.', file);
7561 }
7562#endif
7563 return;
1853aadd 7564 case 'C':
e075ae69 7565 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 7566 return;
fe25fea3 7567 case 'F':
f6f5dff2 7568#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7569 if (ASSEMBLER_DIALECT == ASM_ATT)
7570 putc ('.', file);
7571#endif
e075ae69 7572 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
7573 return;
7574
e9a25f70 7575 /* Like above, but reverse condition */
e075ae69 7576 case 'c':
fce5a9f2 7577 /* Check to see if argument to %c is really a constant
c1d5afc4 7578 and not a condition code which needs to be reversed. */
ec8e098d 7579 if (!COMPARISON_P (x))
c1d5afc4
CR
7580 {
7581 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7582 return;
7583 }
e075ae69
RH
7584 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7585 return;
fe25fea3 7586 case 'f':
f6f5dff2 7587#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
7588 if (ASSEMBLER_DIALECT == ASM_ATT)
7589 putc ('.', file);
7590#endif
e075ae69 7591 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 7592 return;
ef6257cd
JH
7593 case '+':
7594 {
7595 rtx x;
e5cb57e8 7596
ef6257cd
JH
7597 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7598 return;
a4f31c00 7599
ef6257cd
JH
7600 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7601 if (x)
7602 {
7603 int pred_val = INTVAL (XEXP (x, 0));
7604
7605 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7606 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7607 {
7608 int taken = pred_val > REG_BR_PROB_BASE / 2;
7609 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7610
7611 /* Emit hints only in the case default branch prediction
d1f87653 7612 heuristics would fail. */
ef6257cd
JH
7613 if (taken != cputaken)
7614 {
7615 /* We use 3e (DS) prefix for taken branches and
7616 2e (CS) prefix for not taken branches. */
7617 if (taken)
7618 fputs ("ds ; ", file);
7619 else
7620 fputs ("cs ; ", file);
7621 }
7622 }
7623 }
7624 return;
7625 }
4af3895e 7626 default:
a52453cc 7627 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
7628 }
7629 }
e9a25f70 7630
2a2ab3f9 7631 if (GET_CODE (x) == REG)
a55f4481 7632 print_reg (x, code, file);
e9a25f70 7633
2a2ab3f9
JVA
7634 else if (GET_CODE (x) == MEM)
7635 {
e075ae69 7636 /* No `byte ptr' prefix for call instructions. */
80f33d06 7637 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 7638 {
69ddee61 7639 const char * size;
e075ae69
RH
7640 switch (GET_MODE_SIZE (GET_MODE (x)))
7641 {
7642 case 1: size = "BYTE"; break;
7643 case 2: size = "WORD"; break;
7644 case 4: size = "DWORD"; break;
7645 case 8: size = "QWORD"; break;
7646 case 12: size = "XWORD"; break;
a7180f70 7647 case 16: size = "XMMWORD"; break;
e075ae69 7648 default:
564d80f4 7649 abort ();
e075ae69 7650 }
fb204271
DN
7651
7652 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7653 if (code == 'b')
7654 size = "BYTE";
7655 else if (code == 'w')
7656 size = "WORD";
7657 else if (code == 'k')
7658 size = "DWORD";
7659
e075ae69
RH
7660 fputs (size, file);
7661 fputs (" PTR ", file);
2a2ab3f9 7662 }
e075ae69
RH
7663
7664 x = XEXP (x, 0);
0d7d98ee 7665 /* Avoid (%rip) for call operands. */
d10f5ecf 7666 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
7667 && GET_CODE (x) != CONST_INT)
7668 output_addr_const (file, x);
c8b94768
RH
7669 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7670 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 7671 else
e075ae69 7672 output_address (x);
2a2ab3f9 7673 }
e9a25f70 7674
2a2ab3f9
JVA
7675 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7676 {
e9a25f70
JL
7677 REAL_VALUE_TYPE r;
7678 long l;
7679
5f1ec3e6
JVA
7680 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7681 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 7682
80f33d06 7683 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 7684 putc ('$', file);
781f4ec1 7685 fprintf (file, "0x%08lx", l);
5f1ec3e6 7686 }
e9a25f70 7687
74dc3e94
RH
7688 /* These float cases don't actually occur as immediate operands. */
7689 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 7690 {
e9a25f70
JL
7691 char dstr[30];
7692
da6eec72 7693 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7694 fprintf (file, "%s", dstr);
2a2ab3f9 7695 }
e9a25f70 7696
2b589241 7697 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 7698 && GET_MODE (x) == XFmode)
2a2ab3f9 7699 {
e9a25f70
JL
7700 char dstr[30];
7701
da6eec72 7702 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 7703 fprintf (file, "%s", dstr);
2a2ab3f9 7704 }
f996902d 7705
79325812 7706 else
2a2ab3f9 7707 {
4af3895e 7708 if (code != 'P')
2a2ab3f9 7709 {
695dac07 7710 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 7711 {
80f33d06 7712 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7713 putc ('$', file);
7714 }
2a2ab3f9
JVA
7715 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7716 || GET_CODE (x) == LABEL_REF)
e075ae69 7717 {
80f33d06 7718 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
7719 putc ('$', file);
7720 else
7721 fputs ("OFFSET FLAT:", file);
7722 }
2a2ab3f9 7723 }
e075ae69
RH
7724 if (GET_CODE (x) == CONST_INT)
7725 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7726 else if (flag_pic)
2a2ab3f9
JVA
7727 output_pic_addr_const (file, x, code);
7728 else
7729 output_addr_const (file, x);
7730 }
7731}
7732\f
7733/* Print a memory operand whose address is ADDR. */
7734
7735void
8d531ab9 7736print_operand_address (FILE *file, rtx addr)
2a2ab3f9 7737{
e075ae69
RH
7738 struct ix86_address parts;
7739 rtx base, index, disp;
7740 int scale;
e9a25f70 7741
e075ae69
RH
7742 if (! ix86_decompose_address (addr, &parts))
7743 abort ();
e9a25f70 7744
e075ae69
RH
7745 base = parts.base;
7746 index = parts.index;
7747 disp = parts.disp;
7748 scale = parts.scale;
e9a25f70 7749
74dc3e94
RH
7750 switch (parts.seg)
7751 {
7752 case SEG_DEFAULT:
7753 break;
7754 case SEG_FS:
7755 case SEG_GS:
7756 if (USER_LABEL_PREFIX[0] == 0)
7757 putc ('%', file);
7758 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7759 break;
7760 default:
7761 abort ();
7762 }
7763
e075ae69
RH
7764 if (!base && !index)
7765 {
7766 /* Displacement only requires special attention. */
e9a25f70 7767
e075ae69 7768 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 7769 {
74dc3e94 7770 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
7771 {
7772 if (USER_LABEL_PREFIX[0] == 0)
7773 putc ('%', file);
7774 fputs ("ds:", file);
7775 }
74dc3e94 7776 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 7777 }
e075ae69 7778 else if (flag_pic)
74dc3e94 7779 output_pic_addr_const (file, disp, 0);
e075ae69 7780 else
74dc3e94 7781 output_addr_const (file, disp);
0d7d98ee
JH
7782
7783 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 7784 if (TARGET_64BIT
74dc3e94
RH
7785 && ((GET_CODE (disp) == SYMBOL_REF
7786 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7787 || GET_CODE (disp) == LABEL_REF
7788 || (GET_CODE (disp) == CONST
7789 && GET_CODE (XEXP (disp, 0)) == PLUS
7790 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7791 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7792 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 7793 fputs ("(%rip)", file);
e075ae69
RH
7794 }
7795 else
7796 {
80f33d06 7797 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 7798 {
e075ae69 7799 if (disp)
2a2ab3f9 7800 {
c399861d 7801 if (flag_pic)
e075ae69
RH
7802 output_pic_addr_const (file, disp, 0);
7803 else if (GET_CODE (disp) == LABEL_REF)
7804 output_asm_label (disp);
2a2ab3f9 7805 else
e075ae69 7806 output_addr_const (file, disp);
2a2ab3f9
JVA
7807 }
7808
e075ae69
RH
7809 putc ('(', file);
7810 if (base)
a55f4481 7811 print_reg (base, 0, file);
e075ae69 7812 if (index)
2a2ab3f9 7813 {
e075ae69 7814 putc (',', file);
a55f4481 7815 print_reg (index, 0, file);
e075ae69
RH
7816 if (scale != 1)
7817 fprintf (file, ",%d", scale);
2a2ab3f9 7818 }
e075ae69 7819 putc (')', file);
2a2ab3f9 7820 }
2a2ab3f9
JVA
7821 else
7822 {
e075ae69 7823 rtx offset = NULL_RTX;
e9a25f70 7824
e075ae69
RH
7825 if (disp)
7826 {
7827 /* Pull out the offset of a symbol; print any symbol itself. */
7828 if (GET_CODE (disp) == CONST
7829 && GET_CODE (XEXP (disp, 0)) == PLUS
7830 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7831 {
7832 offset = XEXP (XEXP (disp, 0), 1);
7833 disp = gen_rtx_CONST (VOIDmode,
7834 XEXP (XEXP (disp, 0), 0));
7835 }
ce193852 7836
e075ae69
RH
7837 if (flag_pic)
7838 output_pic_addr_const (file, disp, 0);
7839 else if (GET_CODE (disp) == LABEL_REF)
7840 output_asm_label (disp);
7841 else if (GET_CODE (disp) == CONST_INT)
7842 offset = disp;
7843 else
7844 output_addr_const (file, disp);
7845 }
e9a25f70 7846
e075ae69
RH
7847 putc ('[', file);
7848 if (base)
a8620236 7849 {
a55f4481 7850 print_reg (base, 0, file);
e075ae69
RH
7851 if (offset)
7852 {
7853 if (INTVAL (offset) >= 0)
7854 putc ('+', file);
7855 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7856 }
a8620236 7857 }
e075ae69
RH
7858 else if (offset)
7859 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 7860 else
e075ae69 7861 putc ('0', file);
e9a25f70 7862
e075ae69
RH
7863 if (index)
7864 {
7865 putc ('+', file);
a55f4481 7866 print_reg (index, 0, file);
e075ae69
RH
7867 if (scale != 1)
7868 fprintf (file, "*%d", scale);
7869 }
7870 putc (']', file);
7871 }
2a2ab3f9
JVA
7872 }
7873}
f996902d
RH
7874
7875bool
b96a374d 7876output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
7877{
7878 rtx op;
7879
7880 if (GET_CODE (x) != UNSPEC)
7881 return false;
7882
7883 op = XVECEXP (x, 0, 0);
7884 switch (XINT (x, 1))
7885 {
7886 case UNSPEC_GOTTPOFF:
7887 output_addr_const (file, op);
dea73790 7888 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
7889 fputs ("@GOTTPOFF", file);
7890 break;
7891 case UNSPEC_TPOFF:
7892 output_addr_const (file, op);
7893 fputs ("@TPOFF", file);
7894 break;
7895 case UNSPEC_NTPOFF:
7896 output_addr_const (file, op);
75d38379
JJ
7897 if (TARGET_64BIT)
7898 fputs ("@TPOFF", file);
7899 else
7900 fputs ("@NTPOFF", file);
f996902d
RH
7901 break;
7902 case UNSPEC_DTPOFF:
7903 output_addr_const (file, op);
7904 fputs ("@DTPOFF", file);
7905 break;
dea73790
JJ
7906 case UNSPEC_GOTNTPOFF:
7907 output_addr_const (file, op);
75d38379
JJ
7908 if (TARGET_64BIT)
7909 fputs ("@GOTTPOFF(%rip)", file);
7910 else
7911 fputs ("@GOTNTPOFF", file);
dea73790
JJ
7912 break;
7913 case UNSPEC_INDNTPOFF:
7914 output_addr_const (file, op);
7915 fputs ("@INDNTPOFF", file);
7916 break;
f996902d
RH
7917
7918 default:
7919 return false;
7920 }
7921
7922 return true;
7923}
2a2ab3f9
JVA
7924\f
7925/* Split one or more DImode RTL references into pairs of SImode
7926 references. The RTL can be REG, offsettable MEM, integer constant, or
7927 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7928 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 7929 that parallel "operands". */
2a2ab3f9
JVA
7930
7931void
b96a374d 7932split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
7933{
7934 while (num--)
7935 {
57dbca5e 7936 rtx op = operands[num];
b932f770
JH
7937
7938 /* simplify_subreg refuse to split volatile memory addresses,
7939 but we still have to handle it. */
7940 if (GET_CODE (op) == MEM)
2a2ab3f9 7941 {
f4ef873c 7942 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 7943 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7944 }
7945 else
b932f770 7946 {
38ca929b
JH
7947 lo_half[num] = simplify_gen_subreg (SImode, op,
7948 GET_MODE (op) == VOIDmode
7949 ? DImode : GET_MODE (op), 0);
7950 hi_half[num] = simplify_gen_subreg (SImode, op,
7951 GET_MODE (op) == VOIDmode
7952 ? DImode : GET_MODE (op), 4);
b932f770 7953 }
2a2ab3f9
JVA
7954 }
7955}
44cf5b6a
JH
7956/* Split one or more TImode RTL references into pairs of SImode
7957 references. The RTL can be REG, offsettable MEM, integer constant, or
7958 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7959 split and "num" is its length. lo_half and hi_half are output arrays
7960 that parallel "operands". */
7961
7962void
b96a374d 7963split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
7964{
7965 while (num--)
7966 {
7967 rtx op = operands[num];
b932f770
JH
7968
7969 /* simplify_subreg refuse to split volatile memory addresses, but we
7970 still have to handle it. */
7971 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7972 {
7973 lo_half[num] = adjust_address (op, DImode, 0);
7974 hi_half[num] = adjust_address (op, DImode, 8);
7975 }
7976 else
b932f770
JH
7977 {
7978 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7979 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7980 }
44cf5b6a
JH
7981 }
7982}
2a2ab3f9 7983\f
2a2ab3f9
JVA
7984/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7985 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7986 is the expression of the binary operation. The output may either be
7987 emitted here, or returned to the caller, like all output_* functions.
7988
7989 There is no guarantee that the operands are the same mode, as they
0f290768 7990 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7991
e3c2afab
AM
7992#ifndef SYSV386_COMPAT
7993/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7994 wants to fix the assemblers because that causes incompatibility
7995 with gcc. No-one wants to fix gcc because that causes
7996 incompatibility with assemblers... You can use the option of
7997 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7998#define SYSV386_COMPAT 1
7999#endif
8000
69ddee61 8001const char *
b96a374d 8002output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 8003{
e3c2afab 8004 static char buf[30];
69ddee61 8005 const char *p;
1deaa899
JH
8006 const char *ssep;
8007 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 8008
e3c2afab
AM
8009#ifdef ENABLE_CHECKING
8010 /* Even if we do not want to check the inputs, this documents input
8011 constraints. Which helps in understanding the following code. */
8012 if (STACK_REG_P (operands[0])
8013 && ((REG_P (operands[1])
8014 && REGNO (operands[0]) == REGNO (operands[1])
8015 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8016 || (REG_P (operands[2])
8017 && REGNO (operands[0]) == REGNO (operands[2])
8018 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8019 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8020 ; /* ok */
1deaa899 8021 else if (!is_sse)
e3c2afab
AM
8022 abort ();
8023#endif
8024
2a2ab3f9
JVA
8025 switch (GET_CODE (operands[3]))
8026 {
8027 case PLUS:
e075ae69
RH
8028 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8029 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8030 p = "fiadd";
8031 else
8032 p = "fadd";
1deaa899 8033 ssep = "add";
2a2ab3f9
JVA
8034 break;
8035
8036 case MINUS:
e075ae69
RH
8037 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8038 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8039 p = "fisub";
8040 else
8041 p = "fsub";
1deaa899 8042 ssep = "sub";
2a2ab3f9
JVA
8043 break;
8044
8045 case MULT:
e075ae69
RH
8046 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8047 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8048 p = "fimul";
8049 else
8050 p = "fmul";
1deaa899 8051 ssep = "mul";
2a2ab3f9
JVA
8052 break;
8053
8054 case DIV:
e075ae69
RH
8055 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8056 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8057 p = "fidiv";
8058 else
8059 p = "fdiv";
1deaa899 8060 ssep = "div";
2a2ab3f9
JVA
8061 break;
8062
8063 default:
8064 abort ();
8065 }
8066
1deaa899
JH
8067 if (is_sse)
8068 {
8069 strcpy (buf, ssep);
8070 if (GET_MODE (operands[0]) == SFmode)
8071 strcat (buf, "ss\t{%2, %0|%0, %2}");
8072 else
8073 strcat (buf, "sd\t{%2, %0|%0, %2}");
8074 return buf;
8075 }
e075ae69 8076 strcpy (buf, p);
2a2ab3f9
JVA
8077
8078 switch (GET_CODE (operands[3]))
8079 {
8080 case MULT:
8081 case PLUS:
8082 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8083 {
e3c2afab 8084 rtx temp = operands[2];
2a2ab3f9
JVA
8085 operands[2] = operands[1];
8086 operands[1] = temp;
8087 }
8088
e3c2afab
AM
8089 /* know operands[0] == operands[1]. */
8090
2a2ab3f9 8091 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
8092 {
8093 p = "%z2\t%2";
8094 break;
8095 }
2a2ab3f9
JVA
8096
8097 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
8098 {
8099 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
8100 /* How is it that we are storing to a dead operand[2]?
8101 Well, presumably operands[1] is dead too. We can't
8102 store the result to st(0) as st(0) gets popped on this
8103 instruction. Instead store to operands[2] (which I
8104 think has to be st(1)). st(1) will be popped later.
8105 gcc <= 2.8.1 didn't have this check and generated
8106 assembly code that the Unixware assembler rejected. */
8107 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 8108 else
e3c2afab 8109 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 8110 break;
6b28fd63 8111 }
2a2ab3f9
JVA
8112
8113 if (STACK_TOP_P (operands[0]))
e3c2afab 8114 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8115 else
e3c2afab 8116 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 8117 break;
2a2ab3f9
JVA
8118
8119 case MINUS:
8120 case DIV:
8121 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
8122 {
8123 p = "r%z1\t%1";
8124 break;
8125 }
2a2ab3f9
JVA
8126
8127 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
8128 {
8129 p = "%z2\t%2";
8130 break;
8131 }
2a2ab3f9 8132
2a2ab3f9 8133 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 8134 {
e3c2afab
AM
8135#if SYSV386_COMPAT
8136 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8137 derived assemblers, confusingly reverse the direction of
8138 the operation for fsub{r} and fdiv{r} when the
8139 destination register is not st(0). The Intel assembler
8140 doesn't have this brain damage. Read !SYSV386_COMPAT to
8141 figure out what the hardware really does. */
8142 if (STACK_TOP_P (operands[0]))
8143 p = "{p\t%0, %2|rp\t%2, %0}";
8144 else
8145 p = "{rp\t%2, %0|p\t%0, %2}";
8146#else
6b28fd63 8147 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
8148 /* As above for fmul/fadd, we can't store to st(0). */
8149 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 8150 else
e3c2afab
AM
8151 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8152#endif
e075ae69 8153 break;
6b28fd63 8154 }
2a2ab3f9
JVA
8155
8156 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 8157 {
e3c2afab 8158#if SYSV386_COMPAT
6b28fd63 8159 if (STACK_TOP_P (operands[0]))
e3c2afab 8160 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 8161 else
e3c2afab
AM
8162 p = "{p\t%1, %0|rp\t%0, %1}";
8163#else
8164 if (STACK_TOP_P (operands[0]))
8165 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8166 else
8167 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8168#endif
e075ae69 8169 break;
6b28fd63 8170 }
2a2ab3f9
JVA
8171
8172 if (STACK_TOP_P (operands[0]))
8173 {
8174 if (STACK_TOP_P (operands[1]))
e3c2afab 8175 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 8176 else
e3c2afab 8177 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 8178 break;
2a2ab3f9
JVA
8179 }
8180 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
8181 {
8182#if SYSV386_COMPAT
8183 p = "{\t%1, %0|r\t%0, %1}";
8184#else
8185 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8186#endif
8187 }
2a2ab3f9 8188 else
e3c2afab
AM
8189 {
8190#if SYSV386_COMPAT
8191 p = "{r\t%2, %0|\t%0, %2}";
8192#else
8193 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8194#endif
8195 }
e075ae69 8196 break;
2a2ab3f9
JVA
8197
8198 default:
8199 abort ();
8200 }
e075ae69
RH
8201
8202 strcat (buf, p);
8203 return buf;
2a2ab3f9 8204}
e075ae69 8205
a4f31c00 8206/* Output code to initialize control word copies used by
7a2e09f4
JH
8207 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8208 is set to control word rounding downwards. */
8209void
b96a374d 8210emit_i387_cw_initialization (rtx normal, rtx round_down)
7a2e09f4
JH
8211{
8212 rtx reg = gen_reg_rtx (HImode);
8213
8214 emit_insn (gen_x86_fnstcw_1 (normal));
8215 emit_move_insn (reg, normal);
8216 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8217 && !TARGET_64BIT)
8218 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8219 else
8220 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8221 emit_move_insn (round_down, reg);
8222}
8223
2a2ab3f9 8224/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 8225 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 8226 operand may be [SDX]Fmode. */
2a2ab3f9 8227
69ddee61 8228const char *
b96a374d 8229output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
8230{
8231 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 8232 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 8233
e075ae69
RH
8234 /* Jump through a hoop or two for DImode, since the hardware has no
8235 non-popping instruction. We used to do this a different way, but
8236 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
8237 if (dimode_p && !stack_top_dies)
8238 output_asm_insn ("fld\t%y1", operands);
e075ae69 8239
7a2e09f4 8240 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
8241 abort ();
8242
e075ae69 8243 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 8244 abort ();
e9a25f70 8245
7a2e09f4 8246 output_asm_insn ("fldcw\t%3", operands);
e075ae69 8247 if (stack_top_dies || dimode_p)
7a2e09f4 8248 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 8249 else
7a2e09f4 8250 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 8251 output_asm_insn ("fldcw\t%2", operands);
10195bd8 8252
e075ae69 8253 return "";
2a2ab3f9 8254}
cda749b1 8255
e075ae69
RH
8256/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8257 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8258 when fucom should be used. */
8259
69ddee61 8260const char *
b96a374d 8261output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 8262{
e075ae69
RH
8263 int stack_top_dies;
8264 rtx cmp_op0 = operands[0];
8265 rtx cmp_op1 = operands[1];
0644b628 8266 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
8267
8268 if (eflags_p == 2)
8269 {
8270 cmp_op0 = cmp_op1;
8271 cmp_op1 = operands[2];
8272 }
0644b628
JH
8273 if (is_sse)
8274 {
8275 if (GET_MODE (operands[0]) == SFmode)
8276 if (unordered_p)
8277 return "ucomiss\t{%1, %0|%0, %1}";
8278 else
a5cf80f0 8279 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
8280 else
8281 if (unordered_p)
8282 return "ucomisd\t{%1, %0|%0, %1}";
8283 else
a5cf80f0 8284 return "comisd\t{%1, %0|%0, %1}";
0644b628 8285 }
cda749b1 8286
e075ae69 8287 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
8288 abort ();
8289
e075ae69 8290 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 8291
e075ae69
RH
8292 if (STACK_REG_P (cmp_op1)
8293 && stack_top_dies
8294 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8295 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 8296 {
e075ae69
RH
8297 /* If both the top of the 387 stack dies, and the other operand
8298 is also a stack register that dies, then this must be a
8299 `fcompp' float compare */
8300
8301 if (eflags_p == 1)
8302 {
8303 /* There is no double popping fcomi variant. Fortunately,
8304 eflags is immune from the fstp's cc clobbering. */
8305 if (unordered_p)
8306 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8307 else
8308 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
fb364dc4 8309 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
e075ae69
RH
8310 }
8311 else
cda749b1 8312 {
e075ae69
RH
8313 if (eflags_p == 2)
8314 {
8315 if (unordered_p)
8316 return "fucompp\n\tfnstsw\t%0";
8317 else
8318 return "fcompp\n\tfnstsw\t%0";
8319 }
cda749b1
JW
8320 else
8321 {
e075ae69
RH
8322 if (unordered_p)
8323 return "fucompp";
8324 else
8325 return "fcompp";
cda749b1
JW
8326 }
8327 }
cda749b1
JW
8328 }
8329 else
8330 {
e075ae69 8331 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 8332
0f290768 8333 static const char * const alt[24] =
e075ae69
RH
8334 {
8335 "fcom%z1\t%y1",
8336 "fcomp%z1\t%y1",
8337 "fucom%z1\t%y1",
8338 "fucomp%z1\t%y1",
0f290768 8339
e075ae69
RH
8340 "ficom%z1\t%y1",
8341 "ficomp%z1\t%y1",
8342 NULL,
8343 NULL,
8344
8345 "fcomi\t{%y1, %0|%0, %y1}",
8346 "fcomip\t{%y1, %0|%0, %y1}",
8347 "fucomi\t{%y1, %0|%0, %y1}",
8348 "fucomip\t{%y1, %0|%0, %y1}",
8349
8350 NULL,
8351 NULL,
8352 NULL,
8353 NULL,
8354
8355 "fcom%z2\t%y2\n\tfnstsw\t%0",
8356 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8357 "fucom%z2\t%y2\n\tfnstsw\t%0",
8358 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 8359
e075ae69
RH
8360 "ficom%z2\t%y2\n\tfnstsw\t%0",
8361 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8362 NULL,
8363 NULL
8364 };
8365
8366 int mask;
69ddee61 8367 const char *ret;
e075ae69
RH
8368
8369 mask = eflags_p << 3;
8370 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8371 mask |= unordered_p << 1;
8372 mask |= stack_top_dies;
8373
8374 if (mask >= 24)
8375 abort ();
8376 ret = alt[mask];
8377 if (ret == NULL)
8378 abort ();
cda749b1 8379
e075ae69 8380 return ret;
cda749b1
JW
8381 }
8382}
2a2ab3f9 8383
f88c65f7 8384void
b96a374d 8385ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
8386{
8387 const char *directive = ASM_LONG;
8388
8389 if (TARGET_64BIT)
8390 {
8391#ifdef ASM_QUAD
8392 directive = ASM_QUAD;
8393#else
8394 abort ();
8395#endif
8396 }
8397
8398 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8399}
8400
8401void
b96a374d 8402ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
8403{
8404 if (TARGET_64BIT)
74411039 8405 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
8406 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8407 else if (HAVE_AS_GOTOFF_IN_DATA)
8408 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
8409#if TARGET_MACHO
8410 else if (TARGET_MACHO)
86ecdfb6
AP
8411 {
8412 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8413 machopic_output_function_base_name (file);
8414 fprintf(file, "\n");
8415 }
b069de3b 8416#endif
f88c65f7 8417 else
5fc0e5df
KW
8418 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8419 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 8420}
32b5b1aa 8421\f
a8bac9ab
RH
8422/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8423 for the target. */
8424
8425void
b96a374d 8426ix86_expand_clear (rtx dest)
a8bac9ab
RH
8427{
8428 rtx tmp;
8429
8430 /* We play register width games, which are only valid after reload. */
8431 if (!reload_completed)
8432 abort ();
8433
8434 /* Avoid HImode and its attendant prefix byte. */
8435 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8436 dest = gen_rtx_REG (SImode, REGNO (dest));
8437
8438 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8439
8440 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8441 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8442 {
8443 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8444 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8445 }
8446
8447 emit_insn (tmp);
8448}
8449
f996902d
RH
8450/* X is an unchanging MEM. If it is a constant pool reference, return
8451 the constant pool rtx, else NULL. */
8452
8453static rtx
b96a374d 8454maybe_get_pool_constant (rtx x)
f996902d 8455{
69bd9368 8456 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
8457
8458 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8459 return get_pool_constant (x);
8460
8461 return NULL_RTX;
8462}
8463
79325812 8464void
b96a374d 8465ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 8466{
e075ae69 8467 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
8468 rtx op0, op1;
8469 enum tls_model model;
f996902d
RH
8470
8471 op0 = operands[0];
8472 op1 = operands[1];
8473
74dc3e94
RH
8474 model = tls_symbolic_operand (op1, Pmode);
8475 if (model)
f996902d 8476 {
74dc3e94
RH
8477 op1 = legitimize_tls_address (op1, model, true);
8478 op1 = force_operand (op1, op0);
8479 if (op1 == op0)
8480 return;
f996902d 8481 }
74dc3e94
RH
8482
8483 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 8484 {
b069de3b
SS
8485#if TARGET_MACHO
8486 if (MACHOPIC_PURE)
8487 {
8488 rtx temp = ((reload_in_progress
8489 || ((op0 && GET_CODE (op0) == REG)
8490 && mode == Pmode))
8491 ? op0 : gen_reg_rtx (Pmode));
8492 op1 = machopic_indirect_data_reference (op1, temp);
8493 op1 = machopic_legitimize_pic_address (op1, mode,
8494 temp == op1 ? 0 : temp);
8495 }
74dc3e94
RH
8496 else if (MACHOPIC_INDIRECT)
8497 op1 = machopic_indirect_data_reference (op1, 0);
8498 if (op0 == op1)
8499 return;
8500#else
f996902d
RH
8501 if (GET_CODE (op0) == MEM)
8502 op1 = force_reg (Pmode, op1);
e075ae69 8503 else
b39edae3 8504 op1 = legitimize_address (op1, op1, Pmode);
74dc3e94 8505#endif /* TARGET_MACHO */
e075ae69
RH
8506 }
8507 else
8508 {
f996902d 8509 if (GET_CODE (op0) == MEM
44cf5b6a 8510 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
8511 || !push_operand (op0, mode))
8512 && GET_CODE (op1) == MEM)
8513 op1 = force_reg (mode, op1);
e9a25f70 8514
f996902d
RH
8515 if (push_operand (op0, mode)
8516 && ! general_no_elim_operand (op1, mode))
8517 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 8518
44cf5b6a
JH
8519 /* Force large constants in 64bit compilation into register
8520 to get them CSEed. */
8521 if (TARGET_64BIT && mode == DImode
f996902d
RH
8522 && immediate_operand (op1, mode)
8523 && !x86_64_zero_extended_value (op1)
8524 && !register_operand (op0, mode)
44cf5b6a 8525 && optimize && !reload_completed && !reload_in_progress)
f996902d 8526 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 8527
e075ae69 8528 if (FLOAT_MODE_P (mode))
32b5b1aa 8529 {
d7a29404
JH
8530 /* If we are loading a floating point constant to a register,
8531 force the value to memory now, since we'll get better code
8532 out the back end. */
e075ae69
RH
8533
8534 if (strict)
8535 ;
ddc67067
MM
8536 else if (GET_CODE (op1) == CONST_DOUBLE)
8537 {
8538 op1 = validize_mem (force_const_mem (mode, op1));
8539 if (!register_operand (op0, mode))
8540 {
8541 rtx temp = gen_reg_rtx (mode);
8542 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8543 emit_move_insn (op0, temp);
8544 return;
8545 }
8546 }
32b5b1aa 8547 }
32b5b1aa 8548 }
e9a25f70 8549
74dc3e94 8550 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 8551}
e9a25f70 8552
e37af218 8553void
b96a374d 8554ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
8555{
8556 /* Force constants other than zero into memory. We do not know how
8557 the instructions used to build constants modify the upper 64 bits
8558 of the register, once we have that information we may be able
8559 to handle some of them more efficiently. */
8560 if ((reload_in_progress | reload_completed) == 0
8561 && register_operand (operands[0], mode)
fdc4b40b 8562 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 8563 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
8564
8565 /* Make operand1 a register if it isn't already. */
f8ca7923 8566 if (!no_new_pseudos
e37af218 8567 && !register_operand (operands[0], mode)
b105d6da 8568 && !register_operand (operands[1], mode))
e37af218 8569 {
59bef189 8570 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
8571 emit_move_insn (operands[0], temp);
8572 return;
8573 }
8574
8575 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 8576}
e37af218 8577
e075ae69
RH
8578/* Attempt to expand a binary operator. Make the expansion closer to the
8579 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 8580 memory references (one output, two input) in a single insn. */
e9a25f70 8581
e075ae69 8582void
b96a374d
AJ
8583ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8584 rtx operands[])
e075ae69
RH
8585{
8586 int matching_memory;
8587 rtx src1, src2, dst, op, clob;
8588
8589 dst = operands[0];
8590 src1 = operands[1];
8591 src2 = operands[2];
8592
8593 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
ec8e098d 8594 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8595 && (rtx_equal_p (dst, src2)
8596 || immediate_operand (src1, mode)))
8597 {
8598 rtx temp = src1;
8599 src1 = src2;
8600 src2 = temp;
32b5b1aa 8601 }
e9a25f70 8602
e075ae69
RH
8603 /* If the destination is memory, and we do not have matching source
8604 operands, do things in registers. */
8605 matching_memory = 0;
8606 if (GET_CODE (dst) == MEM)
32b5b1aa 8607 {
e075ae69
RH
8608 if (rtx_equal_p (dst, src1))
8609 matching_memory = 1;
ec8e098d 8610 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8611 && rtx_equal_p (dst, src2))
8612 matching_memory = 2;
8613 else
8614 dst = gen_reg_rtx (mode);
8615 }
0f290768 8616
e075ae69
RH
8617 /* Both source operands cannot be in memory. */
8618 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8619 {
8620 if (matching_memory != 2)
8621 src2 = force_reg (mode, src2);
8622 else
8623 src1 = force_reg (mode, src1);
32b5b1aa 8624 }
e9a25f70 8625
06a964de
JH
8626 /* If the operation is not commutable, source 1 cannot be a constant
8627 or non-matching memory. */
0f290768 8628 if ((CONSTANT_P (src1)
06a964de 8629 || (!matching_memory && GET_CODE (src1) == MEM))
ec8e098d 8630 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69 8631 src1 = force_reg (mode, src1);
0f290768 8632
e075ae69 8633 /* If optimizing, copy to regs to improve CSE */
fe577e58 8634 if (optimize && ! no_new_pseudos)
32b5b1aa 8635 {
e075ae69
RH
8636 if (GET_CODE (dst) == MEM)
8637 dst = gen_reg_rtx (mode);
8638 if (GET_CODE (src1) == MEM)
8639 src1 = force_reg (mode, src1);
8640 if (GET_CODE (src2) == MEM)
8641 src2 = force_reg (mode, src2);
32b5b1aa 8642 }
e9a25f70 8643
e075ae69
RH
8644 /* Emit the instruction. */
8645
8646 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8647 if (reload_in_progress)
8648 {
8649 /* Reload doesn't know about the flags register, and doesn't know that
8650 it doesn't want to clobber it. We can only do this with PLUS. */
8651 if (code != PLUS)
8652 abort ();
8653 emit_insn (op);
8654 }
8655 else
32b5b1aa 8656 {
e075ae69
RH
8657 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8658 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 8659 }
e9a25f70 8660
e075ae69
RH
8661 /* Fix up the destination if needed. */
8662 if (dst != operands[0])
8663 emit_move_insn (operands[0], dst);
8664}
8665
8666/* Return TRUE or FALSE depending on whether the binary operator meets the
8667 appropriate constraints. */
8668
8669int
b96a374d
AJ
8670ix86_binary_operator_ok (enum rtx_code code,
8671 enum machine_mode mode ATTRIBUTE_UNUSED,
8672 rtx operands[3])
e075ae69
RH
8673{
8674 /* Both source operands cannot be in memory. */
8675 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8676 return 0;
8677 /* If the operation is not commutable, source 1 cannot be a constant. */
ec8e098d 8678 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69
RH
8679 return 0;
8680 /* If the destination is memory, we must have a matching source operand. */
8681 if (GET_CODE (operands[0]) == MEM
8682 && ! (rtx_equal_p (operands[0], operands[1])
ec8e098d 8683 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
8684 && rtx_equal_p (operands[0], operands[2]))))
8685 return 0;
06a964de 8686 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 8687 have a matching destination. */
06a964de 8688 if (GET_CODE (operands[1]) == MEM
ec8e098d 8689 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
06a964de
JH
8690 && ! rtx_equal_p (operands[0], operands[1]))
8691 return 0;
e075ae69
RH
8692 return 1;
8693}
8694
8695/* Attempt to expand a unary operator. Make the expansion closer to the
8696 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 8697 memory references (one output, one input) in a single insn. */
e075ae69 8698
9d81fc27 8699void
b96a374d
AJ
8700ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8701 rtx operands[])
e075ae69 8702{
06a964de
JH
8703 int matching_memory;
8704 rtx src, dst, op, clob;
8705
8706 dst = operands[0];
8707 src = operands[1];
e075ae69 8708
06a964de
JH
8709 /* If the destination is memory, and we do not have matching source
8710 operands, do things in registers. */
8711 matching_memory = 0;
8712 if (GET_CODE (dst) == MEM)
32b5b1aa 8713 {
06a964de
JH
8714 if (rtx_equal_p (dst, src))
8715 matching_memory = 1;
e075ae69 8716 else
06a964de 8717 dst = gen_reg_rtx (mode);
32b5b1aa 8718 }
e9a25f70 8719
06a964de
JH
8720 /* When source operand is memory, destination must match. */
8721 if (!matching_memory && GET_CODE (src) == MEM)
8722 src = force_reg (mode, src);
0f290768 8723
06a964de 8724 /* If optimizing, copy to regs to improve CSE */
fe577e58 8725 if (optimize && ! no_new_pseudos)
06a964de
JH
8726 {
8727 if (GET_CODE (dst) == MEM)
8728 dst = gen_reg_rtx (mode);
8729 if (GET_CODE (src) == MEM)
8730 src = force_reg (mode, src);
8731 }
8732
8733 /* Emit the instruction. */
8734
8735 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8736 if (reload_in_progress || code == NOT)
8737 {
8738 /* Reload doesn't know about the flags register, and doesn't know that
8739 it doesn't want to clobber it. */
8740 if (code != NOT)
8741 abort ();
8742 emit_insn (op);
8743 }
8744 else
8745 {
8746 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8747 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8748 }
8749
8750 /* Fix up the destination if needed. */
8751 if (dst != operands[0])
8752 emit_move_insn (operands[0], dst);
e075ae69
RH
8753}
8754
8755/* Return TRUE or FALSE depending on whether the unary operator meets the
8756 appropriate constraints. */
8757
8758int
b96a374d
AJ
8759ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8760 enum machine_mode mode ATTRIBUTE_UNUSED,
8761 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 8762{
06a964de
JH
8763 /* If one of operands is memory, source and destination must match. */
8764 if ((GET_CODE (operands[0]) == MEM
8765 || GET_CODE (operands[1]) == MEM)
8766 && ! rtx_equal_p (operands[0], operands[1]))
8767 return FALSE;
e075ae69
RH
8768 return TRUE;
8769}
8770
16189740
RH
8771/* Return TRUE or FALSE depending on whether the first SET in INSN
8772 has source and destination with matching CC modes, and that the
8773 CC mode is at least as constrained as REQ_MODE. */
8774
8775int
b96a374d 8776ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8777{
8778 rtx set;
8779 enum machine_mode set_mode;
8780
8781 set = PATTERN (insn);
8782 if (GET_CODE (set) == PARALLEL)
8783 set = XVECEXP (set, 0, 0);
8784 if (GET_CODE (set) != SET)
8785 abort ();
9076b9c1
JH
8786 if (GET_CODE (SET_SRC (set)) != COMPARE)
8787 abort ();
16189740
RH
8788
8789 set_mode = GET_MODE (SET_DEST (set));
8790 switch (set_mode)
8791 {
9076b9c1
JH
8792 case CCNOmode:
8793 if (req_mode != CCNOmode
8794 && (req_mode != CCmode
8795 || XEXP (SET_SRC (set), 1) != const0_rtx))
8796 return 0;
8797 break;
16189740 8798 case CCmode:
9076b9c1 8799 if (req_mode == CCGCmode)
16189740 8800 return 0;
5efb1046 8801 /* FALLTHRU */
9076b9c1
JH
8802 case CCGCmode:
8803 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8804 return 0;
5efb1046 8805 /* FALLTHRU */
9076b9c1 8806 case CCGOCmode:
16189740
RH
8807 if (req_mode == CCZmode)
8808 return 0;
5efb1046 8809 /* FALLTHRU */
16189740
RH
8810 case CCZmode:
8811 break;
8812
8813 default:
8814 abort ();
8815 }
8816
8817 return (GET_MODE (SET_SRC (set)) == set_mode);
8818}
8819
e075ae69
RH
8820/* Generate insn patterns to do an integer compare of OPERANDS. */
8821
8822static rtx
b96a374d 8823ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8824{
8825 enum machine_mode cmpmode;
8826 rtx tmp, flags;
8827
8828 cmpmode = SELECT_CC_MODE (code, op0, op1);
8829 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8830
8831 /* This is very simple, but making the interface the same as in the
8832 FP case makes the rest of the code easier. */
8833 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8834 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8835
8836 /* Return the test that should be put into the flags user, i.e.
8837 the bcc, scc, or cmov instruction. */
8838 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8839}
8840
3a3677ff
RH
8841/* Figure out whether to use ordered or unordered fp comparisons.
8842 Return the appropriate mode to use. */
e075ae69 8843
b1cdafbb 8844enum machine_mode
b96a374d 8845ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8846{
9e7adcb3
JH
8847 /* ??? In order to make all comparisons reversible, we do all comparisons
8848 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8849 all forms trapping and nontrapping comparisons, we can make inequality
8850 comparisons trapping again, since it results in better code when using
8851 FCOM based compares. */
8852 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8853}
8854
9076b9c1 8855enum machine_mode
b96a374d 8856ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8857{
8858 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8859 return ix86_fp_compare_mode (code);
8860 switch (code)
8861 {
8862 /* Only zero flag is needed. */
8863 case EQ: /* ZF=0 */
8864 case NE: /* ZF!=0 */
8865 return CCZmode;
8866 /* Codes needing carry flag. */
265dab10
JH
8867 case GEU: /* CF=0 */
8868 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8869 case LTU: /* CF=1 */
8870 case LEU: /* CF=1 | ZF=1 */
265dab10 8871 return CCmode;
9076b9c1
JH
8872 /* Codes possibly doable only with sign flag when
8873 comparing against zero. */
8874 case GE: /* SF=OF or SF=0 */
7e08e190 8875 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8876 if (op1 == const0_rtx)
8877 return CCGOCmode;
8878 else
8879 /* For other cases Carry flag is not required. */
8880 return CCGCmode;
8881 /* Codes doable only with sign flag when comparing
8882 against zero, but we miss jump instruction for it
4aae8a9a 8883 so we need to use relational tests against overflow
9076b9c1
JH
8884 that thus needs to be zero. */
8885 case GT: /* ZF=0 & SF=OF */
8886 case LE: /* ZF=1 | SF<>OF */
8887 if (op1 == const0_rtx)
8888 return CCNOmode;
8889 else
8890 return CCGCmode;
7fcd7218
JH
8891 /* strcmp pattern do (use flags) and combine may ask us for proper
8892 mode. */
8893 case USE:
8894 return CCmode;
9076b9c1 8895 default:
0f290768 8896 abort ();
9076b9c1
JH
8897 }
8898}
8899
e129d93a
ILT
8900/* Return the fixed registers used for condition codes. */
8901
8902static bool
8903ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8904{
8905 *p1 = FLAGS_REG;
8906 *p2 = FPSR_REG;
8907 return true;
8908}
8909
8910/* If two condition code modes are compatible, return a condition code
8911 mode which is compatible with both. Otherwise, return
8912 VOIDmode. */
8913
8914static enum machine_mode
8915ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8916{
8917 if (m1 == m2)
8918 return m1;
8919
8920 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8921 return VOIDmode;
8922
8923 if ((m1 == CCGCmode && m2 == CCGOCmode)
8924 || (m1 == CCGOCmode && m2 == CCGCmode))
8925 return CCGCmode;
8926
8927 switch (m1)
8928 {
8929 default:
8930 abort ();
8931
8932 case CCmode:
8933 case CCGCmode:
8934 case CCGOCmode:
8935 case CCNOmode:
8936 case CCZmode:
8937 switch (m2)
8938 {
8939 default:
8940 return VOIDmode;
8941
8942 case CCmode:
8943 case CCGCmode:
8944 case CCGOCmode:
8945 case CCNOmode:
8946 case CCZmode:
8947 return CCmode;
8948 }
8949
8950 case CCFPmode:
8951 case CCFPUmode:
8952 /* These are only compatible with themselves, which we already
8953 checked above. */
8954 return VOIDmode;
8955 }
8956}
8957
3a3677ff
RH
8958/* Return true if we should use an FCOMI instruction for this fp comparison. */
8959
a940d8bd 8960int
b96a374d 8961ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8962{
9e7adcb3
JH
8963 enum rtx_code swapped_code = swap_condition (code);
8964 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8965 || (ix86_fp_comparison_cost (swapped_code)
8966 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8967}
8968
0f290768 8969/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8970 to a fp comparison. The operands are updated in place; the new
d1f87653 8971 comparison code is returned. */
3a3677ff
RH
8972
8973static enum rtx_code
b96a374d 8974ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8975{
8976 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8977 rtx op0 = *pop0, op1 = *pop1;
8978 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8979 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8980
e075ae69 8981 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8982 The same is true of the XFmode compare instructions. The same is
8983 true of the fcomi compare instructions. */
8984
0644b628
JH
8985 if (!is_sse
8986 && (fpcmp_mode == CCFPUmode
8987 || op_mode == XFmode
0644b628 8988 || ix86_use_fcomi_compare (code)))
e075ae69 8989 {
3a3677ff
RH
8990 op0 = force_reg (op_mode, op0);
8991 op1 = force_reg (op_mode, op1);
e075ae69
RH
8992 }
8993 else
8994 {
8995 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8996 things around if they appear profitable, otherwise force op0
8997 into a register. */
8998
8999 if (standard_80387_constant_p (op0) == 0
9000 || (GET_CODE (op0) == MEM
9001 && ! (standard_80387_constant_p (op1) == 0
9002 || GET_CODE (op1) == MEM)))
32b5b1aa 9003 {
e075ae69
RH
9004 rtx tmp;
9005 tmp = op0, op0 = op1, op1 = tmp;
9006 code = swap_condition (code);
9007 }
9008
9009 if (GET_CODE (op0) != REG)
3a3677ff 9010 op0 = force_reg (op_mode, op0);
e075ae69
RH
9011
9012 if (CONSTANT_P (op1))
9013 {
9014 if (standard_80387_constant_p (op1))
3a3677ff 9015 op1 = force_reg (op_mode, op1);
e075ae69 9016 else
3a3677ff 9017 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
9018 }
9019 }
e9a25f70 9020
9e7adcb3
JH
9021 /* Try to rearrange the comparison to make it cheaper. */
9022 if (ix86_fp_comparison_cost (code)
9023 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 9024 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
9025 {
9026 rtx tmp;
9027 tmp = op0, op0 = op1, op1 = tmp;
9028 code = swap_condition (code);
9029 if (GET_CODE (op0) != REG)
9030 op0 = force_reg (op_mode, op0);
9031 }
9032
3a3677ff
RH
9033 *pop0 = op0;
9034 *pop1 = op1;
9035 return code;
9036}
9037
c0c102a9
JH
9038/* Convert comparison codes we use to represent FP comparison to integer
9039 code that will result in proper branch. Return UNKNOWN if no such code
9040 is available. */
9041static enum rtx_code
b96a374d 9042ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
9043{
9044 switch (code)
9045 {
9046 case GT:
9047 return GTU;
9048 case GE:
9049 return GEU;
9050 case ORDERED:
9051 case UNORDERED:
9052 return code;
9053 break;
9054 case UNEQ:
9055 return EQ;
9056 break;
9057 case UNLT:
9058 return LTU;
9059 break;
9060 case UNLE:
9061 return LEU;
9062 break;
9063 case LTGT:
9064 return NE;
9065 break;
9066 default:
9067 return UNKNOWN;
9068 }
9069}
9070
9071/* Split comparison code CODE into comparisons we can do using branch
9072 instructions. BYPASS_CODE is comparison code for branch that will
9073 branch around FIRST_CODE and SECOND_CODE. If some of branches
9074 is not required, set value to NIL.
9075 We never require more than two branches. */
9076static void
b96a374d
AJ
9077ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9078 enum rtx_code *first_code,
9079 enum rtx_code *second_code)
c0c102a9
JH
9080{
9081 *first_code = code;
9082 *bypass_code = NIL;
9083 *second_code = NIL;
9084
9085 /* The fcomi comparison sets flags as follows:
9086
9087 cmp ZF PF CF
9088 > 0 0 0
9089 < 0 0 1
9090 = 1 0 0
9091 un 1 1 1 */
9092
9093 switch (code)
9094 {
9095 case GT: /* GTU - CF=0 & ZF=0 */
9096 case GE: /* GEU - CF=0 */
9097 case ORDERED: /* PF=0 */
9098 case UNORDERED: /* PF=1 */
9099 case UNEQ: /* EQ - ZF=1 */
9100 case UNLT: /* LTU - CF=1 */
9101 case UNLE: /* LEU - CF=1 | ZF=1 */
9102 case LTGT: /* EQ - ZF=0 */
9103 break;
9104 case LT: /* LTU - CF=1 - fails on unordered */
9105 *first_code = UNLT;
9106 *bypass_code = UNORDERED;
9107 break;
9108 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9109 *first_code = UNLE;
9110 *bypass_code = UNORDERED;
9111 break;
9112 case EQ: /* EQ - ZF=1 - fails on unordered */
9113 *first_code = UNEQ;
9114 *bypass_code = UNORDERED;
9115 break;
9116 case NE: /* NE - ZF=0 - fails on unordered */
9117 *first_code = LTGT;
9118 *second_code = UNORDERED;
9119 break;
9120 case UNGE: /* GEU - CF=0 - fails on unordered */
9121 *first_code = GE;
9122 *second_code = UNORDERED;
9123 break;
9124 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9125 *first_code = GT;
9126 *second_code = UNORDERED;
9127 break;
9128 default:
9129 abort ();
9130 }
9131 if (!TARGET_IEEE_FP)
9132 {
9133 *second_code = NIL;
9134 *bypass_code = NIL;
9135 }
9136}
9137
9e7adcb3 9138/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 9139 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
9140 In future this should be tweaked to compute bytes for optimize_size and
9141 take into account performance of various instructions on various CPUs. */
9142static int
b96a374d 9143ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
9144{
9145 if (!TARGET_IEEE_FP)
9146 return 4;
9147 /* The cost of code output by ix86_expand_fp_compare. */
9148 switch (code)
9149 {
9150 case UNLE:
9151 case UNLT:
9152 case LTGT:
9153 case GT:
9154 case GE:
9155 case UNORDERED:
9156 case ORDERED:
9157 case UNEQ:
9158 return 4;
9159 break;
9160 case LT:
9161 case NE:
9162 case EQ:
9163 case UNGE:
9164 return 5;
9165 break;
9166 case LE:
9167 case UNGT:
9168 return 6;
9169 break;
9170 default:
9171 abort ();
9172 }
9173}
9174
9175/* Return cost of comparison done using fcomi operation.
9176 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9177static int
b96a374d 9178ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
9179{
9180 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9181 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
9182 prevents gcc from using it. */
9183 if (!TARGET_CMOVE)
9184 return 1024;
9185 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9186 return (bypass_code != NIL || second_code != NIL) + 2;
9187}
9188
9189/* Return cost of comparison done using sahf operation.
9190 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9191static int
b96a374d 9192ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
9193{
9194 enum rtx_code bypass_code, first_code, second_code;
d1f87653 9195 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
9196 avoids gcc from using it. */
9197 if (!TARGET_USE_SAHF && !optimize_size)
9198 return 1024;
9199 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9200 return (bypass_code != NIL || second_code != NIL) + 3;
9201}
9202
9203/* Compute cost of the comparison done using any method.
9204 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9205static int
b96a374d 9206ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
9207{
9208 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9209 int min;
9210
9211 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9212 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9213
9214 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9215 if (min > sahf_cost)
9216 min = sahf_cost;
9217 if (min > fcomi_cost)
9218 min = fcomi_cost;
9219 return min;
9220}
c0c102a9 9221
3a3677ff
RH
9222/* Generate insn patterns to do a floating point compare of OPERANDS. */
9223
9e7adcb3 9224static rtx
b96a374d
AJ
9225ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9226 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
9227{
9228 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 9229 rtx tmp, tmp2;
9e7adcb3 9230 int cost = ix86_fp_comparison_cost (code);
c0c102a9 9231 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9232
9233 fpcmp_mode = ix86_fp_compare_mode (code);
9234 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9235
9e7adcb3
JH
9236 if (second_test)
9237 *second_test = NULL_RTX;
9238 if (bypass_test)
9239 *bypass_test = NULL_RTX;
9240
c0c102a9
JH
9241 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9242
9e7adcb3
JH
9243 /* Do fcomi/sahf based test when profitable. */
9244 if ((bypass_code == NIL || bypass_test)
9245 && (second_code == NIL || second_test)
9246 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 9247 {
c0c102a9
JH
9248 if (TARGET_CMOVE)
9249 {
9250 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9251 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9252 tmp);
9253 emit_insn (tmp);
9254 }
9255 else
9256 {
9257 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9258 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9259 if (!scratch)
9260 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
9261 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9262 emit_insn (gen_x86_sahf_1 (scratch));
9263 }
e075ae69
RH
9264
9265 /* The FP codes work out to act like unsigned. */
9a915772 9266 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
9267 code = first_code;
9268 if (bypass_code != NIL)
9269 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9270 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9271 const0_rtx);
9272 if (second_code != NIL)
9273 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9274 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9275 const0_rtx);
e075ae69
RH
9276 }
9277 else
9278 {
9279 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 9280 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 9281 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
9282 if (!scratch)
9283 scratch = gen_reg_rtx (HImode);
3a3677ff 9284 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 9285
9a915772
JH
9286 /* In the unordered case, we have to check C2 for NaN's, which
9287 doesn't happen to work out to anything nice combination-wise.
9288 So do some bit twiddling on the value we've got in AH to come
9289 up with an appropriate set of condition codes. */
e075ae69 9290
9a915772
JH
9291 intcmp_mode = CCNOmode;
9292 switch (code)
32b5b1aa 9293 {
9a915772
JH
9294 case GT:
9295 case UNGT:
9296 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 9297 {
3a3677ff 9298 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 9299 code = EQ;
9a915772
JH
9300 }
9301 else
9302 {
9303 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9304 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9305 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9306 intcmp_mode = CCmode;
9307 code = GEU;
9308 }
9309 break;
9310 case LT:
9311 case UNLT:
9312 if (code == LT && TARGET_IEEE_FP)
9313 {
3a3677ff
RH
9314 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9315 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
9316 intcmp_mode = CCmode;
9317 code = EQ;
9a915772
JH
9318 }
9319 else
9320 {
9321 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9322 code = NE;
9323 }
9324 break;
9325 case GE:
9326 case UNGE:
9327 if (code == GE || !TARGET_IEEE_FP)
9328 {
3a3677ff 9329 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 9330 code = EQ;
9a915772
JH
9331 }
9332 else
9333 {
9334 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9335 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9336 GEN_INT (0x01)));
9337 code = NE;
9338 }
9339 break;
9340 case LE:
9341 case UNLE:
9342 if (code == LE && TARGET_IEEE_FP)
9343 {
3a3677ff
RH
9344 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9345 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9346 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9347 intcmp_mode = CCmode;
9348 code = LTU;
9a915772
JH
9349 }
9350 else
9351 {
9352 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9353 code = NE;
9354 }
9355 break;
9356 case EQ:
9357 case UNEQ:
9358 if (code == EQ && TARGET_IEEE_FP)
9359 {
3a3677ff
RH
9360 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9361 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
9362 intcmp_mode = CCmode;
9363 code = EQ;
9a915772
JH
9364 }
9365 else
9366 {
3a3677ff
RH
9367 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9368 code = NE;
9369 break;
9a915772
JH
9370 }
9371 break;
9372 case NE:
9373 case LTGT:
9374 if (code == NE && TARGET_IEEE_FP)
9375 {
3a3677ff 9376 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
9377 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9378 GEN_INT (0x40)));
3a3677ff 9379 code = NE;
9a915772
JH
9380 }
9381 else
9382 {
3a3677ff
RH
9383 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9384 code = EQ;
32b5b1aa 9385 }
9a915772
JH
9386 break;
9387
9388 case UNORDERED:
9389 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9390 code = NE;
9391 break;
9392 case ORDERED:
9393 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9394 code = EQ;
9395 break;
9396
9397 default:
9398 abort ();
32b5b1aa 9399 }
32b5b1aa 9400 }
e075ae69
RH
9401
9402 /* Return the test that should be put into the flags user, i.e.
9403 the bcc, scc, or cmov instruction. */
9404 return gen_rtx_fmt_ee (code, VOIDmode,
9405 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9406 const0_rtx);
9407}
9408
9e3e266c 9409rtx
b96a374d 9410ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
9411{
9412 rtx op0, op1, ret;
9413 op0 = ix86_compare_op0;
9414 op1 = ix86_compare_op1;
9415
a1b8572c
JH
9416 if (second_test)
9417 *second_test = NULL_RTX;
9418 if (bypass_test)
9419 *bypass_test = NULL_RTX;
9420
e075ae69 9421 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 9422 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 9423 second_test, bypass_test);
32b5b1aa 9424 else
e075ae69
RH
9425 ret = ix86_expand_int_compare (code, op0, op1);
9426
9427 return ret;
9428}
9429
03598dea
JH
9430/* Return true if the CODE will result in nontrivial jump sequence. */
9431bool
b96a374d 9432ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
9433{
9434 enum rtx_code bypass_code, first_code, second_code;
9435 if (!TARGET_CMOVE)
9436 return true;
9437 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9438 return bypass_code != NIL || second_code != NIL;
9439}
9440
e075ae69 9441void
b96a374d 9442ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 9443{
3a3677ff 9444 rtx tmp;
e075ae69 9445
3a3677ff 9446 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 9447 {
3a3677ff
RH
9448 case QImode:
9449 case HImode:
9450 case SImode:
0d7d98ee 9451 simple:
a1b8572c 9452 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
9453 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9454 gen_rtx_LABEL_REF (VOIDmode, label),
9455 pc_rtx);
9456 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 9457 return;
e075ae69 9458
3a3677ff
RH
9459 case SFmode:
9460 case DFmode:
0f290768 9461 case XFmode:
3a3677ff
RH
9462 {
9463 rtvec vec;
9464 int use_fcomi;
03598dea 9465 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
9466
9467 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9468 &ix86_compare_op1);
fce5a9f2 9469
03598dea
JH
9470 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9471
9472 /* Check whether we will use the natural sequence with one jump. If
9473 so, we can expand jump early. Otherwise delay expansion by
9474 creating compound insn to not confuse optimizers. */
9475 if (bypass_code == NIL && second_code == NIL
9476 && TARGET_CMOVE)
9477 {
9478 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9479 gen_rtx_LABEL_REF (VOIDmode, label),
9480 pc_rtx, NULL_RTX);
9481 }
9482 else
9483 {
9484 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9485 ix86_compare_op0, ix86_compare_op1);
9486 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9487 gen_rtx_LABEL_REF (VOIDmode, label),
9488 pc_rtx);
9489 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9490
9491 use_fcomi = ix86_use_fcomi_compare (code);
9492 vec = rtvec_alloc (3 + !use_fcomi);
9493 RTVEC_ELT (vec, 0) = tmp;
9494 RTVEC_ELT (vec, 1)
9495 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9496 RTVEC_ELT (vec, 2)
9497 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9498 if (! use_fcomi)
9499 RTVEC_ELT (vec, 3)
9500 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9501
9502 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9503 }
3a3677ff
RH
9504 return;
9505 }
32b5b1aa 9506
3a3677ff 9507 case DImode:
0d7d98ee
JH
9508 if (TARGET_64BIT)
9509 goto simple;
3a3677ff
RH
9510 /* Expand DImode branch into multiple compare+branch. */
9511 {
9512 rtx lo[2], hi[2], label2;
9513 enum rtx_code code1, code2, code3;
32b5b1aa 9514
3a3677ff
RH
9515 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9516 {
9517 tmp = ix86_compare_op0;
9518 ix86_compare_op0 = ix86_compare_op1;
9519 ix86_compare_op1 = tmp;
9520 code = swap_condition (code);
9521 }
9522 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9523 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 9524
3a3677ff
RH
9525 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9526 avoid two branches. This costs one extra insn, so disable when
9527 optimizing for size. */
32b5b1aa 9528
3a3677ff
RH
9529 if ((code == EQ || code == NE)
9530 && (!optimize_size
9531 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9532 {
9533 rtx xor0, xor1;
32b5b1aa 9534
3a3677ff
RH
9535 xor1 = hi[0];
9536 if (hi[1] != const0_rtx)
9537 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9538 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9539
3a3677ff
RH
9540 xor0 = lo[0];
9541 if (lo[1] != const0_rtx)
9542 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9543 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 9544
3a3677ff
RH
9545 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9546 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 9547
3a3677ff
RH
9548 ix86_compare_op0 = tmp;
9549 ix86_compare_op1 = const0_rtx;
9550 ix86_expand_branch (code, label);
9551 return;
9552 }
e075ae69 9553
1f9124e4
JJ
9554 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9555 op1 is a constant and the low word is zero, then we can just
9556 examine the high word. */
32b5b1aa 9557
1f9124e4
JJ
9558 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9559 switch (code)
9560 {
9561 case LT: case LTU: case GE: case GEU:
9562 ix86_compare_op0 = hi[0];
9563 ix86_compare_op1 = hi[1];
9564 ix86_expand_branch (code, label);
9565 return;
9566 default:
9567 break;
9568 }
e075ae69 9569
3a3677ff 9570 /* Otherwise, we need two or three jumps. */
e075ae69 9571
3a3677ff 9572 label2 = gen_label_rtx ();
e075ae69 9573
3a3677ff
RH
9574 code1 = code;
9575 code2 = swap_condition (code);
9576 code3 = unsigned_condition (code);
e075ae69 9577
3a3677ff
RH
9578 switch (code)
9579 {
9580 case LT: case GT: case LTU: case GTU:
9581 break;
e075ae69 9582
3a3677ff
RH
9583 case LE: code1 = LT; code2 = GT; break;
9584 case GE: code1 = GT; code2 = LT; break;
9585 case LEU: code1 = LTU; code2 = GTU; break;
9586 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 9587
3a3677ff
RH
9588 case EQ: code1 = NIL; code2 = NE; break;
9589 case NE: code2 = NIL; break;
e075ae69 9590
3a3677ff
RH
9591 default:
9592 abort ();
9593 }
e075ae69 9594
3a3677ff
RH
9595 /*
9596 * a < b =>
9597 * if (hi(a) < hi(b)) goto true;
9598 * if (hi(a) > hi(b)) goto false;
9599 * if (lo(a) < lo(b)) goto true;
9600 * false:
9601 */
9602
9603 ix86_compare_op0 = hi[0];
9604 ix86_compare_op1 = hi[1];
9605
9606 if (code1 != NIL)
9607 ix86_expand_branch (code1, label);
9608 if (code2 != NIL)
9609 ix86_expand_branch (code2, label2);
9610
9611 ix86_compare_op0 = lo[0];
9612 ix86_compare_op1 = lo[1];
9613 ix86_expand_branch (code3, label);
9614
9615 if (code2 != NIL)
9616 emit_label (label2);
9617 return;
9618 }
e075ae69 9619
3a3677ff
RH
9620 default:
9621 abort ();
9622 }
32b5b1aa 9623}
e075ae69 9624
9e7adcb3
JH
9625/* Split branch based on floating point condition. */
9626void
b96a374d
AJ
9627ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9628 rtx target1, rtx target2, rtx tmp)
9e7adcb3
JH
9629{
9630 rtx second, bypass;
9631 rtx label = NULL_RTX;
03598dea 9632 rtx condition;
6b24c259
JH
9633 int bypass_probability = -1, second_probability = -1, probability = -1;
9634 rtx i;
9e7adcb3
JH
9635
9636 if (target2 != pc_rtx)
9637 {
9638 rtx tmp = target2;
9639 code = reverse_condition_maybe_unordered (code);
9640 target2 = target1;
9641 target1 = tmp;
9642 }
9643
9644 condition = ix86_expand_fp_compare (code, op1, op2,
9645 tmp, &second, &bypass);
6b24c259
JH
9646
9647 if (split_branch_probability >= 0)
9648 {
9649 /* Distribute the probabilities across the jumps.
9650 Assume the BYPASS and SECOND to be always test
9651 for UNORDERED. */
9652 probability = split_branch_probability;
9653
d6a7951f 9654 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9655 to be updated. Later we may run some experiments and see
9656 if unordered values are more frequent in practice. */
9657 if (bypass)
9658 bypass_probability = 1;
9659 if (second)
9660 second_probability = 1;
9661 }
9e7adcb3
JH
9662 if (bypass != NULL_RTX)
9663 {
9664 label = gen_label_rtx ();
6b24c259
JH
9665 i = emit_jump_insn (gen_rtx_SET
9666 (VOIDmode, pc_rtx,
9667 gen_rtx_IF_THEN_ELSE (VOIDmode,
9668 bypass,
9669 gen_rtx_LABEL_REF (VOIDmode,
9670 label),
9671 pc_rtx)));
9672 if (bypass_probability >= 0)
9673 REG_NOTES (i)
9674 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9675 GEN_INT (bypass_probability),
9676 REG_NOTES (i));
9677 }
9678 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9679 (VOIDmode, pc_rtx,
9680 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9681 condition, target1, target2)));
9682 if (probability >= 0)
9683 REG_NOTES (i)
9684 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9685 GEN_INT (probability),
9686 REG_NOTES (i));
9687 if (second != NULL_RTX)
9e7adcb3 9688 {
6b24c259
JH
9689 i = emit_jump_insn (gen_rtx_SET
9690 (VOIDmode, pc_rtx,
9691 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9692 target2)));
9693 if (second_probability >= 0)
9694 REG_NOTES (i)
9695 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9696 GEN_INT (second_probability),
9697 REG_NOTES (i));
9e7adcb3 9698 }
9e7adcb3
JH
9699 if (label != NULL_RTX)
9700 emit_label (label);
9701}
9702
32b5b1aa 9703int
b96a374d 9704ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9705{
3a627503 9706 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9707 rtx second_test, bypass_test;
e075ae69 9708
885a70fd
JH
9709 if (GET_MODE (ix86_compare_op0) == DImode
9710 && !TARGET_64BIT)
e075ae69
RH
9711 return 0; /* FAIL */
9712
b932f770
JH
9713 if (GET_MODE (dest) != QImode)
9714 abort ();
e075ae69 9715
a1b8572c 9716 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9717 PUT_MODE (ret, QImode);
9718
9719 tmp = dest;
a1b8572c 9720 tmpreg = dest;
32b5b1aa 9721
e075ae69 9722 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9723 if (bypass_test || second_test)
9724 {
9725 rtx test = second_test;
9726 int bypass = 0;
9727 rtx tmp2 = gen_reg_rtx (QImode);
9728 if (bypass_test)
9729 {
9730 if (second_test)
b531087a 9731 abort ();
a1b8572c
JH
9732 test = bypass_test;
9733 bypass = 1;
9734 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9735 }
9736 PUT_MODE (test, QImode);
9737 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9738
9739 if (bypass)
9740 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9741 else
9742 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9743 }
e075ae69 9744
3a627503
RS
9745 /* Attach a REG_EQUAL note describing the comparison result. */
9746 equiv = simplify_gen_relational (code, QImode,
9747 GET_MODE (ix86_compare_op0),
9748 ix86_compare_op0, ix86_compare_op1);
9749 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9750
e075ae69 9751 return 1; /* DONE */
32b5b1aa 9752}
e075ae69 9753
c35d187f
RH
9754/* Expand comparison setting or clearing carry flag. Return true when
9755 successful and set pop for the operation. */
9756static bool
b96a374d 9757ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9758{
9759 enum machine_mode mode =
9760 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9761
9762 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 9763 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9764 if ((mode == DImode && !TARGET_64BIT))
9765 return false;
9766 if (FLOAT_MODE_P (mode))
9767 {
9768 rtx second_test = NULL, bypass_test = NULL;
9769 rtx compare_op, compare_seq;
9770
9771 /* Shortcut: following common codes never translate into carry flag compares. */
9772 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9773 || code == ORDERED || code == UNORDERED)
9774 return false;
9775
9776 /* These comparisons require zero flag; swap operands so they won't. */
9777 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9778 && !TARGET_IEEE_FP)
9779 {
9780 rtx tmp = op0;
9781 op0 = op1;
9782 op1 = tmp;
9783 code = swap_condition (code);
9784 }
9785
c51e6d85
KH
9786 /* Try to expand the comparison and verify that we end up with carry flag
9787 based comparison. This is fails to be true only when we decide to expand
9788 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9789 start_sequence ();
9790 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9791 &second_test, &bypass_test);
9792 compare_seq = get_insns ();
9793 end_sequence ();
9794
9795 if (second_test || bypass_test)
9796 return false;
9797 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9798 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9799 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9800 else
9801 code = GET_CODE (compare_op);
9802 if (code != LTU && code != GEU)
9803 return false;
9804 emit_insn (compare_seq);
9805 *pop = compare_op;
9806 return true;
9807 }
9808 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9809 return false;
9810 switch (code)
9811 {
9812 case LTU:
9813 case GEU:
9814 break;
9815
9816 /* Convert a==0 into (unsigned)a<1. */
9817 case EQ:
9818 case NE:
9819 if (op1 != const0_rtx)
9820 return false;
9821 op1 = const1_rtx;
9822 code = (code == EQ ? LTU : GEU);
9823 break;
9824
9825 /* Convert a>b into b<a or a>=b-1. */
9826 case GTU:
9827 case LEU:
9828 if (GET_CODE (op1) == CONST_INT)
9829 {
9830 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9831 /* Bail out on overflow. We still can swap operands but that
43f3a59d 9832 would force loading of the constant into register. */
4977bab6
ZW
9833 if (op1 == const0_rtx
9834 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9835 return false;
9836 code = (code == GTU ? GEU : LTU);
9837 }
9838 else
9839 {
9840 rtx tmp = op1;
9841 op1 = op0;
9842 op0 = tmp;
9843 code = (code == GTU ? LTU : GEU);
9844 }
9845 break;
9846
ccea753c 9847 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9848 case LT:
9849 case GE:
9850 if (mode == DImode || op1 != const0_rtx)
9851 return false;
ccea753c 9852 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9853 code = (code == LT ? GEU : LTU);
9854 break;
9855 case LE:
9856 case GT:
9857 if (mode == DImode || op1 != constm1_rtx)
9858 return false;
ccea753c 9859 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9860 code = (code == LE ? GEU : LTU);
9861 break;
9862
9863 default:
9864 return false;
9865 }
ebe75517
JH
9866 /* Swapping operands may cause constant to appear as first operand. */
9867 if (!nonimmediate_operand (op0, VOIDmode))
9868 {
9869 if (no_new_pseudos)
9870 return false;
9871 op0 = force_reg (mode, op0);
9872 }
4977bab6
ZW
9873 ix86_compare_op0 = op0;
9874 ix86_compare_op1 = op1;
9875 *pop = ix86_expand_compare (code, NULL, NULL);
9876 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9877 abort ();
9878 return true;
9879}
9880
32b5b1aa 9881int
b96a374d 9882ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9883{
e075ae69
RH
9884 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9885 rtx compare_seq, compare_op;
a1b8572c 9886 rtx second_test, bypass_test;
635559ab 9887 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9888 bool sign_bit_compare_p = false;;
3a3677ff 9889
e075ae69 9890 start_sequence ();
a1b8572c 9891 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9892 compare_seq = get_insns ();
e075ae69
RH
9893 end_sequence ();
9894
9895 compare_code = GET_CODE (compare_op);
9896
4977bab6
ZW
9897 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9898 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9899 sign_bit_compare_p = true;
9900
e075ae69
RH
9901 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9902 HImode insns, we'd be swallowed in word prefix ops. */
9903
4977bab6 9904 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9905 && (mode != DImode || TARGET_64BIT)
0f290768 9906 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9907 && GET_CODE (operands[3]) == CONST_INT)
9908 {
9909 rtx out = operands[0];
9910 HOST_WIDE_INT ct = INTVAL (operands[2]);
9911 HOST_WIDE_INT cf = INTVAL (operands[3]);
9912 HOST_WIDE_INT diff;
9913
4977bab6
ZW
9914 diff = ct - cf;
9915 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9916 sbb. */
4977bab6
ZW
9917 if (sign_bit_compare_p
9918 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9919 ix86_compare_op1, &compare_op))
e075ae69 9920 {
e075ae69
RH
9921 /* Detect overlap between destination and compare sources. */
9922 rtx tmp = out;
9923
4977bab6 9924 if (!sign_bit_compare_p)
36583fea 9925 {
e6e81735
JH
9926 bool fpcmp = false;
9927
4977bab6
ZW
9928 compare_code = GET_CODE (compare_op);
9929
e6e81735
JH
9930 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9931 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9932 {
9933 fpcmp = true;
9934 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9935 }
9936
4977bab6
ZW
9937 /* To simplify rest of code, restrict to the GEU case. */
9938 if (compare_code == LTU)
9939 {
9940 HOST_WIDE_INT tmp = ct;
9941 ct = cf;
9942 cf = tmp;
9943 compare_code = reverse_condition (compare_code);
9944 code = reverse_condition (code);
9945 }
e6e81735
JH
9946 else
9947 {
9948 if (fpcmp)
9949 PUT_CODE (compare_op,
9950 reverse_condition_maybe_unordered
9951 (GET_CODE (compare_op)));
9952 else
9953 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9954 }
4977bab6 9955 diff = ct - cf;
36583fea 9956
4977bab6
ZW
9957 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9958 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9959 tmp = gen_reg_rtx (mode);
e075ae69 9960
4977bab6 9961 if (mode == DImode)
e6e81735 9962 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9963 else
e6e81735 9964 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9965 }
14f73b5a 9966 else
4977bab6
ZW
9967 {
9968 if (code == GT || code == GE)
9969 code = reverse_condition (code);
9970 else
9971 {
9972 HOST_WIDE_INT tmp = ct;
9973 ct = cf;
9974 cf = tmp;
5fb48685 9975 diff = ct - cf;
4977bab6
ZW
9976 }
9977 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9978 ix86_compare_op1, VOIDmode, 0, -1);
9979 }
e075ae69 9980
36583fea
JH
9981 if (diff == 1)
9982 {
9983 /*
9984 * cmpl op0,op1
9985 * sbbl dest,dest
9986 * [addl dest, ct]
9987 *
9988 * Size 5 - 8.
9989 */
9990 if (ct)
b96a374d 9991 tmp = expand_simple_binop (mode, PLUS,
635559ab 9992 tmp, GEN_INT (ct),
4977bab6 9993 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9994 }
9995 else if (cf == -1)
9996 {
9997 /*
9998 * cmpl op0,op1
9999 * sbbl dest,dest
10000 * orl $ct, dest
10001 *
10002 * Size 8.
10003 */
635559ab
JH
10004 tmp = expand_simple_binop (mode, IOR,
10005 tmp, GEN_INT (ct),
4977bab6 10006 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
10007 }
10008 else if (diff == -1 && ct)
10009 {
10010 /*
10011 * cmpl op0,op1
10012 * sbbl dest,dest
06ec023f 10013 * notl dest
36583fea
JH
10014 * [addl dest, cf]
10015 *
10016 * Size 8 - 11.
10017 */
4977bab6 10018 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 10019 if (cf)
b96a374d 10020 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
10021 copy_rtx (tmp), GEN_INT (cf),
10022 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
10023 }
10024 else
10025 {
10026 /*
10027 * cmpl op0,op1
10028 * sbbl dest,dest
06ec023f 10029 * [notl dest]
36583fea
JH
10030 * andl cf - ct, dest
10031 * [addl dest, ct]
10032 *
10033 * Size 8 - 11.
10034 */
06ec023f
RB
10035
10036 if (cf == 0)
10037 {
10038 cf = ct;
10039 ct = 0;
4977bab6 10040 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
10041 }
10042
635559ab 10043 tmp = expand_simple_binop (mode, AND,
4977bab6 10044 copy_rtx (tmp),
d8bf17f9 10045 gen_int_mode (cf - ct, mode),
4977bab6 10046 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 10047 if (ct)
b96a374d 10048 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
10049 copy_rtx (tmp), GEN_INT (ct),
10050 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 10051 }
e075ae69 10052
4977bab6
ZW
10053 if (!rtx_equal_p (tmp, out))
10054 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
10055
10056 return 1; /* DONE */
10057 }
10058
e075ae69
RH
10059 if (diff < 0)
10060 {
10061 HOST_WIDE_INT tmp;
10062 tmp = ct, ct = cf, cf = tmp;
10063 diff = -diff;
734dba19
JH
10064 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10065 {
10066 /* We may be reversing unordered compare to normal compare, that
10067 is not valid in general (we may convert non-trapping condition
10068 to trapping one), however on i386 we currently emit all
10069 comparisons unordered. */
10070 compare_code = reverse_condition_maybe_unordered (compare_code);
10071 code = reverse_condition_maybe_unordered (code);
10072 }
10073 else
10074 {
10075 compare_code = reverse_condition (compare_code);
10076 code = reverse_condition (code);
10077 }
e075ae69 10078 }
0f2a3457
JJ
10079
10080 compare_code = NIL;
10081 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10082 && GET_CODE (ix86_compare_op1) == CONST_INT)
10083 {
10084 if (ix86_compare_op1 == const0_rtx
10085 && (code == LT || code == GE))
10086 compare_code = code;
10087 else if (ix86_compare_op1 == constm1_rtx)
10088 {
10089 if (code == LE)
10090 compare_code = LT;
10091 else if (code == GT)
10092 compare_code = GE;
10093 }
10094 }
10095
10096 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10097 if (compare_code != NIL
10098 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10099 && (cf == -1 || ct == -1))
10100 {
10101 /* If lea code below could be used, only optimize
10102 if it results in a 2 insn sequence. */
10103
10104 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10105 || diff == 3 || diff == 5 || diff == 9)
10106 || (compare_code == LT && ct == -1)
10107 || (compare_code == GE && cf == -1))
10108 {
10109 /*
10110 * notl op1 (if necessary)
10111 * sarl $31, op1
10112 * orl cf, op1
10113 */
10114 if (ct != -1)
10115 {
10116 cf = ct;
b96a374d 10117 ct = -1;
0f2a3457
JJ
10118 code = reverse_condition (code);
10119 }
10120
10121 out = emit_store_flag (out, code, ix86_compare_op0,
10122 ix86_compare_op1, VOIDmode, 0, -1);
10123
10124 out = expand_simple_binop (mode, IOR,
10125 out, GEN_INT (cf),
10126 out, 1, OPTAB_DIRECT);
10127 if (out != operands[0])
10128 emit_move_insn (operands[0], out);
10129
10130 return 1; /* DONE */
10131 }
10132 }
10133
4977bab6 10134
635559ab
JH
10135 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10136 || diff == 3 || diff == 5 || diff == 9)
4977bab6 10137 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
c05dbe81 10138 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
10139 {
10140 /*
10141 * xorl dest,dest
10142 * cmpl op1,op2
10143 * setcc dest
10144 * lea cf(dest*(ct-cf)),dest
10145 *
10146 * Size 14.
10147 *
10148 * This also catches the degenerate setcc-only case.
10149 */
10150
10151 rtx tmp;
10152 int nops;
10153
10154 out = emit_store_flag (out, code, ix86_compare_op0,
10155 ix86_compare_op1, VOIDmode, 0, 1);
10156
10157 nops = 0;
97f51ac4
RB
10158 /* On x86_64 the lea instruction operates on Pmode, so we need
10159 to get arithmetics done in proper mode to match. */
e075ae69 10160 if (diff == 1)
068f5dea 10161 tmp = copy_rtx (out);
e075ae69
RH
10162 else
10163 {
885a70fd 10164 rtx out1;
068f5dea 10165 out1 = copy_rtx (out);
635559ab 10166 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
10167 nops++;
10168 if (diff & 1)
10169 {
635559ab 10170 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
10171 nops++;
10172 }
10173 }
10174 if (cf != 0)
10175 {
635559ab 10176 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
10177 nops++;
10178 }
4977bab6 10179 if (!rtx_equal_p (tmp, out))
e075ae69 10180 {
14f73b5a 10181 if (nops == 1)
a5cf80f0 10182 out = force_operand (tmp, copy_rtx (out));
e075ae69 10183 else
4977bab6 10184 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 10185 }
4977bab6 10186 if (!rtx_equal_p (out, operands[0]))
1985ef90 10187 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10188
10189 return 1; /* DONE */
10190 }
10191
10192 /*
10193 * General case: Jumpful:
10194 * xorl dest,dest cmpl op1, op2
10195 * cmpl op1, op2 movl ct, dest
10196 * setcc dest jcc 1f
10197 * decl dest movl cf, dest
10198 * andl (cf-ct),dest 1:
10199 * addl ct,dest
0f290768 10200 *
e075ae69
RH
10201 * Size 20. Size 14.
10202 *
10203 * This is reasonably steep, but branch mispredict costs are
10204 * high on modern cpus, so consider failing only if optimizing
10205 * for space.
e075ae69
RH
10206 */
10207
4977bab6
ZW
10208 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10209 && BRANCH_COST >= 2)
e075ae69 10210 {
97f51ac4 10211 if (cf == 0)
e075ae69 10212 {
97f51ac4
RB
10213 cf = ct;
10214 ct = 0;
734dba19 10215 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
10216 /* We may be reversing unordered compare to normal compare,
10217 that is not valid in general (we may convert non-trapping
10218 condition to trapping one), however on i386 we currently
10219 emit all comparisons unordered. */
10220 code = reverse_condition_maybe_unordered (code);
10221 else
10222 {
10223 code = reverse_condition (code);
10224 if (compare_code != NIL)
10225 compare_code = reverse_condition (compare_code);
10226 }
10227 }
10228
10229 if (compare_code != NIL)
10230 {
10231 /* notl op1 (if needed)
10232 sarl $31, op1
10233 andl (cf-ct), op1
b96a374d 10234 addl ct, op1
0f2a3457
JJ
10235
10236 For x < 0 (resp. x <= -1) there will be no notl,
10237 so if possible swap the constants to get rid of the
10238 complement.
10239 True/false will be -1/0 while code below (store flag
10240 followed by decrement) is 0/-1, so the constants need
10241 to be exchanged once more. */
10242
10243 if (compare_code == GE || !cf)
734dba19 10244 {
b96a374d 10245 code = reverse_condition (code);
0f2a3457 10246 compare_code = LT;
734dba19
JH
10247 }
10248 else
10249 {
0f2a3457 10250 HOST_WIDE_INT tmp = cf;
b96a374d 10251 cf = ct;
0f2a3457 10252 ct = tmp;
734dba19 10253 }
0f2a3457
JJ
10254
10255 out = emit_store_flag (out, code, ix86_compare_op0,
10256 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 10257 }
0f2a3457
JJ
10258 else
10259 {
10260 out = emit_store_flag (out, code, ix86_compare_op0,
10261 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 10262
4977bab6
ZW
10263 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10264 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 10265 }
e075ae69 10266
4977bab6 10267 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 10268 gen_int_mode (cf - ct, mode),
4977bab6 10269 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 10270 if (ct)
4977bab6
ZW
10271 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10272 copy_rtx (out), 1, OPTAB_DIRECT);
10273 if (!rtx_equal_p (out, operands[0]))
10274 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
10275
10276 return 1; /* DONE */
10277 }
10278 }
10279
4977bab6 10280 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
10281 {
10282 /* Try a few things more with specific constants and a variable. */
10283
78a0d70c 10284 optab op;
e075ae69
RH
10285 rtx var, orig_out, out, tmp;
10286
4977bab6 10287 if (BRANCH_COST <= 2)
e075ae69
RH
10288 return 0; /* FAIL */
10289
0f290768 10290 /* If one of the two operands is an interesting constant, load a
e075ae69 10291 constant with the above and mask it in with a logical operation. */
0f290768 10292
e075ae69
RH
10293 if (GET_CODE (operands[2]) == CONST_INT)
10294 {
10295 var = operands[3];
4977bab6 10296 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 10297 operands[3] = constm1_rtx, op = and_optab;
4977bab6 10298 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 10299 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10300 else
10301 return 0; /* FAIL */
e075ae69
RH
10302 }
10303 else if (GET_CODE (operands[3]) == CONST_INT)
10304 {
10305 var = operands[2];
4977bab6 10306 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 10307 operands[2] = constm1_rtx, op = and_optab;
4977bab6 10308 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 10309 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
10310 else
10311 return 0; /* FAIL */
e075ae69 10312 }
78a0d70c 10313 else
e075ae69
RH
10314 return 0; /* FAIL */
10315
10316 orig_out = operands[0];
635559ab 10317 tmp = gen_reg_rtx (mode);
e075ae69
RH
10318 operands[0] = tmp;
10319
10320 /* Recurse to get the constant loaded. */
10321 if (ix86_expand_int_movcc (operands) == 0)
10322 return 0; /* FAIL */
10323
10324 /* Mask in the interesting variable. */
635559ab 10325 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 10326 OPTAB_WIDEN);
4977bab6
ZW
10327 if (!rtx_equal_p (out, orig_out))
10328 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
10329
10330 return 1; /* DONE */
10331 }
10332
10333 /*
10334 * For comparison with above,
10335 *
10336 * movl cf,dest
10337 * movl ct,tmp
10338 * cmpl op1,op2
10339 * cmovcc tmp,dest
10340 *
10341 * Size 15.
10342 */
10343
635559ab
JH
10344 if (! nonimmediate_operand (operands[2], mode))
10345 operands[2] = force_reg (mode, operands[2]);
10346 if (! nonimmediate_operand (operands[3], mode))
10347 operands[3] = force_reg (mode, operands[3]);
e075ae69 10348
a1b8572c
JH
10349 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10350 {
635559ab 10351 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10352 emit_move_insn (tmp, operands[3]);
10353 operands[3] = tmp;
10354 }
10355 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10356 {
635559ab 10357 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
10358 emit_move_insn (tmp, operands[2]);
10359 operands[2] = tmp;
10360 }
4977bab6 10361
c9682caf 10362 if (! register_operand (operands[2], VOIDmode)
b96a374d 10363 && (mode == QImode
4977bab6 10364 || ! register_operand (operands[3], VOIDmode)))
635559ab 10365 operands[2] = force_reg (mode, operands[2]);
a1b8572c 10366
4977bab6
ZW
10367 if (mode == QImode
10368 && ! register_operand (operands[3], VOIDmode))
10369 operands[3] = force_reg (mode, operands[3]);
10370
e075ae69
RH
10371 emit_insn (compare_seq);
10372 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 10373 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
10374 compare_op, operands[2],
10375 operands[3])));
a1b8572c 10376 if (bypass_test)
4977bab6 10377 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10378 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10379 bypass_test,
4977bab6
ZW
10380 copy_rtx (operands[3]),
10381 copy_rtx (operands[0]))));
a1b8572c 10382 if (second_test)
4977bab6 10383 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 10384 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 10385 second_test,
4977bab6
ZW
10386 copy_rtx (operands[2]),
10387 copy_rtx (operands[0]))));
e075ae69
RH
10388
10389 return 1; /* DONE */
e9a25f70 10390}
e075ae69 10391
32b5b1aa 10392int
b96a374d 10393ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 10394{
e075ae69 10395 enum rtx_code code;
e075ae69 10396 rtx tmp;
a1b8572c 10397 rtx compare_op, second_test, bypass_test;
32b5b1aa 10398
0073023d
JH
10399 /* For SF/DFmode conditional moves based on comparisons
10400 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
10401 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10402 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 10403 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
10404 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10405 && (!TARGET_IEEE_FP
10406 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
10407 /* We may be called from the post-reload splitter. */
10408 && (!REG_P (operands[0])
10409 || SSE_REG_P (operands[0])
52a661a6 10410 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
10411 {
10412 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10413 code = GET_CODE (operands[1]);
10414
10415 /* See if we have (cross) match between comparison operands and
10416 conditional move operands. */
10417 if (rtx_equal_p (operands[2], op1))
10418 {
10419 rtx tmp = op0;
10420 op0 = op1;
10421 op1 = tmp;
10422 code = reverse_condition_maybe_unordered (code);
10423 }
10424 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10425 {
10426 /* Check for min operation. */
4977bab6 10427 if (code == LT || code == UNLE)
0073023d 10428 {
4977bab6
ZW
10429 if (code == UNLE)
10430 {
10431 rtx tmp = op0;
10432 op0 = op1;
10433 op1 = tmp;
10434 }
0073023d
JH
10435 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10436 if (memory_operand (op0, VOIDmode))
10437 op0 = force_reg (GET_MODE (operands[0]), op0);
10438 if (GET_MODE (operands[0]) == SFmode)
10439 emit_insn (gen_minsf3 (operands[0], op0, op1));
10440 else
10441 emit_insn (gen_mindf3 (operands[0], op0, op1));
10442 return 1;
10443 }
10444 /* Check for max operation. */
4977bab6 10445 if (code == GT || code == UNGE)
0073023d 10446 {
4977bab6
ZW
10447 if (code == UNGE)
10448 {
10449 rtx tmp = op0;
10450 op0 = op1;
10451 op1 = tmp;
10452 }
0073023d
JH
10453 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10454 if (memory_operand (op0, VOIDmode))
10455 op0 = force_reg (GET_MODE (operands[0]), op0);
10456 if (GET_MODE (operands[0]) == SFmode)
10457 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10458 else
10459 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10460 return 1;
10461 }
10462 }
10463 /* Manage condition to be sse_comparison_operator. In case we are
10464 in non-ieee mode, try to canonicalize the destination operand
10465 to be first in the comparison - this helps reload to avoid extra
10466 moves. */
10467 if (!sse_comparison_operator (operands[1], VOIDmode)
10468 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10469 {
10470 rtx tmp = ix86_compare_op0;
10471 ix86_compare_op0 = ix86_compare_op1;
10472 ix86_compare_op1 = tmp;
10473 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10474 VOIDmode, ix86_compare_op0,
10475 ix86_compare_op1);
10476 }
d1f87653 10477 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
10478 move. We also don't support the NE comparison on SSE, so try to
10479 avoid it. */
037f20f1
JH
10480 if ((rtx_equal_p (operands[0], operands[3])
10481 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10482 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
10483 {
10484 rtx tmp = operands[2];
10485 operands[2] = operands[3];
92d0fb09 10486 operands[3] = tmp;
0073023d
JH
10487 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10488 (GET_CODE (operands[1])),
10489 VOIDmode, ix86_compare_op0,
10490 ix86_compare_op1);
10491 }
10492 if (GET_MODE (operands[0]) == SFmode)
10493 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10494 operands[2], operands[3],
10495 ix86_compare_op0, ix86_compare_op1));
10496 else
10497 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10498 operands[2], operands[3],
10499 ix86_compare_op0, ix86_compare_op1));
10500 return 1;
10501 }
10502
e075ae69 10503 /* The floating point conditional move instructions don't directly
0f290768 10504 support conditions resulting from a signed integer comparison. */
32b5b1aa 10505
e075ae69 10506 code = GET_CODE (operands[1]);
a1b8572c 10507 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
10508
10509 /* The floating point conditional move instructions don't directly
10510 support signed integer comparisons. */
10511
a1b8572c 10512 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 10513 {
a1b8572c 10514 if (second_test != NULL || bypass_test != NULL)
b531087a 10515 abort ();
e075ae69 10516 tmp = gen_reg_rtx (QImode);
3a3677ff 10517 ix86_expand_setcc (code, tmp);
e075ae69
RH
10518 code = NE;
10519 ix86_compare_op0 = tmp;
10520 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
10521 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10522 }
10523 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10524 {
10525 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10526 emit_move_insn (tmp, operands[3]);
10527 operands[3] = tmp;
10528 }
10529 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10530 {
10531 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10532 emit_move_insn (tmp, operands[2]);
10533 operands[2] = tmp;
e075ae69 10534 }
e9a25f70 10535
e075ae69
RH
10536 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10537 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 10538 compare_op,
e075ae69
RH
10539 operands[2],
10540 operands[3])));
a1b8572c
JH
10541 if (bypass_test)
10542 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10543 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10544 bypass_test,
10545 operands[3],
10546 operands[0])));
10547 if (second_test)
10548 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10549 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10550 second_test,
10551 operands[2],
10552 operands[0])));
32b5b1aa 10553
e075ae69 10554 return 1;
32b5b1aa
SC
10555}
10556
7b52eede
JH
10557/* Expand conditional increment or decrement using adb/sbb instructions.
10558 The default case using setcc followed by the conditional move can be
10559 done by generic code. */
10560int
b96a374d 10561ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
10562{
10563 enum rtx_code code = GET_CODE (operands[1]);
10564 rtx compare_op;
10565 rtx val = const0_rtx;
e6e81735 10566 bool fpcmp = false;
e6e81735 10567 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
10568
10569 if (operands[3] != const1_rtx
10570 && operands[3] != constm1_rtx)
10571 return 0;
10572 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10573 ix86_compare_op1, &compare_op))
10574 return 0;
e6e81735
JH
10575 code = GET_CODE (compare_op);
10576
10577 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10578 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10579 {
10580 fpcmp = true;
10581 code = ix86_fp_compare_code_to_integer (code);
10582 }
10583
10584 if (code != LTU)
10585 {
10586 val = constm1_rtx;
10587 if (fpcmp)
10588 PUT_CODE (compare_op,
10589 reverse_condition_maybe_unordered
10590 (GET_CODE (compare_op)));
10591 else
10592 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10593 }
10594 PUT_MODE (compare_op, mode);
10595
10596 /* Construct either adc or sbb insn. */
10597 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
10598 {
10599 switch (GET_MODE (operands[0]))
10600 {
10601 case QImode:
e6e81735 10602 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10603 break;
10604 case HImode:
e6e81735 10605 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10606 break;
10607 case SImode:
e6e81735 10608 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10609 break;
10610 case DImode:
e6e81735 10611 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10612 break;
10613 default:
10614 abort ();
10615 }
10616 }
10617 else
10618 {
10619 switch (GET_MODE (operands[0]))
10620 {
10621 case QImode:
e6e81735 10622 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10623 break;
10624 case HImode:
e6e81735 10625 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10626 break;
10627 case SImode:
e6e81735 10628 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10629 break;
10630 case DImode:
e6e81735 10631 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10632 break;
10633 default:
10634 abort ();
10635 }
10636 }
10637 return 1; /* DONE */
10638}
10639
10640
2450a057
JH
10641/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10642 works for floating pointer parameters and nonoffsetable memories.
10643 For pushes, it returns just stack offsets; the values will be saved
10644 in the right order. Maximally three parts are generated. */
10645
2b589241 10646static int
b96a374d 10647ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10648{
26e5b205
JH
10649 int size;
10650
10651 if (!TARGET_64BIT)
f8a1ebc6 10652 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
10653 else
10654 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10655
a7180f70
BS
10656 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10657 abort ();
2450a057
JH
10658 if (size < 2 || size > 3)
10659 abort ();
10660
f996902d
RH
10661 /* Optimize constant pool reference to immediates. This is used by fp
10662 moves, that force all constants to memory to allow combining. */
10663 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10664 {
10665 rtx tmp = maybe_get_pool_constant (operand);
10666 if (tmp)
10667 operand = tmp;
10668 }
d7a29404 10669
2450a057 10670 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10671 {
2450a057
JH
10672 /* The only non-offsetable memories we handle are pushes. */
10673 if (! push_operand (operand, VOIDmode))
10674 abort ();
10675
26e5b205
JH
10676 operand = copy_rtx (operand);
10677 PUT_MODE (operand, Pmode);
2450a057
JH
10678 parts[0] = parts[1] = parts[2] = operand;
10679 }
26e5b205 10680 else if (!TARGET_64BIT)
2450a057
JH
10681 {
10682 if (mode == DImode)
10683 split_di (&operand, 1, &parts[0], &parts[1]);
10684 else
e075ae69 10685 {
2450a057
JH
10686 if (REG_P (operand))
10687 {
10688 if (!reload_completed)
10689 abort ();
10690 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10691 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10692 if (size == 3)
10693 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10694 }
10695 else if (offsettable_memref_p (operand))
10696 {
f4ef873c 10697 operand = adjust_address (operand, SImode, 0);
2450a057 10698 parts[0] = operand;
b72f00af 10699 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10700 if (size == 3)
b72f00af 10701 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10702 }
10703 else if (GET_CODE (operand) == CONST_DOUBLE)
10704 {
10705 REAL_VALUE_TYPE r;
2b589241 10706 long l[4];
2450a057
JH
10707
10708 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10709 switch (mode)
10710 {
10711 case XFmode:
10712 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10713 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10714 break;
10715 case DFmode:
10716 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10717 break;
10718 default:
10719 abort ();
10720 }
d8bf17f9
LB
10721 parts[1] = gen_int_mode (l[1], SImode);
10722 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10723 }
10724 else
10725 abort ();
e075ae69 10726 }
2450a057 10727 }
26e5b205
JH
10728 else
10729 {
44cf5b6a
JH
10730 if (mode == TImode)
10731 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10732 if (mode == XFmode || mode == TFmode)
10733 {
f8a1ebc6 10734 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
10735 if (REG_P (operand))
10736 {
10737 if (!reload_completed)
10738 abort ();
10739 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 10740 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
10741 }
10742 else if (offsettable_memref_p (operand))
10743 {
b72f00af 10744 operand = adjust_address (operand, DImode, 0);
26e5b205 10745 parts[0] = operand;
f8a1ebc6 10746 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
10747 }
10748 else if (GET_CODE (operand) == CONST_DOUBLE)
10749 {
10750 REAL_VALUE_TYPE r;
10751 long l[3];
10752
10753 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 10754 real_to_target (l, &r, mode);
26e5b205
JH
10755 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10756 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10757 parts[0]
d8bf17f9 10758 = gen_int_mode
44cf5b6a 10759 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10760 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10761 DImode);
26e5b205
JH
10762 else
10763 parts[0] = immed_double_const (l[0], l[1], DImode);
f8a1ebc6
JH
10764 if (upper_mode == SImode)
10765 parts[1] = gen_int_mode (l[2], SImode);
10766 else if (HOST_BITS_PER_WIDE_INT >= 64)
10767 parts[1]
10768 = gen_int_mode
10769 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10770 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10771 DImode);
10772 else
10773 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
10774 }
10775 else
10776 abort ();
10777 }
10778 }
2450a057 10779
2b589241 10780 return size;
2450a057
JH
10781}
10782
10783/* Emit insns to perform a move or push of DI, DF, and XF values.
10784 Return false when normal moves are needed; true when all required
10785 insns have been emitted. Operands 2-4 contain the input values
10786 int the correct order; operands 5-7 contain the output values. */
10787
26e5b205 10788void
b96a374d 10789ix86_split_long_move (rtx operands[])
2450a057
JH
10790{
10791 rtx part[2][3];
26e5b205 10792 int nparts;
2450a057
JH
10793 int push = 0;
10794 int collisions = 0;
26e5b205
JH
10795 enum machine_mode mode = GET_MODE (operands[0]);
10796
10797 /* The DFmode expanders may ask us to move double.
10798 For 64bit target this is single move. By hiding the fact
10799 here we simplify i386.md splitters. */
10800 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10801 {
8cdfa312
RH
10802 /* Optimize constant pool reference to immediates. This is used by
10803 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10804
10805 if (GET_CODE (operands[1]) == MEM
10806 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10807 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10808 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10809 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10810 {
10811 operands[0] = copy_rtx (operands[0]);
10812 PUT_MODE (operands[0], Pmode);
10813 }
26e5b205
JH
10814 else
10815 operands[0] = gen_lowpart (DImode, operands[0]);
10816 operands[1] = gen_lowpart (DImode, operands[1]);
10817 emit_move_insn (operands[0], operands[1]);
10818 return;
10819 }
2450a057 10820
2450a057
JH
10821 /* The only non-offsettable memory we handle is push. */
10822 if (push_operand (operands[0], VOIDmode))
10823 push = 1;
10824 else if (GET_CODE (operands[0]) == MEM
10825 && ! offsettable_memref_p (operands[0]))
10826 abort ();
10827
26e5b205
JH
10828 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10829 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10830
10831 /* When emitting push, take care for source operands on the stack. */
10832 if (push && GET_CODE (operands[1]) == MEM
10833 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10834 {
26e5b205 10835 if (nparts == 3)
886cbb88
JH
10836 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10837 XEXP (part[1][2], 0));
10838 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10839 XEXP (part[1][1], 0));
2450a057
JH
10840 }
10841
0f290768 10842 /* We need to do copy in the right order in case an address register
2450a057
JH
10843 of the source overlaps the destination. */
10844 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10845 {
10846 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10847 collisions++;
10848 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10849 collisions++;
26e5b205 10850 if (nparts == 3
2450a057
JH
10851 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10852 collisions++;
10853
10854 /* Collision in the middle part can be handled by reordering. */
26e5b205 10855 if (collisions == 1 && nparts == 3
2450a057 10856 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10857 {
2450a057
JH
10858 rtx tmp;
10859 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10860 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10861 }
e075ae69 10862
2450a057
JH
10863 /* If there are more collisions, we can't handle it by reordering.
10864 Do an lea to the last part and use only one colliding move. */
10865 else if (collisions > 1)
10866 {
8231b3f9
RH
10867 rtx base;
10868
2450a057 10869 collisions = 1;
8231b3f9
RH
10870
10871 base = part[0][nparts - 1];
10872
10873 /* Handle the case when the last part isn't valid for lea.
10874 Happens in 64-bit mode storing the 12-byte XFmode. */
10875 if (GET_MODE (base) != Pmode)
10876 base = gen_rtx_REG (Pmode, REGNO (base));
10877
10878 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10879 part[1][0] = replace_equiv_address (part[1][0], base);
10880 part[1][1] = replace_equiv_address (part[1][1],
10881 plus_constant (base, UNITS_PER_WORD));
26e5b205 10882 if (nparts == 3)
8231b3f9
RH
10883 part[1][2] = replace_equiv_address (part[1][2],
10884 plus_constant (base, 8));
2450a057
JH
10885 }
10886 }
10887
10888 if (push)
10889 {
26e5b205 10890 if (!TARGET_64BIT)
2b589241 10891 {
26e5b205
JH
10892 if (nparts == 3)
10893 {
f8a1ebc6
JH
10894 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10895 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
10896 emit_move_insn (part[0][2], part[1][2]);
10897 }
2b589241 10898 }
26e5b205
JH
10899 else
10900 {
10901 /* In 64bit mode we don't have 32bit push available. In case this is
10902 register, it is OK - we will just use larger counterpart. We also
10903 retype memory - these comes from attempt to avoid REX prefix on
10904 moving of second half of TFmode value. */
10905 if (GET_MODE (part[1][1]) == SImode)
10906 {
10907 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10908 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10909 else if (REG_P (part[1][1]))
10910 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10911 else
b531087a 10912 abort ();
886cbb88
JH
10913 if (GET_MODE (part[1][0]) == SImode)
10914 part[1][0] = part[1][1];
26e5b205
JH
10915 }
10916 }
10917 emit_move_insn (part[0][1], part[1][1]);
10918 emit_move_insn (part[0][0], part[1][0]);
10919 return;
2450a057
JH
10920 }
10921
10922 /* Choose correct order to not overwrite the source before it is copied. */
10923 if ((REG_P (part[0][0])
10924 && REG_P (part[1][1])
10925 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10926 || (nparts == 3
2450a057
JH
10927 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10928 || (collisions > 0
10929 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10930 {
26e5b205 10931 if (nparts == 3)
2450a057 10932 {
26e5b205
JH
10933 operands[2] = part[0][2];
10934 operands[3] = part[0][1];
10935 operands[4] = part[0][0];
10936 operands[5] = part[1][2];
10937 operands[6] = part[1][1];
10938 operands[7] = part[1][0];
2450a057
JH
10939 }
10940 else
10941 {
26e5b205
JH
10942 operands[2] = part[0][1];
10943 operands[3] = part[0][0];
10944 operands[5] = part[1][1];
10945 operands[6] = part[1][0];
2450a057
JH
10946 }
10947 }
10948 else
10949 {
26e5b205 10950 if (nparts == 3)
2450a057 10951 {
26e5b205
JH
10952 operands[2] = part[0][0];
10953 operands[3] = part[0][1];
10954 operands[4] = part[0][2];
10955 operands[5] = part[1][0];
10956 operands[6] = part[1][1];
10957 operands[7] = part[1][2];
2450a057
JH
10958 }
10959 else
10960 {
26e5b205
JH
10961 operands[2] = part[0][0];
10962 operands[3] = part[0][1];
10963 operands[5] = part[1][0];
10964 operands[6] = part[1][1];
e075ae69
RH
10965 }
10966 }
26e5b205
JH
10967 emit_move_insn (operands[2], operands[5]);
10968 emit_move_insn (operands[3], operands[6]);
10969 if (nparts == 3)
10970 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10971
26e5b205 10972 return;
32b5b1aa 10973}
32b5b1aa 10974
e075ae69 10975void
b96a374d 10976ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10977{
e075ae69
RH
10978 rtx low[2], high[2];
10979 int count;
b985a30f 10980
e075ae69
RH
10981 if (GET_CODE (operands[2]) == CONST_INT)
10982 {
10983 split_di (operands, 2, low, high);
10984 count = INTVAL (operands[2]) & 63;
32b5b1aa 10985
e075ae69
RH
10986 if (count >= 32)
10987 {
10988 emit_move_insn (high[0], low[1]);
10989 emit_move_insn (low[0], const0_rtx);
b985a30f 10990
e075ae69
RH
10991 if (count > 32)
10992 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10993 }
10994 else
10995 {
10996 if (!rtx_equal_p (operands[0], operands[1]))
10997 emit_move_insn (operands[0], operands[1]);
10998 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10999 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
11000 }
11001 }
11002 else
11003 {
11004 if (!rtx_equal_p (operands[0], operands[1]))
11005 emit_move_insn (operands[0], operands[1]);
b985a30f 11006
e075ae69 11007 split_di (operands, 1, low, high);
b985a30f 11008
e075ae69
RH
11009 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
11010 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 11011
fe577e58 11012 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 11013 {
fe577e58 11014 if (! no_new_pseudos)
e075ae69
RH
11015 scratch = force_reg (SImode, const0_rtx);
11016 else
11017 emit_move_insn (scratch, const0_rtx);
11018
11019 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
11020 scratch));
11021 }
11022 else
11023 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11024 }
e9a25f70 11025}
32b5b1aa 11026
e075ae69 11027void
b96a374d 11028ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 11029{
e075ae69
RH
11030 rtx low[2], high[2];
11031 int count;
32b5b1aa 11032
e075ae69
RH
11033 if (GET_CODE (operands[2]) == CONST_INT)
11034 {
11035 split_di (operands, 2, low, high);
11036 count = INTVAL (operands[2]) & 63;
32b5b1aa 11037
8937b6a2
RS
11038 if (count == 63)
11039 {
11040 emit_move_insn (high[0], high[1]);
11041 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11042 emit_move_insn (low[0], high[0]);
11043
11044 }
11045 else if (count >= 32)
e075ae69
RH
11046 {
11047 emit_move_insn (low[0], high[1]);
32b5b1aa 11048
e075ae69
RH
11049 if (! reload_completed)
11050 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
11051 else
11052 {
11053 emit_move_insn (high[0], low[0]);
11054 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11055 }
11056
11057 if (count > 32)
11058 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11059 }
11060 else
11061 {
11062 if (!rtx_equal_p (operands[0], operands[1]))
11063 emit_move_insn (operands[0], operands[1]);
11064 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11065 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11066 }
11067 }
11068 else
32b5b1aa 11069 {
e075ae69
RH
11070 if (!rtx_equal_p (operands[0], operands[1]))
11071 emit_move_insn (operands[0], operands[1]);
11072
11073 split_di (operands, 1, low, high);
11074
11075 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11076 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11077
fe577e58 11078 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 11079 {
fe577e58 11080 if (! no_new_pseudos)
e075ae69
RH
11081 scratch = gen_reg_rtx (SImode);
11082 emit_move_insn (scratch, high[0]);
11083 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11084 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11085 scratch));
11086 }
11087 else
11088 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 11089 }
e075ae69 11090}
32b5b1aa 11091
e075ae69 11092void
b96a374d 11093ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
11094{
11095 rtx low[2], high[2];
11096 int count;
32b5b1aa 11097
e075ae69 11098 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 11099 {
e075ae69
RH
11100 split_di (operands, 2, low, high);
11101 count = INTVAL (operands[2]) & 63;
11102
11103 if (count >= 32)
c7271385 11104 {
e075ae69
RH
11105 emit_move_insn (low[0], high[1]);
11106 emit_move_insn (high[0], const0_rtx);
32b5b1aa 11107
e075ae69
RH
11108 if (count > 32)
11109 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11110 }
11111 else
11112 {
11113 if (!rtx_equal_p (operands[0], operands[1]))
11114 emit_move_insn (operands[0], operands[1]);
11115 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11116 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11117 }
32b5b1aa 11118 }
e075ae69
RH
11119 else
11120 {
11121 if (!rtx_equal_p (operands[0], operands[1]))
11122 emit_move_insn (operands[0], operands[1]);
32b5b1aa 11123
e075ae69
RH
11124 split_di (operands, 1, low, high);
11125
11126 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11127 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11128
11129 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 11130 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 11131 {
fe577e58 11132 if (! no_new_pseudos)
e075ae69
RH
11133 scratch = force_reg (SImode, const0_rtx);
11134 else
11135 emit_move_insn (scratch, const0_rtx);
11136
11137 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11138 scratch));
11139 }
11140 else
11141 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11142 }
32b5b1aa 11143}
3f803cd9 11144
0407c02b 11145/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
11146 it is aligned to VALUE bytes. If true, jump to the label. */
11147static rtx
b96a374d 11148ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
11149{
11150 rtx label = gen_label_rtx ();
11151 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11152 if (GET_MODE (variable) == DImode)
11153 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11154 else
11155 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11156 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 11157 1, label);
0945b39d
JH
11158 return label;
11159}
11160
11161/* Adjust COUNTER by the VALUE. */
11162static void
b96a374d 11163ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
11164{
11165 if (GET_MODE (countreg) == DImode)
11166 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11167 else
11168 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11169}
11170
11171/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 11172rtx
b96a374d 11173ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
11174{
11175 rtx r;
11176 if (GET_MODE (exp) == VOIDmode)
11177 return force_reg (Pmode, exp);
11178 if (GET_MODE (exp) == Pmode)
11179 return copy_to_mode_reg (Pmode, exp);
11180 r = gen_reg_rtx (Pmode);
11181 emit_insn (gen_zero_extendsidi2 (r, exp));
11182 return r;
11183}
11184
11185/* Expand string move (memcpy) operation. Use i386 string operations when
70128ad9 11186 profitable. expand_clrmem contains similar code. */
0945b39d 11187int
70128ad9 11188ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 11189{
4e44c1ef 11190 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
11191 enum machine_mode counter_mode;
11192 HOST_WIDE_INT align = 0;
11193 unsigned HOST_WIDE_INT count = 0;
0945b39d 11194
0945b39d
JH
11195 if (GET_CODE (align_exp) == CONST_INT)
11196 align = INTVAL (align_exp);
11197
d0a5295a
RH
11198 /* Can't use any of this if the user has appropriated esi or edi. */
11199 if (global_regs[4] || global_regs[5])
11200 return 0;
11201
5519a4f9 11202 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11203 if (!TARGET_ALIGN_STRINGOPS)
11204 align = 64;
11205
11206 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11207 {
11208 count = INTVAL (count_exp);
11209 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11210 return 0;
11211 }
0945b39d
JH
11212
11213 /* Figure out proper mode for counter. For 32bits it is always SImode,
11214 for 64bits use SImode when possible, otherwise DImode.
11215 Set count to number of bytes copied when known at compile time. */
11216 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11217 || x86_64_zero_extended_value (count_exp))
11218 counter_mode = SImode;
11219 else
11220 counter_mode = DImode;
11221
11222 if (counter_mode != SImode && counter_mode != DImode)
11223 abort ();
11224
11225 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
11226 if (destreg != XEXP (dst, 0))
11227 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 11228 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
11229 if (srcreg != XEXP (src, 0))
11230 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
11231
11232 /* When optimizing for size emit simple rep ; movsb instruction for
11233 counts not divisible by 4. */
11234
11235 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11236 {
4e44c1ef 11237 emit_insn (gen_cld ());
0945b39d 11238 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
11239 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11240 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11241 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11242 destexp, srcexp));
0945b39d
JH
11243 }
11244
11245 /* For constant aligned (or small unaligned) copies use rep movsl
11246 followed by code copying the rest. For PentiumPro ensure 8 byte
11247 alignment to allow rep movsl acceleration. */
11248
11249 else if (count != 0
11250 && (align >= 8
11251 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11252 || optimize_size || count < (unsigned int) 64))
0945b39d 11253 {
4e44c1ef 11254 unsigned HOST_WIDE_INT offset = 0;
0945b39d 11255 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11256 rtx srcmem, dstmem;
11257
11258 emit_insn (gen_cld ());
0945b39d
JH
11259 if (count & ~(size - 1))
11260 {
11261 countreg = copy_to_mode_reg (counter_mode,
11262 GEN_INT ((count >> (size == 4 ? 2 : 3))
11263 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11264 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11265
11266 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11267 GEN_INT (size == 4 ? 2 : 3));
11268 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11269 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11270
11271 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11272 countreg, destexp, srcexp));
11273 offset = count & ~(size - 1);
0945b39d
JH
11274 }
11275 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11276 {
11277 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11278 offset);
11279 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11280 offset);
11281 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11282 offset += 4;
11283 }
0945b39d 11284 if (count & 0x02)
4e44c1ef
JJ
11285 {
11286 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11287 offset);
11288 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11289 offset);
11290 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11291 offset += 2;
11292 }
0945b39d 11293 if (count & 0x01)
4e44c1ef
JJ
11294 {
11295 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11296 offset);
11297 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11298 offset);
11299 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11300 }
0945b39d
JH
11301 }
11302 /* The generic code based on the glibc implementation:
11303 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11304 allowing accelerated copying there)
11305 - copy the data using rep movsl
11306 - copy the rest. */
11307 else
11308 {
11309 rtx countreg2;
11310 rtx label = NULL;
4e44c1ef 11311 rtx srcmem, dstmem;
37ad04a5
JH
11312 int desired_alignment = (TARGET_PENTIUMPRO
11313 && (count == 0 || count >= (unsigned int) 260)
11314 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
11315 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11316 dst = change_address (dst, BLKmode, destreg);
11317 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
11318
11319 /* In case we don't know anything about the alignment, default to
11320 library version, since it is usually equally fast and result in
b96a374d 11321 shorter code.
4977bab6
ZW
11322
11323 Also emit call when we know that the count is large and call overhead
11324 will not be important. */
11325 if (!TARGET_INLINE_ALL_STRINGOPS
11326 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 11327 return 0;
0945b39d
JH
11328
11329 if (TARGET_SINGLE_STRINGOP)
11330 emit_insn (gen_cld ());
11331
11332 countreg2 = gen_reg_rtx (Pmode);
11333 countreg = copy_to_mode_reg (counter_mode, count_exp);
11334
11335 /* We don't use loops to align destination and to copy parts smaller
11336 than 4 bytes, because gcc is able to optimize such code better (in
11337 the case the destination or the count really is aligned, gcc is often
11338 able to predict the branches) and also it is friendlier to the
a4f31c00 11339 hardware branch prediction.
0945b39d 11340
d1f87653 11341 Using loops is beneficial for generic case, because we can
0945b39d
JH
11342 handle small counts using the loops. Many CPUs (such as Athlon)
11343 have large REP prefix setup costs.
11344
4aae8a9a 11345 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
11346 add some customizability to this code. */
11347
37ad04a5 11348 if (count == 0 && align < desired_alignment)
0945b39d
JH
11349 {
11350 label = gen_label_rtx ();
aaae0bb9 11351 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11352 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11353 }
11354 if (align <= 1)
11355 {
11356 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11357 srcmem = change_address (src, QImode, srcreg);
11358 dstmem = change_address (dst, QImode, destreg);
11359 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11360 ix86_adjust_counter (countreg, 1);
11361 emit_label (label);
11362 LABEL_NUSES (label) = 1;
11363 }
11364 if (align <= 2)
11365 {
11366 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11367 srcmem = change_address (src, HImode, srcreg);
11368 dstmem = change_address (dst, HImode, destreg);
11369 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11370 ix86_adjust_counter (countreg, 2);
11371 emit_label (label);
11372 LABEL_NUSES (label) = 1;
11373 }
37ad04a5 11374 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11375 {
11376 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11377 srcmem = change_address (src, SImode, srcreg);
11378 dstmem = change_address (dst, SImode, destreg);
11379 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11380 ix86_adjust_counter (countreg, 4);
11381 emit_label (label);
11382 LABEL_NUSES (label) = 1;
11383 }
11384
37ad04a5
JH
11385 if (label && desired_alignment > 4 && !TARGET_64BIT)
11386 {
11387 emit_label (label);
11388 LABEL_NUSES (label) = 1;
11389 label = NULL_RTX;
11390 }
0945b39d
JH
11391 if (!TARGET_SINGLE_STRINGOP)
11392 emit_insn (gen_cld ());
11393 if (TARGET_64BIT)
11394 {
11395 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11396 GEN_INT (3)));
4e44c1ef 11397 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11398 }
11399 else
11400 {
4e44c1ef
JJ
11401 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11402 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11403 }
4e44c1ef
JJ
11404 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11405 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11406 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11407 countreg2, destexp, srcexp));
0945b39d
JH
11408
11409 if (label)
11410 {
11411 emit_label (label);
11412 LABEL_NUSES (label) = 1;
11413 }
11414 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11415 {
11416 srcmem = change_address (src, SImode, srcreg);
11417 dstmem = change_address (dst, SImode, destreg);
11418 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11419 }
0945b39d
JH
11420 if ((align <= 4 || count == 0) && TARGET_64BIT)
11421 {
11422 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11423 srcmem = change_address (src, SImode, srcreg);
11424 dstmem = change_address (dst, SImode, destreg);
11425 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11426 emit_label (label);
11427 LABEL_NUSES (label) = 1;
11428 }
11429 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11430 {
11431 srcmem = change_address (src, HImode, srcreg);
11432 dstmem = change_address (dst, HImode, destreg);
11433 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11434 }
0945b39d
JH
11435 if (align <= 2 || count == 0)
11436 {
11437 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11438 srcmem = change_address (src, HImode, srcreg);
11439 dstmem = change_address (dst, HImode, destreg);
11440 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11441 emit_label (label);
11442 LABEL_NUSES (label) = 1;
11443 }
11444 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11445 {
11446 srcmem = change_address (src, QImode, srcreg);
11447 dstmem = change_address (dst, QImode, destreg);
11448 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11449 }
0945b39d
JH
11450 if (align <= 1 || count == 0)
11451 {
11452 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11453 srcmem = change_address (src, QImode, srcreg);
11454 dstmem = change_address (dst, QImode, destreg);
11455 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
11456 emit_label (label);
11457 LABEL_NUSES (label) = 1;
11458 }
11459 }
11460
0945b39d
JH
11461 return 1;
11462}
11463
11464/* Expand string clear operation (bzero). Use i386 string operations when
70128ad9 11465 profitable. expand_movmem contains similar code. */
0945b39d 11466int
70128ad9 11467ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 11468{
4e44c1ef 11469 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
11470 enum machine_mode counter_mode;
11471 HOST_WIDE_INT align = 0;
11472 unsigned HOST_WIDE_INT count = 0;
11473
11474 if (GET_CODE (align_exp) == CONST_INT)
11475 align = INTVAL (align_exp);
11476
d0a5295a
RH
11477 /* Can't use any of this if the user has appropriated esi. */
11478 if (global_regs[4])
11479 return 0;
11480
5519a4f9 11481 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
11482 if (!TARGET_ALIGN_STRINGOPS)
11483 align = 32;
11484
11485 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
11486 {
11487 count = INTVAL (count_exp);
11488 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11489 return 0;
11490 }
0945b39d
JH
11491 /* Figure out proper mode for counter. For 32bits it is always SImode,
11492 for 64bits use SImode when possible, otherwise DImode.
11493 Set count to number of bytes copied when known at compile time. */
11494 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11495 || x86_64_zero_extended_value (count_exp))
11496 counter_mode = SImode;
11497 else
11498 counter_mode = DImode;
11499
4e44c1ef
JJ
11500 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11501 if (destreg != XEXP (dst, 0))
11502 dst = replace_equiv_address_nv (dst, destreg);
0945b39d
JH
11503
11504 emit_insn (gen_cld ());
11505
11506 /* When optimizing for size emit simple rep ; movsb instruction for
11507 counts not divisible by 4. */
11508
11509 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11510 {
11511 countreg = ix86_zero_extend_to_Pmode (count_exp);
11512 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
11513 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11514 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
11515 }
11516 else if (count != 0
11517 && (align >= 8
11518 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 11519 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
11520 {
11521 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
11522 unsigned HOST_WIDE_INT offset = 0;
11523
0945b39d
JH
11524 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11525 if (count & ~(size - 1))
11526 {
11527 countreg = copy_to_mode_reg (counter_mode,
11528 GEN_INT ((count >> (size == 4 ? 2 : 3))
11529 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11530 countreg = ix86_zero_extend_to_Pmode (countreg);
4e44c1ef
JJ
11531 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11532 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11533 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11534 offset = count & ~(size - 1);
0945b39d
JH
11535 }
11536 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11537 {
11538 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11539 offset);
11540 emit_insn (gen_strset (destreg, mem,
0945b39d 11541 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
11542 offset += 4;
11543 }
0945b39d 11544 if (count & 0x02)
4e44c1ef
JJ
11545 {
11546 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11547 offset);
11548 emit_insn (gen_strset (destreg, mem,
0945b39d 11549 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
11550 offset += 2;
11551 }
0945b39d 11552 if (count & 0x01)
4e44c1ef
JJ
11553 {
11554 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11555 offset);
11556 emit_insn (gen_strset (destreg, mem,
0945b39d 11557 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 11558 }
0945b39d
JH
11559 }
11560 else
11561 {
11562 rtx countreg2;
11563 rtx label = NULL;
37ad04a5
JH
11564 /* Compute desired alignment of the string operation. */
11565 int desired_alignment = (TARGET_PENTIUMPRO
11566 && (count == 0 || count >= (unsigned int) 260)
11567 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11568
11569 /* In case we don't know anything about the alignment, default to
11570 library version, since it is usually equally fast and result in
4977bab6
ZW
11571 shorter code.
11572
11573 Also emit call when we know that the count is large and call overhead
11574 will not be important. */
11575 if (!TARGET_INLINE_ALL_STRINGOPS
11576 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11577 return 0;
11578
11579 if (TARGET_SINGLE_STRINGOP)
11580 emit_insn (gen_cld ());
11581
11582 countreg2 = gen_reg_rtx (Pmode);
11583 countreg = copy_to_mode_reg (counter_mode, count_exp);
11584 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
11585 /* Get rid of MEM_OFFSET, it won't be accurate. */
11586 dst = change_address (dst, BLKmode, destreg);
0945b39d 11587
37ad04a5 11588 if (count == 0 && align < desired_alignment)
0945b39d
JH
11589 {
11590 label = gen_label_rtx ();
37ad04a5 11591 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11592 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11593 }
11594 if (align <= 1)
11595 {
11596 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11597 emit_insn (gen_strset (destreg, dst,
11598 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11599 ix86_adjust_counter (countreg, 1);
11600 emit_label (label);
11601 LABEL_NUSES (label) = 1;
11602 }
11603 if (align <= 2)
11604 {
11605 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11606 emit_insn (gen_strset (destreg, dst,
11607 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11608 ix86_adjust_counter (countreg, 2);
11609 emit_label (label);
11610 LABEL_NUSES (label) = 1;
11611 }
37ad04a5 11612 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11613 {
11614 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11615 emit_insn (gen_strset (destreg, dst,
11616 (TARGET_64BIT
11617 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11618 : zeroreg)));
0945b39d
JH
11619 ix86_adjust_counter (countreg, 4);
11620 emit_label (label);
11621 LABEL_NUSES (label) = 1;
11622 }
11623
37ad04a5
JH
11624 if (label && desired_alignment > 4 && !TARGET_64BIT)
11625 {
11626 emit_label (label);
11627 LABEL_NUSES (label) = 1;
11628 label = NULL_RTX;
11629 }
11630
0945b39d
JH
11631 if (!TARGET_SINGLE_STRINGOP)
11632 emit_insn (gen_cld ());
11633 if (TARGET_64BIT)
11634 {
11635 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11636 GEN_INT (3)));
4e44c1ef 11637 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11638 }
11639 else
11640 {
4e44c1ef
JJ
11641 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11642 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11643 }
4e44c1ef
JJ
11644 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11645 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11646
0945b39d
JH
11647 if (label)
11648 {
11649 emit_label (label);
11650 LABEL_NUSES (label) = 1;
11651 }
37ad04a5 11652
0945b39d 11653 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11654 emit_insn (gen_strset (destreg, dst,
11655 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11656 if (TARGET_64BIT && (align <= 4 || count == 0))
11657 {
79258dce 11658 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11659 emit_insn (gen_strset (destreg, dst,
11660 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11661 emit_label (label);
11662 LABEL_NUSES (label) = 1;
11663 }
11664 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11665 emit_insn (gen_strset (destreg, dst,
11666 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11667 if (align <= 2 || count == 0)
11668 {
74411039 11669 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11670 emit_insn (gen_strset (destreg, dst,
11671 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11672 emit_label (label);
11673 LABEL_NUSES (label) = 1;
11674 }
11675 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11676 emit_insn (gen_strset (destreg, dst,
11677 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11678 if (align <= 1 || count == 0)
11679 {
74411039 11680 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11681 emit_insn (gen_strset (destreg, dst,
11682 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11683 emit_label (label);
11684 LABEL_NUSES (label) = 1;
11685 }
11686 }
11687 return 1;
11688}
4e44c1ef 11689
0945b39d
JH
11690/* Expand strlen. */
11691int
b96a374d 11692ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11693{
11694 rtx addr, scratch1, scratch2, scratch3, scratch4;
11695
11696 /* The generic case of strlen expander is long. Avoid it's
11697 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11698
11699 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11700 && !TARGET_INLINE_ALL_STRINGOPS
11701 && !optimize_size
11702 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11703 return 0;
11704
11705 addr = force_reg (Pmode, XEXP (src, 0));
11706 scratch1 = gen_reg_rtx (Pmode);
11707
11708 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11709 && !optimize_size)
11710 {
11711 /* Well it seems that some optimizer does not combine a call like
11712 foo(strlen(bar), strlen(bar));
11713 when the move and the subtraction is done here. It does calculate
11714 the length just once when these instructions are done inside of
11715 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11716 often used and I use one fewer register for the lifetime of
11717 output_strlen_unroll() this is better. */
11718
11719 emit_move_insn (out, addr);
11720
4e44c1ef 11721 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
11722
11723 /* strlensi_unroll_1 returns the address of the zero at the end of
11724 the string, like memchr(), so compute the length by subtracting
11725 the start address. */
11726 if (TARGET_64BIT)
11727 emit_insn (gen_subdi3 (out, out, addr));
11728 else
11729 emit_insn (gen_subsi3 (out, out, addr));
11730 }
11731 else
11732 {
4e44c1ef 11733 rtx unspec;
0945b39d
JH
11734 scratch2 = gen_reg_rtx (Pmode);
11735 scratch3 = gen_reg_rtx (Pmode);
11736 scratch4 = force_reg (Pmode, constm1_rtx);
11737
11738 emit_move_insn (scratch3, addr);
11739 eoschar = force_reg (QImode, eoschar);
11740
11741 emit_insn (gen_cld ());
4e44c1ef
JJ
11742 src = replace_equiv_address_nv (src, scratch3);
11743
11744 /* If .md starts supporting :P, this can be done in .md. */
11745 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11746 scratch4), UNSPEC_SCAS);
11747 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
11748 if (TARGET_64BIT)
11749 {
0945b39d
JH
11750 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11751 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11752 }
11753 else
11754 {
0945b39d
JH
11755 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11756 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11757 }
11758 }
11759 return 1;
11760}
11761
e075ae69
RH
11762/* Expand the appropriate insns for doing strlen if not just doing
11763 repnz; scasb
11764
11765 out = result, initialized with the start address
11766 align_rtx = alignment of the address.
11767 scratch = scratch register, initialized with the startaddress when
77ebd435 11768 not aligned, otherwise undefined
3f803cd9 11769
39e3f58c 11770 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11771 some address computing at the end. These things are done in i386.md. */
11772
0945b39d 11773static void
4e44c1ef 11774ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 11775{
e075ae69
RH
11776 int align;
11777 rtx tmp;
11778 rtx align_2_label = NULL_RTX;
11779 rtx align_3_label = NULL_RTX;
11780 rtx align_4_label = gen_label_rtx ();
11781 rtx end_0_label = gen_label_rtx ();
e075ae69 11782 rtx mem;
e2e52e1b 11783 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11784 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11785 rtx cmp;
e075ae69
RH
11786
11787 align = 0;
11788 if (GET_CODE (align_rtx) == CONST_INT)
11789 align = INTVAL (align_rtx);
3f803cd9 11790
e9a25f70 11791 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11792
e9a25f70 11793 /* Is there a known alignment and is it less than 4? */
e075ae69 11794 if (align < 4)
3f803cd9 11795 {
0945b39d
JH
11796 rtx scratch1 = gen_reg_rtx (Pmode);
11797 emit_move_insn (scratch1, out);
e9a25f70 11798 /* Is there a known alignment and is it not 2? */
e075ae69 11799 if (align != 2)
3f803cd9 11800 {
e075ae69
RH
11801 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11802 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11803
11804 /* Leave just the 3 lower bits. */
0945b39d 11805 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11806 NULL_RTX, 0, OPTAB_WIDEN);
11807
9076b9c1 11808 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11809 Pmode, 1, align_4_label);
60c81c89 11810 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 11811 Pmode, 1, align_2_label);
60c81c89 11812 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 11813 Pmode, 1, align_3_label);
3f803cd9
SC
11814 }
11815 else
11816 {
e9a25f70
JL
11817 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11818 check if is aligned to 4 - byte. */
e9a25f70 11819
60c81c89 11820 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
11821 NULL_RTX, 0, OPTAB_WIDEN);
11822
9076b9c1 11823 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11824 Pmode, 1, align_4_label);
3f803cd9
SC
11825 }
11826
4e44c1ef 11827 mem = change_address (src, QImode, out);
e9a25f70 11828
e075ae69 11829 /* Now compare the bytes. */
e9a25f70 11830
0f290768 11831 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11832 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11833 QImode, 1, end_0_label);
3f803cd9 11834
0f290768 11835 /* Increment the address. */
0945b39d
JH
11836 if (TARGET_64BIT)
11837 emit_insn (gen_adddi3 (out, out, const1_rtx));
11838 else
11839 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11840
e075ae69
RH
11841 /* Not needed with an alignment of 2 */
11842 if (align != 2)
11843 {
11844 emit_label (align_2_label);
3f803cd9 11845
d43e0b7d
RK
11846 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11847 end_0_label);
e075ae69 11848
0945b39d
JH
11849 if (TARGET_64BIT)
11850 emit_insn (gen_adddi3 (out, out, const1_rtx));
11851 else
11852 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11853
11854 emit_label (align_3_label);
11855 }
11856
d43e0b7d
RK
11857 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11858 end_0_label);
e075ae69 11859
0945b39d
JH
11860 if (TARGET_64BIT)
11861 emit_insn (gen_adddi3 (out, out, const1_rtx));
11862 else
11863 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11864 }
11865
e075ae69
RH
11866 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11867 align this loop. It gives only huge programs, but does not help to
11868 speed up. */
11869 emit_label (align_4_label);
3f803cd9 11870
4e44c1ef 11871 mem = change_address (src, SImode, out);
e075ae69 11872 emit_move_insn (scratch, mem);
0945b39d
JH
11873 if (TARGET_64BIT)
11874 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11875 else
11876 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11877
e2e52e1b
JH
11878 /* This formula yields a nonzero result iff one of the bytes is zero.
11879 This saves three branches inside loop and many cycles. */
11880
11881 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11882 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11883 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11884 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11885 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11886 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11887 align_4_label);
e2e52e1b
JH
11888
11889 if (TARGET_CMOVE)
11890 {
11891 rtx reg = gen_reg_rtx (SImode);
0945b39d 11892 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11893 emit_move_insn (reg, tmpreg);
11894 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11895
0f290768 11896 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11897 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11898 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11899 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11900 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11901 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11902 reg,
11903 tmpreg)));
e2e52e1b 11904 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 11905 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 11906 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
11907
11908 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11909 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11910 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11911 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11912 reg2,
11913 out)));
e2e52e1b
JH
11914
11915 }
11916 else
11917 {
11918 rtx end_2_label = gen_label_rtx ();
11919 /* Is zero in the first two bytes? */
11920
16189740 11921 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11922 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11923 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11924 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11925 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11926 pc_rtx);
11927 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11928 JUMP_LABEL (tmp) = end_2_label;
11929
0f290768 11930 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11931 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 11932 if (TARGET_64BIT)
60c81c89 11933 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 11934 else
60c81c89 11935 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
11936
11937 emit_label (end_2_label);
11938
11939 }
11940
0f290768 11941 /* Avoid branch in fixing the byte. */
e2e52e1b 11942 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11943 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11944 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11945 if (TARGET_64BIT)
e6e81735 11946 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11947 else
e6e81735 11948 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11949
11950 emit_label (end_0_label);
11951}
0e07aff3
RH
11952
11953void
0f901c4c
SH
11954ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11955 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 11956 rtx pop, int sibcall)
0e07aff3
RH
11957{
11958 rtx use = NULL, call;
11959
11960 if (pop == const0_rtx)
11961 pop = NULL;
11962 if (TARGET_64BIT && pop)
11963 abort ();
11964
b069de3b
SS
11965#if TARGET_MACHO
11966 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11967 fnaddr = machopic_indirect_call_target (fnaddr);
11968#else
0e07aff3
RH
11969 /* Static functions and indirect calls don't need the pic register. */
11970 if (! TARGET_64BIT && flag_pic
11971 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11972 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11973 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11974
11975 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11976 {
11977 rtx al = gen_rtx_REG (QImode, 0);
11978 emit_move_insn (al, callarg2);
11979 use_reg (&use, al);
11980 }
b069de3b 11981#endif /* TARGET_MACHO */
0e07aff3
RH
11982
11983 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11984 {
11985 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11986 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11987 }
4977bab6
ZW
11988 if (sibcall && TARGET_64BIT
11989 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11990 {
11991 rtx addr;
11992 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11993 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11994 emit_move_insn (fnaddr, addr);
11995 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11996 }
0e07aff3
RH
11997
11998 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11999 if (retval)
12000 call = gen_rtx_SET (VOIDmode, retval, call);
12001 if (pop)
12002 {
12003 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12004 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12005 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12006 }
12007
12008 call = emit_call_insn (call);
12009 if (use)
12010 CALL_INSN_FUNCTION_USAGE (call) = use;
12011}
fce5a9f2 12012
e075ae69 12013\f
e075ae69
RH
12014/* Clear stack slot assignments remembered from previous functions.
12015 This is called from INIT_EXPANDERS once before RTL is emitted for each
12016 function. */
12017
e2500fed 12018static struct machine_function *
b96a374d 12019ix86_init_machine_status (void)
37b15744 12020{
d7394366
JH
12021 struct machine_function *f;
12022
12023 f = ggc_alloc_cleared (sizeof (struct machine_function));
12024 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
12025
12026 return f;
1526a060
BS
12027}
12028
e075ae69
RH
12029/* Return a MEM corresponding to a stack slot with mode MODE.
12030 Allocate a new slot if necessary.
12031
12032 The RTL for a function can have several slots available: N is
12033 which slot to use. */
12034
12035rtx
b96a374d 12036assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 12037{
ddb0ae00
ZW
12038 struct stack_local_entry *s;
12039
e075ae69
RH
12040 if (n < 0 || n >= MAX_386_STACK_LOCALS)
12041 abort ();
12042
ddb0ae00
ZW
12043 for (s = ix86_stack_locals; s; s = s->next)
12044 if (s->mode == mode && s->n == n)
12045 return s->rtl;
12046
12047 s = (struct stack_local_entry *)
12048 ggc_alloc (sizeof (struct stack_local_entry));
12049 s->n = n;
12050 s->mode = mode;
12051 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 12052
ddb0ae00
ZW
12053 s->next = ix86_stack_locals;
12054 ix86_stack_locals = s;
12055 return s->rtl;
e075ae69 12056}
f996902d
RH
12057
12058/* Construct the SYMBOL_REF for the tls_get_addr function. */
12059
e2500fed 12060static GTY(()) rtx ix86_tls_symbol;
f996902d 12061rtx
b96a374d 12062ix86_tls_get_addr (void)
f996902d 12063{
f996902d 12064
e2500fed 12065 if (!ix86_tls_symbol)
f996902d 12066 {
75d38379
JJ
12067 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12068 (TARGET_GNU_TLS && !TARGET_64BIT)
12069 ? "___tls_get_addr"
12070 : "__tls_get_addr");
f996902d
RH
12071 }
12072
e2500fed 12073 return ix86_tls_symbol;
f996902d 12074}
e075ae69
RH
12075\f
12076/* Calculate the length of the memory address in the instruction
12077 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12078
12079static int
b96a374d 12080memory_address_length (rtx addr)
e075ae69
RH
12081{
12082 struct ix86_address parts;
12083 rtx base, index, disp;
12084 int len;
12085
12086 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
12087 || GET_CODE (addr) == POST_INC
12088 || GET_CODE (addr) == PRE_MODIFY
12089 || GET_CODE (addr) == POST_MODIFY)
e075ae69 12090 return 0;
3f803cd9 12091
e075ae69
RH
12092 if (! ix86_decompose_address (addr, &parts))
12093 abort ();
3f803cd9 12094
e075ae69
RH
12095 base = parts.base;
12096 index = parts.index;
12097 disp = parts.disp;
12098 len = 0;
3f803cd9 12099
7b65ed54
EB
12100 /* Rule of thumb:
12101 - esp as the base always wants an index,
12102 - ebp as the base always wants a displacement. */
12103
e075ae69
RH
12104 /* Register Indirect. */
12105 if (base && !index && !disp)
12106 {
7b65ed54
EB
12107 /* esp (for its index) and ebp (for its displacement) need
12108 the two-byte modrm form. */
e075ae69
RH
12109 if (addr == stack_pointer_rtx
12110 || addr == arg_pointer_rtx
564d80f4
JH
12111 || addr == frame_pointer_rtx
12112 || addr == hard_frame_pointer_rtx)
e075ae69 12113 len = 1;
3f803cd9 12114 }
e9a25f70 12115
e075ae69
RH
12116 /* Direct Addressing. */
12117 else if (disp && !base && !index)
12118 len = 4;
12119
3f803cd9
SC
12120 else
12121 {
e075ae69
RH
12122 /* Find the length of the displacement constant. */
12123 if (disp)
12124 {
12125 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
12126 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12127 && base)
e075ae69
RH
12128 len = 1;
12129 else
12130 len = 4;
12131 }
7b65ed54
EB
12132 /* ebp always wants a displacement. */
12133 else if (base == hard_frame_pointer_rtx)
12134 len = 1;
3f803cd9 12135
43f3a59d 12136 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
12137 if (index
12138 /* ...like esp, which always wants an index. */
12139 || base == stack_pointer_rtx
12140 || base == arg_pointer_rtx
12141 || base == frame_pointer_rtx)
e075ae69 12142 len += 1;
3f803cd9
SC
12143 }
12144
e075ae69
RH
12145 return len;
12146}
79325812 12147
5bf0ebab
RH
12148/* Compute default value for "length_immediate" attribute. When SHORTFORM
12149 is set, expect that insn have 8bit immediate alternative. */
e075ae69 12150int
b96a374d 12151ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 12152{
6ef67412
JH
12153 int len = 0;
12154 int i;
6c698a6d 12155 extract_insn_cached (insn);
6ef67412
JH
12156 for (i = recog_data.n_operands - 1; i >= 0; --i)
12157 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 12158 {
6ef67412 12159 if (len)
3071fab5 12160 abort ();
6ef67412
JH
12161 if (shortform
12162 && GET_CODE (recog_data.operand[i]) == CONST_INT
12163 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12164 len = 1;
12165 else
12166 {
12167 switch (get_attr_mode (insn))
12168 {
12169 case MODE_QI:
12170 len+=1;
12171 break;
12172 case MODE_HI:
12173 len+=2;
12174 break;
12175 case MODE_SI:
12176 len+=4;
12177 break;
14f73b5a
JH
12178 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12179 case MODE_DI:
12180 len+=4;
12181 break;
6ef67412 12182 default:
c725bd79 12183 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
12184 }
12185 }
3071fab5 12186 }
6ef67412
JH
12187 return len;
12188}
12189/* Compute default value for "length_address" attribute. */
12190int
b96a374d 12191ix86_attr_length_address_default (rtx insn)
6ef67412
JH
12192{
12193 int i;
9b73c90a
EB
12194
12195 if (get_attr_type (insn) == TYPE_LEA)
12196 {
12197 rtx set = PATTERN (insn);
12198 if (GET_CODE (set) == SET)
12199 ;
12200 else if (GET_CODE (set) == PARALLEL
12201 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12202 set = XVECEXP (set, 0, 0);
12203 else
12204 {
12205#ifdef ENABLE_CHECKING
12206 abort ();
12207#endif
12208 return 0;
12209 }
12210
12211 return memory_address_length (SET_SRC (set));
12212 }
12213
6c698a6d 12214 extract_insn_cached (insn);
1ccbefce
RH
12215 for (i = recog_data.n_operands - 1; i >= 0; --i)
12216 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12217 {
6ef67412 12218 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
12219 break;
12220 }
6ef67412 12221 return 0;
3f803cd9 12222}
e075ae69
RH
12223\f
12224/* Return the maximum number of instructions a cpu can issue. */
b657fc39 12225
c237e94a 12226static int
b96a374d 12227ix86_issue_rate (void)
b657fc39 12228{
9e555526 12229 switch (ix86_tune)
b657fc39 12230 {
e075ae69
RH
12231 case PROCESSOR_PENTIUM:
12232 case PROCESSOR_K6:
12233 return 2;
79325812 12234
e075ae69 12235 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
12236 case PROCESSOR_PENTIUM4:
12237 case PROCESSOR_ATHLON:
4977bab6 12238 case PROCESSOR_K8:
89c43c0a 12239 case PROCESSOR_NOCONA:
e075ae69 12240 return 3;
b657fc39 12241
b657fc39 12242 default:
e075ae69 12243 return 1;
b657fc39 12244 }
b657fc39
L
12245}
12246
e075ae69
RH
12247/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12248 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 12249
e075ae69 12250static int
b96a374d 12251ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12252{
12253 rtx set, set2;
b657fc39 12254
e075ae69
RH
12255 /* Simplify the test for uninteresting insns. */
12256 if (insn_type != TYPE_SETCC
12257 && insn_type != TYPE_ICMOV
12258 && insn_type != TYPE_FCMOV
12259 && insn_type != TYPE_IBR)
12260 return 0;
b657fc39 12261
e075ae69
RH
12262 if ((set = single_set (dep_insn)) != 0)
12263 {
12264 set = SET_DEST (set);
12265 set2 = NULL_RTX;
12266 }
12267 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12268 && XVECLEN (PATTERN (dep_insn), 0) == 2
12269 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12270 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12271 {
12272 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12273 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12274 }
78a0d70c
ZW
12275 else
12276 return 0;
b657fc39 12277
78a0d70c
ZW
12278 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12279 return 0;
b657fc39 12280
f5143c46 12281 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
12282 not any other potentially set register. */
12283 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12284 return 0;
12285
12286 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12287 return 0;
12288
12289 return 1;
e075ae69 12290}
b657fc39 12291
e075ae69
RH
12292/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12293 address with operands set by DEP_INSN. */
12294
12295static int
b96a374d 12296ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
12297{
12298 rtx addr;
12299
6ad48e84
JH
12300 if (insn_type == TYPE_LEA
12301 && TARGET_PENTIUM)
5fbdde42
RH
12302 {
12303 addr = PATTERN (insn);
12304 if (GET_CODE (addr) == SET)
12305 ;
12306 else if (GET_CODE (addr) == PARALLEL
12307 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12308 addr = XVECEXP (addr, 0, 0);
12309 else
12310 abort ();
12311 addr = SET_SRC (addr);
12312 }
e075ae69
RH
12313 else
12314 {
12315 int i;
6c698a6d 12316 extract_insn_cached (insn);
1ccbefce
RH
12317 for (i = recog_data.n_operands - 1; i >= 0; --i)
12318 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 12319 {
1ccbefce 12320 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
12321 goto found;
12322 }
12323 return 0;
12324 found:;
b657fc39
L
12325 }
12326
e075ae69 12327 return modified_in_p (addr, dep_insn);
b657fc39 12328}
a269a03c 12329
c237e94a 12330static int
b96a374d 12331ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 12332{
e075ae69 12333 enum attr_type insn_type, dep_insn_type;
8695f61e 12334 enum attr_memory memory;
e075ae69 12335 rtx set, set2;
9b00189f 12336 int dep_insn_code_number;
a269a03c 12337
d1f87653 12338 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 12339 if (REG_NOTE_KIND (link) != 0)
309ada50 12340 return 0;
a269a03c 12341
9b00189f
JH
12342 dep_insn_code_number = recog_memoized (dep_insn);
12343
e075ae69 12344 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 12345 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 12346 return cost;
a269a03c 12347
1c71e60e
JH
12348 insn_type = get_attr_type (insn);
12349 dep_insn_type = get_attr_type (dep_insn);
9b00189f 12350
9e555526 12351 switch (ix86_tune)
a269a03c
JC
12352 {
12353 case PROCESSOR_PENTIUM:
e075ae69
RH
12354 /* Address Generation Interlock adds a cycle of latency. */
12355 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12356 cost += 1;
12357
12358 /* ??? Compares pair with jump/setcc. */
12359 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12360 cost = 0;
12361
d1f87653 12362 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 12363 if (insn_type == TYPE_FMOV
e075ae69
RH
12364 && get_attr_memory (insn) == MEMORY_STORE
12365 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12366 cost += 1;
12367 break;
a269a03c 12368
e075ae69 12369 case PROCESSOR_PENTIUMPRO:
6ad48e84 12370 memory = get_attr_memory (insn);
e075ae69
RH
12371
12372 /* INT->FP conversion is expensive. */
12373 if (get_attr_fp_int_src (dep_insn))
12374 cost += 5;
12375
12376 /* There is one cycle extra latency between an FP op and a store. */
12377 if (insn_type == TYPE_FMOV
12378 && (set = single_set (dep_insn)) != NULL_RTX
12379 && (set2 = single_set (insn)) != NULL_RTX
12380 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12381 && GET_CODE (SET_DEST (set2)) == MEM)
12382 cost += 1;
6ad48e84
JH
12383
12384 /* Show ability of reorder buffer to hide latency of load by executing
12385 in parallel with previous instruction in case
12386 previous instruction is not needed to compute the address. */
12387 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12388 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12389 {
6ad48e84
JH
12390 /* Claim moves to take one cycle, as core can issue one load
12391 at time and the next load can start cycle later. */
12392 if (dep_insn_type == TYPE_IMOV
12393 || dep_insn_type == TYPE_FMOV)
12394 cost = 1;
12395 else if (cost > 1)
12396 cost--;
12397 }
e075ae69 12398 break;
a269a03c 12399
e075ae69 12400 case PROCESSOR_K6:
6ad48e84 12401 memory = get_attr_memory (insn);
8695f61e 12402
e075ae69
RH
12403 /* The esp dependency is resolved before the instruction is really
12404 finished. */
12405 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12406 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12407 return 1;
a269a03c 12408
e075ae69
RH
12409 /* INT->FP conversion is expensive. */
12410 if (get_attr_fp_int_src (dep_insn))
12411 cost += 5;
6ad48e84
JH
12412
12413 /* Show ability of reorder buffer to hide latency of load by executing
12414 in parallel with previous instruction in case
12415 previous instruction is not needed to compute the address. */
12416 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12417 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12418 {
6ad48e84
JH
12419 /* Claim moves to take one cycle, as core can issue one load
12420 at time and the next load can start cycle later. */
12421 if (dep_insn_type == TYPE_IMOV
12422 || dep_insn_type == TYPE_FMOV)
12423 cost = 1;
12424 else if (cost > 2)
12425 cost -= 2;
12426 else
12427 cost = 1;
12428 }
a14003ee 12429 break;
e075ae69 12430
309ada50 12431 case PROCESSOR_ATHLON:
4977bab6 12432 case PROCESSOR_K8:
6ad48e84 12433 memory = get_attr_memory (insn);
6ad48e84 12434
6ad48e84
JH
12435 /* Show ability of reorder buffer to hide latency of load by executing
12436 in parallel with previous instruction in case
12437 previous instruction is not needed to compute the address. */
12438 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12439 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 12440 {
26f74aa3
JH
12441 enum attr_unit unit = get_attr_unit (insn);
12442 int loadcost = 3;
12443
12444 /* Because of the difference between the length of integer and
12445 floating unit pipeline preparation stages, the memory operands
b96a374d 12446 for floating point are cheaper.
26f74aa3 12447
c51e6d85 12448 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
12449 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12450 loadcost = 3;
12451 else
12452 loadcost = TARGET_ATHLON ? 2 : 0;
12453
12454 if (cost >= loadcost)
12455 cost -= loadcost;
6ad48e84
JH
12456 else
12457 cost = 0;
12458 }
309ada50 12459
a269a03c 12460 default:
a269a03c
JC
12461 break;
12462 }
12463
12464 return cost;
12465}
0a726ef1 12466
9b690711
RH
12467/* How many alternative schedules to try. This should be as wide as the
12468 scheduling freedom in the DFA, but no wider. Making this value too
12469 large results extra work for the scheduler. */
12470
12471static int
b96a374d 12472ia32_multipass_dfa_lookahead (void)
9b690711 12473{
9e555526 12474 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 12475 return 2;
56bab446 12476
8695f61e
SB
12477 if (ix86_tune == PROCESSOR_PENTIUMPRO
12478 || ix86_tune == PROCESSOR_K6)
56bab446
SB
12479 return 1;
12480
9b690711 12481 else
56bab446 12482 return 0;
9b690711
RH
12483}
12484
0e4970d7 12485\f
a7180f70
BS
12486/* Compute the alignment given to a constant that is being placed in memory.
12487 EXP is the constant and ALIGN is the alignment that the object would
12488 ordinarily have.
12489 The value of this function is used instead of that alignment to align
12490 the object. */
12491
12492int
b96a374d 12493ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12494{
12495 if (TREE_CODE (exp) == REAL_CST)
12496 {
12497 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12498 return 64;
12499 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12500 return 128;
12501 }
4137ba7a
JJ
12502 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12503 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12504 return BITS_PER_WORD;
a7180f70
BS
12505
12506 return align;
12507}
12508
12509/* Compute the alignment for a static variable.
12510 TYPE is the data type, and ALIGN is the alignment that
12511 the object would ordinarily have. The value of this function is used
12512 instead of that alignment to align the object. */
12513
12514int
b96a374d 12515ix86_data_alignment (tree type, int align)
a7180f70
BS
12516{
12517 if (AGGREGATE_TYPE_P (type)
12518 && TYPE_SIZE (type)
12519 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12520 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12521 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12522 return 256;
12523
0d7d98ee
JH
12524 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12525 to 16byte boundary. */
12526 if (TARGET_64BIT)
12527 {
12528 if (AGGREGATE_TYPE_P (type)
12529 && TYPE_SIZE (type)
12530 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12531 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12532 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12533 return 128;
12534 }
12535
a7180f70
BS
12536 if (TREE_CODE (type) == ARRAY_TYPE)
12537 {
12538 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12539 return 64;
12540 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12541 return 128;
12542 }
12543 else if (TREE_CODE (type) == COMPLEX_TYPE)
12544 {
0f290768 12545
a7180f70
BS
12546 if (TYPE_MODE (type) == DCmode && align < 64)
12547 return 64;
12548 if (TYPE_MODE (type) == XCmode && align < 128)
12549 return 128;
12550 }
12551 else if ((TREE_CODE (type) == RECORD_TYPE
12552 || TREE_CODE (type) == UNION_TYPE
12553 || TREE_CODE (type) == QUAL_UNION_TYPE)
12554 && TYPE_FIELDS (type))
12555 {
12556 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12557 return 64;
12558 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12559 return 128;
12560 }
12561 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12562 || TREE_CODE (type) == INTEGER_TYPE)
12563 {
12564 if (TYPE_MODE (type) == DFmode && align < 64)
12565 return 64;
12566 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12567 return 128;
12568 }
12569
12570 return align;
12571}
12572
12573/* Compute the alignment for a local variable.
12574 TYPE is the data type, and ALIGN is the alignment that
12575 the object would ordinarily have. The value of this macro is used
12576 instead of that alignment to align the object. */
12577
12578int
b96a374d 12579ix86_local_alignment (tree type, int align)
a7180f70 12580{
0d7d98ee
JH
12581 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12582 to 16byte boundary. */
12583 if (TARGET_64BIT)
12584 {
12585 if (AGGREGATE_TYPE_P (type)
12586 && TYPE_SIZE (type)
12587 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12588 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12589 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12590 return 128;
12591 }
a7180f70
BS
12592 if (TREE_CODE (type) == ARRAY_TYPE)
12593 {
12594 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12595 return 64;
12596 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12597 return 128;
12598 }
12599 else if (TREE_CODE (type) == COMPLEX_TYPE)
12600 {
12601 if (TYPE_MODE (type) == DCmode && align < 64)
12602 return 64;
12603 if (TYPE_MODE (type) == XCmode && align < 128)
12604 return 128;
12605 }
12606 else if ((TREE_CODE (type) == RECORD_TYPE
12607 || TREE_CODE (type) == UNION_TYPE
12608 || TREE_CODE (type) == QUAL_UNION_TYPE)
12609 && TYPE_FIELDS (type))
12610 {
12611 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12612 return 64;
12613 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12614 return 128;
12615 }
12616 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12617 || TREE_CODE (type) == INTEGER_TYPE)
12618 {
0f290768 12619
a7180f70
BS
12620 if (TYPE_MODE (type) == DFmode && align < 64)
12621 return 64;
12622 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12623 return 128;
12624 }
12625 return align;
12626}
0ed08620
JH
12627\f
12628/* Emit RTL insns to initialize the variable parts of a trampoline.
12629 FNADDR is an RTX for the address of the function's pure code.
12630 CXT is an RTX for the static chain value for the function. */
12631void
b96a374d 12632x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12633{
12634 if (!TARGET_64BIT)
12635 {
12636 /* Compute offset from the end of the jmp to the target function. */
12637 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12638 plus_constant (tramp, 10),
12639 NULL_RTX, 1, OPTAB_DIRECT);
12640 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12641 gen_int_mode (0xb9, QImode));
0ed08620
JH
12642 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12643 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12644 gen_int_mode (0xe9, QImode));
0ed08620
JH
12645 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12646 }
12647 else
12648 {
12649 int offset = 0;
12650 /* Try to load address using shorter movl instead of movabs.
12651 We may want to support movq for kernel mode, but kernel does not use
12652 trampolines at the moment. */
12653 if (x86_64_zero_extended_value (fnaddr))
12654 {
12655 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12656 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12657 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12658 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12659 gen_lowpart (SImode, fnaddr));
12660 offset += 6;
12661 }
12662 else
12663 {
12664 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12665 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12666 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12667 fnaddr);
12668 offset += 10;
12669 }
12670 /* Load static chain using movabs to r10. */
12671 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12672 gen_int_mode (0xba49, HImode));
0ed08620
JH
12673 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12674 cxt);
12675 offset += 10;
12676 /* Jump to the r11 */
12677 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12678 gen_int_mode (0xff49, HImode));
0ed08620 12679 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12680 gen_int_mode (0xe3, QImode));
0ed08620
JH
12681 offset += 3;
12682 if (offset > TRAMPOLINE_SIZE)
b531087a 12683 abort ();
0ed08620 12684 }
5791cc29 12685
e7a742ec 12686#ifdef ENABLE_EXECUTE_STACK
f84d109f 12687 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
12688 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12689#endif
0ed08620 12690}
eeb06b1b 12691\f
6e34d3a3
JM
12692#define def_builtin(MASK, NAME, TYPE, CODE) \
12693do { \
12694 if ((MASK) & target_flags \
12695 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12696 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12697 NULL, NULL_TREE); \
eeb06b1b 12698} while (0)
bd793c65 12699
bd793c65
BS
12700struct builtin_description
12701{
8b60264b
KG
12702 const unsigned int mask;
12703 const enum insn_code icode;
12704 const char *const name;
12705 const enum ix86_builtins code;
12706 const enum rtx_code comparison;
12707 const unsigned int flag;
bd793c65
BS
12708};
12709
8b60264b 12710static const struct builtin_description bdesc_comi[] =
bd793c65 12711{
37f22004
L
12712 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12713 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12714 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12715 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12716 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12717 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12718 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12719 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12720 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12721 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12722 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12723 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12724 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12725 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12726 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12729 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12734 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12735 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12736};
12737
8b60264b 12738static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12739{
12740 /* SSE */
37f22004
L
12741 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12742 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12743 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12744 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12745 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12746 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12747 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12748 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12749
12750 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12751 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12752 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12753 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12754 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12755 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12756 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12757 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12758 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12759 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12760 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12761 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12762 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12763 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12764 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12765 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12766 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12767 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12768 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12769 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12770
12771 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12772 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12773 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12774 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12775
12776 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12777 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12778 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12779 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12780
12781 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12782 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12783 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12784 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12785 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12786
12787 /* MMX */
eeb06b1b
BS
12788 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12789 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12790 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12791 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12792 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12793 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12794 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12795 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12796
12797 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12798 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12799 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12800 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12801 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12802 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12803 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12804 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12805
12806 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12807 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12808 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12809
12810 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12811 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12812 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12814
37f22004
L
12815 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12816 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12817
12818 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12819 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12820 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12821 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12822 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12823 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12824
37f22004
L
12825 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12826 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12827 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12828 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12829
12830 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12831 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12832 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12833 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12834 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12835 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12836
12837 /* Special. */
eeb06b1b
BS
12838 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12839 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12840 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12841
37f22004
L
12842 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12843 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12844 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12845
12846 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12847 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12848 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12849 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12850 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12851 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12852
12853 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12854 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12855 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12856 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12857 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12858 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12859
12860 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12861 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12862 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12863 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12864
37f22004 12865 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12866 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12867
12868 /* SSE2 */
12869 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12873 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12877
12878 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12879 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12880 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12881 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12882 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12883 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12884 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12885 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12886 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12887 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12888 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12889 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12890 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12891 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12892 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12893 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12894 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12895 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12896 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12897 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12898
12899 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12903
1877be45
JH
12904 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12908
12909 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12912
12913 /* SSE2 MMX */
12914 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12917 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12918 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12921 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12922
12923 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12924 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12925 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12926 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12927 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12928 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12929 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12930 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12931
12932 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
fbe5eb6d 12934
916b60b7
BS
12935 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12939
12940 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12942
12943 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12949
12950 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12954
12955 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12958 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12959 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12962 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12963
916b60b7
BS
12964 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12967
12968 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12970
9e9fb0ce
JB
12971 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12973
916b60b7
BS
12974 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12980
12981 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12987
12988 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12992
12993 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12994
fbe5eb6d 12995 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 12996 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 12997 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
12998 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12999
9e200aaf
KC
13000 /* SSE3 MMX */
13001 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13002 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13003 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13004 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13005 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13006 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
13007};
13008
8b60264b 13009static const struct builtin_description bdesc_1arg[] =
bd793c65 13010{
37f22004
L
13011 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13012 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 13013
37f22004
L
13014 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13015 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13016 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 13017
37f22004
L
13018 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13019 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13020 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13021 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13022 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13023 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
13024
13025 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 13028 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
13029
13030 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13031
13032 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 13034
fbe5eb6d
BS
13035 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 13040
fbe5eb6d 13041 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 13042
fbe5eb6d
BS
13043 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
13045 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13046 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
13047
13048 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
13050 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13051
22c7c85e
L
13052 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13053
9e200aaf
KC
13054 /* SSE3 */
13055 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13056 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13057 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
13058};
13059
f6155fda 13060void
b96a374d 13061ix86_init_builtins (void)
f6155fda
SS
13062{
13063 if (TARGET_MMX)
13064 ix86_init_mmx_sse_builtins ();
13065}
13066
13067/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
13068 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13069 builtins. */
e37af218 13070static void
b96a374d 13071ix86_init_mmx_sse_builtins (void)
bd793c65 13072{
8b60264b 13073 const struct builtin_description * d;
77ebd435 13074 size_t i;
bd793c65 13075
4a5eab38
PB
13076 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13077 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13078 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13079 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
13080 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13081 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13082 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13083 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13084 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13085 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13086
bd793c65 13087 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
13088 tree pcchar_type_node = build_pointer_type (
13089 build_type_variant (char_type_node, 1, 0));
bd793c65 13090 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
13091 tree pcfloat_type_node = build_pointer_type (
13092 build_type_variant (float_type_node, 1, 0));
bd793c65 13093 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 13094 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
13095 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13096
13097 /* Comparisons. */
13098 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
13099 = build_function_type_list (integer_type_node,
13100 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13101 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
13102 = build_function_type_list (V4SI_type_node,
13103 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13104 /* MMX/SSE/integer conversions. */
bd793c65 13105 tree int_ftype_v4sf
b4de2f7d
AH
13106 = build_function_type_list (integer_type_node,
13107 V4SF_type_node, NULL_TREE);
453ee231
JH
13108 tree int64_ftype_v4sf
13109 = build_function_type_list (long_long_integer_type_node,
13110 V4SF_type_node, NULL_TREE);
bd793c65 13111 tree int_ftype_v8qi
b4de2f7d 13112 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13113 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
13114 = build_function_type_list (V4SF_type_node,
13115 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13116 tree v4sf_ftype_v4sf_int64
13117 = build_function_type_list (V4SF_type_node,
13118 V4SF_type_node, long_long_integer_type_node,
13119 NULL_TREE);
bd793c65 13120 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
13121 = build_function_type_list (V4SF_type_node,
13122 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13123 tree int_ftype_v4hi_int
b4de2f7d
AH
13124 = build_function_type_list (integer_type_node,
13125 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13126 tree v4hi_ftype_v4hi_int_int
e7a60f56 13127 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
13128 integer_type_node, integer_type_node,
13129 NULL_TREE);
bd793c65
BS
13130 /* Miscellaneous. */
13131 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
13132 = build_function_type_list (V8QI_type_node,
13133 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13134 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
13135 = build_function_type_list (V4HI_type_node,
13136 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13137 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
13138 = build_function_type_list (V4SF_type_node,
13139 V4SF_type_node, V4SF_type_node,
13140 integer_type_node, NULL_TREE);
bd793c65 13141 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
13142 = build_function_type_list (V2SI_type_node,
13143 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13144 tree v4hi_ftype_v4hi_int
b4de2f7d 13145 = build_function_type_list (V4HI_type_node,
e7a60f56 13146 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13147 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
13148 = build_function_type_list (V4HI_type_node,
13149 V4HI_type_node, long_long_unsigned_type_node,
13150 NULL_TREE);
bd793c65 13151 tree v2si_ftype_v2si_di
b4de2f7d
AH
13152 = build_function_type_list (V2SI_type_node,
13153 V2SI_type_node, long_long_unsigned_type_node,
13154 NULL_TREE);
bd793c65 13155 tree void_ftype_void
b4de2f7d 13156 = build_function_type (void_type_node, void_list_node);
bd793c65 13157 tree void_ftype_unsigned
b4de2f7d 13158 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
13159 tree void_ftype_unsigned_unsigned
13160 = build_function_type_list (void_type_node, unsigned_type_node,
13161 unsigned_type_node, NULL_TREE);
13162 tree void_ftype_pcvoid_unsigned_unsigned
13163 = build_function_type_list (void_type_node, const_ptr_type_node,
13164 unsigned_type_node, unsigned_type_node,
13165 NULL_TREE);
bd793c65 13166 tree unsigned_ftype_void
b4de2f7d 13167 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13168 tree di_ftype_void
b4de2f7d 13169 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 13170 tree v4sf_ftype_void
b4de2f7d 13171 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 13172 tree v2si_ftype_v4sf
b4de2f7d 13173 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13174 /* Loads/stores. */
bd793c65 13175 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13176 = build_function_type_list (void_type_node,
13177 V8QI_type_node, V8QI_type_node,
13178 pchar_type_node, NULL_TREE);
068f5dea
JH
13179 tree v4sf_ftype_pcfloat
13180 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13181 /* @@@ the type is bogus */
13182 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13183 = build_function_type_list (V4SF_type_node,
f8ca7923 13184 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13185 tree void_ftype_pv2si_v4sf
b4de2f7d 13186 = build_function_type_list (void_type_node,
f8ca7923 13187 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13188 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13189 = build_function_type_list (void_type_node,
13190 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13191 tree void_ftype_pdi_di
b4de2f7d
AH
13192 = build_function_type_list (void_type_node,
13193 pdi_type_node, long_long_unsigned_type_node,
13194 NULL_TREE);
916b60b7 13195 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13196 = build_function_type_list (void_type_node,
13197 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13198 /* Normal vector unops. */
13199 tree v4sf_ftype_v4sf
b4de2f7d 13200 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13201
bd793c65
BS
13202 /* Normal vector binops. */
13203 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13204 = build_function_type_list (V4SF_type_node,
13205 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13206 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13207 = build_function_type_list (V8QI_type_node,
13208 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13209 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13210 = build_function_type_list (V4HI_type_node,
13211 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13212 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13213 = build_function_type_list (V2SI_type_node,
13214 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13215 tree di_ftype_di_di
b4de2f7d
AH
13216 = build_function_type_list (long_long_unsigned_type_node,
13217 long_long_unsigned_type_node,
13218 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13219
47f339cf 13220 tree v2si_ftype_v2sf
ae3aa00d 13221 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13222 tree v2sf_ftype_v2si
b4de2f7d 13223 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13224 tree v2si_ftype_v2si
b4de2f7d 13225 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13226 tree v2sf_ftype_v2sf
b4de2f7d 13227 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13228 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13229 = build_function_type_list (V2SF_type_node,
13230 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13231 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13232 = build_function_type_list (V2SI_type_node,
13233 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 13234 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
13235 tree pcint_type_node = build_pointer_type (
13236 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 13237 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13238 tree pcdouble_type_node = build_pointer_type (
13239 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13240 tree int_ftype_v2df_v2df
b4de2f7d
AH
13241 = build_function_type_list (integer_type_node,
13242 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
13243
13244 tree ti_ftype_void
b4de2f7d 13245 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
13246 tree v2di_ftype_void
13247 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 13248 tree ti_ftype_ti_ti
b4de2f7d
AH
13249 = build_function_type_list (intTI_type_node,
13250 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13251 tree void_ftype_pcvoid
13252 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13253 tree v2di_ftype_di
b4de2f7d
AH
13254 = build_function_type_list (V2DI_type_node,
13255 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
13256 tree di_ftype_v2di
13257 = build_function_type_list (long_long_unsigned_type_node,
13258 V2DI_type_node, NULL_TREE);
fbe5eb6d 13259 tree v4sf_ftype_v4si
b4de2f7d 13260 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13261 tree v4si_ftype_v4sf
b4de2f7d 13262 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13263 tree v2df_ftype_v4si
b4de2f7d 13264 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13265 tree v4si_ftype_v2df
b4de2f7d 13266 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13267 tree v2si_ftype_v2df
b4de2f7d 13268 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13269 tree v4sf_ftype_v2df
b4de2f7d 13270 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13271 tree v2df_ftype_v2si
b4de2f7d 13272 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13273 tree v2df_ftype_v4sf
b4de2f7d 13274 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13275 tree int_ftype_v2df
b4de2f7d 13276 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13277 tree int64_ftype_v2df
13278 = build_function_type_list (long_long_integer_type_node,
b96a374d 13279 V2DF_type_node, NULL_TREE);
fbe5eb6d 13280 tree v2df_ftype_v2df_int
b4de2f7d
AH
13281 = build_function_type_list (V2DF_type_node,
13282 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13283 tree v2df_ftype_v2df_int64
13284 = build_function_type_list (V2DF_type_node,
13285 V2DF_type_node, long_long_integer_type_node,
13286 NULL_TREE);
fbe5eb6d 13287 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13288 = build_function_type_list (V4SF_type_node,
13289 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13290 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13291 = build_function_type_list (V2DF_type_node,
13292 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13293 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13294 = build_function_type_list (V2DF_type_node,
13295 V2DF_type_node, V2DF_type_node,
13296 integer_type_node,
13297 NULL_TREE);
fbe5eb6d 13298 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
13299 = build_function_type_list (V2DF_type_node,
13300 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 13301 tree void_ftype_pv2si_v2df
b4de2f7d
AH
13302 = build_function_type_list (void_type_node,
13303 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13304 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13305 = build_function_type_list (void_type_node,
13306 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13307 tree void_ftype_pint_int
b4de2f7d
AH
13308 = build_function_type_list (void_type_node,
13309 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13310 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13311 = build_function_type_list (void_type_node,
13312 V16QI_type_node, V16QI_type_node,
13313 pchar_type_node, NULL_TREE);
068f5dea
JH
13314 tree v2df_ftype_pcdouble
13315 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13316 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13317 = build_function_type_list (V2DF_type_node,
13318 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13319 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13320 = build_function_type_list (V16QI_type_node,
13321 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13322 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13323 = build_function_type_list (V8HI_type_node,
13324 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13325 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13326 = build_function_type_list (V4SI_type_node,
13327 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13328 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13329 = build_function_type_list (V2DI_type_node,
13330 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13331 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13332 = build_function_type_list (V2DI_type_node,
13333 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13334 tree v2df_ftype_v2df
b4de2f7d 13335 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13336 tree v2df_ftype_double
b4de2f7d 13337 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13338 tree v2df_ftype_double_double
b4de2f7d
AH
13339 = build_function_type_list (V2DF_type_node,
13340 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 13341 tree int_ftype_v8hi_int
b4de2f7d
AH
13342 = build_function_type_list (integer_type_node,
13343 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13344 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
13345 = build_function_type_list (V8HI_type_node,
13346 V8HI_type_node, integer_type_node,
13347 integer_type_node, NULL_TREE);
916b60b7 13348 tree v2di_ftype_v2di_int
b4de2f7d
AH
13349 = build_function_type_list (V2DI_type_node,
13350 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13351 tree v4si_ftype_v4si_int
b4de2f7d
AH
13352 = build_function_type_list (V4SI_type_node,
13353 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13354 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13355 = build_function_type_list (V8HI_type_node,
13356 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13357 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13358 = build_function_type_list (V8HI_type_node,
13359 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13360 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13361 = build_function_type_list (V4SI_type_node,
13362 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13363 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13364 = build_function_type_list (V4SI_type_node,
13365 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13366 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13367 = build_function_type_list (long_long_unsigned_type_node,
13368 V8QI_type_node, V8QI_type_node, NULL_TREE);
9e9fb0ce
JB
13369 tree di_ftype_v2si_v2si
13370 = build_function_type_list (long_long_unsigned_type_node,
13371 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 13372 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13373 = build_function_type_list (V2DI_type_node,
13374 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
13375 tree v2di_ftype_v4si_v4si
13376 = build_function_type_list (V2DI_type_node,
13377 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 13378 tree int_ftype_v16qi
b4de2f7d 13379 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13380 tree v16qi_ftype_pcchar
13381 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13382 tree void_ftype_pchar_v16qi
13383 = build_function_type_list (void_type_node,
13384 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13385 tree v4si_ftype_pcint
13386 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13387 tree void_ftype_pcint_v4si
f02e1358 13388 = build_function_type_list (void_type_node,
068f5dea 13389 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
13390 tree v2di_ftype_v2di
13391 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 13392
f8a1ebc6
JH
13393 tree float80_type;
13394 tree float128_type;
13395
13396 /* The __float80 type. */
13397 if (TYPE_MODE (long_double_type_node) == XFmode)
13398 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13399 "__float80");
13400 else
13401 {
13402 /* The __float80 type. */
13403 float80_type = make_node (REAL_TYPE);
968a7562 13404 TYPE_PRECISION (float80_type) = 80;
f8a1ebc6
JH
13405 layout_type (float80_type);
13406 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13407 }
13408
13409 float128_type = make_node (REAL_TYPE);
13410 TYPE_PRECISION (float128_type) = 128;
13411 layout_type (float128_type);
13412 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13413
bd793c65
BS
13414 /* Add all builtins that are more or less simple operations on two
13415 operands. */
ca7558fc 13416 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13417 {
13418 /* Use one of the operands; the target can have a different mode for
13419 mask-generating compares. */
13420 enum machine_mode mode;
13421 tree type;
13422
13423 if (d->name == 0)
13424 continue;
13425 mode = insn_data[d->icode].operand[1].mode;
13426
bd793c65
BS
13427 switch (mode)
13428 {
fbe5eb6d
BS
13429 case V16QImode:
13430 type = v16qi_ftype_v16qi_v16qi;
13431 break;
13432 case V8HImode:
13433 type = v8hi_ftype_v8hi_v8hi;
13434 break;
13435 case V4SImode:
13436 type = v4si_ftype_v4si_v4si;
13437 break;
13438 case V2DImode:
13439 type = v2di_ftype_v2di_v2di;
13440 break;
13441 case V2DFmode:
13442 type = v2df_ftype_v2df_v2df;
13443 break;
13444 case TImode:
13445 type = ti_ftype_ti_ti;
13446 break;
bd793c65
BS
13447 case V4SFmode:
13448 type = v4sf_ftype_v4sf_v4sf;
13449 break;
13450 case V8QImode:
13451 type = v8qi_ftype_v8qi_v8qi;
13452 break;
13453 case V4HImode:
13454 type = v4hi_ftype_v4hi_v4hi;
13455 break;
13456 case V2SImode:
13457 type = v2si_ftype_v2si_v2si;
13458 break;
bd793c65
BS
13459 case DImode:
13460 type = di_ftype_di_di;
13461 break;
13462
13463 default:
13464 abort ();
13465 }
0f290768 13466
bd793c65
BS
13467 /* Override for comparisons. */
13468 if (d->icode == CODE_FOR_maskcmpv4sf3
13469 || d->icode == CODE_FOR_maskncmpv4sf3
13470 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13471 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13472 type = v4si_ftype_v4sf_v4sf;
13473
fbe5eb6d
BS
13474 if (d->icode == CODE_FOR_maskcmpv2df3
13475 || d->icode == CODE_FOR_maskncmpv2df3
13476 || d->icode == CODE_FOR_vmmaskcmpv2df3
13477 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13478 type = v2di_ftype_v2df_v2df;
13479
eeb06b1b 13480 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13481 }
13482
13483 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
13484 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13485 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13486 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13487 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13488 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13489
13490 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13491 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13492 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13493
13494 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13495 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13496
13497 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13498 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13499
bd793c65 13500 /* comi/ucomi insns. */
ca7558fc 13501 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13502 if (d->mask == MASK_SSE2)
13503 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13504 else
13505 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13506
1255c85c
BS
13507 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13508 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13509 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13510
37f22004
L
13511 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13512 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13513 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13514 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13515 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13516 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13517 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13518 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13519 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13520 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13521 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13522
13523 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13524 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13525
13526 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13527
13528 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13529 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13530 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13531 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13532 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13533 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13534
13535 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13536 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13537 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13538 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13539
13540 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13541 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13542 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13543 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13544
13545 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13546
13547 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13548
13549 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13550 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13551 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13552 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13553 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13554 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13555
13556 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13557
47f339cf
BS
13558 /* Original 3DNow! */
13559 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13560 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13561 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13562 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13563 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13564 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13565 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13566 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13567 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13568 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13569 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13570 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13571 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13572 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13573 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13574 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13575 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13576 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13577 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13578 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13579
13580 /* 3DNow! extension as used in the Athlon CPU. */
13581 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13582 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13583 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13584 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13585 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13586 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13587
37f22004 13588 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
13589
13590 /* SSE2 */
13591 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13593
13594 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 13596 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 13597
068f5dea
JH
13598 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
13601 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13604
13605 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13609
13610 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13611 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13612 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13614 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13615
13616 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13619 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13620
13621 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13623
13624 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13625
13626 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13627 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13628
13629 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13634
13635 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13636
13637 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13639 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13640 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13641
13642 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13644 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13645
13646 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13647 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13648 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13650
13651 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
13654 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
13656 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13658
068f5dea 13659 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13660 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13662
068f5dea
JH
13663 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
13666 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 13668 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
13669 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13670
37f22004 13671 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 13672
9e9fb0ce
JB
13673 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13675
916b60b7
BS
13676 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13679
13680 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13683
13684 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13686
ab3146fd 13687 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13688 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13691
ab3146fd 13692 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13693 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13696
13697 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13699
13700 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13701
13702 /* Prescott New Instructions. */
9e200aaf 13703 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
13704 void_ftype_pcvoid_unsigned_unsigned,
13705 IX86_BUILTIN_MONITOR);
9e200aaf 13706 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
13707 void_ftype_unsigned_unsigned,
13708 IX86_BUILTIN_MWAIT);
9e200aaf 13709 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
13710 v4sf_ftype_v4sf,
13711 IX86_BUILTIN_MOVSHDUP);
9e200aaf 13712 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
13713 v4sf_ftype_v4sf,
13714 IX86_BUILTIN_MOVSLDUP);
9e200aaf 13715 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 13716 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
9e200aaf 13717 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
22c7c85e 13718 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
9e200aaf 13719 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
22c7c85e 13720 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
13721}
13722
13723/* Errors in the source file can cause expand_expr to return const0_rtx
13724 where we expect a vector. To avoid crashing, use one of the vector
13725 clear instructions. */
13726static rtx
b96a374d 13727safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
13728{
13729 if (x != const0_rtx)
13730 return x;
13731 x = gen_reg_rtx (mode);
13732
47f339cf 13733 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
13734 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13735 : gen_rtx_SUBREG (DImode, x, 0)));
13736 else
e37af218 13737 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
13738 : gen_rtx_SUBREG (V4SFmode, x, 0),
13739 CONST0_RTX (V4SFmode)));
bd793c65
BS
13740 return x;
13741}
13742
13743/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13744
13745static rtx
b96a374d 13746ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13747{
13748 rtx pat;
13749 tree arg0 = TREE_VALUE (arglist);
13750 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13751 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13752 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13753 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13754 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13755 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13756
13757 if (VECTOR_MODE_P (mode0))
13758 op0 = safe_vector_operand (op0, mode0);
13759 if (VECTOR_MODE_P (mode1))
13760 op1 = safe_vector_operand (op1, mode1);
13761
13762 if (! target
13763 || GET_MODE (target) != tmode
13764 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13765 target = gen_reg_rtx (tmode);
13766
d9deed68
JH
13767 if (GET_MODE (op1) == SImode && mode1 == TImode)
13768 {
13769 rtx x = gen_reg_rtx (V4SImode);
13770 emit_insn (gen_sse2_loadd (x, op1));
13771 op1 = gen_lowpart (TImode, x);
13772 }
13773
bd793c65
BS
13774 /* In case the insn wants input operands in modes different from
13775 the result, abort. */
ebe75517
JH
13776 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13777 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13778 abort ();
13779
13780 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13781 op0 = copy_to_mode_reg (mode0, op0);
13782 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13783 op1 = copy_to_mode_reg (mode1, op1);
13784
59bef189
RH
13785 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13786 yet one of the two must not be a memory. This is normally enforced
13787 by expanders, but we didn't bother to create one here. */
13788 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13789 op0 = copy_to_mode_reg (mode0, op0);
13790
bd793c65
BS
13791 pat = GEN_FCN (icode) (target, op0, op1);
13792 if (! pat)
13793 return 0;
13794 emit_insn (pat);
13795 return target;
13796}
13797
13798/* Subroutine of ix86_expand_builtin to take care of stores. */
13799
13800static rtx
b96a374d 13801ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13802{
13803 rtx pat;
13804 tree arg0 = TREE_VALUE (arglist);
13805 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13806 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13807 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13808 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13809 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13810
13811 if (VECTOR_MODE_P (mode1))
13812 op1 = safe_vector_operand (op1, mode1);
13813
13814 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13815 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13816
bd793c65
BS
13817 pat = GEN_FCN (icode) (op0, op1);
13818 if (pat)
13819 emit_insn (pat);
13820 return 0;
13821}
13822
13823/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13824
13825static rtx
b96a374d
AJ
13826ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13827 rtx target, int do_load)
bd793c65
BS
13828{
13829 rtx pat;
13830 tree arg0 = TREE_VALUE (arglist);
13831 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13832 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13833 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13834
13835 if (! target
13836 || GET_MODE (target) != tmode
13837 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13838 target = gen_reg_rtx (tmode);
13839 if (do_load)
13840 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13841 else
13842 {
13843 if (VECTOR_MODE_P (mode0))
13844 op0 = safe_vector_operand (op0, mode0);
13845
13846 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13847 op0 = copy_to_mode_reg (mode0, op0);
13848 }
13849
13850 pat = GEN_FCN (icode) (target, op0);
13851 if (! pat)
13852 return 0;
13853 emit_insn (pat);
13854 return target;
13855}
13856
13857/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13858 sqrtss, rsqrtss, rcpss. */
13859
13860static rtx
b96a374d 13861ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13862{
13863 rtx pat;
13864 tree arg0 = TREE_VALUE (arglist);
59bef189 13865 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13866 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13867 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13868
13869 if (! target
13870 || GET_MODE (target) != tmode
13871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13872 target = gen_reg_rtx (tmode);
13873
13874 if (VECTOR_MODE_P (mode0))
13875 op0 = safe_vector_operand (op0, mode0);
13876
13877 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13878 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13879
59bef189
RH
13880 op1 = op0;
13881 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13882 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13883
59bef189 13884 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13885 if (! pat)
13886 return 0;
13887 emit_insn (pat);
13888 return target;
13889}
13890
13891/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13892
13893static rtx
b96a374d
AJ
13894ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13895 rtx target)
bd793c65
BS
13896{
13897 rtx pat;
13898 tree arg0 = TREE_VALUE (arglist);
13899 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13900 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13901 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13902 rtx op2;
13903 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13904 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13905 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13906 enum rtx_code comparison = d->comparison;
13907
13908 if (VECTOR_MODE_P (mode0))
13909 op0 = safe_vector_operand (op0, mode0);
13910 if (VECTOR_MODE_P (mode1))
13911 op1 = safe_vector_operand (op1, mode1);
13912
13913 /* Swap operands if we have a comparison that isn't available in
13914 hardware. */
13915 if (d->flag)
13916 {
21e1b5f1
BS
13917 rtx tmp = gen_reg_rtx (mode1);
13918 emit_move_insn (tmp, op1);
bd793c65 13919 op1 = op0;
21e1b5f1 13920 op0 = tmp;
bd793c65 13921 }
21e1b5f1
BS
13922
13923 if (! target
13924 || GET_MODE (target) != tmode
13925 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13926 target = gen_reg_rtx (tmode);
13927
13928 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13929 op0 = copy_to_mode_reg (mode0, op0);
13930 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13931 op1 = copy_to_mode_reg (mode1, op1);
13932
13933 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13934 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13935 if (! pat)
13936 return 0;
13937 emit_insn (pat);
13938 return target;
13939}
13940
13941/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13942
13943static rtx
b96a374d
AJ
13944ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13945 rtx target)
bd793c65
BS
13946{
13947 rtx pat;
13948 tree arg0 = TREE_VALUE (arglist);
13949 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13950 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13951 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13952 rtx op2;
13953 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13954 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13955 enum rtx_code comparison = d->comparison;
13956
13957 if (VECTOR_MODE_P (mode0))
13958 op0 = safe_vector_operand (op0, mode0);
13959 if (VECTOR_MODE_P (mode1))
13960 op1 = safe_vector_operand (op1, mode1);
13961
13962 /* Swap operands if we have a comparison that isn't available in
13963 hardware. */
13964 if (d->flag)
13965 {
13966 rtx tmp = op1;
13967 op1 = op0;
13968 op0 = tmp;
bd793c65
BS
13969 }
13970
13971 target = gen_reg_rtx (SImode);
13972 emit_move_insn (target, const0_rtx);
13973 target = gen_rtx_SUBREG (QImode, target, 0);
13974
13975 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13976 op0 = copy_to_mode_reg (mode0, op0);
13977 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13978 op1 = copy_to_mode_reg (mode1, op1);
13979
13980 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13981 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13982 if (! pat)
13983 return 0;
13984 emit_insn (pat);
29628f27
BS
13985 emit_insn (gen_rtx_SET (VOIDmode,
13986 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13987 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13988 SET_DEST (pat),
29628f27 13989 const0_rtx)));
bd793c65 13990
6f1a6c5b 13991 return SUBREG_REG (target);
bd793c65
BS
13992}
13993
13994/* Expand an expression EXP that calls a built-in function,
13995 with result going to TARGET if that's convenient
13996 (and in mode MODE if that's convenient).
13997 SUBTARGET may be used as the target for computing one of EXP's operands.
13998 IGNORE is nonzero if the value is to be ignored. */
13999
14000rtx
b96a374d
AJ
14001ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14002 enum machine_mode mode ATTRIBUTE_UNUSED,
14003 int ignore ATTRIBUTE_UNUSED)
bd793c65 14004{
8b60264b 14005 const struct builtin_description *d;
77ebd435 14006 size_t i;
bd793c65
BS
14007 enum insn_code icode;
14008 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14009 tree arglist = TREE_OPERAND (exp, 1);
e37af218 14010 tree arg0, arg1, arg2;
bd793c65
BS
14011 rtx op0, op1, op2, pat;
14012 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 14013 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
14014
14015 switch (fcode)
14016 {
14017 case IX86_BUILTIN_EMMS:
14018 emit_insn (gen_emms ());
14019 return 0;
14020
14021 case IX86_BUILTIN_SFENCE:
14022 emit_insn (gen_sfence ());
14023 return 0;
14024
bd793c65 14025 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
14026 case IX86_BUILTIN_PEXTRW128:
14027 icode = (fcode == IX86_BUILTIN_PEXTRW
14028 ? CODE_FOR_mmx_pextrw
14029 : CODE_FOR_sse2_pextrw);
bd793c65
BS
14030 arg0 = TREE_VALUE (arglist);
14031 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14032 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14033 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14034 tmode = insn_data[icode].operand[0].mode;
14035 mode0 = insn_data[icode].operand[1].mode;
14036 mode1 = insn_data[icode].operand[2].mode;
14037
14038 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14039 op0 = copy_to_mode_reg (mode0, op0);
14040 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14041 {
ebe75517
JH
14042 error ("selector must be an integer constant in the range 0..%i",
14043 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 14044 return gen_reg_rtx (tmode);
bd793c65
BS
14045 }
14046 if (target == 0
14047 || GET_MODE (target) != tmode
14048 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14049 target = gen_reg_rtx (tmode);
14050 pat = GEN_FCN (icode) (target, op0, op1);
14051 if (! pat)
14052 return 0;
14053 emit_insn (pat);
14054 return target;
14055
14056 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
14057 case IX86_BUILTIN_PINSRW128:
14058 icode = (fcode == IX86_BUILTIN_PINSRW
14059 ? CODE_FOR_mmx_pinsrw
14060 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
14061 arg0 = TREE_VALUE (arglist);
14062 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14063 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14064 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14065 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14066 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14067 tmode = insn_data[icode].operand[0].mode;
14068 mode0 = insn_data[icode].operand[1].mode;
14069 mode1 = insn_data[icode].operand[2].mode;
14070 mode2 = insn_data[icode].operand[3].mode;
14071
14072 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14073 op0 = copy_to_mode_reg (mode0, op0);
14074 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14075 op1 = copy_to_mode_reg (mode1, op1);
14076 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14077 {
ebe75517
JH
14078 error ("selector must be an integer constant in the range 0..%i",
14079 fcode == IX86_BUILTIN_PINSRW ? 15:255);
bd793c65
BS
14080 return const0_rtx;
14081 }
14082 if (target == 0
14083 || GET_MODE (target) != tmode
14084 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14085 target = gen_reg_rtx (tmode);
14086 pat = GEN_FCN (icode) (target, op0, op1, op2);
14087 if (! pat)
14088 return 0;
14089 emit_insn (pat);
14090 return target;
14091
14092 case IX86_BUILTIN_MASKMOVQ:
077084dd 14093 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
14094 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14095 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
14096 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14097 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
14098 /* Note the arg order is different from the operand order. */
14099 arg1 = TREE_VALUE (arglist);
14100 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14101 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14102 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14103 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14104 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14105 mode0 = insn_data[icode].operand[0].mode;
14106 mode1 = insn_data[icode].operand[1].mode;
14107 mode2 = insn_data[icode].operand[2].mode;
14108
5c464583 14109 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
14110 op0 = copy_to_mode_reg (mode0, op0);
14111 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14112 op1 = copy_to_mode_reg (mode1, op1);
14113 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14114 op2 = copy_to_mode_reg (mode2, op2);
14115 pat = GEN_FCN (icode) (op0, op1, op2);
14116 if (! pat)
14117 return 0;
14118 emit_insn (pat);
14119 return 0;
14120
14121 case IX86_BUILTIN_SQRTSS:
14122 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14123 case IX86_BUILTIN_RSQRTSS:
14124 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14125 case IX86_BUILTIN_RCPSS:
14126 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14127
14128 case IX86_BUILTIN_LOADAPS:
14129 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14130
14131 case IX86_BUILTIN_LOADUPS:
14132 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14133
14134 case IX86_BUILTIN_STOREAPS:
e37af218 14135 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 14136
bd793c65 14137 case IX86_BUILTIN_STOREUPS:
e37af218 14138 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
14139
14140 case IX86_BUILTIN_LOADSS:
14141 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14142
14143 case IX86_BUILTIN_STORESS:
e37af218 14144 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 14145
0f290768 14146 case IX86_BUILTIN_LOADHPS:
bd793c65 14147 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
14148 case IX86_BUILTIN_LOADHPD:
14149 case IX86_BUILTIN_LOADLPD:
14150 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14151 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14152 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
997404de 14153 : CODE_FOR_sse2_movsd);
bd793c65
BS
14154 arg0 = TREE_VALUE (arglist);
14155 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14156 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14157 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14158 tmode = insn_data[icode].operand[0].mode;
14159 mode0 = insn_data[icode].operand[1].mode;
14160 mode1 = insn_data[icode].operand[2].mode;
14161
14162 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14163 op0 = copy_to_mode_reg (mode0, op0);
14164 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14165 if (target == 0
14166 || GET_MODE (target) != tmode
14167 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14168 target = gen_reg_rtx (tmode);
14169 pat = GEN_FCN (icode) (target, op0, op1);
14170 if (! pat)
14171 return 0;
14172 emit_insn (pat);
14173 return target;
0f290768 14174
bd793c65
BS
14175 case IX86_BUILTIN_STOREHPS:
14176 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
14177 case IX86_BUILTIN_STOREHPD:
14178 case IX86_BUILTIN_STORELPD:
14179 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14180 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14181 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
997404de 14182 : CODE_FOR_sse2_movsd);
bd793c65
BS
14183 arg0 = TREE_VALUE (arglist);
14184 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14185 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14186 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14187 mode0 = insn_data[icode].operand[1].mode;
14188 mode1 = insn_data[icode].operand[2].mode;
14189
14190 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14191 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14192 op1 = copy_to_mode_reg (mode1, op1);
14193
14194 pat = GEN_FCN (icode) (op0, op0, op1);
14195 if (! pat)
14196 return 0;
14197 emit_insn (pat);
14198 return 0;
14199
14200 case IX86_BUILTIN_MOVNTPS:
e37af218 14201 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14202 case IX86_BUILTIN_MOVNTQ:
e37af218 14203 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14204
14205 case IX86_BUILTIN_LDMXCSR:
14206 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14207 target = assign_386_stack_local (SImode, 0);
14208 emit_move_insn (target, op0);
14209 emit_insn (gen_ldmxcsr (target));
14210 return 0;
14211
14212 case IX86_BUILTIN_STMXCSR:
14213 target = assign_386_stack_local (SImode, 0);
14214 emit_insn (gen_stmxcsr (target));
14215 return copy_to_mode_reg (SImode, target);
14216
bd793c65 14217 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14218 case IX86_BUILTIN_SHUFPD:
14219 icode = (fcode == IX86_BUILTIN_SHUFPS
14220 ? CODE_FOR_sse_shufps
14221 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14222 arg0 = TREE_VALUE (arglist);
14223 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14224 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14225 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14226 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14227 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14228 tmode = insn_data[icode].operand[0].mode;
14229 mode0 = insn_data[icode].operand[1].mode;
14230 mode1 = insn_data[icode].operand[2].mode;
14231 mode2 = insn_data[icode].operand[3].mode;
14232
14233 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14234 op0 = copy_to_mode_reg (mode0, op0);
14235 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14236 op1 = copy_to_mode_reg (mode1, op1);
14237 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14238 {
14239 /* @@@ better error message */
14240 error ("mask must be an immediate");
6f1a6c5b 14241 return gen_reg_rtx (tmode);
bd793c65
BS
14242 }
14243 if (target == 0
14244 || GET_MODE (target) != tmode
14245 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14246 target = gen_reg_rtx (tmode);
14247 pat = GEN_FCN (icode) (target, op0, op1, op2);
14248 if (! pat)
14249 return 0;
14250 emit_insn (pat);
14251 return target;
14252
14253 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14254 case IX86_BUILTIN_PSHUFD:
14255 case IX86_BUILTIN_PSHUFHW:
14256 case IX86_BUILTIN_PSHUFLW:
14257 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14258 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14259 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14260 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14261 arg0 = TREE_VALUE (arglist);
14262 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14263 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14264 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14265 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14266 mode1 = insn_data[icode].operand[1].mode;
14267 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14268
29628f27
BS
14269 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14270 op0 = copy_to_mode_reg (mode1, op0);
14271 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14272 {
14273 /* @@@ better error message */
14274 error ("mask must be an immediate");
14275 return const0_rtx;
14276 }
14277 if (target == 0
14278 || GET_MODE (target) != tmode
14279 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14280 target = gen_reg_rtx (tmode);
29628f27 14281 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14282 if (! pat)
14283 return 0;
14284 emit_insn (pat);
14285 return target;
14286
ab3146fd
ZD
14287 case IX86_BUILTIN_PSLLDQI128:
14288 case IX86_BUILTIN_PSRLDQI128:
14289 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14290 : CODE_FOR_sse2_lshrti3);
14291 arg0 = TREE_VALUE (arglist);
14292 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14293 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14294 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14295 tmode = insn_data[icode].operand[0].mode;
14296 mode1 = insn_data[icode].operand[1].mode;
14297 mode2 = insn_data[icode].operand[2].mode;
14298
14299 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14300 {
14301 op0 = copy_to_reg (op0);
14302 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14303 }
14304 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14305 {
14306 error ("shift must be an immediate");
14307 return const0_rtx;
14308 }
14309 target = gen_reg_rtx (V2DImode);
14310 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14311 if (! pat)
14312 return 0;
14313 emit_insn (pat);
14314 return target;
14315
47f339cf
BS
14316 case IX86_BUILTIN_FEMMS:
14317 emit_insn (gen_femms ());
14318 return NULL_RTX;
14319
14320 case IX86_BUILTIN_PAVGUSB:
14321 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14322
14323 case IX86_BUILTIN_PF2ID:
14324 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14325
14326 case IX86_BUILTIN_PFACC:
14327 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14328
14329 case IX86_BUILTIN_PFADD:
14330 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14331
14332 case IX86_BUILTIN_PFCMPEQ:
14333 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14334
14335 case IX86_BUILTIN_PFCMPGE:
14336 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14337
14338 case IX86_BUILTIN_PFCMPGT:
14339 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14340
14341 case IX86_BUILTIN_PFMAX:
14342 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14343
14344 case IX86_BUILTIN_PFMIN:
14345 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14346
14347 case IX86_BUILTIN_PFMUL:
14348 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14349
14350 case IX86_BUILTIN_PFRCP:
14351 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14352
14353 case IX86_BUILTIN_PFRCPIT1:
14354 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14355
14356 case IX86_BUILTIN_PFRCPIT2:
14357 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14358
14359 case IX86_BUILTIN_PFRSQIT1:
14360 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14361
14362 case IX86_BUILTIN_PFRSQRT:
14363 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14364
14365 case IX86_BUILTIN_PFSUB:
14366 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14367
14368 case IX86_BUILTIN_PFSUBR:
14369 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14370
14371 case IX86_BUILTIN_PI2FD:
14372 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14373
14374 case IX86_BUILTIN_PMULHRW:
14375 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14376
47f339cf
BS
14377 case IX86_BUILTIN_PF2IW:
14378 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14379
14380 case IX86_BUILTIN_PFNACC:
14381 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14382
14383 case IX86_BUILTIN_PFPNACC:
14384 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14385
14386 case IX86_BUILTIN_PI2FW:
14387 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14388
14389 case IX86_BUILTIN_PSWAPDSI:
14390 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14391
14392 case IX86_BUILTIN_PSWAPDSF:
14393 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14394
e37af218
RH
14395 case IX86_BUILTIN_SSE_ZERO:
14396 target = gen_reg_rtx (V4SFmode);
4977bab6 14397 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
14398 return target;
14399
bd793c65
BS
14400 case IX86_BUILTIN_MMX_ZERO:
14401 target = gen_reg_rtx (DImode);
14402 emit_insn (gen_mmx_clrdi (target));
14403 return target;
14404
f02e1358
JH
14405 case IX86_BUILTIN_CLRTI:
14406 target = gen_reg_rtx (V2DImode);
14407 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14408 return target;
14409
14410
fbe5eb6d
BS
14411 case IX86_BUILTIN_SQRTSD:
14412 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14413 case IX86_BUILTIN_LOADAPD:
14414 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14415 case IX86_BUILTIN_LOADUPD:
14416 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14417
14418 case IX86_BUILTIN_STOREAPD:
14419 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14420 case IX86_BUILTIN_STOREUPD:
14421 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14422
14423 case IX86_BUILTIN_LOADSD:
14424 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14425
14426 case IX86_BUILTIN_STORESD:
14427 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14428
14429 case IX86_BUILTIN_SETPD1:
14430 target = assign_386_stack_local (DFmode, 0);
14431 arg0 = TREE_VALUE (arglist);
14432 emit_move_insn (adjust_address (target, DFmode, 0),
14433 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14434 op0 = gen_reg_rtx (V2DFmode);
14435 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
60c81c89 14436 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
fbe5eb6d
BS
14437 return op0;
14438
14439 case IX86_BUILTIN_SETPD:
14440 target = assign_386_stack_local (V2DFmode, 0);
14441 arg0 = TREE_VALUE (arglist);
14442 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14443 emit_move_insn (adjust_address (target, DFmode, 0),
14444 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14445 emit_move_insn (adjust_address (target, DFmode, 8),
14446 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14447 op0 = gen_reg_rtx (V2DFmode);
14448 emit_insn (gen_sse2_movapd (op0, target));
14449 return op0;
14450
14451 case IX86_BUILTIN_LOADRPD:
14452 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14453 gen_reg_rtx (V2DFmode), 1);
60c81c89 14454 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
fbe5eb6d
BS
14455 return target;
14456
14457 case IX86_BUILTIN_LOADPD1:
14458 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14459 gen_reg_rtx (V2DFmode), 1);
14460 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14461 return target;
14462
14463 case IX86_BUILTIN_STOREPD1:
14464 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14465 case IX86_BUILTIN_STORERPD:
14466 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14467
48126a97
JH
14468 case IX86_BUILTIN_CLRPD:
14469 target = gen_reg_rtx (V2DFmode);
14470 emit_insn (gen_sse_clrv2df (target));
14471 return target;
14472
fbe5eb6d
BS
14473 case IX86_BUILTIN_MFENCE:
14474 emit_insn (gen_sse2_mfence ());
14475 return 0;
14476 case IX86_BUILTIN_LFENCE:
14477 emit_insn (gen_sse2_lfence ());
14478 return 0;
14479
14480 case IX86_BUILTIN_CLFLUSH:
14481 arg0 = TREE_VALUE (arglist);
14482 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14483 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14484 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14485 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14486
14487 emit_insn (gen_sse2_clflush (op0));
14488 return 0;
14489
14490 case IX86_BUILTIN_MOVNTPD:
14491 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14492 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14493 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14494 case IX86_BUILTIN_MOVNTI:
14495 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14496
f02e1358
JH
14497 case IX86_BUILTIN_LOADDQA:
14498 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14499 case IX86_BUILTIN_LOADDQU:
14500 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14501 case IX86_BUILTIN_LOADD:
14502 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14503
14504 case IX86_BUILTIN_STOREDQA:
14505 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14506 case IX86_BUILTIN_STOREDQU:
14507 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14508 case IX86_BUILTIN_STORED:
14509 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14510
22c7c85e
L
14511 case IX86_BUILTIN_MONITOR:
14512 arg0 = TREE_VALUE (arglist);
14513 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14514 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14515 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14516 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14517 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14518 if (!REG_P (op0))
14519 op0 = copy_to_mode_reg (SImode, op0);
14520 if (!REG_P (op1))
14521 op1 = copy_to_mode_reg (SImode, op1);
14522 if (!REG_P (op2))
14523 op2 = copy_to_mode_reg (SImode, op2);
14524 emit_insn (gen_monitor (op0, op1, op2));
14525 return 0;
14526
14527 case IX86_BUILTIN_MWAIT:
14528 arg0 = TREE_VALUE (arglist);
14529 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14530 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14531 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14532 if (!REG_P (op0))
14533 op0 = copy_to_mode_reg (SImode, op0);
14534 if (!REG_P (op1))
14535 op1 = copy_to_mode_reg (SImode, op1);
14536 emit_insn (gen_mwait (op0, op1));
14537 return 0;
14538
14539 case IX86_BUILTIN_LOADDDUP:
14540 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14541
14542 case IX86_BUILTIN_LDDQU:
14543 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14544 1);
14545
bd793c65
BS
14546 default:
14547 break;
14548 }
14549
ca7558fc 14550 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14551 if (d->code == fcode)
14552 {
14553 /* Compares are treated specially. */
14554 if (d->icode == CODE_FOR_maskcmpv4sf3
14555 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14556 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
14557 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14558 || d->icode == CODE_FOR_maskcmpv2df3
14559 || d->icode == CODE_FOR_vmmaskcmpv2df3
14560 || d->icode == CODE_FOR_maskncmpv2df3
14561 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
14562 return ix86_expand_sse_compare (d, arglist, target);
14563
14564 return ix86_expand_binop_builtin (d->icode, arglist, target);
14565 }
14566
ca7558fc 14567 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14568 if (d->code == fcode)
14569 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14570
ca7558fc 14571 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14572 if (d->code == fcode)
14573 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14574
bd793c65
BS
14575 /* @@@ Should really do something sensible here. */
14576 return 0;
bd793c65 14577}
4211a8fb
JH
14578
14579/* Store OPERAND to the memory after reload is completed. This means
f710504c 14580 that we can't easily use assign_stack_local. */
4211a8fb 14581rtx
b96a374d 14582ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14583{
898d374d 14584 rtx result;
4211a8fb
JH
14585 if (!reload_completed)
14586 abort ();
a5b378d6 14587 if (TARGET_RED_ZONE)
898d374d
JH
14588 {
14589 result = gen_rtx_MEM (mode,
14590 gen_rtx_PLUS (Pmode,
14591 stack_pointer_rtx,
14592 GEN_INT (-RED_ZONE_SIZE)));
14593 emit_move_insn (result, operand);
14594 }
a5b378d6 14595 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14596 {
898d374d 14597 switch (mode)
4211a8fb 14598 {
898d374d
JH
14599 case HImode:
14600 case SImode:
14601 operand = gen_lowpart (DImode, operand);
5efb1046 14602 /* FALLTHRU */
898d374d 14603 case DImode:
4211a8fb 14604 emit_insn (
898d374d
JH
14605 gen_rtx_SET (VOIDmode,
14606 gen_rtx_MEM (DImode,
14607 gen_rtx_PRE_DEC (DImode,
14608 stack_pointer_rtx)),
14609 operand));
14610 break;
14611 default:
14612 abort ();
14613 }
14614 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14615 }
14616 else
14617 {
14618 switch (mode)
14619 {
14620 case DImode:
14621 {
14622 rtx operands[2];
14623 split_di (&operand, 1, operands, operands + 1);
14624 emit_insn (
14625 gen_rtx_SET (VOIDmode,
14626 gen_rtx_MEM (SImode,
14627 gen_rtx_PRE_DEC (Pmode,
14628 stack_pointer_rtx)),
14629 operands[1]));
14630 emit_insn (
14631 gen_rtx_SET (VOIDmode,
14632 gen_rtx_MEM (SImode,
14633 gen_rtx_PRE_DEC (Pmode,
14634 stack_pointer_rtx)),
14635 operands[0]));
14636 }
14637 break;
14638 case HImode:
14639 /* It is better to store HImodes as SImodes. */
14640 if (!TARGET_PARTIAL_REG_STALL)
14641 operand = gen_lowpart (SImode, operand);
5efb1046 14642 /* FALLTHRU */
898d374d 14643 case SImode:
4211a8fb 14644 emit_insn (
898d374d
JH
14645 gen_rtx_SET (VOIDmode,
14646 gen_rtx_MEM (GET_MODE (operand),
14647 gen_rtx_PRE_DEC (SImode,
14648 stack_pointer_rtx)),
14649 operand));
14650 break;
14651 default:
14652 abort ();
4211a8fb 14653 }
898d374d 14654 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14655 }
898d374d 14656 return result;
4211a8fb
JH
14657}
14658
14659/* Free operand from the memory. */
14660void
b96a374d 14661ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14662{
a5b378d6 14663 if (!TARGET_RED_ZONE)
898d374d
JH
14664 {
14665 int size;
14666
14667 if (mode == DImode || TARGET_64BIT)
14668 size = 8;
14669 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14670 size = 2;
14671 else
14672 size = 4;
14673 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14674 to pop or add instruction if registers are available. */
14675 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14676 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14677 GEN_INT (size))));
14678 }
4211a8fb 14679}
a946dd00 14680
f84aa48a
JH
14681/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14682 QImode must go into class Q_REGS.
14683 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14684 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14685enum reg_class
b96a374d 14686ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14687{
1877be45
JH
14688 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14689 return NO_REGS;
f84aa48a
JH
14690 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14691 {
14692 /* SSE can't load any constant directly yet. */
14693 if (SSE_CLASS_P (class))
14694 return NO_REGS;
14695 /* Floats can load 0 and 1. */
14696 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14697 {
14698 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14699 if (MAYBE_SSE_CLASS_P (class))
14700 return (reg_class_subset_p (class, GENERAL_REGS)
14701 ? GENERAL_REGS : FLOAT_REGS);
14702 else
14703 return class;
14704 }
14705 /* General regs can load everything. */
14706 if (reg_class_subset_p (class, GENERAL_REGS))
14707 return GENERAL_REGS;
14708 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14709 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14710 return NO_REGS;
14711 }
14712 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14713 return NO_REGS;
14714 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14715 return Q_REGS;
14716 return class;
14717}
14718
14719/* If we are copying between general and FP registers, we need a memory
14720 location. The same is true for SSE and MMX registers.
14721
14722 The macro can't work reliably when one of the CLASSES is class containing
14723 registers from multiple units (SSE, MMX, integer). We avoid this by never
14724 combining those units in single alternative in the machine description.
14725 Ensure that this constraint holds to avoid unexpected surprises.
14726
14727 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14728 enforce these sanity checks. */
14729int
b96a374d
AJ
14730ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14731 enum machine_mode mode, int strict)
f84aa48a
JH
14732{
14733 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14734 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14735 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14736 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14737 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14738 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14739 {
14740 if (strict)
14741 abort ();
14742 else
14743 return 1;
14744 }
14745 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14746 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14747 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14748 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14749 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14750}
14751/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14752 one in class CLASS2.
f84aa48a
JH
14753
14754 It is not required that the cost always equal 2 when FROM is the same as TO;
14755 on some machines it is expensive to move between registers if they are not
14756 general registers. */
14757int
b96a374d
AJ
14758ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14759 enum reg_class class2)
f84aa48a
JH
14760{
14761 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14762 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14763 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14764
f84aa48a
JH
14765 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14766 {
d631b80a
RH
14767 int cost = 1;
14768
14769 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14770 MEMORY_MOVE_COST (mode, class1, 1));
14771 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14772 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14773
d631b80a
RH
14774 /* In case of copying from general_purpose_register we may emit multiple
14775 stores followed by single load causing memory size mismatch stall.
d1f87653 14776 Count this as arbitrarily high cost of 20. */
62415523 14777 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14778 cost += 20;
14779
14780 /* In the case of FP/MMX moves, the registers actually overlap, and we
14781 have to switch modes in order to treat them differently. */
14782 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14783 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14784 cost += 20;
14785
14786 return cost;
f84aa48a 14787 }
d631b80a 14788
92d0fb09 14789 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14790 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14791 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14792 return ix86_cost->mmxsse_to_integer;
14793 if (MAYBE_FLOAT_CLASS_P (class1))
14794 return ix86_cost->fp_move;
14795 if (MAYBE_SSE_CLASS_P (class1))
14796 return ix86_cost->sse_move;
14797 if (MAYBE_MMX_CLASS_P (class1))
14798 return ix86_cost->mmx_move;
f84aa48a
JH
14799 return 2;
14800}
14801
a946dd00
JH
14802/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14803int
b96a374d 14804ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14805{
14806 /* Flags and only flags can only hold CCmode values. */
14807 if (CC_REGNO_P (regno))
14808 return GET_MODE_CLASS (mode) == MODE_CC;
14809 if (GET_MODE_CLASS (mode) == MODE_CC
14810 || GET_MODE_CLASS (mode) == MODE_RANDOM
14811 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14812 return 0;
14813 if (FP_REGNO_P (regno))
14814 return VALID_FP_MODE_P (mode);
14815 if (SSE_REGNO_P (regno))
a67a3220 14816 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
a946dd00 14817 if (MMX_REGNO_P (regno))
a67a3220
JH
14818 return (TARGET_MMX
14819 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
a946dd00
JH
14820 /* We handle both integer and floats in the general purpose registers.
14821 In future we should be able to handle vector modes as well. */
14822 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14823 return 0;
14824 /* Take care for QImode values - they can be in non-QI regs, but then
14825 they do cause partial register stalls. */
d2836273 14826 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14827 return 1;
14828 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14829}
fa79946e
JH
14830
14831/* Return the cost of moving data of mode M between a
14832 register and memory. A value of 2 is the default; this cost is
14833 relative to those in `REGISTER_MOVE_COST'.
14834
14835 If moving between registers and memory is more expensive than
14836 between two registers, you should define this macro to express the
a4f31c00
AJ
14837 relative cost.
14838
fa79946e
JH
14839 Model also increased moving costs of QImode registers in non
14840 Q_REGS classes.
14841 */
14842int
b96a374d 14843ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14844{
14845 if (FLOAT_CLASS_P (class))
14846 {
14847 int index;
14848 switch (mode)
14849 {
14850 case SFmode:
14851 index = 0;
14852 break;
14853 case DFmode:
14854 index = 1;
14855 break;
14856 case XFmode:
fa79946e
JH
14857 index = 2;
14858 break;
14859 default:
14860 return 100;
14861 }
14862 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14863 }
14864 if (SSE_CLASS_P (class))
14865 {
14866 int index;
14867 switch (GET_MODE_SIZE (mode))
14868 {
14869 case 4:
14870 index = 0;
14871 break;
14872 case 8:
14873 index = 1;
14874 break;
14875 case 16:
14876 index = 2;
14877 break;
14878 default:
14879 return 100;
14880 }
14881 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14882 }
14883 if (MMX_CLASS_P (class))
14884 {
14885 int index;
14886 switch (GET_MODE_SIZE (mode))
14887 {
14888 case 4:
14889 index = 0;
14890 break;
14891 case 8:
14892 index = 1;
14893 break;
14894 default:
14895 return 100;
14896 }
14897 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14898 }
14899 switch (GET_MODE_SIZE (mode))
14900 {
14901 case 1:
14902 if (in)
14903 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14904 : ix86_cost->movzbl_load);
14905 else
14906 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14907 : ix86_cost->int_store[0] + 4);
14908 break;
14909 case 2:
14910 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14911 default:
14912 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14913 if (mode == TFmode)
14914 mode = XFmode;
3bb7e126 14915 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14916 * (((int) GET_MODE_SIZE (mode)
14917 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14918 }
14919}
0ecf09f9 14920
3c50106f
RH
14921/* Compute a (partial) cost for rtx X. Return true if the complete
14922 cost has been computed, and false if subexpressions should be
14923 scanned. In either case, *TOTAL contains the cost result. */
14924
14925static bool
b96a374d 14926ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14927{
14928 enum machine_mode mode = GET_MODE (x);
14929
14930 switch (code)
14931 {
14932 case CONST_INT:
14933 case CONST:
14934 case LABEL_REF:
14935 case SYMBOL_REF:
14936 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14937 *total = 3;
14938 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14939 *total = 2;
3504dad3
JH
14940 else if (flag_pic && SYMBOLIC_CONST (x)
14941 && (!TARGET_64BIT
14942 || (!GET_CODE (x) != LABEL_REF
14943 && (GET_CODE (x) != SYMBOL_REF
12969f45 14944 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14945 *total = 1;
14946 else
14947 *total = 0;
14948 return true;
14949
14950 case CONST_DOUBLE:
14951 if (mode == VOIDmode)
14952 *total = 0;
14953 else
14954 switch (standard_80387_constant_p (x))
14955 {
14956 case 1: /* 0.0 */
14957 *total = 1;
14958 break;
881b2a96 14959 default: /* Other constants */
3c50106f
RH
14960 *total = 2;
14961 break;
881b2a96
RS
14962 case 0:
14963 case -1:
3c50106f
RH
14964 /* Start with (MEM (SYMBOL_REF)), since that's where
14965 it'll probably end up. Add a penalty for size. */
14966 *total = (COSTS_N_INSNS (1)
3504dad3 14967 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14968 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14969 break;
14970 }
14971 return true;
14972
14973 case ZERO_EXTEND:
14974 /* The zero extensions is often completely free on x86_64, so make
14975 it as cheap as possible. */
14976 if (TARGET_64BIT && mode == DImode
14977 && GET_MODE (XEXP (x, 0)) == SImode)
14978 *total = 1;
14979 else if (TARGET_ZERO_EXTEND_WITH_AND)
14980 *total = COSTS_N_INSNS (ix86_cost->add);
14981 else
14982 *total = COSTS_N_INSNS (ix86_cost->movzx);
14983 return false;
14984
14985 case SIGN_EXTEND:
14986 *total = COSTS_N_INSNS (ix86_cost->movsx);
14987 return false;
14988
14989 case ASHIFT:
14990 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14991 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14992 {
14993 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14994 if (value == 1)
14995 {
14996 *total = COSTS_N_INSNS (ix86_cost->add);
14997 return false;
14998 }
14999 if ((value == 2 || value == 3)
3c50106f
RH
15000 && ix86_cost->lea <= ix86_cost->shift_const)
15001 {
15002 *total = COSTS_N_INSNS (ix86_cost->lea);
15003 return false;
15004 }
15005 }
5efb1046 15006 /* FALLTHRU */
3c50106f
RH
15007
15008 case ROTATE:
15009 case ASHIFTRT:
15010 case LSHIFTRT:
15011 case ROTATERT:
15012 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15013 {
15014 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15015 {
15016 if (INTVAL (XEXP (x, 1)) > 32)
15017 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15018 else
15019 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15020 }
15021 else
15022 {
15023 if (GET_CODE (XEXP (x, 1)) == AND)
15024 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15025 else
15026 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15027 }
15028 }
15029 else
15030 {
15031 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15032 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15033 else
15034 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15035 }
15036 return false;
15037
15038 case MULT:
15039 if (FLOAT_MODE_P (mode))
3c50106f 15040 {
4a5eab38
PB
15041 *total = COSTS_N_INSNS (ix86_cost->fmul);
15042 return false;
3c50106f
RH
15043 }
15044 else
15045 {
4a5eab38
PB
15046 rtx op0 = XEXP (x, 0);
15047 rtx op1 = XEXP (x, 1);
15048 int nbits;
15049 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15050 {
15051 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15052 for (nbits = 0; value != 0; value &= value - 1)
15053 nbits++;
15054 }
15055 else
15056 /* This is arbitrary. */
15057 nbits = 7;
15058
15059 /* Compute costs correctly for widening multiplication. */
15060 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15061 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15062 == GET_MODE_SIZE (mode))
15063 {
15064 int is_mulwiden = 0;
15065 enum machine_mode inner_mode = GET_MODE (op0);
15066
15067 if (GET_CODE (op0) == GET_CODE (op1))
15068 is_mulwiden = 1, op1 = XEXP (op1, 0);
15069 else if (GET_CODE (op1) == CONST_INT)
15070 {
15071 if (GET_CODE (op0) == SIGN_EXTEND)
15072 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15073 == INTVAL (op1);
15074 else
15075 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15076 }
15077
15078 if (is_mulwiden)
15079 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15080 }
15081
15082 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15083 + nbits * ix86_cost->mult_bit)
15084 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15085
15086 return true;
3c50106f 15087 }
3c50106f
RH
15088
15089 case DIV:
15090 case UDIV:
15091 case MOD:
15092 case UMOD:
15093 if (FLOAT_MODE_P (mode))
15094 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15095 else
15096 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15097 return false;
15098
15099 case PLUS:
15100 if (FLOAT_MODE_P (mode))
15101 *total = COSTS_N_INSNS (ix86_cost->fadd);
e0c00392 15102 else if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
15103 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15104 {
15105 if (GET_CODE (XEXP (x, 0)) == PLUS
15106 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15107 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15108 && CONSTANT_P (XEXP (x, 1)))
15109 {
15110 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15111 if (val == 2 || val == 4 || val == 8)
15112 {
15113 *total = COSTS_N_INSNS (ix86_cost->lea);
15114 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15115 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15116 outer_code);
15117 *total += rtx_cost (XEXP (x, 1), outer_code);
15118 return true;
15119 }
15120 }
15121 else if (GET_CODE (XEXP (x, 0)) == MULT
15122 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15123 {
15124 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15125 if (val == 2 || val == 4 || val == 8)
15126 {
15127 *total = COSTS_N_INSNS (ix86_cost->lea);
15128 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15129 *total += rtx_cost (XEXP (x, 1), outer_code);
15130 return true;
15131 }
15132 }
15133 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15134 {
15135 *total = COSTS_N_INSNS (ix86_cost->lea);
15136 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15137 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15138 *total += rtx_cost (XEXP (x, 1), outer_code);
15139 return true;
15140 }
15141 }
5efb1046 15142 /* FALLTHRU */
3c50106f
RH
15143
15144 case MINUS:
15145 if (FLOAT_MODE_P (mode))
15146 {
15147 *total = COSTS_N_INSNS (ix86_cost->fadd);
15148 return false;
15149 }
5efb1046 15150 /* FALLTHRU */
3c50106f
RH
15151
15152 case AND:
15153 case IOR:
15154 case XOR:
15155 if (!TARGET_64BIT && mode == DImode)
15156 {
15157 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15158 + (rtx_cost (XEXP (x, 0), outer_code)
15159 << (GET_MODE (XEXP (x, 0)) != DImode))
15160 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 15161 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
15162 return true;
15163 }
5efb1046 15164 /* FALLTHRU */
3c50106f
RH
15165
15166 case NEG:
15167 if (FLOAT_MODE_P (mode))
15168 {
15169 *total = COSTS_N_INSNS (ix86_cost->fchs);
15170 return false;
15171 }
5efb1046 15172 /* FALLTHRU */
3c50106f
RH
15173
15174 case NOT:
15175 if (!TARGET_64BIT && mode == DImode)
15176 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15177 else
15178 *total = COSTS_N_INSNS (ix86_cost->add);
15179 return false;
15180
15181 case FLOAT_EXTEND:
15182 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15183 *total = 0;
15184 return false;
15185
15186 case ABS:
15187 if (FLOAT_MODE_P (mode))
15188 *total = COSTS_N_INSNS (ix86_cost->fabs);
15189 return false;
15190
15191 case SQRT:
15192 if (FLOAT_MODE_P (mode))
15193 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15194 return false;
15195
74dc3e94
RH
15196 case UNSPEC:
15197 if (XINT (x, 1) == UNSPEC_TP)
15198 *total = 0;
15199 return false;
15200
3c50106f
RH
15201 default:
15202 return false;
15203 }
15204}
15205
21c318ba 15206#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4 15207static void
b96a374d 15208ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
2cc07db4
RH
15209{
15210 init_section ();
15211 fputs ("\tpushl $", asm_out_file);
15212 assemble_name (asm_out_file, XSTR (symbol, 0));
15213 fputc ('\n', asm_out_file);
15214}
15215#endif
162f023b 15216
b069de3b
SS
15217#if TARGET_MACHO
15218
15219static int current_machopic_label_num;
15220
15221/* Given a symbol name and its associated stub, write out the
15222 definition of the stub. */
15223
15224void
b96a374d 15225machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
15226{
15227 unsigned int length;
15228 char *binder_name, *symbol_name, lazy_ptr_name[32];
15229 int label = ++current_machopic_label_num;
15230
15231 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15232 symb = (*targetm.strip_name_encoding) (symb);
15233
15234 length = strlen (stub);
15235 binder_name = alloca (length + 32);
15236 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15237
15238 length = strlen (symb);
15239 symbol_name = alloca (length + 32);
15240 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15241
15242 sprintf (lazy_ptr_name, "L%d$lz", label);
15243
15244 if (MACHOPIC_PURE)
15245 machopic_picsymbol_stub_section ();
15246 else
15247 machopic_symbol_stub_section ();
15248
15249 fprintf (file, "%s:\n", stub);
15250 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15251
15252 if (MACHOPIC_PURE)
15253 {
15254 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15255 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15256 fprintf (file, "\tjmp %%edx\n");
15257 }
15258 else
15259 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 15260
b069de3b 15261 fprintf (file, "%s:\n", binder_name);
b96a374d 15262
b069de3b
SS
15263 if (MACHOPIC_PURE)
15264 {
15265 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15266 fprintf (file, "\tpushl %%eax\n");
15267 }
15268 else
15269 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15270
15271 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15272
15273 machopic_lazy_symbol_ptr_section ();
15274 fprintf (file, "%s:\n", lazy_ptr_name);
15275 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15276 fprintf (file, "\t.long %s\n", binder_name);
15277}
15278#endif /* TARGET_MACHO */
15279
162f023b
JH
15280/* Order the registers for register allocator. */
15281
15282void
b96a374d 15283x86_order_regs_for_local_alloc (void)
162f023b
JH
15284{
15285 int pos = 0;
15286 int i;
15287
15288 /* First allocate the local general purpose registers. */
15289 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15290 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15291 reg_alloc_order [pos++] = i;
15292
15293 /* Global general purpose registers. */
15294 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15295 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15296 reg_alloc_order [pos++] = i;
15297
15298 /* x87 registers come first in case we are doing FP math
15299 using them. */
15300 if (!TARGET_SSE_MATH)
15301 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15302 reg_alloc_order [pos++] = i;
fce5a9f2 15303
162f023b
JH
15304 /* SSE registers. */
15305 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15306 reg_alloc_order [pos++] = i;
15307 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15308 reg_alloc_order [pos++] = i;
15309
d1f87653 15310 /* x87 registers. */
162f023b
JH
15311 if (TARGET_SSE_MATH)
15312 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15313 reg_alloc_order [pos++] = i;
15314
15315 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15316 reg_alloc_order [pos++] = i;
15317
15318 /* Initialize the rest of array as we do not allocate some registers
15319 at all. */
15320 while (pos < FIRST_PSEUDO_REGISTER)
15321 reg_alloc_order [pos++] = 0;
15322}
194734e9 15323
4977bab6
ZW
15324#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15325#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15326#endif
15327
fe77449a
DR
15328/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15329 struct attribute_spec.handler. */
15330static tree
b96a374d
AJ
15331ix86_handle_struct_attribute (tree *node, tree name,
15332 tree args ATTRIBUTE_UNUSED,
15333 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15334{
15335 tree *type = NULL;
15336 if (DECL_P (*node))
15337 {
15338 if (TREE_CODE (*node) == TYPE_DECL)
15339 type = &TREE_TYPE (*node);
15340 }
15341 else
15342 type = node;
15343
15344 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15345 || TREE_CODE (*type) == UNION_TYPE)))
15346 {
15347 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15348 *no_add_attrs = true;
15349 }
15350
15351 else if ((is_attribute_p ("ms_struct", name)
15352 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15353 || ((is_attribute_p ("gcc_struct", name)
15354 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15355 {
15356 warning ("`%s' incompatible attribute ignored",
15357 IDENTIFIER_POINTER (name));
15358 *no_add_attrs = true;
15359 }
15360
15361 return NULL_TREE;
15362}
15363
4977bab6 15364static bool
b96a374d 15365ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15366{
fe77449a 15367 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15368 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15369 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15370}
15371
483ab821
MM
15372/* Returns an expression indicating where the this parameter is
15373 located on entry to the FUNCTION. */
15374
15375static rtx
b96a374d 15376x86_this_parameter (tree function)
483ab821
MM
15377{
15378 tree type = TREE_TYPE (function);
15379
3961e8fe
RH
15380 if (TARGET_64BIT)
15381 {
61f71b34 15382 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
15383 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15384 }
15385
e767b5be 15386 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
15387 {
15388 tree parm;
15389
15390 parm = TYPE_ARG_TYPES (type);
15391 /* Figure out whether or not the function has a variable number of
15392 arguments. */
3961e8fe 15393 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15394 if (TREE_VALUE (parm) == void_type_node)
15395 break;
e767b5be 15396 /* If not, the this parameter is in the first argument. */
483ab821 15397 if (parm)
e767b5be
JH
15398 {
15399 int regno = 0;
15400 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15401 regno = 2;
02e02343 15402 return gen_rtx_REG (SImode, regno);
e767b5be 15403 }
483ab821
MM
15404 }
15405
61f71b34 15406 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
15407 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15408 else
15409 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15410}
15411
3961e8fe
RH
15412/* Determine whether x86_output_mi_thunk can succeed. */
15413
15414static bool
b96a374d
AJ
15415x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15416 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15417 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15418{
15419 /* 64-bit can handle anything. */
15420 if (TARGET_64BIT)
15421 return true;
15422
15423 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 15424 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
15425 return true;
15426
15427 /* Need a free register for vcall_offset. */
15428 if (vcall_offset)
15429 return false;
15430
15431 /* Need a free register for GOT references. */
15432 if (flag_pic && !(*targetm.binds_local_p) (function))
15433 return false;
15434
15435 /* Otherwise ok. */
15436 return true;
15437}
15438
15439/* Output the assembler code for a thunk function. THUNK_DECL is the
15440 declaration for the thunk function itself, FUNCTION is the decl for
15441 the target function. DELTA is an immediate constant offset to be
272d0bee 15442 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15443 *(*this + vcall_offset) should be added to THIS. */
483ab821 15444
c590b625 15445static void
b96a374d
AJ
15446x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15447 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15448 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15449{
194734e9 15450 rtx xops[3];
3961e8fe
RH
15451 rtx this = x86_this_parameter (function);
15452 rtx this_reg, tmp;
194734e9 15453
3961e8fe
RH
15454 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15455 pull it in now and let DELTA benefit. */
15456 if (REG_P (this))
15457 this_reg = this;
15458 else if (vcall_offset)
15459 {
15460 /* Put the this parameter into %eax. */
15461 xops[0] = this;
15462 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15463 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15464 }
15465 else
15466 this_reg = NULL_RTX;
15467
15468 /* Adjust the this parameter by a fixed constant. */
15469 if (delta)
194734e9 15470 {
483ab821 15471 xops[0] = GEN_INT (delta);
3961e8fe
RH
15472 xops[1] = this_reg ? this_reg : this;
15473 if (TARGET_64BIT)
194734e9 15474 {
3961e8fe
RH
15475 if (!x86_64_general_operand (xops[0], DImode))
15476 {
15477 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15478 xops[1] = tmp;
15479 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15480 xops[0] = tmp;
15481 xops[1] = this;
15482 }
15483 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15484 }
15485 else
3961e8fe 15486 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15487 }
3961e8fe
RH
15488
15489 /* Adjust the this parameter by a value stored in the vtable. */
15490 if (vcall_offset)
194734e9 15491 {
3961e8fe
RH
15492 if (TARGET_64BIT)
15493 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15494 else
e767b5be
JH
15495 {
15496 int tmp_regno = 2 /* ECX */;
15497 if (lookup_attribute ("fastcall",
15498 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15499 tmp_regno = 0 /* EAX */;
15500 tmp = gen_rtx_REG (SImode, tmp_regno);
15501 }
483ab821 15502
3961e8fe
RH
15503 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15504 xops[1] = tmp;
15505 if (TARGET_64BIT)
15506 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15507 else
15508 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15509
3961e8fe
RH
15510 /* Adjust the this parameter. */
15511 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15512 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15513 {
15514 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15515 xops[0] = GEN_INT (vcall_offset);
15516 xops[1] = tmp2;
15517 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15518 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15519 }
3961e8fe
RH
15520 xops[1] = this_reg;
15521 if (TARGET_64BIT)
15522 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15523 else
15524 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15525 }
194734e9 15526
3961e8fe
RH
15527 /* If necessary, drop THIS back to its stack slot. */
15528 if (this_reg && this_reg != this)
15529 {
15530 xops[0] = this_reg;
15531 xops[1] = this;
15532 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15533 }
194734e9 15534
89ce1c8f 15535 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15536 if (TARGET_64BIT)
15537 {
15538 if (!flag_pic || (*targetm.binds_local_p) (function))
15539 output_asm_insn ("jmp\t%P0", xops);
15540 else
fcbe3b89 15541 {
89ce1c8f 15542 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15543 tmp = gen_rtx_CONST (Pmode, tmp);
15544 tmp = gen_rtx_MEM (QImode, tmp);
15545 xops[0] = tmp;
15546 output_asm_insn ("jmp\t%A0", xops);
15547 }
3961e8fe
RH
15548 }
15549 else
15550 {
15551 if (!flag_pic || (*targetm.binds_local_p) (function))
15552 output_asm_insn ("jmp\t%P0", xops);
194734e9 15553 else
21ff35fb 15554#if TARGET_MACHO
095fa594
SH
15555 if (TARGET_MACHO)
15556 {
11abc112
MM
15557 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15558 tmp = (gen_rtx_SYMBOL_REF
15559 (Pmode,
15560 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
095fa594
SH
15561 tmp = gen_rtx_MEM (QImode, tmp);
15562 xops[0] = tmp;
15563 output_asm_insn ("jmp\t%0", xops);
15564 }
15565 else
15566#endif /* TARGET_MACHO */
194734e9 15567 {
3961e8fe
RH
15568 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15569 output_set_got (tmp);
15570
15571 xops[1] = tmp;
15572 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15573 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15574 }
15575 }
15576}
e2500fed 15577
1bc7c5b6 15578static void
b96a374d 15579x86_file_start (void)
1bc7c5b6
ZW
15580{
15581 default_file_start ();
15582 if (X86_FILE_START_VERSION_DIRECTIVE)
15583 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15584 if (X86_FILE_START_FLTUSED)
15585 fputs ("\t.global\t__fltused\n", asm_out_file);
15586 if (ix86_asm_dialect == ASM_INTEL)
15587 fputs ("\t.intel_syntax\n", asm_out_file);
15588}
15589
e932b21b 15590int
b96a374d 15591x86_field_alignment (tree field, int computed)
e932b21b
JH
15592{
15593 enum machine_mode mode;
ad9335eb
JJ
15594 tree type = TREE_TYPE (field);
15595
15596 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15597 return computed;
ad9335eb
JJ
15598 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15599 ? get_inner_array_type (type) : type);
39e3a681
JJ
15600 if (mode == DFmode || mode == DCmode
15601 || GET_MODE_CLASS (mode) == MODE_INT
15602 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15603 return MIN (32, computed);
15604 return computed;
15605}
15606
a5fa1ecd
JH
15607/* Output assembler code to FILE to increment profiler label # LABELNO
15608 for profiling a function entry. */
15609void
b96a374d 15610x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15611{
15612 if (TARGET_64BIT)
15613 if (flag_pic)
15614 {
15615#ifndef NO_PROFILE_COUNTERS
15616 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15617#endif
15618 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15619 }
15620 else
15621 {
15622#ifndef NO_PROFILE_COUNTERS
15623 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15624#endif
15625 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15626 }
15627 else if (flag_pic)
15628 {
15629#ifndef NO_PROFILE_COUNTERS
15630 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15631 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15632#endif
15633 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15634 }
15635 else
15636 {
15637#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15638 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15639 PROFILE_COUNT_REGISTER);
15640#endif
15641 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15642 }
15643}
15644
d2c49530
JH
15645/* We don't have exact information about the insn sizes, but we may assume
15646 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15647 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15648 99% of cases. */
15649
15650static int
b96a374d 15651min_insn_size (rtx insn)
d2c49530
JH
15652{
15653 int l = 0;
15654
15655 if (!INSN_P (insn) || !active_insn_p (insn))
15656 return 0;
15657
15658 /* Discard alignments we've emit and jump instructions. */
15659 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15660 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15661 return 0;
15662 if (GET_CODE (insn) == JUMP_INSN
15663 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15664 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15665 return 0;
15666
15667 /* Important case - calls are always 5 bytes.
15668 It is common to have many calls in the row. */
15669 if (GET_CODE (insn) == CALL_INSN
15670 && symbolic_reference_mentioned_p (PATTERN (insn))
15671 && !SIBLING_CALL_P (insn))
15672 return 5;
15673 if (get_attr_length (insn) <= 1)
15674 return 1;
15675
15676 /* For normal instructions we may rely on the sizes of addresses
15677 and the presence of symbol to require 4 bytes of encoding.
15678 This is not the case for jumps where references are PC relative. */
15679 if (GET_CODE (insn) != JUMP_INSN)
15680 {
15681 l = get_attr_length_address (insn);
15682 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15683 l = 4;
15684 }
15685 if (l)
15686 return 1+l;
15687 else
15688 return 2;
15689}
15690
c51e6d85 15691/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15692 window. */
15693
15694static void
be04394b 15695ix86_avoid_jump_misspredicts (void)
d2c49530
JH
15696{
15697 rtx insn, start = get_insns ();
15698 int nbytes = 0, njumps = 0;
15699 int isjump = 0;
15700
15701 /* Look for all minimal intervals of instructions containing 4 jumps.
15702 The intervals are bounded by START and INSN. NBYTES is the total
15703 size of instructions in the interval including INSN and not including
15704 START. When the NBYTES is smaller than 16 bytes, it is possible
15705 that the end of START and INSN ends up in the same 16byte page.
15706
15707 The smallest offset in the page INSN can start is the case where START
15708 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15709 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15710 */
15711 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15712 {
15713
15714 nbytes += min_insn_size (insn);
c263766c
RH
15715 if (dump_file)
15716 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15717 INSN_UID (insn), min_insn_size (insn));
15718 if ((GET_CODE (insn) == JUMP_INSN
15719 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15720 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15721 || GET_CODE (insn) == CALL_INSN)
15722 njumps++;
15723 else
15724 continue;
15725
15726 while (njumps > 3)
15727 {
15728 start = NEXT_INSN (start);
15729 if ((GET_CODE (start) == JUMP_INSN
15730 && GET_CODE (PATTERN (start)) != ADDR_VEC
15731 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15732 || GET_CODE (start) == CALL_INSN)
15733 njumps--, isjump = 1;
15734 else
15735 isjump = 0;
15736 nbytes -= min_insn_size (start);
15737 }
15738 if (njumps < 0)
15739 abort ();
c263766c
RH
15740 if (dump_file)
15741 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15742 INSN_UID (start), INSN_UID (insn), nbytes);
15743
15744 if (njumps == 3 && isjump && nbytes < 16)
15745 {
15746 int padsize = 15 - nbytes + min_insn_size (insn);
15747
c263766c
RH
15748 if (dump_file)
15749 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15750 INSN_UID (insn), padsize);
d2c49530
JH
15751 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15752 }
15753 }
15754}
15755
be04394b 15756/* AMD Athlon works faster
d1f87653 15757 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15758 by other jump instruction. We avoid the penalty by inserting NOP just
15759 before the RET instructions in such cases. */
18dbd950 15760static void
be04394b 15761ix86_pad_returns (void)
2a500b9e
JH
15762{
15763 edge e;
15764
2a500b9e
JH
15765 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15766 {
15767 basic_block bb = e->src;
a813c111 15768 rtx ret = BB_END (bb);
2a500b9e 15769 rtx prev;
253c7a00 15770 bool replace = false;
2a500b9e 15771
253c7a00
JH
15772 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15773 || !maybe_hot_bb_p (bb))
2a500b9e 15774 continue;
4977bab6
ZW
15775 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15776 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15777 break;
2a500b9e
JH
15778 if (prev && GET_CODE (prev) == CODE_LABEL)
15779 {
15780 edge e;
15781 for (e = bb->pred; e; e = e->pred_next)
4977bab6 15782 if (EDGE_FREQUENCY (e) && e->src->index >= 0
2a500b9e 15783 && !(e->flags & EDGE_FALLTHRU))
253c7a00 15784 replace = true;
2a500b9e 15785 }
253c7a00 15786 if (!replace)
2a500b9e 15787 {
4977bab6 15788 prev = prev_active_insn (ret);
25f57a0e
JH
15789 if (prev
15790 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15791 || GET_CODE (prev) == CALL_INSN))
253c7a00 15792 replace = true;
c51e6d85 15793 /* Empty functions get branch mispredict even when the jump destination
4977bab6
ZW
15794 is not visible to us. */
15795 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
253c7a00
JH
15796 replace = true;
15797 }
15798 if (replace)
15799 {
15800 emit_insn_before (gen_return_internal_long (), ret);
15801 delete_insn (ret);
2a500b9e 15802 }
2a500b9e 15803 }
be04394b
JH
15804}
15805
15806/* Implement machine specific optimizations. We implement padding of returns
15807 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15808static void
15809ix86_reorg (void)
15810{
15811 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15812 ix86_pad_returns ();
15813 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15814 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
15815}
15816
4977bab6
ZW
15817/* Return nonzero when QImode register that must be represented via REX prefix
15818 is used. */
15819bool
b96a374d 15820x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15821{
15822 int i;
15823 extract_insn_cached (insn);
15824 for (i = 0; i < recog_data.n_operands; i++)
15825 if (REG_P (recog_data.operand[i])
15826 && REGNO (recog_data.operand[i]) >= 4)
15827 return true;
15828 return false;
15829}
15830
15831/* Return nonzero when P points to register encoded via REX prefix.
15832 Called via for_each_rtx. */
15833static int
b96a374d 15834extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15835{
15836 unsigned int regno;
15837 if (!REG_P (*p))
15838 return 0;
15839 regno = REGNO (*p);
15840 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15841}
15842
15843/* Return true when INSN mentions register that must be encoded using REX
15844 prefix. */
15845bool
b96a374d 15846x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15847{
15848 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15849}
15850
1d6ba901 15851/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15852 optabs would emit if we didn't have TFmode patterns. */
15853
15854void
b96a374d 15855x86_emit_floatuns (rtx operands[2])
8d705469
JH
15856{
15857 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15858 enum machine_mode mode, inmode;
15859
15860 inmode = GET_MODE (operands[1]);
15861 if (inmode != SImode
15862 && inmode != DImode)
15863 abort ();
8d705469
JH
15864
15865 out = operands[0];
1d6ba901 15866 in = force_reg (inmode, operands[1]);
8d705469
JH
15867 mode = GET_MODE (out);
15868 neglab = gen_label_rtx ();
15869 donelab = gen_label_rtx ();
15870 i1 = gen_reg_rtx (Pmode);
15871 f0 = gen_reg_rtx (mode);
15872
15873 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15874
15875 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15876 emit_jump_insn (gen_jump (donelab));
15877 emit_barrier ();
15878
15879 emit_label (neglab);
15880
15881 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15882 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15883 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15884 expand_float (f0, i0, 0);
15885 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15886
15887 emit_label (donelab);
15888}
15889
997404de
JH
15890/* Initialize vector TARGET via VALS. */
15891void
15892ix86_expand_vector_init (rtx target, rtx vals)
15893{
15894 enum machine_mode mode = GET_MODE (target);
15895 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15896 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15897 int i;
15898
15899 for (i = n_elts - 1; i >= 0; i--)
15900 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15901 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15902 break;
15903
15904 /* Few special cases first...
15905 ... constants are best loaded from constant pool. */
15906 if (i < 0)
15907 {
15908 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15909 return;
15910 }
15911
15912 /* ... values where only first field is non-constant are best loaded
1ae58c30 15913 from the pool and overwritten via move later. */
997404de
JH
15914 if (!i)
15915 {
15916 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15917 GET_MODE_INNER (mode), 0);
15918
15919 op = force_reg (mode, op);
15920 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15921 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15922 switch (GET_MODE (target))
15923 {
15924 case V2DFmode:
15925 emit_insn (gen_sse2_movsd (target, target, op));
15926 break;
15927 case V4SFmode:
15928 emit_insn (gen_sse_movss (target, target, op));
15929 break;
15930 default:
15931 break;
15932 }
15933 return;
15934 }
15935
15936 /* And the busy sequence doing rotations. */
15937 switch (GET_MODE (target))
15938 {
15939 case V2DFmode:
15940 {
15941 rtx vecop0 =
15942 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15943 rtx vecop1 =
15944 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15945
15946 vecop0 = force_reg (V2DFmode, vecop0);
15947 vecop1 = force_reg (V2DFmode, vecop1);
15948 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15949 }
15950 break;
15951 case V4SFmode:
15952 {
15953 rtx vecop0 =
15954 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15955 rtx vecop1 =
15956 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15957 rtx vecop2 =
15958 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15959 rtx vecop3 =
15960 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15961 rtx tmp1 = gen_reg_rtx (V4SFmode);
15962 rtx tmp2 = gen_reg_rtx (V4SFmode);
15963
15964 vecop0 = force_reg (V4SFmode, vecop0);
15965 vecop1 = force_reg (V4SFmode, vecop1);
15966 vecop2 = force_reg (V4SFmode, vecop2);
15967 vecop3 = force_reg (V4SFmode, vecop3);
15968 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15969 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15970 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15971 }
15972 break;
15973 default:
15974 abort ();
15975 }
15976}
15977
67dfe110
KH
15978/* Worker function for TARGET_MD_ASM_CLOBBERS.
15979
15980 We do this in the new i386 backend to maintain source compatibility
15981 with the old cc0-based compiler. */
15982
15983static tree
15984ix86_md_asm_clobbers (tree clobbers)
15985{
15986 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15987 clobbers);
15988 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15989 clobbers);
15990 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15991 clobbers);
15992 return clobbers;
15993}
15994
3c5cb3e4
KH
15995/* Worker function for REVERSE_CONDITION. */
15996
15997enum rtx_code
15998ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15999{
16000 return (mode != CCFPmode && mode != CCFPUmode
16001 ? reverse_condition (code)
16002 : reverse_condition_maybe_unordered (code));
16003}
16004
5ea9cb6e
RS
16005/* Output code to perform an x87 FP register move, from OPERANDS[1]
16006 to OPERANDS[0]. */
16007
16008const char *
16009output_387_reg_move (rtx insn, rtx *operands)
16010{
16011 if (REG_P (operands[1])
16012 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16013 {
16014 if (REGNO (operands[0]) == FIRST_STACK_REG
16015 && TARGET_USE_FFREEP)
16016 return "ffreep\t%y0";
16017 return "fstp\t%y0";
16018 }
16019 if (STACK_TOP_P (operands[0]))
16020 return "fld%z1\t%y1";
16021 return "fst\t%y0";
16022}
16023
5ae27cfa
UB
16024/* Output code to perform a conditional jump to LABEL, if C2 flag in
16025 FP status register is set. */
16026
16027void
16028ix86_emit_fp_unordered_jump (rtx label)
16029{
16030 rtx reg = gen_reg_rtx (HImode);
16031 rtx temp;
16032
16033 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35
UB
16034
16035 if (TARGET_USE_SAHF)
16036 {
16037 emit_insn (gen_x86_sahf_1 (reg));
16038
16039 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16040 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16041 }
16042 else
16043 {
16044 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16045
16046 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16047 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16048 }
5ae27cfa 16049
5ae27cfa
UB
16050 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16051 gen_rtx_LABEL_REF (VOIDmode, label),
16052 pc_rtx);
16053 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16054 emit_jump_insn (temp);
16055}
16056
c2fcfa4f
UB
16057/* Output code to perform a log1p XFmode calculation. */
16058
16059void ix86_emit_i387_log1p (rtx op0, rtx op1)
16060{
16061 rtx label1 = gen_label_rtx ();
16062 rtx label2 = gen_label_rtx ();
16063
16064 rtx tmp = gen_reg_rtx (XFmode);
16065 rtx tmp2 = gen_reg_rtx (XFmode);
16066
16067 emit_insn (gen_absxf2 (tmp, op1));
16068 emit_insn (gen_cmpxf (tmp,
16069 CONST_DOUBLE_FROM_REAL_VALUE (
16070 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16071 XFmode)));
16072 emit_jump_insn (gen_bge (label1));
16073
16074 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16075 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16076 emit_jump (label2);
16077
16078 emit_label (label1);
16079 emit_move_insn (tmp, CONST1_RTX (XFmode));
16080 emit_insn (gen_addxf3 (tmp, op1, tmp));
16081 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16082 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16083
16084 emit_label (label2);
16085}
16086
e2500fed 16087#include "gt-i386.h"
This page took 4.856958 seconds and 5 git commands to generate.