]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
i386.c (override_options): Revert 2004-11-24 change.
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2cdb3148 3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9 34#include "output.h"
8bc527af 35#include "insn-codes.h"
2a2ab3f9 36#include "insn-attr.h"
2a2ab3f9 37#include "flags.h"
a8ffcc81 38#include "except.h"
ecbc4695 39#include "function.h"
00c79232 40#include "recog.h"
ced8dd8c 41#include "expr.h"
e78d8e51 42#include "optabs.h"
f103890b 43#include "toplev.h"
e075ae69 44#include "basic-block.h"
1526a060 45#include "ggc.h"
672a6f42
NB
46#include "target.h"
47#include "target-def.h"
f1e639b1 48#include "langhooks.h"
dafc5b82 49#include "cgraph.h"
cd3ce9b4 50#include "tree-gimple.h"
2a2ab3f9 51
8dfe5673 52#ifndef CHECK_STACK_LIMIT
07933f72 53#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
54#endif
55
3c50106f
RH
56/* Return index of given mode in mult and division cost tables. */
57#define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
2ab0437e 64/* Processor costs (relative to an add) */
fce5a9f2 65static const
2ab0437e
JH
66struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 72 0, /* cost of multiply per each bit set */
4977bab6 73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
74 3, /* cost of movsx */
75 3, /* cost of movzx */
2ab0437e
JH
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
f4365627
JH
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
4977bab6 100 1, /* Branch cost */
229b303a
RS
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
2ab0437e 107};
229b303a 108
32b5b1aa 109/* Processor costs (relative to an add) */
fce5a9f2 110static const
32b5b1aa 111struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 112 1, /* cost of an add instruction */
32b5b1aa
SC
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
4977bab6 116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 117 1, /* cost of multiply per each bit set */
4977bab6 118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
119 3, /* cost of movsx */
120 2, /* cost of movzx */
96e7ae40 121 15, /* "large" insn */
e2e52e1b 122 3, /* MOVE_RATIO */
7c6b971d 123 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
0f290768 126 Relative to reg-reg move (2). */
96e7ae40
JH
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
fa79946e
JH
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
f4365627
JH
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
4977bab6 145 1, /* Branch cost */
229b303a
RS
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
152};
153
fce5a9f2 154static const
32b5b1aa
SC
155struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
4977bab6 160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 161 1, /* cost of multiply per each bit set */
4977bab6 162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
163 3, /* cost of movsx */
164 2, /* cost of movzx */
96e7ae40 165 15, /* "large" insn */
e2e52e1b 166 3, /* MOVE_RATIO */
7c6b971d 167 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
0f290768 170 Relative to reg-reg move (2). */
96e7ae40
JH
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
fa79946e
JH
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
f4365627
JH
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
4977bab6 189 1, /* Branch cost */
229b303a
RS
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
196};
197
fce5a9f2 198static const
e5cb57e8 199struct processor_costs pentium_cost = {
32b5b1aa
SC
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
856b07a1 202 4, /* variable shift costs */
e5cb57e8 203 1, /* constant shift costs */
4977bab6 204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 205 0, /* cost of multiply per each bit set */
4977bab6 206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
207 3, /* cost of movsx */
208 2, /* cost of movzx */
96e7ae40 209 8, /* "large" insn */
e2e52e1b 210 6, /* MOVE_RATIO */
7c6b971d 211 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
0f290768 214 Relative to reg-reg move (2). */
96e7ae40
JH
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
fa79946e
JH
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
f4365627
JH
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
4977bab6 233 2, /* Branch cost */
229b303a
RS
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
240};
241
fce5a9f2 242static const
856b07a1
SC
243struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
e075ae69 246 1, /* variable shift costs */
856b07a1 247 1, /* constant shift costs */
4977bab6 248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 249 0, /* cost of multiply per each bit set */
4977bab6 250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
251 1, /* cost of movsx */
252 1, /* cost of movzx */
96e7ae40 253 8, /* "large" insn */
e2e52e1b 254 6, /* MOVE_RATIO */
7c6b971d 255 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
0f290768 258 Relative to reg-reg move (2). */
96e7ae40
JH
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
fa79946e
JH
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
f4365627
JH
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
4977bab6 277 2, /* Branch cost */
229b303a
RS
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
856b07a1
SC
284};
285
fce5a9f2 286static const
a269a03c
JC
287struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
e075ae69 289 2, /* cost of a lea instruction */
a269a03c
JC
290 1, /* variable shift costs */
291 1, /* constant shift costs */
4977bab6 292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 293 0, /* cost of multiply per each bit set */
4977bab6 294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
295 2, /* cost of movsx */
296 2, /* cost of movzx */
96e7ae40 297 8, /* "large" insn */
e2e52e1b 298 4, /* MOVE_RATIO */
7c6b971d 299 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
0f290768 302 Relative to reg-reg move (2). */
96e7ae40
JH
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
fa79946e
JH
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
f4365627
JH
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
4977bab6 321 1, /* Branch cost */
229b303a
RS
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
4f770e7b
RS
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
229b303a
RS
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
a269a03c
JC
328};
329
fce5a9f2 330static const
309ada50
JH
331struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
0b5107cf 333 2, /* cost of a lea instruction */
309ada50
JH
334 1, /* variable shift costs */
335 1, /* constant shift costs */
4977bab6 336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 337 0, /* cost of multiply per each bit set */
4977bab6 338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
339 1, /* cost of movsx */
340 1, /* cost of movzx */
309ada50 341 8, /* "large" insn */
e2e52e1b 342 9, /* MOVE_RATIO */
309ada50 343 4, /* cost for loading QImode using movzbl */
b72b1c29 344 {3, 4, 3}, /* cost of loading integer registers
309ada50 345 in QImode, HImode and SImode.
0f290768 346 Relative to reg-reg move (2). */
b72b1c29 347 {3, 4, 3}, /* cost of storing integer registers */
309ada50 348 4, /* cost of reg,reg fld/fst */
b72b1c29 349 {4, 4, 12}, /* cost of loading fp registers
309ada50 350 in SFmode, DFmode and XFmode */
b72b1c29 351 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 352 2, /* cost of moving MMX register */
b72b1c29 353 {4, 4}, /* cost of loading MMX registers
fa79946e 354 in SImode and DImode */
b72b1c29 355 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
b72b1c29 358 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 361 in SImode, DImode and TImode */
b72b1c29 362 5, /* MMX or SSE register to integer */
f4365627
JH
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
8c1e80e9 365 5, /* Branch cost */
229b303a
RS
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
309ada50
JH
372};
373
4977bab6
ZW
374static const
375struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
8c1e80e9 409 5, /* Branch cost */
4977bab6
ZW
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416};
417
fce5a9f2 418static const
b4e89e2d
JH
419struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
e0c00392 421 3, /* cost of a lea instruction */
4977bab6
ZW
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 425 0, /* cost of multiply per each bit set */
4977bab6 426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
427 1, /* cost of movsx */
428 1, /* cost of movzx */
b4e89e2d
JH
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
f4365627
JH
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
4977bab6 453 2, /* Branch cost */
229b303a
RS
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
460};
461
89c43c0a
VM
462static const
463struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504};
505
8b60264b 506const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 507
a269a03c
JC
508/* Processor feature/optimization bitmasks. */
509#define m_386 (1<<PROCESSOR_I386)
510#define m_486 (1<<PROCESSOR_I486)
511#define m_PENT (1<<PROCESSOR_PENTIUM)
512#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513#define m_K6 (1<<PROCESSOR_K6)
309ada50 514#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 515#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
516#define m_K8 (1<<PROCESSOR_K8)
517#define m_ATHLON_K8 (m_K8 | m_ATHLON)
89c43c0a 518#define m_NOCONA (1<<PROCESSOR_NOCONA)
a269a03c 519
4977bab6 520const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
89c43c0a 521const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 522const int x86_zero_extend_with_and = m_486 | m_PENT;
89c43c0a 523const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
e075ae69 524const int x86_double_with_add = ~m_386;
a269a03c 525const int x86_use_bit_test = m_386;
4977bab6 526const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
89c43c0a 527const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
4977bab6 528const int x86_3dnow_a = m_ATHLON_K8;
89c43c0a 529const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
d20bf446
L
530/* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534const int x86_branch_hints = 0;
89c43c0a 535const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
e075ae69
RH
536const int x86_partial_reg_stall = m_PPRO;
537const int x86_use_loop = m_K6;
4977bab6 538const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
539const int x86_use_mov0 = m_K6;
540const int x86_use_cltd = ~(m_PENT | m_K6);
541const int x86_read_modify_write = ~m_PENT;
542const int x86_read_modify = ~(m_PENT | m_PPRO);
543const int x86_split_long_moves = m_PPRO;
4977bab6 544const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 545const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
89c43c0a 546const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
d9f32422
JH
547const int x86_qimode_math = ~(0);
548const int x86_promote_qi_regs = 0;
549const int x86_himode_math = ~(m_PPRO);
550const int x86_promote_hi_regs = m_PPRO;
89c43c0a
VM
551const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
7b50a809
JH
559const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
89c43c0a 561const int x86_decompose_lea = m_PENT4 | m_NOCONA;
495333a6 562const int x86_shift1 = ~m_486;
89c43c0a
VM
563const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
41afe4ef
RH
565/* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
568 undefined. */
569const int x86_sse_split_regs = m_ATHLON_K8;
4977bab6 570const int x86_sse_typeless_stores = m_ATHLON_K8;
89c43c0a 571const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
4977bab6
ZW
572const int x86_use_ffreep = m_ATHLON_K8;
573const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 574const int x86_inter_unit_moves = ~(m_ATHLON_K8);
89c43c0a 575const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
be04394b
JH
576/* Some CPU cores are not able to predict more than 4 branch instructions in
577 the 16 byte window. */
89c43c0a 578const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
03e00d30 579const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
7cacf53e 580const int x86_use_bt = m_ATHLON_K8;
a269a03c 581
d1f87653 582/* In case the average insn count for single function invocation is
6ab16dd9
JH
583 lower than this constant, emit fast (but longer) prologue and
584 epilogue code. */
4977bab6 585#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 586
5bf0ebab
RH
587/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
588static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
589static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
590static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
591
592/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 593 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 594
e075ae69 595enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
596{
597 /* ax, dx, cx, bx */
ab408a86 598 AREG, DREG, CREG, BREG,
4c0d89b5 599 /* si, di, bp, sp */
e075ae69 600 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
601 /* FP registers */
602 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 603 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 604 /* arg pointer */
83774849 605 NON_Q_REGS,
564d80f4 606 /* flags, fpsr, dirflag, frame */
a7180f70
BS
607 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
610 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
611 MMX_REGS, MMX_REGS,
612 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
615 SSE_REGS, SSE_REGS,
4c0d89b5 616};
c572e5ba 617
3d117b30 618/* The "default" register map used in 32bit mode. */
83774849 619
0f290768 620int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
621{
622 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
623 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 624 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
625 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
626 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
629};
630
5bf0ebab
RH
631static int const x86_64_int_parameter_registers[6] =
632{
633 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
634 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
635};
636
637static int const x86_64_int_return_registers[4] =
638{
639 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
640};
53c17031 641
0f7fa3d0
JH
642/* The "default" register map used in 64bit mode. */
643int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
644{
645 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 646 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
647 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
648 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
649 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
650 8,9,10,11,12,13,14,15, /* extended integer registers */
651 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
652};
653
83774849
RH
654/* Define the register numbers to be used in Dwarf debugging information.
655 The SVR4 reference port C compiler uses the following register numbers
656 in its Dwarf output code:
657 0 for %eax (gcc regno = 0)
658 1 for %ecx (gcc regno = 2)
659 2 for %edx (gcc regno = 1)
660 3 for %ebx (gcc regno = 3)
661 4 for %esp (gcc regno = 7)
662 5 for %ebp (gcc regno = 6)
663 6 for %esi (gcc regno = 4)
664 7 for %edi (gcc regno = 5)
665 The following three DWARF register numbers are never generated by
666 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
667 believes these numbers have these meanings.
668 8 for %eip (no gcc equivalent)
669 9 for %eflags (gcc regno = 17)
670 10 for %trapno (no gcc equivalent)
671 It is not at all clear how we should number the FP stack registers
672 for the x86 architecture. If the version of SDB on x86/svr4 were
673 a bit less brain dead with respect to floating-point then we would
674 have a precedent to follow with respect to DWARF register numbers
675 for x86 FP registers, but the SDB on x86/svr4 is so completely
676 broken with respect to FP registers that it is hardly worth thinking
677 of it as something to strive for compatibility with.
678 The version of x86/svr4 SDB I have at the moment does (partially)
679 seem to believe that DWARF register number 11 is associated with
680 the x86 register %st(0), but that's about all. Higher DWARF
681 register numbers don't seem to be associated with anything in
682 particular, and even for DWARF regno 11, SDB only seems to under-
683 stand that it should say that a variable lives in %st(0) (when
684 asked via an `=' command) if we said it was in DWARF regno 11,
685 but SDB still prints garbage when asked for the value of the
686 variable in question (via a `/' command).
687 (Also note that the labels SDB prints for various FP stack regs
688 when doing an `x' command are all wrong.)
689 Note that these problems generally don't affect the native SVR4
690 C compiler because it doesn't allow the use of -O with -g and
691 because when it is *not* optimizing, it allocates a memory
692 location for each floating-point variable, and the memory
693 location is what gets described in the DWARF AT_location
694 attribute for the variable in question.
695 Regardless of the severe mental illness of the x86/svr4 SDB, we
696 do something sensible here and we use the following DWARF
697 register numbers. Note that these are all stack-top-relative
698 numbers.
699 11 for %st(0) (gcc regno = 8)
700 12 for %st(1) (gcc regno = 9)
701 13 for %st(2) (gcc regno = 10)
702 14 for %st(3) (gcc regno = 11)
703 15 for %st(4) (gcc regno = 12)
704 16 for %st(5) (gcc regno = 13)
705 17 for %st(6) (gcc regno = 14)
706 18 for %st(7) (gcc regno = 15)
707*/
0f290768 708int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
709{
710 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
711 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 712 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
713 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
714 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
715 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
717};
718
c572e5ba
JVA
719/* Test and compare insns in i386.md store the information needed to
720 generate branch and scc insns here. */
721
07933f72
GS
722rtx ix86_compare_op0 = NULL_RTX;
723rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 724
7a2e09f4 725#define MAX_386_STACK_LOCALS 3
8362f420
JH
726/* Size of the register save area. */
727#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
728
729/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
730
731struct stack_local_entry GTY(())
732{
733 unsigned short mode;
734 unsigned short n;
735 rtx rtl;
736 struct stack_local_entry *next;
737};
738
4dd2ac2c
JH
739/* Structure describing stack frame layout.
740 Stack grows downward:
741
742 [arguments]
743 <- ARG_POINTER
744 saved pc
745
746 saved frame pointer if frame_pointer_needed
747 <- HARD_FRAME_POINTER
748 [saved regs]
749
750 [padding1] \
751 )
752 [va_arg registers] (
753 > to_allocate <- FRAME_POINTER
754 [frame] (
755 )
756 [padding2] /
757 */
758struct ix86_frame
759{
760 int nregs;
761 int padding1;
8362f420 762 int va_arg_size;
4dd2ac2c
JH
763 HOST_WIDE_INT frame;
764 int padding2;
765 int outgoing_arguments_size;
8362f420 766 int red_zone_size;
4dd2ac2c
JH
767
768 HOST_WIDE_INT to_allocate;
769 /* The offsets relative to ARG_POINTER. */
770 HOST_WIDE_INT frame_pointer_offset;
771 HOST_WIDE_INT hard_frame_pointer_offset;
772 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
773
774 /* When save_regs_using_mov is set, emit prologue using
775 move instead of push instructions. */
776 bool save_regs_using_mov;
4dd2ac2c
JH
777};
778
c93e80a5
JH
779/* Used to enable/disable debugging features. */
780const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
781/* Code model option as passed by user. */
782const char *ix86_cmodel_string;
783/* Parsed value. */
784enum cmodel ix86_cmodel;
80f33d06
GS
785/* Asm dialect. */
786const char *ix86_asm_string;
787enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
788/* TLS dialext. */
789const char *ix86_tls_dialect_string;
790enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 791
5bf0ebab 792/* Which unit we are generating floating point math for. */
965f5423
JH
793enum fpmath_unit ix86_fpmath;
794
5bf0ebab 795/* Which cpu are we scheduling for. */
9e555526 796enum processor_type ix86_tune;
5bf0ebab
RH
797/* Which instruction set architecture to use. */
798enum processor_type ix86_arch;
c8c5cb99
SC
799
800/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 801const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 802const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 803const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 804
0f290768 805/* # of registers to use to pass arguments. */
e075ae69 806const char *ix86_regparm_string;
e9a25f70 807
f4365627
JH
808/* true if sse prefetch instruction is not NOOP. */
809int x86_prefetch_sse;
810
e075ae69
RH
811/* ix86_regparm_string as a number */
812int ix86_regparm;
e9a25f70
JL
813
814/* Alignment to use for loops and jumps: */
815
0f290768 816/* Power of two alignment for loops. */
e075ae69 817const char *ix86_align_loops_string;
e9a25f70 818
0f290768 819/* Power of two alignment for non-loop jumps. */
e075ae69 820const char *ix86_align_jumps_string;
e9a25f70 821
3af4bd89 822/* Power of two alignment for stack boundary in bytes. */
e075ae69 823const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
824
825/* Preferred alignment for stack boundary in bits. */
95899b34 826unsigned int ix86_preferred_stack_boundary;
3af4bd89 827
e9a25f70 828/* Values 1-5: see jump.c */
e075ae69
RH
829int ix86_branch_cost;
830const char *ix86_branch_cost_string;
e9a25f70 831
0f290768 832/* Power of two alignment for functions. */
e075ae69 833const char *ix86_align_funcs_string;
623fe810
RH
834
835/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
8fe75e43
RH
836char internal_label_prefix[16];
837int internal_label_prefix_len;
e075ae69 838\f
b96a374d
AJ
839static void output_pic_addr_const (FILE *, rtx, int);
840static void put_condition_code (enum rtx_code, enum machine_mode,
841 int, int, FILE *);
842static const char *get_some_local_dynamic_name (void);
843static int get_some_local_dynamic_name_1 (rtx *, void *);
b96a374d
AJ
844static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
845static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 rtx *);
e129d93a
ILT
847static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
848static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 enum machine_mode);
b96a374d
AJ
850static rtx get_thread_pointer (int);
851static rtx legitimize_tls_address (rtx, enum tls_model, int);
852static void get_pc_thunk_name (char [32], unsigned int);
853static rtx gen_push (rtx);
b96a374d
AJ
854static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855static int ix86_agi_dependant (rtx, rtx, enum attr_type);
b96a374d
AJ
856static struct machine_function * ix86_init_machine_status (void);
857static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858static int ix86_nsaved_regs (void);
859static void ix86_emit_save_regs (void);
860static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 861static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 862static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
863static HOST_WIDE_INT ix86_GOT_alias_set (void);
864static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 866static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
867static int ix86_issue_rate (void);
868static int ix86_adjust_cost (rtx, rtx, rtx, int);
b96a374d
AJ
869static int ia32_multipass_dfa_lookahead (void);
870static void ix86_init_mmx_sse_builtins (void);
871static rtx x86_this_parameter (tree);
872static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875static void x86_file_start (void);
876static void ix86_reorg (void);
c35d187f
RH
877static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
879static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
23a60a04 881static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
f676971a 882static bool ix86_vector_mode_supported_p (enum machine_mode);
e075ae69 883
b96a374d
AJ
884static int ix86_address_cost (rtx);
885static bool ix86_cannot_force_const_mem (rtx);
886static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
887
888struct builtin_description;
b96a374d
AJ
889static rtx ix86_expand_sse_comi (const struct builtin_description *,
890 tree, rtx);
891static rtx ix86_expand_sse_compare (const struct builtin_description *,
892 tree, rtx);
893static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
894static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
895static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
896static rtx ix86_expand_store_builtin (enum insn_code, tree);
897static rtx safe_vector_operand (rtx, enum machine_mode);
b96a374d
AJ
898static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
899static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
900static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
901static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
902static int ix86_fp_comparison_cost (enum rtx_code code);
903static unsigned int ix86_select_alt_pic_regnum (void);
904static int ix86_save_reg (unsigned int, int);
905static void ix86_compute_frame_layout (struct ix86_frame *);
906static int ix86_comp_type_attributes (tree, tree);
e767b5be 907static int ix86_function_regparm (tree, tree);
91d231cb 908const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
909static bool ix86_function_ok_for_sibcall (tree, tree);
910static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
911static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
912static int ix86_value_regno (enum machine_mode);
913static bool contains_128bit_aligned_vector_p (tree);
0397ac35 914static rtx ix86_struct_value_rtx (tree, int);
b96a374d
AJ
915static bool ix86_ms_bitfield_layout_p (tree);
916static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
917static int extended_reg_mentioned_1 (rtx *, void *);
918static bool ix86_rtx_costs (rtx, int, int, int *);
919static int min_insn_size (rtx);
67dfe110 920static tree ix86_md_asm_clobbers (tree clobbers);
fe984136 921static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
8cd5a4e0
RH
922static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
923 tree, bool);
eb701deb
RH
924static void ix86_init_builtins (void);
925static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
7c262518 926
7915fbaa
MM
927/* This function is only used on Solaris. */
928static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
929 ATTRIBUTE_UNUSED;
e56feed6 930
53c17031
JH
931/* Register class used for passing given 64bit part of the argument.
932 These represent classes as documented by the PS ABI, with the exception
933 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 934 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 935
d1f87653 936 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
937 whenever possible (upper half does contain padding).
938 */
939enum x86_64_reg_class
940 {
941 X86_64_NO_CLASS,
942 X86_64_INTEGER_CLASS,
943 X86_64_INTEGERSI_CLASS,
944 X86_64_SSE_CLASS,
945 X86_64_SSESF_CLASS,
946 X86_64_SSEDF_CLASS,
947 X86_64_SSEUP_CLASS,
948 X86_64_X87_CLASS,
949 X86_64_X87UP_CLASS,
499accd7 950 X86_64_COMPLEX_X87_CLASS,
53c17031
JH
951 X86_64_MEMORY_CLASS
952 };
6c4ccfd8
RH
953static const char * const x86_64_reg_class_name[] = {
954 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
955 "sseup", "x87", "x87up", "cplx87", "no"
956};
53c17031
JH
957
958#define MAX_CLASSES 4
881b2a96 959
43f3a59d 960/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
961static REAL_VALUE_TYPE ext_80387_constants_table [5];
962static bool ext_80387_constants_init = 0;
b96a374d 963static void init_ext_80387_constants (void);
672a6f42
NB
964\f
965/* Initialize the GCC target structure. */
91d231cb
JM
966#undef TARGET_ATTRIBUTE_TABLE
967#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
b2ca3702 968#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
969# undef TARGET_MERGE_DECL_ATTRIBUTES
970# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
971#endif
972
8d8e52be
JM
973#undef TARGET_COMP_TYPE_ATTRIBUTES
974#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
975
f6155fda
SS
976#undef TARGET_INIT_BUILTINS
977#define TARGET_INIT_BUILTINS ix86_init_builtins
f6155fda
SS
978#undef TARGET_EXPAND_BUILTIN
979#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
980
bd09bdeb
RH
981#undef TARGET_ASM_FUNCTION_EPILOGUE
982#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 983
17b53c33
NB
984#undef TARGET_ASM_OPEN_PAREN
985#define TARGET_ASM_OPEN_PAREN ""
986#undef TARGET_ASM_CLOSE_PAREN
987#define TARGET_ASM_CLOSE_PAREN ""
988
301d03af
RS
989#undef TARGET_ASM_ALIGNED_HI_OP
990#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
991#undef TARGET_ASM_ALIGNED_SI_OP
992#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
993#ifdef ASM_QUAD
994#undef TARGET_ASM_ALIGNED_DI_OP
995#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
996#endif
997
998#undef TARGET_ASM_UNALIGNED_HI_OP
999#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1000#undef TARGET_ASM_UNALIGNED_SI_OP
1001#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1002#undef TARGET_ASM_UNALIGNED_DI_OP
1003#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1004
c237e94a
ZW
1005#undef TARGET_SCHED_ADJUST_COST
1006#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1007#undef TARGET_SCHED_ISSUE_RATE
1008#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
9b690711
RH
1009#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1010#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1011 ia32_multipass_dfa_lookahead
c237e94a 1012
4977bab6
ZW
1013#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1014#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1015
f996902d
RH
1016#ifdef HAVE_AS_TLS
1017#undef TARGET_HAVE_TLS
1018#define TARGET_HAVE_TLS true
1019#endif
3a04ff64
RH
1020#undef TARGET_CANNOT_FORCE_CONST_MEM
1021#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 1022
7daebb7a 1023#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1024#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1025
4977bab6
ZW
1026#undef TARGET_MS_BITFIELD_LAYOUT_P
1027#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1028
c590b625
RH
1029#undef TARGET_ASM_OUTPUT_MI_THUNK
1030#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1031#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1032#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1033
1bc7c5b6
ZW
1034#undef TARGET_ASM_FILE_START
1035#define TARGET_ASM_FILE_START x86_file_start
1036
3c50106f
RH
1037#undef TARGET_RTX_COSTS
1038#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1039#undef TARGET_ADDRESS_COST
1040#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1041
e129d93a
ILT
1042#undef TARGET_FIXED_CONDITION_CODE_REGS
1043#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1044#undef TARGET_CC_MODES_COMPATIBLE
1045#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1046
18dbd950
RS
1047#undef TARGET_MACHINE_DEPENDENT_REORG
1048#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1049
c35d187f
RH
1050#undef TARGET_BUILD_BUILTIN_VA_LIST
1051#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1052
67dfe110
KH
1053#undef TARGET_MD_ASM_CLOBBERS
1054#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1055
9184f892
KH
1056#undef TARGET_PROMOTE_PROTOTYPES
1057#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
0397ac35
RH
1058#undef TARGET_STRUCT_VALUE_RTX
1059#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
a0524eb3
KH
1060#undef TARGET_SETUP_INCOMING_VARARGS
1061#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
fe984136
RH
1062#undef TARGET_MUST_PASS_IN_STACK
1063#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
8cd5a4e0
RH
1064#undef TARGET_PASS_BY_REFERENCE
1065#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
a0524eb3 1066
cd3ce9b4
JM
1067#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1068#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1069
f676971a
EC
1070#undef TARGET_VECTOR_MODE_SUPPORTED_P
1071#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1072
07a43492
DJ
1073#ifdef SUBTARGET_INSERT_ATTRIBUTES
1074#undef TARGET_INSERT_ATTRIBUTES
1075#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1076#endif
1077
f6897b10 1078struct gcc_target targetm = TARGET_INITIALIZER;
89c43c0a 1079
e075ae69 1080\f
67c2b45f
JS
1081/* The svr4 ABI for the i386 says that records and unions are returned
1082 in memory. */
1083#ifndef DEFAULT_PCC_STRUCT_RETURN
1084#define DEFAULT_PCC_STRUCT_RETURN 1
1085#endif
1086
f5316dfe
MM
1087/* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1091 been parsed.
1092
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1095
1096void
b96a374d 1097override_options (void)
f5316dfe 1098{
400500c4 1099 int i;
3326f410
DJ
1100 int ix86_tune_defaulted = 0;
1101
e075ae69
RH
1102 /* Comes from final.c -- no real reason to change it. */
1103#define MAX_CODE_ALIGN 16
f5316dfe 1104
c8c5cb99
SC
1105 static struct ptt
1106 {
8b60264b
KG
1107 const struct processor_costs *cost; /* Processor costs */
1108 const int target_enable; /* Target flags to enable. */
1109 const int target_disable; /* Target flags to disable. */
1110 const int align_loop; /* Default alignments. */
2cca7283 1111 const int align_loop_max_skip;
8b60264b 1112 const int align_jump;
2cca7283 1113 const int align_jump_max_skip;
8b60264b 1114 const int align_func;
e075ae69 1115 }
0f290768 1116 const processor_target_table[PROCESSOR_max] =
e075ae69 1117 {
4977bab6
ZW
1118 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1119 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1120 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1121 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1122 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1123 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
89c43c0a
VM
1125 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1126 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
e075ae69
RH
1127 };
1128
f4365627 1129 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1130 static struct pta
1131 {
8b60264b
KG
1132 const char *const name; /* processor name or nickname. */
1133 const enum processor_type processor;
0dd0e980
JH
1134 const enum pta_flags
1135 {
1136 PTA_SSE = 1,
1137 PTA_SSE2 = 2,
5bbeea44
JH
1138 PTA_SSE3 = 4,
1139 PTA_MMX = 8,
1140 PTA_PREFETCH_SSE = 16,
1141 PTA_3DNOW = 32,
4977bab6
ZW
1142 PTA_3DNOW_A = 64,
1143 PTA_64BIT = 128
0dd0e980 1144 } flags;
e075ae69 1145 }
0f290768 1146 const processor_alias_table[] =
e075ae69 1147 {
0dd0e980
JH
1148 {"i386", PROCESSOR_I386, 0},
1149 {"i486", PROCESSOR_I486, 0},
1150 {"i586", PROCESSOR_PENTIUM, 0},
1151 {"pentium", PROCESSOR_PENTIUM, 0},
1152 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1153 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1154 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1155 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1156 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1157 {"i686", PROCESSOR_PENTIUMPRO, 0},
1158 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1159 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1160 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
5bbeea44
JH
1161 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1162 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1163 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1164 | PTA_MMX | PTA_PREFETCH_SSE},
1165 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1166 | PTA_MMX | PTA_PREFETCH_SSE},
89c43c0a
VM
1167 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
5bbeea44 1170 | PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1171 {"k6", PROCESSOR_K6, PTA_MMX},
1172 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1173 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1174 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1175 | PTA_3DNOW_A},
f4365627 1176 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1177 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1178 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1179 | PTA_3DNOW_A | PTA_SSE},
f4365627 1180 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1181 | PTA_3DNOW_A | PTA_SSE},
f4365627 1182 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1183 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1184 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1185 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1186 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1187 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1188 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1189 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1190 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1192 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1194 };
c8c5cb99 1195
ca7558fc 1196 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1197
554707bd
DJ
1198#ifdef SUBTARGET_OVERRIDE_OPTIONS
1199 SUBTARGET_OVERRIDE_OPTIONS;
1200#endif
1201
41ed2237 1202 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1203 in case they weren't overwritten by command line options. */
55ba61f3
JH
1204 if (TARGET_64BIT)
1205 {
1206 if (flag_omit_frame_pointer == 2)
1207 flag_omit_frame_pointer = 1;
1208 if (flag_asynchronous_unwind_tables == 2)
1209 flag_asynchronous_unwind_tables = 1;
1210 if (flag_pcc_struct_return == 2)
1211 flag_pcc_struct_return = 0;
1212 }
1213 else
1214 {
1215 if (flag_omit_frame_pointer == 2)
1216 flag_omit_frame_pointer = 0;
1217 if (flag_asynchronous_unwind_tables == 2)
1218 flag_asynchronous_unwind_tables = 0;
1219 if (flag_pcc_struct_return == 2)
7c712dcc 1220 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1221 }
1222
9e555526
RH
1223 if (!ix86_tune_string && ix86_arch_string)
1224 ix86_tune_string = ix86_arch_string;
1225 if (!ix86_tune_string)
3326f410
DJ
1226 {
1227 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1228 ix86_tune_defaulted = 1;
1229 }
f4365627 1230 if (!ix86_arch_string)
3fec9fa9 1231 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
e075ae69 1232
6189a572
JH
1233 if (ix86_cmodel_string != 0)
1234 {
1235 if (!strcmp (ix86_cmodel_string, "small"))
1236 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1237 else if (flag_pic)
c725bd79 1238 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1239 else if (!strcmp (ix86_cmodel_string, "32"))
1240 ix86_cmodel = CM_32;
1241 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1242 ix86_cmodel = CM_KERNEL;
1243 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1244 ix86_cmodel = CM_MEDIUM;
1245 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1246 ix86_cmodel = CM_LARGE;
1247 else
1248 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1249 }
1250 else
1251 {
1252 ix86_cmodel = CM_32;
1253 if (TARGET_64BIT)
1254 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1255 }
c93e80a5
JH
1256 if (ix86_asm_string != 0)
1257 {
1258 if (!strcmp (ix86_asm_string, "intel"))
1259 ix86_asm_dialect = ASM_INTEL;
1260 else if (!strcmp (ix86_asm_string, "att"))
1261 ix86_asm_dialect = ASM_ATT;
1262 else
1263 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1264 }
6189a572 1265 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
9e637a26 1266 error ("code model %qs not supported in the %s bit mode",
6189a572
JH
1267 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1268 if (ix86_cmodel == CM_LARGE)
9e637a26 1269 sorry ("code model %<large%> not supported yet");
0c2dc519 1270 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1271 sorry ("%i-bit mode not compiled in",
0c2dc519 1272 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1273
f4365627
JH
1274 for (i = 0; i < pta_size; i++)
1275 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1276 {
1277 ix86_arch = processor_alias_table[i].processor;
1278 /* Default cpu tuning to the architecture. */
9e555526 1279 ix86_tune = ix86_arch;
f4365627 1280 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1281 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1282 target_flags |= MASK_MMX;
1283 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1284 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1285 target_flags |= MASK_3DNOW;
1286 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1287 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1288 target_flags |= MASK_3DNOW_A;
1289 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1290 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1291 target_flags |= MASK_SSE;
1292 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1293 && !(target_flags_explicit & MASK_SSE2))
f4365627 1294 target_flags |= MASK_SSE2;
5bbeea44
JH
1295 if (processor_alias_table[i].flags & PTA_SSE3
1296 && !(target_flags_explicit & MASK_SSE3))
1297 target_flags |= MASK_SSE3;
f4365627
JH
1298 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1299 x86_prefetch_sse = true;
6716ecbc
JM
1300 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1301 error ("CPU you selected does not support x86-64 "
1302 "instruction set");
1303 break;
1304 }
1305
1306 if (i == pta_size)
1307 error ("bad value (%s) for -march= switch", ix86_arch_string);
1308
1309 for (i = 0; i < pta_size; i++)
1310 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1311 {
1312 ix86_tune = processor_alias_table[i].processor;
4977bab6 1313 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
1314 {
1315 if (ix86_tune_defaulted)
1316 {
1317 ix86_tune_string = "x86-64";
1318 for (i = 0; i < pta_size; i++)
1319 if (! strcmp (ix86_tune_string,
1320 processor_alias_table[i].name))
1321 break;
1322 ix86_tune = processor_alias_table[i].processor;
1323 }
1324 else
1325 error ("CPU you selected does not support x86-64 "
1326 "instruction set");
1327 }
c618c6ec
JJ
1328 /* Intel CPUs have always interpreted SSE prefetch instructions as
1329 NOPs; so, we can enable SSE prefetch instructions even when
1330 -mtune (rather than -march) points us to a processor that has them.
1331 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1332 higher processors. */
1333 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1334 x86_prefetch_sse = true;
f4365627
JH
1335 break;
1336 }
f4365627 1337 if (i == pta_size)
9e555526 1338 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1339
2ab0437e
JH
1340 if (optimize_size)
1341 ix86_cost = &size_cost;
1342 else
9e555526
RH
1343 ix86_cost = processor_target_table[ix86_tune].cost;
1344 target_flags |= processor_target_table[ix86_tune].target_enable;
1345 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1346
36edd3cc
BS
1347 /* Arrange to set up i386_stack_locals for all functions. */
1348 init_machine_status = ix86_init_machine_status;
fce5a9f2 1349
0f290768 1350 /* Validate -mregparm= value. */
e075ae69 1351 if (ix86_regparm_string)
b08de47e 1352 {
400500c4
RK
1353 i = atoi (ix86_regparm_string);
1354 if (i < 0 || i > REGPARM_MAX)
1355 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1356 else
1357 ix86_regparm = i;
b08de47e 1358 }
0d7d98ee
JH
1359 else
1360 if (TARGET_64BIT)
1361 ix86_regparm = REGPARM_MAX;
b08de47e 1362
3e18fdf6 1363 /* If the user has provided any of the -malign-* options,
a4f31c00 1364 warn and use that value only if -falign-* is not set.
3e18fdf6 1365 Remove this code in GCC 3.2 or later. */
e075ae69 1366 if (ix86_align_loops_string)
b08de47e 1367 {
3e18fdf6
GK
1368 warning ("-malign-loops is obsolete, use -falign-loops");
1369 if (align_loops == 0)
1370 {
1371 i = atoi (ix86_align_loops_string);
1372 if (i < 0 || i > MAX_CODE_ALIGN)
1373 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1374 else
1375 align_loops = 1 << i;
1376 }
b08de47e 1377 }
3af4bd89 1378
e075ae69 1379 if (ix86_align_jumps_string)
b08de47e 1380 {
3e18fdf6
GK
1381 warning ("-malign-jumps is obsolete, use -falign-jumps");
1382 if (align_jumps == 0)
1383 {
1384 i = atoi (ix86_align_jumps_string);
1385 if (i < 0 || i > MAX_CODE_ALIGN)
1386 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1387 else
1388 align_jumps = 1 << i;
1389 }
b08de47e 1390 }
b08de47e 1391
e075ae69 1392 if (ix86_align_funcs_string)
b08de47e 1393 {
3e18fdf6
GK
1394 warning ("-malign-functions is obsolete, use -falign-functions");
1395 if (align_functions == 0)
1396 {
1397 i = atoi (ix86_align_funcs_string);
1398 if (i < 0 || i > MAX_CODE_ALIGN)
1399 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1400 else
1401 align_functions = 1 << i;
1402 }
b08de47e 1403 }
3af4bd89 1404
3e18fdf6 1405 /* Default align_* from the processor table. */
3e18fdf6 1406 if (align_loops == 0)
2cca7283 1407 {
9e555526
RH
1408 align_loops = processor_target_table[ix86_tune].align_loop;
1409 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1410 }
3e18fdf6 1411 if (align_jumps == 0)
2cca7283 1412 {
9e555526
RH
1413 align_jumps = processor_target_table[ix86_tune].align_jump;
1414 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1415 }
3e18fdf6 1416 if (align_functions == 0)
2cca7283 1417 {
9e555526 1418 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1419 }
3e18fdf6 1420
e4c0478d 1421 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1422 The default of 128 bits is for Pentium III's SSE __m128, but we
1423 don't want additional code to keep the stack aligned when
1424 optimizing for code size. */
1425 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1426 ? TARGET_64BIT ? 128 : 32
fbb83b43 1427 : 128);
e075ae69 1428 if (ix86_preferred_stack_boundary_string)
3af4bd89 1429 {
400500c4 1430 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1431 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1432 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1433 TARGET_64BIT ? 4 : 2);
400500c4
RK
1434 else
1435 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1436 }
77a989d1 1437
0f290768 1438 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1439 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1440 if (ix86_branch_cost_string)
804a8ee0 1441 {
400500c4
RK
1442 i = atoi (ix86_branch_cost_string);
1443 if (i < 0 || i > 5)
1444 error ("-mbranch-cost=%d is not between 0 and 5", i);
1445 else
1446 ix86_branch_cost = i;
804a8ee0 1447 }
804a8ee0 1448
f996902d
RH
1449 if (ix86_tls_dialect_string)
1450 {
1451 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1452 ix86_tls_dialect = TLS_DIALECT_GNU;
1453 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1454 ix86_tls_dialect = TLS_DIALECT_SUN;
1455 else
1456 error ("bad value (%s) for -mtls-dialect= switch",
1457 ix86_tls_dialect_string);
1458 }
1459
e9a25f70 1460 /* Keep nonleaf frame pointers. */
14c473b9
RS
1461 if (flag_omit_frame_pointer)
1462 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1463 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1464 flag_omit_frame_pointer = 1;
e075ae69
RH
1465
1466 /* If we're doing fast math, we don't care about comparison order
1467 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1468 if (flag_unsafe_math_optimizations)
e075ae69
RH
1469 target_flags &= ~MASK_IEEE_FP;
1470
30c99a84
RH
1471 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1472 since the insns won't need emulation. */
9690a821 1473 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
30c99a84
RH
1474 target_flags &= ~MASK_NO_FANCY_MATH_387;
1475
ba2baa55
RS
1476 /* Likewise, if the target doesn't have a 387, or we've specified
1477 software floating point, don't use 387 inline instrinsics. */
1478 if (!TARGET_80387)
1479 target_flags |= MASK_NO_FANCY_MATH_387;
1480
9e200aaf
KC
1481 /* Turn on SSE2 builtins for -msse3. */
1482 if (TARGET_SSE3)
22c7c85e
L
1483 target_flags |= MASK_SSE2;
1484
1485 /* Turn on SSE builtins for -msse2. */
1486 if (TARGET_SSE2)
1487 target_flags |= MASK_SSE;
1488
a5370cf0
RH
1489 /* Turn on MMX builtins for -msse. */
1490 if (TARGET_SSE)
1491 {
1492 target_flags |= MASK_MMX & ~target_flags_explicit;
1493 x86_prefetch_sse = true;
1494 }
1495
1496 /* Turn on MMX builtins for 3Dnow. */
1497 if (TARGET_3DNOW)
1498 target_flags |= MASK_MMX;
1499
14f73b5a
JH
1500 if (TARGET_64BIT)
1501 {
1502 if (TARGET_ALIGN_DOUBLE)
c725bd79 1503 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1504 if (TARGET_RTD)
c725bd79 1505 error ("-mrtd calling convention not supported in the 64bit mode");
a5370cf0
RH
1506
1507 /* Enable by default the SSE and MMX builtins. Do allow the user to
1508 explicitly disable any of these. In particular, disabling SSE and
1509 MMX for kernel code is extremely useful. */
1510 target_flags
1511 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1512 & ~target_flags_explicit);
1513
1514 if (TARGET_SSE)
1515 ix86_fpmath = FPMATH_SSE;
14f73b5a 1516 }
965f5423 1517 else
a5b378d6
JH
1518 {
1519 ix86_fpmath = FPMATH_387;
1520 /* i386 ABI does not specify red zone. It still makes sense to use it
1521 when programmer takes care to stack from being destroyed. */
1522 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1523 target_flags |= MASK_NO_RED_ZONE;
1524 }
965f5423
JH
1525
1526 if (ix86_fpmath_string != 0)
1527 {
1528 if (! strcmp (ix86_fpmath_string, "387"))
1529 ix86_fpmath = FPMATH_387;
1530 else if (! strcmp (ix86_fpmath_string, "sse"))
1531 {
1532 if (!TARGET_SSE)
1533 {
1534 warning ("SSE instruction set disabled, using 387 arithmetics");
1535 ix86_fpmath = FPMATH_387;
1536 }
1537 else
1538 ix86_fpmath = FPMATH_SSE;
1539 }
1540 else if (! strcmp (ix86_fpmath_string, "387,sse")
1541 || ! strcmp (ix86_fpmath_string, "sse,387"))
1542 {
1543 if (!TARGET_SSE)
1544 {
1545 warning ("SSE instruction set disabled, using 387 arithmetics");
1546 ix86_fpmath = FPMATH_387;
1547 }
1548 else if (!TARGET_80387)
1549 {
1550 warning ("387 instruction set disabled, using SSE arithmetics");
1551 ix86_fpmath = FPMATH_SSE;
1552 }
1553 else
1554 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1555 }
fce5a9f2 1556 else
965f5423
JH
1557 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1558 }
14f73b5a 1559
9e555526 1560 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1561 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1562 && !optimize_size)
1563 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1564
1565 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1566 {
1567 char *p;
1568 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1569 p = strchr (internal_label_prefix, 'X');
1570 internal_label_prefix_len = p - internal_label_prefix;
1571 *p = '\0';
1572 }
a5370cf0
RH
1573
1574 /* When scheduling description is not available, disable scheduler pass
1575 so it won't slow down the compilation and make x87 code slower. */
ad7b96a9
JH
1576 if (!TARGET_SCHEDULE)
1577 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
f5316dfe
MM
1578}
1579\f
32b5b1aa 1580void
b96a374d 1581optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1582{
e9a25f70
JL
1583 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1584 make the problem with not enough registers even worse. */
32b5b1aa
SC
1585#ifdef INSN_SCHEDULING
1586 if (level > 1)
1587 flag_schedule_insns = 0;
1588#endif
55ba61f3
JH
1589
1590 /* The default values of these switches depend on the TARGET_64BIT
1591 that is not known at this moment. Mark these values with 2 and
1592 let user the to override these. In case there is no command line option
1593 specifying them, we will set the defaults in override_options. */
1594 if (optimize >= 1)
1595 flag_omit_frame_pointer = 2;
1596 flag_pcc_struct_return = 2;
1597 flag_asynchronous_unwind_tables = 2;
4f514514
JM
1598#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1599 SUBTARGET_OPTIMIZATION_OPTIONS;
1600#endif
32b5b1aa 1601}
b08de47e 1602\f
91d231cb
JM
1603/* Table of valid machine attributes. */
1604const struct attribute_spec ix86_attribute_table[] =
b08de47e 1605{
91d231cb 1606 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1607 /* Stdcall attribute says callee is responsible for popping arguments
1608 if they are not variable. */
91d231cb 1609 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1610 /* Fastcall attribute says callee is responsible for popping arguments
1611 if they are not variable. */
1612 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1613 /* Cdecl attribute says the callee is a normal C declaration */
1614 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1615 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1616 passed in registers. */
91d231cb 1617 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
b2ca3702
MM
1618#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1619 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1620 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3da1eb0b 1621 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1622#endif
fe77449a
DR
1623 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1624 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
07a43492
DJ
1625#ifdef SUBTARGET_ATTRIBUTE_TABLE
1626 SUBTARGET_ATTRIBUTE_TABLE,
1627#endif
91d231cb
JM
1628 { NULL, 0, 0, false, false, false, NULL }
1629};
1630
5fbf0217
EB
1631/* Decide whether we can make a sibling call to a function. DECL is the
1632 declaration of the function being targeted by the call and EXP is the
1633 CALL_EXPR representing the call. */
4977bab6
ZW
1634
1635static bool
b96a374d 1636ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1637{
1638 /* If we are generating position-independent code, we cannot sibcall
1639 optimize any indirect call, or a direct call to a global function,
1640 as the PLT requires %ebx be live. */
1641 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1642 return false;
1643
1644 /* If we are returning floats on the 80387 register stack, we cannot
1645 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1646 function that does or, conversely, from a function that does return
1647 a float to a function that doesn't; the necessary stack adjustment
1648 would not be executed. */
4977bab6 1649 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1650 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1651 return false;
1652
1653 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1654 register for the address of the target function. Make sure that all
4977bab6
ZW
1655 such registers are not used for passing parameters. */
1656 if (!decl && !TARGET_64BIT)
1657 {
e767b5be 1658 tree type;
4977bab6
ZW
1659
1660 /* We're looking at the CALL_EXPR, we need the type of the function. */
1661 type = TREE_OPERAND (exp, 0); /* pointer expression */
1662 type = TREE_TYPE (type); /* pointer type */
1663 type = TREE_TYPE (type); /* function type */
1664
e767b5be 1665 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1666 {
1667 /* ??? Need to count the actual number of registers to be used,
1668 not the possible number of registers. Fix later. */
1669 return false;
1670 }
1671 }
1672
1673 /* Otherwise okay. That also includes certain types of indirect calls. */
1674 return true;
1675}
1676
e91f04de 1677/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1678 arguments as in struct attribute_spec.handler. */
1679static tree
b96a374d
AJ
1680ix86_handle_cdecl_attribute (tree *node, tree name,
1681 tree args ATTRIBUTE_UNUSED,
1682 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1683{
1684 if (TREE_CODE (*node) != FUNCTION_TYPE
1685 && TREE_CODE (*node) != METHOD_TYPE
1686 && TREE_CODE (*node) != FIELD_DECL
1687 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1688 {
9e637a26 1689 warning ("%qs attribute only applies to functions",
91d231cb
JM
1690 IDENTIFIER_POINTER (name));
1691 *no_add_attrs = true;
1692 }
e91f04de
CH
1693 else
1694 {
1695 if (is_attribute_p ("fastcall", name))
1696 {
1697 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1698 {
1699 error ("fastcall and stdcall attributes are not compatible");
1700 }
1701 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1702 {
1703 error ("fastcall and regparm attributes are not compatible");
1704 }
1705 }
1706 else if (is_attribute_p ("stdcall", name))
1707 {
1708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1709 {
1710 error ("fastcall and stdcall attributes are not compatible");
1711 }
1712 }
1713 }
b08de47e 1714
91d231cb
JM
1715 if (TARGET_64BIT)
1716 {
9e637a26 1717 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
91d231cb
JM
1718 *no_add_attrs = true;
1719 }
b08de47e 1720
91d231cb
JM
1721 return NULL_TREE;
1722}
b08de47e 1723
91d231cb
JM
1724/* Handle a "regparm" attribute;
1725 arguments as in struct attribute_spec.handler. */
1726static tree
b96a374d
AJ
1727ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1728 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1729{
1730 if (TREE_CODE (*node) != FUNCTION_TYPE
1731 && TREE_CODE (*node) != METHOD_TYPE
1732 && TREE_CODE (*node) != FIELD_DECL
1733 && TREE_CODE (*node) != TYPE_DECL)
1734 {
9e637a26 1735 warning ("%qs attribute only applies to functions",
91d231cb
JM
1736 IDENTIFIER_POINTER (name));
1737 *no_add_attrs = true;
1738 }
1739 else
1740 {
1741 tree cst;
b08de47e 1742
91d231cb
JM
1743 cst = TREE_VALUE (args);
1744 if (TREE_CODE (cst) != INTEGER_CST)
1745 {
9e637a26 1746 warning ("%qs attribute requires an integer constant argument",
91d231cb
JM
1747 IDENTIFIER_POINTER (name));
1748 *no_add_attrs = true;
1749 }
1750 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1751 {
9e637a26 1752 warning ("argument to %qs attribute larger than %d",
91d231cb
JM
1753 IDENTIFIER_POINTER (name), REGPARM_MAX);
1754 *no_add_attrs = true;
1755 }
e91f04de
CH
1756
1757 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1758 {
1759 error ("fastcall and regparm attributes are not compatible");
1760 }
b08de47e
MM
1761 }
1762
91d231cb 1763 return NULL_TREE;
b08de47e
MM
1764}
1765
1766/* Return 0 if the attributes for two types are incompatible, 1 if they
1767 are compatible, and 2 if they are nearly compatible (which causes a
1768 warning to be generated). */
1769
8d8e52be 1770static int
b96a374d 1771ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1772{
0f290768 1773 /* Check for mismatch of non-default calling convention. */
27c38fbe 1774 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1775
1776 if (TREE_CODE (type1) != FUNCTION_TYPE)
1777 return 1;
1778
b96a374d 1779 /* Check for mismatched fastcall types */
e91f04de
CH
1780 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1781 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1782 return 0;
e91f04de 1783
afcfe58c 1784 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1785 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1786 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac
MM
1787 return 0;
1788 if (ix86_function_regparm (type1, NULL)
1789 != ix86_function_regparm (type2, NULL))
afcfe58c 1790 return 0;
b08de47e
MM
1791 return 1;
1792}
b08de47e 1793\f
e767b5be
JH
1794/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1795 DECL may be NULL when calling function indirectly
839a4992 1796 or considering a libcall. */
483ab821
MM
1797
1798static int
e767b5be 1799ix86_function_regparm (tree type, tree decl)
483ab821
MM
1800{
1801 tree attr;
e767b5be
JH
1802 int regparm = ix86_regparm;
1803 bool user_convention = false;
483ab821 1804
e767b5be
JH
1805 if (!TARGET_64BIT)
1806 {
1807 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1808 if (attr)
1809 {
1810 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1811 user_convention = true;
1812 }
1813
1814 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1815 {
1816 regparm = 2;
1817 user_convention = true;
1818 }
1819
1820 /* Use register calling convention for local functions when possible. */
1821 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1822 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1823 {
1824 struct cgraph_local_info *i = cgraph_local_info (decl);
1825 if (i && i->local)
1826 {
1827 /* We can't use regparm(3) for nested functions as these use
1828 static chain pointer in third argument. */
1829 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1830 regparm = 2;
1831 else
1832 regparm = 3;
1833 }
1834 }
1835 }
1836 return regparm;
483ab821
MM
1837}
1838
f676971a 1839/* Return true if EAX is live at the start of the function. Used by
fe9f516f
RH
1840 ix86_expand_prologue to determine if we need special help before
1841 calling allocate_stack_worker. */
1842
1843static bool
1844ix86_eax_live_at_start_p (void)
1845{
1846 /* Cheat. Don't bother working forward from ix86_function_regparm
1847 to the function type to whether an actual argument is located in
1848 eax. Instead just look at cfg info, which is still close enough
1849 to correct at this point. This gives false positives for broken
1850 functions that might use uninitialized data that happens to be
1851 allocated in eax, but who cares? */
1852 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1853}
1854
b08de47e
MM
1855/* Value is the number of bytes of arguments automatically
1856 popped when returning from a subroutine call.
1857 FUNDECL is the declaration node of the function (as a tree),
1858 FUNTYPE is the data type of the function (as a tree),
1859 or for a library call it is an identifier node for the subroutine name.
1860 SIZE is the number of bytes of arguments passed on the stack.
1861
1862 On the 80386, the RTD insn may be used to pop them if the number
1863 of args is fixed, but if the number is variable then the caller
1864 must pop them all. RTD can't be used for library calls now
1865 because the library is compiled with the Unix compiler.
1866 Use of RTD is a selectable option, since it is incompatible with
1867 standard Unix calling sequences. If the option is not selected,
1868 the caller must always pop the args.
1869
1870 The attribute stdcall is equivalent to RTD on a per module basis. */
1871
1872int
b96a374d 1873ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1874{
3345ee7d 1875 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1876
43f3a59d 1877 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1878 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1879
43f3a59d
KH
1880 /* Stdcall and fastcall functions will pop the stack if not
1881 variable args. */
e91f04de
CH
1882 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1883 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1884 rtd = 1;
79325812 1885
698cdd84
SC
1886 if (rtd
1887 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1888 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1889 == void_type_node)))
698cdd84
SC
1890 return size;
1891 }
79325812 1892
232b8f52 1893 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1894 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
61fec9ff
JB
1895 && !TARGET_64BIT
1896 && !KEEP_AGGREGATE_RETURN_POINTER)
232b8f52 1897 {
e767b5be 1898 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1899
1900 if (!nregs)
1901 return GET_MODE_SIZE (Pmode);
1902 }
1903
1904 return 0;
b08de47e 1905}
b08de47e
MM
1906\f
1907/* Argument support functions. */
1908
53c17031
JH
1909/* Return true when register may be used to pass function parameters. */
1910bool
b96a374d 1911ix86_function_arg_regno_p (int regno)
53c17031
JH
1912{
1913 int i;
1914 if (!TARGET_64BIT)
0333394e
JJ
1915 return (regno < REGPARM_MAX
1916 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1917 if (SSE_REGNO_P (regno) && TARGET_SSE)
1918 return true;
1919 /* RAX is used as hidden argument to va_arg functions. */
1920 if (!regno)
1921 return true;
1922 for (i = 0; i < REGPARM_MAX; i++)
1923 if (regno == x86_64_int_parameter_registers[i])
1924 return true;
1925 return false;
1926}
1927
fe984136
RH
1928/* Return if we do not know how to pass TYPE solely in registers. */
1929
1930static bool
1931ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1932{
1933 if (must_pass_in_stack_var_size_or_pad (mode, type))
1934 return true;
dcbca208
RH
1935
1936 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1937 The layout_type routine is crafty and tries to trick us into passing
1938 currently unsupported vector types on the stack by using TImode. */
1939 return (!TARGET_64BIT && mode == TImode
1940 && type && TREE_CODE (type) != VECTOR_TYPE);
fe984136
RH
1941}
1942
b08de47e
MM
1943/* Initialize a variable CUM of type CUMULATIVE_ARGS
1944 for a call to a function whose data type is FNTYPE.
1945 For a library call, FNTYPE is 0. */
1946
1947void
b96a374d
AJ
1948init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1949 tree fntype, /* tree ptr for function decl */
1950 rtx libname, /* SYMBOL_REF of library name or 0 */
1951 tree fndecl)
b08de47e
MM
1952{
1953 static CUMULATIVE_ARGS zero_cum;
1954 tree param, next_param;
1955
1956 if (TARGET_DEBUG_ARG)
1957 {
1958 fprintf (stderr, "\ninit_cumulative_args (");
1959 if (fntype)
e9a25f70
JL
1960 fprintf (stderr, "fntype code = %s, ret code = %s",
1961 tree_code_name[(int) TREE_CODE (fntype)],
1962 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1963 else
1964 fprintf (stderr, "no fntype");
1965
1966 if (libname)
1967 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1968 }
1969
1970 *cum = zero_cum;
1971
1972 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1973 if (fntype)
1974 cum->nregs = ix86_function_regparm (fntype, fndecl);
1975 else
1976 cum->nregs = ix86_regparm;
78fbfc4b
JB
1977 if (TARGET_SSE)
1978 cum->sse_nregs = SSE_REGPARM_MAX;
1979 if (TARGET_MMX)
1980 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
1981 cum->warn_sse = true;
1982 cum->warn_mmx = true;
53c17031 1983 cum->maybe_vaarg = false;
b08de47e 1984
e91f04de
CH
1985 /* Use ecx and edx registers if function has fastcall attribute */
1986 if (fntype && !TARGET_64BIT)
1987 {
1988 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1989 {
1990 cum->nregs = 2;
1991 cum->fastcall = 1;
1992 }
1993 }
1994
b08de47e
MM
1995 /* Determine if this function has variable arguments. This is
1996 indicated by the last argument being 'void_type_mode' if there
1997 are no variable arguments. If there are variable arguments, then
78fbfc4b 1998 we won't pass anything in registers in 32-bit mode. */
b08de47e 1999
78fbfc4b 2000 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
b08de47e
MM
2001 {
2002 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 2003 param != 0; param = next_param)
b08de47e
MM
2004 {
2005 next_param = TREE_CHAIN (param);
e9a25f70 2006 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
2007 {
2008 if (!TARGET_64BIT)
e91f04de
CH
2009 {
2010 cum->nregs = 0;
e1be55d0
JH
2011 cum->sse_nregs = 0;
2012 cum->mmx_nregs = 0;
2013 cum->warn_sse = 0;
2014 cum->warn_mmx = 0;
e91f04de
CH
2015 cum->fastcall = 0;
2016 }
53c17031
JH
2017 cum->maybe_vaarg = true;
2018 }
b08de47e
MM
2019 }
2020 }
53c17031
JH
2021 if ((!fntype && !libname)
2022 || (fntype && !TYPE_ARG_TYPES (fntype)))
2023 cum->maybe_vaarg = 1;
b08de47e
MM
2024
2025 if (TARGET_DEBUG_ARG)
2026 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2027
2028 return;
2029}
2030
6c4ccfd8
RH
2031/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2032 But in the case of vector types, it is some vector mode.
2033
2034 When we have only some of our vector isa extensions enabled, then there
2035 are some modes for which vector_mode_supported_p is false. For these
2036 modes, the generic vector support in gcc will choose some non-vector mode
2037 in order to implement the type. By computing the natural mode, we'll
2038 select the proper ABI location for the operand and not depend on whatever
2039 the middle-end decides to do with these vector types. */
2040
2041static enum machine_mode
2042type_natural_mode (tree type)
2043{
2044 enum machine_mode mode = TYPE_MODE (type);
2045
2046 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2047 {
2048 HOST_WIDE_INT size = int_size_in_bytes (type);
2049 if ((size == 8 || size == 16)
2050 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2051 && TYPE_VECTOR_SUBPARTS (type) > 1)
2052 {
2053 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2054
2055 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2056 mode = MIN_MODE_VECTOR_FLOAT;
2057 else
2058 mode = MIN_MODE_VECTOR_INT;
2059
2060 /* Get the mode which has this inner mode and number of units. */
2061 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2062 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2063 && GET_MODE_INNER (mode) == innermode)
2064 return mode;
2065
2066 abort ();
2067 }
2068 }
2069
2070 return mode;
2071}
2072
2073/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2074 this may not agree with the mode that the type system has chosen for the
2075 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2076 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2077
2078static rtx
2079gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2080 unsigned int regno)
2081{
2082 rtx tmp;
2083
2084 if (orig_mode != BLKmode)
2085 tmp = gen_rtx_REG (orig_mode, regno);
2086 else
2087 {
2088 tmp = gen_rtx_REG (mode, regno);
2089 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2090 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2091 }
2092
2093 return tmp;
2094}
2095
d1f87653 2096/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 2097 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
2098 class and assign registers accordingly. */
2099
2100/* Return the union class of CLASS1 and CLASS2.
2101 See the x86-64 PS ABI for details. */
2102
2103static enum x86_64_reg_class
b96a374d 2104merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
2105{
2106 /* Rule #1: If both classes are equal, this is the resulting class. */
2107 if (class1 == class2)
2108 return class1;
2109
2110 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2111 the other class. */
2112 if (class1 == X86_64_NO_CLASS)
2113 return class2;
2114 if (class2 == X86_64_NO_CLASS)
2115 return class1;
2116
2117 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2118 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2119 return X86_64_MEMORY_CLASS;
2120
2121 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2122 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2123 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2124 return X86_64_INTEGERSI_CLASS;
2125 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2126 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2127 return X86_64_INTEGER_CLASS;
2128
499accd7
JB
2129 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2130 MEMORY is used. */
2131 if (class1 == X86_64_X87_CLASS
2132 || class1 == X86_64_X87UP_CLASS
2133 || class1 == X86_64_COMPLEX_X87_CLASS
2134 || class2 == X86_64_X87_CLASS
2135 || class2 == X86_64_X87UP_CLASS
2136 || class2 == X86_64_COMPLEX_X87_CLASS)
53c17031
JH
2137 return X86_64_MEMORY_CLASS;
2138
2139 /* Rule #6: Otherwise class SSE is used. */
2140 return X86_64_SSE_CLASS;
2141}
2142
2143/* Classify the argument of type TYPE and mode MODE.
2144 CLASSES will be filled by the register class used to pass each word
2145 of the operand. The number of words is returned. In case the parameter
2146 should be passed in memory, 0 is returned. As a special case for zero
2147 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2148
2149 BIT_OFFSET is used internally for handling records and specifies offset
2150 of the offset in bits modulo 256 to avoid overflow cases.
2151
2152 See the x86-64 PS ABI for details.
2153*/
2154
2155static int
b96a374d
AJ
2156classify_argument (enum machine_mode mode, tree type,
2157 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 2158{
296e4ae8 2159 HOST_WIDE_INT bytes =
53c17031 2160 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 2161 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 2162
c60ee6f5
JH
2163 /* Variable sized entities are always passed/returned in memory. */
2164 if (bytes < 0)
2165 return 0;
2166
dafc5b82 2167 if (mode != VOIDmode
fe984136 2168 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
2169 return 0;
2170
53c17031
JH
2171 if (type && AGGREGATE_TYPE_P (type))
2172 {
2173 int i;
2174 tree field;
2175 enum x86_64_reg_class subclasses[MAX_CLASSES];
2176
2177 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2178 if (bytes > 16)
2179 return 0;
2180
2181 for (i = 0; i < words; i++)
2182 classes[i] = X86_64_NO_CLASS;
2183
2184 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2185 signalize memory class, so handle it as special case. */
2186 if (!words)
2187 {
2188 classes[0] = X86_64_NO_CLASS;
2189 return 1;
2190 }
2191
2192 /* Classify each field of record and merge classes. */
2193 if (TREE_CODE (type) == RECORD_TYPE)
2194 {
91ea38f9 2195 /* For classes first merge in the field of the subclasses. */
fa743e8c 2196 if (TYPE_BINFO (type))
91ea38f9 2197 {
fa743e8c 2198 tree binfo, base_binfo;
e8112eac 2199 int basenum;
91ea38f9 2200
e8112eac
ZK
2201 for (binfo = TYPE_BINFO (type), basenum = 0;
2202 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
91ea38f9 2203 {
91ea38f9 2204 int num;
fa743e8c
NS
2205 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2206 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2207
2208 num = classify_argument (TYPE_MODE (type),
2209 type, subclasses,
2210 (offset + bit_offset) % 256);
2211 if (!num)
2212 return 0;
2213 for (i = 0; i < num; i++)
2214 {
db01f480 2215 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2216 classes[i + pos] =
2217 merge_classes (subclasses[i], classes[i + pos]);
2218 }
2219 }
2220 }
43f3a59d 2221 /* And now merge the fields of structure. */
53c17031
JH
2222 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2223 {
2224 if (TREE_CODE (field) == FIELD_DECL)
2225 {
2226 int num;
2227
2228 /* Bitfields are always classified as integer. Handle them
2229 early, since later code would consider them to be
2230 misaligned integers. */
2231 if (DECL_BIT_FIELD (field))
2232 {
2233 for (i = int_bit_position (field) / 8 / 8;
2234 i < (int_bit_position (field)
2235 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2236 + 63) / 8 / 8; i++)
53c17031
JH
2237 classes[i] =
2238 merge_classes (X86_64_INTEGER_CLASS,
2239 classes[i]);
2240 }
2241 else
2242 {
2243 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2244 TREE_TYPE (field), subclasses,
2245 (int_bit_position (field)
2246 + bit_offset) % 256);
2247 if (!num)
2248 return 0;
2249 for (i = 0; i < num; i++)
2250 {
2251 int pos =
db01f480 2252 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2253 classes[i + pos] =
2254 merge_classes (subclasses[i], classes[i + pos]);
2255 }
2256 }
2257 }
2258 }
2259 }
2260 /* Arrays are handled as small records. */
2261 else if (TREE_CODE (type) == ARRAY_TYPE)
2262 {
2263 int num;
2264 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2265 TREE_TYPE (type), subclasses, bit_offset);
2266 if (!num)
2267 return 0;
2268
2269 /* The partial classes are now full classes. */
2270 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2271 subclasses[0] = X86_64_SSE_CLASS;
2272 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2273 subclasses[0] = X86_64_INTEGER_CLASS;
2274
2275 for (i = 0; i < words; i++)
2276 classes[i] = subclasses[i % num];
2277 }
2278 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2279 else if (TREE_CODE (type) == UNION_TYPE
2280 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2281 {
91ea38f9 2282 /* For classes first merge in the field of the subclasses. */
fa743e8c 2283 if (TYPE_BINFO (type))
91ea38f9 2284 {
fa743e8c 2285 tree binfo, base_binfo;
e8112eac 2286 int basenum;
91ea38f9 2287
e8112eac
ZK
2288 for (binfo = TYPE_BINFO (type), basenum = 0;
2289 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
91ea38f9 2290 {
91ea38f9 2291 int num;
fa743e8c
NS
2292 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2293 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2294
2295 num = classify_argument (TYPE_MODE (type),
2296 type, subclasses,
db01f480 2297 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2298 if (!num)
2299 return 0;
2300 for (i = 0; i < num; i++)
2301 {
c16576e6 2302 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2303 classes[i + pos] =
2304 merge_classes (subclasses[i], classes[i + pos]);
2305 }
2306 }
2307 }
53c17031
JH
2308 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2309 {
2310 if (TREE_CODE (field) == FIELD_DECL)
2311 {
2312 int num;
2313 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2314 TREE_TYPE (field), subclasses,
2315 bit_offset);
2316 if (!num)
2317 return 0;
2318 for (i = 0; i < num; i++)
2319 classes[i] = merge_classes (subclasses[i], classes[i]);
2320 }
2321 }
2322 }
2323 else
2324 abort ();
2325
2326 /* Final merger cleanup. */
2327 for (i = 0; i < words; i++)
2328 {
2329 /* If one class is MEMORY, everything should be passed in
2330 memory. */
2331 if (classes[i] == X86_64_MEMORY_CLASS)
2332 return 0;
2333
d6a7951f 2334 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2335 X86_64_SSE_CLASS. */
2336 if (classes[i] == X86_64_SSEUP_CLASS
2337 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2338 classes[i] = X86_64_SSE_CLASS;
2339
d6a7951f 2340 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2341 if (classes[i] == X86_64_X87UP_CLASS
2342 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2343 classes[i] = X86_64_SSE_CLASS;
2344 }
2345 return words;
2346 }
2347
2348 /* Compute alignment needed. We align all types to natural boundaries with
2349 exception of XFmode that is aligned to 64bits. */
2350 if (mode != VOIDmode && mode != BLKmode)
2351 {
2352 int mode_alignment = GET_MODE_BITSIZE (mode);
2353
2354 if (mode == XFmode)
2355 mode_alignment = 128;
2356 else if (mode == XCmode)
2357 mode_alignment = 256;
2c6b27c3
JH
2358 if (COMPLEX_MODE_P (mode))
2359 mode_alignment /= 2;
f5143c46 2360 /* Misaligned fields are always returned in memory. */
53c17031
JH
2361 if (bit_offset % mode_alignment)
2362 return 0;
2363 }
2364
9e9fb0ce
JB
2365 /* for V1xx modes, just use the base mode */
2366 if (VECTOR_MODE_P (mode)
2367 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2368 mode = GET_MODE_INNER (mode);
2369
53c17031
JH
2370 /* Classification of atomic types. */
2371 switch (mode)
2372 {
2373 case DImode:
2374 case SImode:
2375 case HImode:
2376 case QImode:
2377 case CSImode:
2378 case CHImode:
2379 case CQImode:
2380 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2381 classes[0] = X86_64_INTEGERSI_CLASS;
2382 else
2383 classes[0] = X86_64_INTEGER_CLASS;
2384 return 1;
2385 case CDImode:
2386 case TImode:
2387 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2388 return 2;
2389 case CTImode:
9e9fb0ce 2390 return 0;
53c17031
JH
2391 case SFmode:
2392 if (!(bit_offset % 64))
2393 classes[0] = X86_64_SSESF_CLASS;
2394 else
2395 classes[0] = X86_64_SSE_CLASS;
2396 return 1;
2397 case DFmode:
2398 classes[0] = X86_64_SSEDF_CLASS;
2399 return 1;
f8a1ebc6 2400 case XFmode:
53c17031
JH
2401 classes[0] = X86_64_X87_CLASS;
2402 classes[1] = X86_64_X87UP_CLASS;
2403 return 2;
f8a1ebc6 2404 case TFmode:
9e9fb0ce
JB
2405 classes[0] = X86_64_SSE_CLASS;
2406 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
2407 return 2;
2408 case SCmode:
2409 classes[0] = X86_64_SSE_CLASS;
2410 return 1;
9e9fb0ce
JB
2411 case DCmode:
2412 classes[0] = X86_64_SSEDF_CLASS;
2413 classes[1] = X86_64_SSEDF_CLASS;
2414 return 2;
2415 case XCmode:
499accd7
JB
2416 classes[0] = X86_64_COMPLEX_X87_CLASS;
2417 return 1;
9e9fb0ce 2418 case TCmode:
499accd7 2419 /* This modes is larger than 16 bytes. */
9e9fb0ce 2420 return 0;
e95d6b23
JH
2421 case V4SFmode:
2422 case V4SImode:
495333a6
JH
2423 case V16QImode:
2424 case V8HImode:
2425 case V2DFmode:
2426 case V2DImode:
e95d6b23
JH
2427 classes[0] = X86_64_SSE_CLASS;
2428 classes[1] = X86_64_SSEUP_CLASS;
2429 return 2;
2430 case V2SFmode:
2431 case V2SImode:
2432 case V4HImode:
2433 case V8QImode:
9e9fb0ce
JB
2434 classes[0] = X86_64_SSE_CLASS;
2435 return 1;
53c17031 2436 case BLKmode:
e95d6b23 2437 case VOIDmode:
53c17031
JH
2438 return 0;
2439 default:
9e9fb0ce
JB
2440 if (VECTOR_MODE_P (mode))
2441 {
2442 if (bytes > 16)
2443 return 0;
2444 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2445 {
2446 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2447 classes[0] = X86_64_INTEGERSI_CLASS;
2448 else
2449 classes[0] = X86_64_INTEGER_CLASS;
2450 classes[1] = X86_64_INTEGER_CLASS;
2451 return 1 + (bytes > 8);
2452 }
2453 }
53c17031
JH
2454 abort ();
2455 }
2456}
2457
2458/* Examine the argument and return set number of register required in each
f5143c46 2459 class. Return 0 iff parameter should be passed in memory. */
53c17031 2460static int
b96a374d
AJ
2461examine_argument (enum machine_mode mode, tree type, int in_return,
2462 int *int_nregs, int *sse_nregs)
53c17031
JH
2463{
2464 enum x86_64_reg_class class[MAX_CLASSES];
2465 int n = classify_argument (mode, type, class, 0);
2466
2467 *int_nregs = 0;
2468 *sse_nregs = 0;
2469 if (!n)
2470 return 0;
2471 for (n--; n >= 0; n--)
2472 switch (class[n])
2473 {
2474 case X86_64_INTEGER_CLASS:
2475 case X86_64_INTEGERSI_CLASS:
2476 (*int_nregs)++;
2477 break;
2478 case X86_64_SSE_CLASS:
2479 case X86_64_SSESF_CLASS:
2480 case X86_64_SSEDF_CLASS:
2481 (*sse_nregs)++;
2482 break;
2483 case X86_64_NO_CLASS:
2484 case X86_64_SSEUP_CLASS:
2485 break;
2486 case X86_64_X87_CLASS:
2487 case X86_64_X87UP_CLASS:
2488 if (!in_return)
2489 return 0;
2490 break;
499accd7
JB
2491 case X86_64_COMPLEX_X87_CLASS:
2492 return in_return ? 2 : 0;
53c17031
JH
2493 case X86_64_MEMORY_CLASS:
2494 abort ();
2495 }
2496 return 1;
2497}
6c4ccfd8 2498
53c17031
JH
2499/* Construct container for the argument used by GCC interface. See
2500 FUNCTION_ARG for the detailed description. */
6c4ccfd8 2501
53c17031 2502static rtx
6c4ccfd8
RH
2503construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2504 tree type, int in_return, int nintregs, int nsseregs,
2505 const int *intreg, int sse_regno)
53c17031
JH
2506{
2507 enum machine_mode tmpmode;
2508 int bytes =
2509 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2510 enum x86_64_reg_class class[MAX_CLASSES];
2511 int n;
2512 int i;
2513 int nexps = 0;
2514 int needed_sseregs, needed_intregs;
2515 rtx exp[MAX_CLASSES];
2516 rtx ret;
2517
2518 n = classify_argument (mode, type, class, 0);
2519 if (TARGET_DEBUG_ARG)
2520 {
2521 if (!n)
2522 fprintf (stderr, "Memory class\n");
2523 else
2524 {
2525 fprintf (stderr, "Classes:");
2526 for (i = 0; i < n; i++)
2527 {
2528 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2529 }
2530 fprintf (stderr, "\n");
2531 }
2532 }
2533 if (!n)
2534 return NULL;
6c4ccfd8
RH
2535 if (!examine_argument (mode, type, in_return, &needed_intregs,
2536 &needed_sseregs))
53c17031
JH
2537 return NULL;
2538 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2539 return NULL;
2540
a5370cf0
RH
2541 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2542 some less clueful developer tries to use floating-point anyway. */
2543 if (needed_sseregs && !TARGET_SSE)
2544 {
2545 static bool issued_error;
2546 if (!issued_error)
2547 {
2548 issued_error = true;
2549 if (in_return)
2550 error ("SSE register return with SSE disabled");
2551 else
2552 error ("SSE register argument with SSE disabled");
2553 }
2554 return NULL;
2555 }
2556
53c17031
JH
2557 /* First construct simple cases. Avoid SCmode, since we want to use
2558 single register to pass this type. */
2559 if (n == 1 && mode != SCmode)
2560 switch (class[0])
2561 {
2562 case X86_64_INTEGER_CLASS:
2563 case X86_64_INTEGERSI_CLASS:
2564 return gen_rtx_REG (mode, intreg[0]);
2565 case X86_64_SSE_CLASS:
2566 case X86_64_SSESF_CLASS:
2567 case X86_64_SSEDF_CLASS:
6c4ccfd8 2568 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
53c17031 2569 case X86_64_X87_CLASS:
499accd7 2570 case X86_64_COMPLEX_X87_CLASS:
53c17031
JH
2571 return gen_rtx_REG (mode, FIRST_STACK_REG);
2572 case X86_64_NO_CLASS:
2573 /* Zero sized array, struct or class. */
2574 return NULL;
2575 default:
2576 abort ();
2577 }
2c6b27c3
JH
2578 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2579 && mode != BLKmode)
e95d6b23 2580 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2581 if (n == 2
2582 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2583 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2584 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2585 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2586 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2587 && intreg[0] + 1 == intreg[1])
2588 return gen_rtx_REG (mode, intreg[0]);
53c17031
JH
2589
2590 /* Otherwise figure out the entries of the PARALLEL. */
2591 for (i = 0; i < n; i++)
2592 {
2593 switch (class[i])
2594 {
2595 case X86_64_NO_CLASS:
2596 break;
2597 case X86_64_INTEGER_CLASS:
2598 case X86_64_INTEGERSI_CLASS:
d1f87653 2599 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2600 if (i * 8 + 8 > bytes)
2601 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2602 else if (class[i] == X86_64_INTEGERSI_CLASS)
2603 tmpmode = SImode;
2604 else
2605 tmpmode = DImode;
2606 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2607 if (tmpmode == BLKmode)
2608 tmpmode = DImode;
2609 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2610 gen_rtx_REG (tmpmode, *intreg),
2611 GEN_INT (i*8));
2612 intreg++;
2613 break;
2614 case X86_64_SSESF_CLASS:
2615 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2616 gen_rtx_REG (SFmode,
2617 SSE_REGNO (sse_regno)),
2618 GEN_INT (i*8));
2619 sse_regno++;
2620 break;
2621 case X86_64_SSEDF_CLASS:
2622 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2623 gen_rtx_REG (DFmode,
2624 SSE_REGNO (sse_regno)),
2625 GEN_INT (i*8));
2626 sse_regno++;
2627 break;
2628 case X86_64_SSE_CLASS:
12f5c45e
JH
2629 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2630 tmpmode = TImode;
53c17031
JH
2631 else
2632 tmpmode = DImode;
2633 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2634 gen_rtx_REG (tmpmode,
2635 SSE_REGNO (sse_regno)),
2636 GEN_INT (i*8));
12f5c45e
JH
2637 if (tmpmode == TImode)
2638 i++;
53c17031
JH
2639 sse_regno++;
2640 break;
2641 default:
2642 abort ();
2643 }
2644 }
2645 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2646 for (i = 0; i < nexps; i++)
2647 XVECEXP (ret, 0, i) = exp [i];
2648 return ret;
2649}
2650
b08de47e
MM
2651/* Update the data in CUM to advance over an argument
2652 of mode MODE and data type TYPE.
2653 (TYPE is null for libcalls where that information may not be available.) */
2654
2655void
6c4ccfd8
RH
2656function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2657 tree type, int named)
b08de47e 2658{
5ac9118e
KG
2659 int bytes =
2660 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2661 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2662
b3a1ca49
RH
2663 if (type)
2664 mode = type_natural_mode (type);
2665
b08de47e 2666 if (TARGET_DEBUG_ARG)
6c4ccfd8
RH
2667 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2668 "mode=%s, named=%d)\n\n",
2669 words, cum->words, cum->nregs, cum->sse_nregs,
2670 GET_MODE_NAME (mode), named);
b3a1ca49 2671
53c17031 2672 if (TARGET_64BIT)
b08de47e 2673 {
53c17031
JH
2674 int int_nregs, sse_nregs;
2675 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2676 cum->words += words;
2677 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2678 {
53c17031
JH
2679 cum->nregs -= int_nregs;
2680 cum->sse_nregs -= sse_nregs;
2681 cum->regno += int_nregs;
2682 cum->sse_regno += sse_nregs;
82a127a9 2683 }
53c17031
JH
2684 else
2685 cum->words += words;
b08de47e 2686 }
a4f31c00 2687 else
82a127a9 2688 {
b3a1ca49 2689 switch (mode)
82a127a9 2690 {
b3a1ca49
RH
2691 default:
2692 break;
2693
2694 case BLKmode:
2695 if (bytes < 0)
2696 break;
2697 /* FALLTHRU */
2698
2699 case DImode:
2700 case SImode:
2701 case HImode:
2702 case QImode:
53c17031
JH
2703 cum->words += words;
2704 cum->nregs -= words;
2705 cum->regno += words;
2706
2707 if (cum->nregs <= 0)
2708 {
2709 cum->nregs = 0;
2710 cum->regno = 0;
2711 }
b3a1ca49
RH
2712 break;
2713
2714 case TImode:
2715 case V16QImode:
2716 case V8HImode:
2717 case V4SImode:
2718 case V2DImode:
2719 case V4SFmode:
2720 case V2DFmode:
2721 if (!type || !AGGREGATE_TYPE_P (type))
2722 {
2723 cum->sse_words += words;
2724 cum->sse_nregs -= 1;
2725 cum->sse_regno += 1;
2726 if (cum->sse_nregs <= 0)
2727 {
2728 cum->sse_nregs = 0;
2729 cum->sse_regno = 0;
2730 }
2731 }
2732 break;
2733
2734 case V8QImode:
2735 case V4HImode:
2736 case V2SImode:
2737 case V2SFmode:
2738 if (!type || !AGGREGATE_TYPE_P (type))
2739 {
2740 cum->mmx_words += words;
2741 cum->mmx_nregs -= 1;
2742 cum->mmx_regno += 1;
2743 if (cum->mmx_nregs <= 0)
2744 {
2745 cum->mmx_nregs = 0;
2746 cum->mmx_regno = 0;
2747 }
2748 }
2749 break;
82a127a9
CM
2750 }
2751 }
b08de47e
MM
2752}
2753
2754/* Define where to put the arguments to a function.
2755 Value is zero to push the argument on the stack,
2756 or a hard register in which to store the argument.
2757
2758 MODE is the argument's machine mode.
2759 TYPE is the data type of the argument (as a tree).
2760 This is null for libcalls where that information may
2761 not be available.
2762 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2763 the preceding args and about the function being called.
2764 NAMED is nonzero if this argument is a named parameter
2765 (otherwise it is an extra parameter matching an ellipsis). */
2766
07933f72 2767rtx
dcbca208
RH
2768function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2769 tree type, int named)
b08de47e 2770{
dcbca208
RH
2771 enum machine_mode mode = orig_mode;
2772 rtx ret = NULL_RTX;
5ac9118e
KG
2773 int bytes =
2774 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 2775 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 2776 static bool warnedsse, warnedmmx;
b08de47e 2777
90d5887b
PB
2778 /* To simplify the code below, represent vector types with a vector mode
2779 even if MMX/SSE are not active. */
6c4ccfd8
RH
2780 if (type && TREE_CODE (type) == VECTOR_TYPE)
2781 mode = type_natural_mode (type);
90d5887b 2782
5bdc5878 2783 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2784 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2785 any AL settings. */
32ee7d1d 2786 if (mode == VOIDmode)
b08de47e 2787 {
53c17031
JH
2788 if (TARGET_64BIT)
2789 return GEN_INT (cum->maybe_vaarg
2790 ? (cum->sse_nregs < 0
2791 ? SSE_REGPARM_MAX
2792 : cum->sse_regno)
2793 : -1);
2794 else
2795 return constm1_rtx;
b08de47e 2796 }
53c17031 2797 if (TARGET_64BIT)
6c4ccfd8
RH
2798 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2799 cum->sse_nregs,
53c17031
JH
2800 &x86_64_int_parameter_registers [cum->regno],
2801 cum->sse_regno);
2802 else
2803 switch (mode)
2804 {
2805 /* For now, pass fp/complex values on the stack. */
2806 default:
2807 break;
2808
2809 case BLKmode:
8d454008
RH
2810 if (bytes < 0)
2811 break;
5efb1046 2812 /* FALLTHRU */
53c17031
JH
2813 case DImode:
2814 case SImode:
2815 case HImode:
2816 case QImode:
2817 if (words <= cum->nregs)
b96a374d
AJ
2818 {
2819 int regno = cum->regno;
2820
2821 /* Fastcall allocates the first two DWORD (SImode) or
2822 smaller arguments to ECX and EDX. */
2823 if (cum->fastcall)
2824 {
2825 if (mode == BLKmode || mode == DImode)
2826 break;
2827
2828 /* ECX not EAX is the first allocated register. */
2829 if (regno == 0)
e767b5be 2830 regno = 2;
b96a374d
AJ
2831 }
2832 ret = gen_rtx_REG (mode, regno);
2833 }
53c17031
JH
2834 break;
2835 case TImode:
bcf17554
JH
2836 case V16QImode:
2837 case V8HImode:
2838 case V4SImode:
2839 case V2DImode:
2840 case V4SFmode:
2841 case V2DFmode:
2842 if (!type || !AGGREGATE_TYPE_P (type))
2843 {
78fbfc4b 2844 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
bcf17554
JH
2845 {
2846 warnedsse = true;
2847 warning ("SSE vector argument without SSE enabled "
2848 "changes the ABI");
2849 }
2850 if (cum->sse_nregs)
6c4ccfd8 2851 ret = gen_reg_or_parallel (mode, orig_mode,
dcbca208 2852 cum->sse_regno + FIRST_SSE_REG);
bcf17554
JH
2853 }
2854 break;
2855 case V8QImode:
2856 case V4HImode:
2857 case V2SImode:
2858 case V2SFmode:
2859 if (!type || !AGGREGATE_TYPE_P (type))
2860 {
e1be55d0 2861 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
2862 {
2863 warnedmmx = true;
2864 warning ("MMX vector argument without MMX enabled "
2865 "changes the ABI");
2866 }
2867 if (cum->mmx_nregs)
6c4ccfd8 2868 ret = gen_reg_or_parallel (mode, orig_mode,
dcbca208 2869 cum->mmx_regno + FIRST_MMX_REG);
bcf17554 2870 }
53c17031
JH
2871 break;
2872 }
b08de47e
MM
2873
2874 if (TARGET_DEBUG_ARG)
2875 {
2876 fprintf (stderr,
91ea38f9 2877 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2878 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2879
2880 if (ret)
91ea38f9 2881 print_simple_rtl (stderr, ret);
b08de47e
MM
2882 else
2883 fprintf (stderr, ", stack");
2884
2885 fprintf (stderr, " )\n");
2886 }
2887
2888 return ret;
2889}
53c17031 2890
09b2e78d
ZD
2891/* A C expression that indicates when an argument must be passed by
2892 reference. If nonzero for an argument, a copy of that argument is
2893 made in memory and a pointer to the argument is passed instead of
2894 the argument itself. The pointer is passed in whatever way is
2895 appropriate for passing a pointer to that type. */
2896
8cd5a4e0
RH
2897static bool
2898ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2899 enum machine_mode mode ATTRIBUTE_UNUSED,
2900 tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2901{
2902 if (!TARGET_64BIT)
2903 return 0;
2904
2905 if (type && int_size_in_bytes (type) == -1)
2906 {
2907 if (TARGET_DEBUG_ARG)
2908 fprintf (stderr, "function_arg_pass_by_reference\n");
2909 return 1;
2910 }
2911
2912 return 0;
2913}
2914
8b978a57 2915/* Return true when TYPE should be 128bit aligned for 32bit argument passing
90d5887b 2916 ABI. Only called if TARGET_SSE. */
8b978a57 2917static bool
b96a374d 2918contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2919{
2920 enum machine_mode mode = TYPE_MODE (type);
2921 if (SSE_REG_MODE_P (mode)
2922 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2923 return true;
2924 if (TYPE_ALIGN (type) < 128)
2925 return false;
2926
2927 if (AGGREGATE_TYPE_P (type))
2928 {
2a43945f 2929 /* Walk the aggregates recursively. */
8b978a57
JH
2930 if (TREE_CODE (type) == RECORD_TYPE
2931 || TREE_CODE (type) == UNION_TYPE
2932 || TREE_CODE (type) == QUAL_UNION_TYPE)
2933 {
2934 tree field;
2935
fa743e8c 2936 if (TYPE_BINFO (type))
8b978a57 2937 {
fa743e8c 2938 tree binfo, base_binfo;
8b978a57
JH
2939 int i;
2940
fa743e8c
NS
2941 for (binfo = TYPE_BINFO (type), i = 0;
2942 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2943 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2944 return true;
8b978a57 2945 }
43f3a59d 2946 /* And now merge the fields of structure. */
8b978a57
JH
2947 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2948 {
2949 if (TREE_CODE (field) == FIELD_DECL
2950 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2951 return true;
2952 }
2953 }
2954 /* Just for use if some languages passes arrays by value. */
2955 else if (TREE_CODE (type) == ARRAY_TYPE)
2956 {
2957 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2958 return true;
2959 }
2960 else
2961 abort ();
2962 }
2963 return false;
2964}
2965
bb498ea3
AH
2966/* Gives the alignment boundary, in bits, of an argument with the
2967 specified mode and type. */
53c17031
JH
2968
2969int
b96a374d 2970ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2971{
2972 int align;
53c17031
JH
2973 if (type)
2974 align = TYPE_ALIGN (type);
2975 else
2976 align = GET_MODE_ALIGNMENT (mode);
2977 if (align < PARM_BOUNDARY)
2978 align = PARM_BOUNDARY;
8b978a57
JH
2979 if (!TARGET_64BIT)
2980 {
2981 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2982 make an exception for SSE modes since these require 128bit
b96a374d 2983 alignment.
8b978a57
JH
2984
2985 The handling here differs from field_alignment. ICC aligns MMX
2986 arguments to 4 byte boundaries, while structure fields are aligned
2987 to 8 byte boundaries. */
78fbfc4b
JB
2988 if (!TARGET_SSE)
2989 align = PARM_BOUNDARY;
2990 else if (!type)
8b978a57
JH
2991 {
2992 if (!SSE_REG_MODE_P (mode))
2993 align = PARM_BOUNDARY;
2994 }
2995 else
2996 {
2997 if (!contains_128bit_aligned_vector_p (type))
2998 align = PARM_BOUNDARY;
2999 }
8b978a57 3000 }
53c17031
JH
3001 if (align > 128)
3002 align = 128;
3003 return align;
3004}
3005
3006/* Return true if N is a possible register number of function value. */
3007bool
b96a374d 3008ix86_function_value_regno_p (int regno)
53c17031
JH
3009{
3010 if (!TARGET_64BIT)
3011 {
3012 return ((regno) == 0
3013 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3014 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3015 }
3016 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3017 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3018 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3019}
3020
3021/* Define how to find the value returned by a function.
3022 VALTYPE is the data type of the value (as a tree).
3023 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3024 otherwise, FUNC is 0. */
3025rtx
b96a374d 3026ix86_function_value (tree valtype)
53c17031 3027{
b3a1ca49
RH
3028 enum machine_mode natmode = type_natural_mode (valtype);
3029
53c17031
JH
3030 if (TARGET_64BIT)
3031 {
b3a1ca49 3032 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
6c4ccfd8 3033 1, REGPARM_MAX, SSE_REGPARM_MAX,
53c17031 3034 x86_64_int_return_registers, 0);
6c4ccfd8
RH
3035 /* For zero sized structures, construct_container return NULL, but we
3036 need to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
3037 if (!ret)
3038 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3039 return ret;
3040 }
3041 else
b3a1ca49 3042 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
53c17031
JH
3043}
3044
f5143c46 3045/* Return false iff type is returned in memory. */
53c17031 3046int
b96a374d 3047ix86_return_in_memory (tree type)
53c17031 3048{
a30b6839 3049 int needed_intregs, needed_sseregs, size;
b3a1ca49 3050 enum machine_mode mode = type_natural_mode (type);
a30b6839 3051
53c17031 3052 if (TARGET_64BIT)
a30b6839
RH
3053 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3054
3055 if (mode == BLKmode)
3056 return 1;
3057
3058 size = int_size_in_bytes (type);
3059
3060 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3061 return 0;
3062
3063 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 3064 {
a30b6839
RH
3065 /* User-created vectors small enough to fit in EAX. */
3066 if (size < 8)
5e062767 3067 return 0;
a30b6839
RH
3068
3069 /* MMX/3dNow values are returned on the stack, since we've
3070 got to EMMS/FEMMS before returning. */
3071 if (size == 8)
53c17031 3072 return 1;
a30b6839 3073
0397ac35 3074 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 3075 if (size == 16)
0397ac35 3076 return (TARGET_SSE ? 0 : 1);
53c17031 3077 }
a30b6839 3078
cf2348cb 3079 if (mode == XFmode)
a30b6839 3080 return 0;
f8a1ebc6 3081
a30b6839
RH
3082 if (size > 12)
3083 return 1;
3084 return 0;
53c17031
JH
3085}
3086
0397ac35
RH
3087/* When returning SSE vector types, we have a choice of either
3088 (1) being abi incompatible with a -march switch, or
3089 (2) generating an error.
3090 Given no good solution, I think the safest thing is one warning.
3091 The user won't be able to use -Werror, but....
3092
3093 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3094 called in response to actually generating a caller or callee that
3095 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3096 via aggregate_value_p for general type probing from tree-ssa. */
3097
3098static rtx
3099ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3100{
3101 static bool warned;
3102
3103 if (!TARGET_SSE && type && !warned)
3104 {
3105 /* Look at the return type of the function, not the function type. */
3106 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3107
3108 if (mode == TImode
3109 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3110 {
3111 warned = true;
3112 warning ("SSE vector return without SSE enabled changes the ABI");
3113 }
3114 }
3115
3116 return NULL;
3117}
3118
53c17031
JH
3119/* Define how to find the value returned by a library function
3120 assuming the value has mode MODE. */
3121rtx
b96a374d 3122ix86_libcall_value (enum machine_mode mode)
53c17031
JH
3123{
3124 if (TARGET_64BIT)
3125 {
3126 switch (mode)
3127 {
f8a1ebc6
JH
3128 case SFmode:
3129 case SCmode:
3130 case DFmode:
3131 case DCmode:
9e9fb0ce 3132 case TFmode:
f8a1ebc6
JH
3133 return gen_rtx_REG (mode, FIRST_SSE_REG);
3134 case XFmode:
9e9fb0ce 3135 case XCmode:
499accd7 3136 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
f8a1ebc6
JH
3137 case TCmode:
3138 return NULL;
3139 default:
3140 return gen_rtx_REG (mode, 0);
53c17031
JH
3141 }
3142 }
3143 else
f8a1ebc6 3144 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
3145}
3146
3147/* Given a mode, return the register to use for a return value. */
3148
3149static int
b96a374d 3150ix86_value_regno (enum machine_mode mode)
b069de3b 3151{
a30b6839 3152 /* Floating point return values in %st(0). */
b069de3b
SS
3153 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3154 return FIRST_FLOAT_REG;
a30b6839
RH
3155 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3156 we prevent this case when sse is not available. */
3157 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 3158 return FIRST_SSE_REG;
a30b6839 3159 /* Everything else in %eax. */
b069de3b 3160 return 0;
53c17031 3161}
ad919812
JH
3162\f
3163/* Create the va_list data type. */
53c17031 3164
c35d187f
RH
3165static tree
3166ix86_build_builtin_va_list (void)
ad919812
JH
3167{
3168 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 3169
ad919812
JH
3170 /* For i386 we use plain pointer to argument area. */
3171 if (!TARGET_64BIT)
3172 return build_pointer_type (char_type_node);
3173
f1e639b1 3174 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
3175 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3176
fce5a9f2 3177 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 3178 unsigned_type_node);
fce5a9f2 3179 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
3180 unsigned_type_node);
3181 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3182 ptr_type_node);
3183 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3184 ptr_type_node);
3185
3186 DECL_FIELD_CONTEXT (f_gpr) = record;
3187 DECL_FIELD_CONTEXT (f_fpr) = record;
3188 DECL_FIELD_CONTEXT (f_ovf) = record;
3189 DECL_FIELD_CONTEXT (f_sav) = record;
3190
3191 TREE_CHAIN (record) = type_decl;
3192 TYPE_NAME (record) = type_decl;
3193 TYPE_FIELDS (record) = f_gpr;
3194 TREE_CHAIN (f_gpr) = f_fpr;
3195 TREE_CHAIN (f_fpr) = f_ovf;
3196 TREE_CHAIN (f_ovf) = f_sav;
3197
3198 layout_type (record);
3199
3200 /* The correct type is an array type of one element. */
3201 return build_array_type (record, build_index_type (size_zero_node));
3202}
3203
a0524eb3 3204/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 3205
a0524eb3 3206static void
b96a374d
AJ
3207ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3208 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3209 int no_rtl)
ad919812
JH
3210{
3211 CUMULATIVE_ARGS next_cum;
3212 rtx save_area = NULL_RTX, mem;
3213 rtx label;
3214 rtx label_ref;
3215 rtx tmp_reg;
3216 rtx nsse_reg;
3217 int set;
3218 tree fntype;
3219 int stdarg_p;
3220 int i;
3221
3222 if (!TARGET_64BIT)
3223 return;
3224
3225 /* Indicate to allocate space on the stack for varargs save area. */
3226 ix86_save_varrargs_registers = 1;
3227
5474eed5
JH
3228 cfun->stack_alignment_needed = 128;
3229
ad919812
JH
3230 fntype = TREE_TYPE (current_function_decl);
3231 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3232 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3233 != void_type_node));
3234
3235 /* For varargs, we do not want to skip the dummy va_dcl argument.
3236 For stdargs, we do want to skip the last named argument. */
3237 next_cum = *cum;
3238 if (stdarg_p)
3239 function_arg_advance (&next_cum, mode, type, 1);
3240
3241 if (!no_rtl)
3242 save_area = frame_pointer_rtx;
3243
3244 set = get_varargs_alias_set ();
3245
5496b36f 3246 for (i = next_cum.regno; i < ix86_regparm; i++)
ad919812
JH
3247 {
3248 mem = gen_rtx_MEM (Pmode,
3249 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 3250 set_mem_alias_set (mem, set);
ad919812
JH
3251 emit_move_insn (mem, gen_rtx_REG (Pmode,
3252 x86_64_int_parameter_registers[i]));
3253 }
3254
5496b36f 3255 if (next_cum.sse_nregs)
ad919812
JH
3256 {
3257 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 3258 of SSE parameter registers used to call this function. We use
ad919812
JH
3259 sse_prologue_save insn template that produces computed jump across
3260 SSE saves. We need some preparation work to get this working. */
3261
3262 label = gen_label_rtx ();
3263 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3264
3265 /* Compute address to jump to :
3266 label - 5*eax + nnamed_sse_arguments*5 */
3267 tmp_reg = gen_reg_rtx (Pmode);
3268 nsse_reg = gen_reg_rtx (Pmode);
3269 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3270 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 3271 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
3272 GEN_INT (4))));
3273 if (next_cum.sse_regno)
3274 emit_move_insn
3275 (nsse_reg,
3276 gen_rtx_CONST (DImode,
3277 gen_rtx_PLUS (DImode,
3278 label_ref,
3279 GEN_INT (next_cum.sse_regno * 4))));
3280 else
3281 emit_move_insn (nsse_reg, label_ref);
3282 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3283
3284 /* Compute address of memory block we save into. We always use pointer
3285 pointing 127 bytes after first byte to store - this is needed to keep
3286 instruction size limited by 4 bytes. */
3287 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
3288 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3289 plus_constant (save_area,
3290 8 * REGPARM_MAX + 127)));
ad919812 3291 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 3292 set_mem_alias_set (mem, set);
8ac61af7 3293 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
3294
3295 /* And finally do the dirty job! */
8ac61af7
RK
3296 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3297 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
3298 }
3299
3300}
3301
3302/* Implement va_start. */
3303
3304void
b96a374d 3305ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
3306{
3307 HOST_WIDE_INT words, n_gpr, n_fpr;
3308 tree f_gpr, f_fpr, f_ovf, f_sav;
3309 tree gpr, fpr, ovf, sav, t;
3310
3311 /* Only 64bit target needs something special. */
3312 if (!TARGET_64BIT)
3313 {
e5faf155 3314 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3315 return;
3316 }
3317
3318 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3319 f_fpr = TREE_CHAIN (f_gpr);
3320 f_ovf = TREE_CHAIN (f_fpr);
3321 f_sav = TREE_CHAIN (f_ovf);
3322
3323 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
44de5aeb
RK
3324 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3325 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3326 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3327 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
3328
3329 /* Count number of gp and fp argument registers used. */
3330 words = current_function_args_info.words;
3331 n_gpr = current_function_args_info.regno;
3332 n_fpr = current_function_args_info.sse_regno;
3333
3334 if (TARGET_DEBUG_ARG)
3335 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3336 (int) words, (int) n_gpr, (int) n_fpr);
ad919812 3337
5496b36f
JJ
3338 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3339 build_int_cst (NULL_TREE, n_gpr * 8));
3340 TREE_SIDE_EFFECTS (t) = 1;
3341 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812 3342
5496b36f
JJ
3343 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3344 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3345 TREE_SIDE_EFFECTS (t) = 1;
3346 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812
JH
3347
3348 /* Find the overflow area. */
3349 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3350 if (words != 0)
3351 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
7d60be94 3352 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
ad919812
JH
3353 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3354 TREE_SIDE_EFFECTS (t) = 1;
3355 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3356
5496b36f
JJ
3357 /* Find the register save area.
3358 Prologue of the function save it right above stack frame. */
3359 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3360 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3361 TREE_SIDE_EFFECTS (t) = 1;
3362 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812
JH
3363}
3364
3365/* Implement va_arg. */
cd3ce9b4 3366
23a60a04
JM
3367tree
3368ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 3369{
cd3ce9b4
JM
3370 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3371 tree f_gpr, f_fpr, f_ovf, f_sav;
3372 tree gpr, fpr, ovf, sav, t;
3373 int size, rsize;
3374 tree lab_false, lab_over = NULL_TREE;
3375 tree addr, t2;
3376 rtx container;
3377 int indirect_p = 0;
3378 tree ptrtype;
52cf10a3 3379 enum machine_mode nat_mode;
cd3ce9b4
JM
3380
3381 /* Only 64bit target needs something special. */
3382 if (!TARGET_64BIT)
23a60a04 3383 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
3384
3385 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3386 f_fpr = TREE_CHAIN (f_gpr);
3387 f_ovf = TREE_CHAIN (f_fpr);
3388 f_sav = TREE_CHAIN (f_ovf);
3389
c2433d7d 3390 valist = build_va_arg_indirect_ref (valist);
44de5aeb
RK
3391 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3392 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3393 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3394 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 3395
08b0dc1b
RH
3396 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3397 if (indirect_p)
3398 type = build_pointer_type (type);
cd3ce9b4 3399 size = int_size_in_bytes (type);
cd3ce9b4
JM
3400 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3401
52cf10a3
RH
3402 nat_mode = type_natural_mode (type);
3403 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3404 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
6c4ccfd8
RH
3405
3406 /* Pull the value out of the saved registers. */
cd3ce9b4
JM
3407
3408 addr = create_tmp_var (ptr_type_node, "addr");
3409 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3410
3411 if (container)
3412 {
3413 int needed_intregs, needed_sseregs;
e52a6df5 3414 bool need_temp;
cd3ce9b4
JM
3415 tree int_addr, sse_addr;
3416
3417 lab_false = create_artificial_label ();
3418 lab_over = create_artificial_label ();
3419
52cf10a3 3420 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
cd3ce9b4 3421
e52a6df5
JB
3422 need_temp = (!REG_P (container)
3423 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3424 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
3425
3426 /* In case we are passing structure, verify that it is consecutive block
3427 on the register save area. If not we need to do moves. */
3428 if (!need_temp && !REG_P (container))
3429 {
3430 /* Verify that all registers are strictly consecutive */
3431 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3432 {
3433 int i;
3434
3435 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3436 {
3437 rtx slot = XVECEXP (container, 0, i);
3438 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3439 || INTVAL (XEXP (slot, 1)) != i * 16)
3440 need_temp = 1;
3441 }
3442 }
3443 else
3444 {
3445 int i;
3446
3447 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3448 {
3449 rtx slot = XVECEXP (container, 0, i);
3450 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3451 || INTVAL (XEXP (slot, 1)) != i * 8)
3452 need_temp = 1;
3453 }
3454 }
3455 }
3456 if (!need_temp)
3457 {
3458 int_addr = addr;
3459 sse_addr = addr;
3460 }
3461 else
3462 {
3463 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3464 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3465 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3466 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3467 }
56d76b69 3468
cd3ce9b4
JM
3469 /* First ensure that we fit completely in registers. */
3470 if (needed_intregs)
3471 {
4a90aeeb 3472 t = build_int_cst (TREE_TYPE (gpr),
7d60be94 3473 (REGPARM_MAX - needed_intregs + 1) * 8);
cd3ce9b4
JM
3474 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3475 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3476 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3477 gimplify_and_add (t, pre_p);
3478 }
3479 if (needed_sseregs)
3480 {
4a90aeeb
NS
3481 t = build_int_cst (TREE_TYPE (fpr),
3482 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7d60be94 3483 + REGPARM_MAX * 8);
cd3ce9b4
JM
3484 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3485 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3486 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3487 gimplify_and_add (t, pre_p);
3488 }
3489
3490 /* Compute index to start of area used for integer regs. */
3491 if (needed_intregs)
3492 {
3493 /* int_addr = gpr + sav; */
56d76b69
RH
3494 t = fold_convert (ptr_type_node, gpr);
3495 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
cd3ce9b4
JM
3496 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3497 gimplify_and_add (t, pre_p);
3498 }
3499 if (needed_sseregs)
3500 {
3501 /* sse_addr = fpr + sav; */
56d76b69
RH
3502 t = fold_convert (ptr_type_node, fpr);
3503 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
cd3ce9b4
JM
3504 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3505 gimplify_and_add (t, pre_p);
3506 }
3507 if (need_temp)
3508 {
3509 int i;
3510 tree temp = create_tmp_var (type, "va_arg_tmp");
3511
3512 /* addr = &temp; */
3513 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3514 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3515 gimplify_and_add (t, pre_p);
f676971a 3516
cd3ce9b4
JM
3517 for (i = 0; i < XVECLEN (container, 0); i++)
3518 {
3519 rtx slot = XVECEXP (container, 0, i);
3520 rtx reg = XEXP (slot, 0);
3521 enum machine_mode mode = GET_MODE (reg);
3522 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3523 tree addr_type = build_pointer_type (piece_type);
3524 tree src_addr, src;
3525 int src_offset;
3526 tree dest_addr, dest;
3527
3528 if (SSE_REGNO_P (REGNO (reg)))
3529 {
3530 src_addr = sse_addr;
3531 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3532 }
3533 else
3534 {
3535 src_addr = int_addr;
3536 src_offset = REGNO (reg) * 8;
3537 }
8fe75e43
RH
3538 src_addr = fold_convert (addr_type, src_addr);
3539 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3540 size_int (src_offset)));
c2433d7d 3541 src = build_va_arg_indirect_ref (src_addr);
e6e81735 3542
8fe75e43
RH
3543 dest_addr = fold_convert (addr_type, addr);
3544 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3545 size_int (INTVAL (XEXP (slot, 1)))));
c2433d7d 3546 dest = build_va_arg_indirect_ref (dest_addr);
3a3677ff 3547
8fe75e43
RH
3548 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3549 gimplify_and_add (t, pre_p);
3550 }
3551 }
e6e81735 3552
8fe75e43
RH
3553 if (needed_intregs)
3554 {
3555 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
56d76b69 3556 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8fe75e43
RH
3557 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3558 gimplify_and_add (t, pre_p);
3559 }
3560 if (needed_sseregs)
3561 {
4a90aeeb 3562 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
56d76b69 3563 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8fe75e43
RH
3564 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3565 gimplify_and_add (t, pre_p);
3566 }
e6e81735 3567
8fe75e43
RH
3568 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3569 gimplify_and_add (t, pre_p);
3570
3571 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3572 append_to_statement_list (t, pre_p);
3a3677ff 3573 }
b840bfb0 3574
8fe75e43 3575 /* ... otherwise out of the overflow area. */
e9e80858 3576
8fe75e43
RH
3577 /* Care for on-stack alignment if needed. */
3578 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3579 t = ovf;
3580 else
e9e80858 3581 {
8fe75e43 3582 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4a90aeeb 3583 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
56d76b69 3584 build_int_cst (TREE_TYPE (ovf), align - 1));
4a90aeeb 3585 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
56d76b69 3586 build_int_cst (TREE_TYPE (t), -align));
e9e80858 3587 }
8fe75e43 3588 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
e075ae69 3589
8fe75e43
RH
3590 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3591 gimplify_and_add (t2, pre_p);
e075ae69 3592
8fe75e43 3593 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
56d76b69 3594 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
8fe75e43
RH
3595 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3596 gimplify_and_add (t, pre_p);
e075ae69 3597
8fe75e43 3598 if (container)
2a2ab3f9 3599 {
8fe75e43
RH
3600 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3601 append_to_statement_list (t, pre_p);
2a2ab3f9 3602 }
e075ae69 3603
8fe75e43
RH
3604 ptrtype = build_pointer_type (type);
3605 addr = fold_convert (ptrtype, addr);
0a726ef1 3606
8fe75e43 3607 if (indirect_p)
c2433d7d
FCE
3608 addr = build_va_arg_indirect_ref (addr);
3609 return build_va_arg_indirect_ref (addr);
0a726ef1 3610}
8fe75e43
RH
3611\f
3612/* Return nonzero if OPNUM's MEM should be matched
3613 in movabs* patterns. */
fee2770d
RS
3614
3615int
8fe75e43 3616ix86_check_movabs (rtx insn, int opnum)
4f2c8ebb 3617{
8fe75e43 3618 rtx set, mem;
e075ae69 3619
8fe75e43
RH
3620 set = PATTERN (insn);
3621 if (GET_CODE (set) == PARALLEL)
3622 set = XVECEXP (set, 0, 0);
3623 if (GET_CODE (set) != SET)
e075ae69 3624 abort ();
8fe75e43
RH
3625 mem = XEXP (set, opnum);
3626 while (GET_CODE (mem) == SUBREG)
3627 mem = SUBREG_REG (mem);
3628 if (GET_CODE (mem) != MEM)
2247f6ed 3629 abort ();
8fe75e43 3630 return (volatile_ok || !MEM_VOLATILE_P (mem));
2247f6ed 3631}
e075ae69 3632\f
881b2a96
RS
3633/* Initialize the table of extra 80387 mathematical constants. */
3634
3635static void
b96a374d 3636init_ext_80387_constants (void)
881b2a96
RS
3637{
3638 static const char * cst[5] =
3639 {
3640 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3641 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3642 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3643 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3644 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3645 };
3646 int i;
3647
3648 for (i = 0; i < 5; i++)
3649 {
3650 real_from_string (&ext_80387_constants_table[i], cst[i]);
3651 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 3652 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 3653 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
3654 }
3655
3656 ext_80387_constants_init = 1;
3657}
3658
e075ae69 3659/* Return true if the constant is something that can be loaded with
881b2a96 3660 a special instruction. */
57dbca5e
BS
3661
3662int
b96a374d 3663standard_80387_constant_p (rtx x)
57dbca5e 3664{
2b04e52b 3665 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3666 return -1;
881b2a96 3667
2b04e52b
JH
3668 if (x == CONST0_RTX (GET_MODE (x)))
3669 return 1;
3670 if (x == CONST1_RTX (GET_MODE (x)))
3671 return 2;
881b2a96 3672
22cc69c4
RS
3673 /* For XFmode constants, try to find a special 80387 instruction when
3674 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 3675 if (GET_MODE (x) == XFmode
22cc69c4 3676 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
3677 {
3678 REAL_VALUE_TYPE r;
3679 int i;
3680
3681 if (! ext_80387_constants_init)
3682 init_ext_80387_constants ();
3683
3684 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3685 for (i = 0; i < 5; i++)
3686 if (real_identical (&r, &ext_80387_constants_table[i]))
3687 return i + 3;
3688 }
3689
e075ae69 3690 return 0;
57dbca5e
BS
3691}
3692
881b2a96
RS
3693/* Return the opcode of the special instruction to be used to load
3694 the constant X. */
3695
3696const char *
b96a374d 3697standard_80387_constant_opcode (rtx x)
881b2a96
RS
3698{
3699 switch (standard_80387_constant_p (x))
3700 {
b96a374d 3701 case 1:
881b2a96
RS
3702 return "fldz";
3703 case 2:
3704 return "fld1";
b96a374d 3705 case 3:
881b2a96
RS
3706 return "fldlg2";
3707 case 4:
3708 return "fldln2";
b96a374d 3709 case 5:
881b2a96
RS
3710 return "fldl2e";
3711 case 6:
3712 return "fldl2t";
b96a374d 3713 case 7:
881b2a96
RS
3714 return "fldpi";
3715 }
3716 abort ();
3717}
3718
3719/* Return the CONST_DOUBLE representing the 80387 constant that is
3720 loaded by the specified special instruction. The argument IDX
3721 matches the return value from standard_80387_constant_p. */
3722
3723rtx
b96a374d 3724standard_80387_constant_rtx (int idx)
881b2a96
RS
3725{
3726 int i;
3727
3728 if (! ext_80387_constants_init)
3729 init_ext_80387_constants ();
3730
3731 switch (idx)
3732 {
3733 case 3:
3734 case 4:
3735 case 5:
3736 case 6:
3737 case 7:
3738 i = idx - 3;
3739 break;
3740
3741 default:
3742 abort ();
3743 }
3744
1f48e56d 3745 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 3746 XFmode);
881b2a96
RS
3747}
3748
2b04e52b
JH
3749/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3750 */
3751int
b96a374d 3752standard_sse_constant_p (rtx x)
2b04e52b 3753{
0e67d460
JH
3754 if (x == const0_rtx)
3755 return 1;
2b04e52b
JH
3756 return (x == CONST0_RTX (GET_MODE (x)));
3757}
3758
2a2ab3f9
JVA
3759/* Returns 1 if OP contains a symbol reference */
3760
3761int
b96a374d 3762symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 3763{
8d531ab9
KH
3764 const char *fmt;
3765 int i;
2a2ab3f9
JVA
3766
3767 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3768 return 1;
3769
3770 fmt = GET_RTX_FORMAT (GET_CODE (op));
3771 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3772 {
3773 if (fmt[i] == 'E')
3774 {
8d531ab9 3775 int j;
2a2ab3f9
JVA
3776
3777 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3778 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3779 return 1;
3780 }
e9a25f70 3781
2a2ab3f9
JVA
3782 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3783 return 1;
3784 }
3785
3786 return 0;
3787}
e075ae69
RH
3788
3789/* Return 1 if it is appropriate to emit `ret' instructions in the
3790 body of a function. Do this only if the epilogue is simple, needing a
3791 couple of insns. Prior to reloading, we can't tell how many registers
3792 must be saved, so return 0 then. Return 0 if there is no frame
6e14af16 3793 marker to de-allocate. */
32b5b1aa
SC
3794
3795int
b96a374d 3796ix86_can_use_return_insn_p (void)
32b5b1aa 3797{
4dd2ac2c 3798 struct ix86_frame frame;
9a7372d6 3799
9a7372d6
RH
3800 if (! reload_completed || frame_pointer_needed)
3801 return 0;
32b5b1aa 3802
9a7372d6
RH
3803 /* Don't allow more than 32 pop, since that's all we can do
3804 with one instruction. */
3805 if (current_function_pops_args
3806 && current_function_args_size >= 32768)
e075ae69 3807 return 0;
32b5b1aa 3808
4dd2ac2c
JH
3809 ix86_compute_frame_layout (&frame);
3810 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3811}
6189a572 3812\f
6fca22eb
RH
3813/* Value should be nonzero if functions must have frame pointers.
3814 Zero means the frame pointer need not be set up (and parms may
3815 be accessed via the stack pointer) in functions that seem suitable. */
3816
3817int
b96a374d 3818ix86_frame_pointer_required (void)
6fca22eb
RH
3819{
3820 /* If we accessed previous frames, then the generated code expects
3821 to be able to access the saved ebp value in our frame. */
3822 if (cfun->machine->accesses_prev_frame)
3823 return 1;
a4f31c00 3824
6fca22eb
RH
3825 /* Several x86 os'es need a frame pointer for other reasons,
3826 usually pertaining to setjmp. */
3827 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3828 return 1;
3829
3830 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3831 the frame pointer by default. Turn it back on now if we've not
3832 got a leaf function. */
a7943381 3833 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
3834 && (!current_function_is_leaf))
3835 return 1;
3836
3837 if (current_function_profile)
6fca22eb
RH
3838 return 1;
3839
3840 return 0;
3841}
3842
3843/* Record that the current function accesses previous call frames. */
3844
3845void
b96a374d 3846ix86_setup_frame_addresses (void)
6fca22eb
RH
3847{
3848 cfun->machine->accesses_prev_frame = 1;
3849}
e075ae69 3850\f
145aacc2
RH
3851#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3852# define USE_HIDDEN_LINKONCE 1
3853#else
3854# define USE_HIDDEN_LINKONCE 0
3855#endif
3856
bd09bdeb 3857static int pic_labels_used;
e9a25f70 3858
145aacc2
RH
3859/* Fills in the label name that should be used for a pc thunk for
3860 the given register. */
3861
3862static void
b96a374d 3863get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
3864{
3865 if (USE_HIDDEN_LINKONCE)
3866 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3867 else
3868 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3869}
3870
3871
e075ae69
RH
3872/* This function generates code for -fpic that loads %ebx with
3873 the return address of the caller and then returns. */
3874
3875void
b96a374d 3876ix86_file_end (void)
e075ae69
RH
3877{
3878 rtx xops[2];
bd09bdeb 3879 int regno;
32b5b1aa 3880
bd09bdeb 3881 for (regno = 0; regno < 8; ++regno)
7c262518 3882 {
145aacc2
RH
3883 char name[32];
3884
bd09bdeb
RH
3885 if (! ((pic_labels_used >> regno) & 1))
3886 continue;
3887
145aacc2 3888 get_pc_thunk_name (name, regno);
bd09bdeb 3889
145aacc2
RH
3890 if (USE_HIDDEN_LINKONCE)
3891 {
3892 tree decl;
3893
3894 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3895 error_mark_node);
3896 TREE_PUBLIC (decl) = 1;
3897 TREE_STATIC (decl) = 1;
3898 DECL_ONE_ONLY (decl) = 1;
3899
3900 (*targetm.asm_out.unique_section) (decl, 0);
3901 named_section (decl, NULL, 0);
3902
a5fe455b
ZW
3903 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3904 fputs ("\t.hidden\t", asm_out_file);
3905 assemble_name (asm_out_file, name);
3906 fputc ('\n', asm_out_file);
3907 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
3908 }
3909 else
3910 {
3911 text_section ();
a5fe455b 3912 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 3913 }
bd09bdeb
RH
3914
3915 xops[0] = gen_rtx_REG (SImode, regno);
3916 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3917 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3918 output_asm_insn ("ret", xops);
7c262518 3919 }
3edc56a9 3920
a5fe455b
ZW
3921 if (NEED_INDICATE_EXEC_STACK)
3922 file_end_indicate_exec_stack ();
32b5b1aa 3923}
32b5b1aa 3924
c8c03509 3925/* Emit code for the SET_GOT patterns. */
32b5b1aa 3926
c8c03509 3927const char *
b96a374d 3928output_set_got (rtx dest)
c8c03509
RH
3929{
3930 rtx xops[3];
0d7d98ee 3931
c8c03509 3932 xops[0] = dest;
5fc0e5df 3933 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 3934
c8c03509 3935 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 3936 {
c8c03509
RH
3937 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3938
3939 if (!flag_pic)
3940 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3941 else
3942 output_asm_insn ("call\t%a2", xops);
3943
b069de3b
SS
3944#if TARGET_MACHO
3945 /* Output the "canonical" label name ("Lxx$pb") here too. This
3946 is what will be referred to by the Mach-O PIC subsystem. */
3947 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3948#endif
4977bab6 3949 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
3950 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3951
3952 if (flag_pic)
3953 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 3954 }
e075ae69 3955 else
e5cb57e8 3956 {
145aacc2
RH
3957 char name[32];
3958 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 3959 pic_labels_used |= 1 << REGNO (dest);
f996902d 3960
145aacc2 3961 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
3962 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3963 output_asm_insn ("call\t%X2", xops);
e5cb57e8 3964 }
e5cb57e8 3965
c8c03509
RH
3966 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3967 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 3968 else if (!TARGET_MACHO)
8e9fadc3 3969 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 3970
c8c03509 3971 return "";
e9a25f70 3972}
8dfe5673 3973
0d7d98ee 3974/* Generate an "push" pattern for input ARG. */
e9a25f70 3975
e075ae69 3976static rtx
b96a374d 3977gen_push (rtx arg)
e9a25f70 3978{
c5c76735 3979 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3980 gen_rtx_MEM (Pmode,
3981 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3982 stack_pointer_rtx)),
3983 arg);
e9a25f70
JL
3984}
3985
bd09bdeb
RH
3986/* Return >= 0 if there is an unused call-clobbered register available
3987 for the entire function. */
3988
3989static unsigned int
b96a374d 3990ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
3991{
3992 if (current_function_is_leaf && !current_function_profile)
3993 {
3994 int i;
3995 for (i = 2; i >= 0; --i)
3996 if (!regs_ever_live[i])
3997 return i;
3998 }
3999
4000 return INVALID_REGNUM;
4001}
fce5a9f2 4002
4dd2ac2c
JH
4003/* Return 1 if we need to save REGNO. */
4004static int
b96a374d 4005ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 4006{
bd09bdeb
RH
4007 if (pic_offset_table_rtx
4008 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4009 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4010 || current_function_profile
8c38a24f
MM
4011 || current_function_calls_eh_return
4012 || current_function_uses_const_pool))
bd09bdeb
RH
4013 {
4014 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4015 return 0;
4016 return 1;
4017 }
1020a5ab
RH
4018
4019 if (current_function_calls_eh_return && maybe_eh_return)
4020 {
4021 unsigned i;
4022 for (i = 0; ; i++)
4023 {
b531087a 4024 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4025 if (test == INVALID_REGNUM)
4026 break;
9b690711 4027 if (test == regno)
1020a5ab
RH
4028 return 1;
4029 }
4030 }
4dd2ac2c 4031
1020a5ab
RH
4032 return (regs_ever_live[regno]
4033 && !call_used_regs[regno]
4034 && !fixed_regs[regno]
4035 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4036}
4037
0903fcab
JH
4038/* Return number of registers to be saved on the stack. */
4039
4040static int
b96a374d 4041ix86_nsaved_regs (void)
0903fcab
JH
4042{
4043 int nregs = 0;
0903fcab
JH
4044 int regno;
4045
4dd2ac2c 4046 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4047 if (ix86_save_reg (regno, true))
4dd2ac2c 4048 nregs++;
0903fcab
JH
4049 return nregs;
4050}
4051
4052/* Return the offset between two registers, one to be eliminated, and the other
4053 its replacement, at the start of a routine. */
4054
4055HOST_WIDE_INT
b96a374d 4056ix86_initial_elimination_offset (int from, int to)
0903fcab 4057{
4dd2ac2c
JH
4058 struct ix86_frame frame;
4059 ix86_compute_frame_layout (&frame);
564d80f4
JH
4060
4061 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4062 return frame.hard_frame_pointer_offset;
564d80f4
JH
4063 else if (from == FRAME_POINTER_REGNUM
4064 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4065 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4066 else
4067 {
564d80f4
JH
4068 if (to != STACK_POINTER_REGNUM)
4069 abort ();
4070 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4071 return frame.stack_pointer_offset;
564d80f4
JH
4072 else if (from != FRAME_POINTER_REGNUM)
4073 abort ();
0903fcab 4074 else
4dd2ac2c 4075 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4076 }
4077}
4078
4dd2ac2c 4079/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4080
4dd2ac2c 4081static void
b96a374d 4082ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4083{
65954bd8 4084 HOST_WIDE_INT total_size;
95899b34 4085 unsigned int stack_alignment_needed;
b19ee4bd 4086 HOST_WIDE_INT offset;
95899b34 4087 unsigned int preferred_alignment;
4dd2ac2c 4088 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4089
4dd2ac2c 4090 frame->nregs = ix86_nsaved_regs ();
564d80f4 4091 total_size = size;
65954bd8 4092
95899b34
RH
4093 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4094 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4095
d7394366
JH
4096 /* During reload iteration the amount of registers saved can change.
4097 Recompute the value as needed. Do not recompute when amount of registers
4098 didn't change as reload does mutiple calls to the function and does not
4099 expect the decision to change within single iteration. */
4100 if (!optimize_size
4101 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
4102 {
4103 int count = frame->nregs;
4104
d7394366 4105 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
4106 /* The fast prologue uses move instead of push to save registers. This
4107 is significantly longer, but also executes faster as modern hardware
4108 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 4109
d9b40e8d
JH
4110 Be careful about choosing what prologue to emit: When function takes
4111 many instructions to execute we may use slow version as well as in
4112 case function is known to be outside hot spot (this is known with
4113 feedback only). Weight the size of function by number of registers
4114 to save as it is cheap to use one or two push instructions but very
4115 slow to use many of them. */
4116 if (count)
4117 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4118 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4119 || (flag_branch_probabilities
4120 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4121 cfun->machine->use_fast_prologue_epilogue = false;
4122 else
4123 cfun->machine->use_fast_prologue_epilogue
4124 = !expensive_function_p (count);
4125 }
4126 if (TARGET_PROLOGUE_USING_MOVE
4127 && cfun->machine->use_fast_prologue_epilogue)
4128 frame->save_regs_using_mov = true;
4129 else
4130 frame->save_regs_using_mov = false;
4131
4132
9ba81eaa 4133 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4134 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4135
4136 frame->hard_frame_pointer_offset = offset;
564d80f4 4137
fcbfaa65
RK
4138 /* Do some sanity checking of stack_alignment_needed and
4139 preferred_alignment, since i386 port is the only using those features
f710504c 4140 that may break easily. */
564d80f4 4141
44affdae
JH
4142 if (size && !stack_alignment_needed)
4143 abort ();
44affdae
JH
4144 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4145 abort ();
4146 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4147 abort ();
4148 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4149 abort ();
564d80f4 4150
4dd2ac2c
JH
4151 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4152 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4153
4dd2ac2c
JH
4154 /* Register save area */
4155 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4156
8362f420
JH
4157 /* Va-arg area */
4158 if (ix86_save_varrargs_registers)
4159 {
4160 offset += X86_64_VARARGS_SIZE;
4161 frame->va_arg_size = X86_64_VARARGS_SIZE;
4162 }
4163 else
4164 frame->va_arg_size = 0;
4165
4dd2ac2c
JH
4166 /* Align start of frame for local function. */
4167 frame->padding1 = ((offset + stack_alignment_needed - 1)
4168 & -stack_alignment_needed) - offset;
f73ad30e 4169
4dd2ac2c 4170 offset += frame->padding1;
65954bd8 4171
4dd2ac2c
JH
4172 /* Frame pointer points here. */
4173 frame->frame_pointer_offset = offset;
54ff41b7 4174
4dd2ac2c 4175 offset += size;
65954bd8 4176
0b7ae565 4177 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
4178 all the function calls as dead code.
4179 Skipping is however impossible when function calls alloca. Alloca
4180 expander assumes that last current_function_outgoing_args_size
4181 of stack frame are unused. */
4182 if (ACCUMULATE_OUTGOING_ARGS
4183 && (!current_function_is_leaf || current_function_calls_alloca))
4dd2ac2c
JH
4184 {
4185 offset += current_function_outgoing_args_size;
4186 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4187 }
4188 else
4189 frame->outgoing_arguments_size = 0;
564d80f4 4190
002ff5bc
RH
4191 /* Align stack boundary. Only needed if we're calling another function
4192 or using alloca. */
4193 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4194 frame->padding2 = ((offset + preferred_alignment - 1)
4195 & -preferred_alignment) - offset;
4196 else
4197 frame->padding2 = 0;
4dd2ac2c
JH
4198
4199 offset += frame->padding2;
4200
4201 /* We've reached end of stack frame. */
4202 frame->stack_pointer_offset = offset;
4203
4204 /* Size prologue needs to allocate. */
4205 frame->to_allocate =
4206 (size + frame->padding1 + frame->padding2
8362f420 4207 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4208
b19ee4bd
JJ
4209 if ((!frame->to_allocate && frame->nregs <= 1)
4210 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
4211 frame->save_regs_using_mov = false;
4212
a5b378d6 4213 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
4214 && current_function_is_leaf)
4215 {
4216 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
4217 if (frame->save_regs_using_mov)
4218 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
4219 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4220 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4221 }
4222 else
4223 frame->red_zone_size = 0;
4224 frame->to_allocate -= frame->red_zone_size;
4225 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4226#if 0
4227 fprintf (stderr, "nregs: %i\n", frame->nregs);
4228 fprintf (stderr, "size: %i\n", size);
4229 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4230 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4231 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4232 fprintf (stderr, "padding2: %i\n", frame->padding2);
4233 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4234 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4235 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4236 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4237 frame->hard_frame_pointer_offset);
4238 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4239#endif
65954bd8
JL
4240}
4241
0903fcab
JH
4242/* Emit code to save registers in the prologue. */
4243
4244static void
b96a374d 4245ix86_emit_save_regs (void)
0903fcab 4246{
8d531ab9 4247 int regno;
0903fcab 4248 rtx insn;
0903fcab 4249
4dd2ac2c 4250 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4251 if (ix86_save_reg (regno, true))
0903fcab 4252 {
0d7d98ee 4253 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4254 RTX_FRAME_RELATED_P (insn) = 1;
4255 }
4256}
4257
c6036a37
JH
4258/* Emit code to save registers using MOV insns. First register
4259 is restored from POINTER + OFFSET. */
4260static void
b96a374d 4261ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
4262{
4263 int regno;
4264 rtx insn;
4265
4266 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4267 if (ix86_save_reg (regno, true))
4268 {
b72f00af
RK
4269 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4270 Pmode, offset),
c6036a37
JH
4271 gen_rtx_REG (Pmode, regno));
4272 RTX_FRAME_RELATED_P (insn) = 1;
4273 offset += UNITS_PER_WORD;
4274 }
4275}
4276
839a4992 4277/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
4278 The pattern exist to put a dependency on all ebp-based memory accesses.
4279 STYLE should be negative if instructions should be marked as frame related,
4280 zero if %r11 register is live and cannot be freely used and positive
4281 otherwise. */
4282
4283static void
4284pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4285{
4286 rtx insn;
4287
4288 if (! TARGET_64BIT)
4289 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4290 else if (x86_64_immediate_operand (offset, DImode))
4291 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4292 else
4293 {
4294 rtx r11;
4295 /* r11 is used by indirect sibcall return as well, set before the
4296 epilogue and used after the epilogue. ATM indirect sibcall
4297 shouldn't be used together with huge frame sizes in one
4298 function because of the frame_size check in sibcall.c. */
4299 if (style == 0)
4300 abort ();
4301 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4302 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4303 if (style < 0)
4304 RTX_FRAME_RELATED_P (insn) = 1;
4305 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4306 offset));
4307 }
4308 if (style < 0)
4309 RTX_FRAME_RELATED_P (insn) = 1;
4310}
4311
0f290768 4312/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4313
4314void
b96a374d 4315ix86_expand_prologue (void)
2a2ab3f9 4316{
564d80f4 4317 rtx insn;
bd09bdeb 4318 bool pic_reg_used;
4dd2ac2c 4319 struct ix86_frame frame;
c6036a37 4320 HOST_WIDE_INT allocate;
4dd2ac2c 4321
4977bab6 4322 ix86_compute_frame_layout (&frame);
79325812 4323
e075ae69
RH
4324 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4325 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4326
2a2ab3f9
JVA
4327 if (frame_pointer_needed)
4328 {
564d80f4 4329 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4330 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4331
564d80f4 4332 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4333 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4334 }
4335
c6036a37 4336 allocate = frame.to_allocate;
c6036a37 4337
d9b40e8d 4338 if (!frame.save_regs_using_mov)
c6036a37
JH
4339 ix86_emit_save_regs ();
4340 else
4341 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4342
d9b40e8d
JH
4343 /* When using red zone we may start register saving before allocating
4344 the stack frame saving one cycle of the prologue. */
4345 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4346 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4347 : stack_pointer_rtx,
4348 -frame.nregs * UNITS_PER_WORD);
4349
c6036a37 4350 if (allocate == 0)
8dfe5673 4351 ;
e323735c 4352 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
4353 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4354 GEN_INT (-allocate), -1);
79325812 4355 else
8dfe5673 4356 {
fe9f516f
RH
4357 /* Only valid for Win32. */
4358 rtx eax = gen_rtx_REG (SImode, 0);
4359 bool eax_live = ix86_eax_live_at_start_p ();
5fc94ac4 4360 rtx t;
e9a25f70 4361
8362f420 4362 if (TARGET_64BIT)
b1177d69 4363 abort ();
e075ae69 4364
fe9f516f
RH
4365 if (eax_live)
4366 {
4367 emit_insn (gen_push (eax));
4368 allocate -= 4;
4369 }
4370
5fc94ac4 4371 emit_move_insn (eax, GEN_INT (allocate));
98417968 4372
b1177d69
KC
4373 insn = emit_insn (gen_allocate_stack_worker (eax));
4374 RTX_FRAME_RELATED_P (insn) = 1;
5fc94ac4
RH
4375 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4376 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4377 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4378 t, REG_NOTES (insn));
fe9f516f
RH
4379
4380 if (eax_live)
4381 {
ea5f7a19
RS
4382 if (frame_pointer_needed)
4383 t = plus_constant (hard_frame_pointer_rtx,
4384 allocate
4385 - frame.to_allocate
4386 - frame.nregs * UNITS_PER_WORD);
4387 else
4388 t = plus_constant (stack_pointer_rtx, allocate);
fe9f516f
RH
4389 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4390 }
e075ae69 4391 }
fe9f516f 4392
d9b40e8d 4393 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
4394 {
4395 if (!frame_pointer_needed || !frame.to_allocate)
4396 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4397 else
4398 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4399 -frame.nregs * UNITS_PER_WORD);
4400 }
e9a25f70 4401
bd09bdeb
RH
4402 pic_reg_used = false;
4403 if (pic_offset_table_rtx
4404 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4405 || current_function_profile))
4406 {
4407 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4408
4409 if (alt_pic_reg_used != INVALID_REGNUM)
4410 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4411
4412 pic_reg_used = true;
4413 }
4414
e9a25f70 4415 if (pic_reg_used)
c8c03509
RH
4416 {
4417 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4418
66edd3b4
RH
4419 /* Even with accurate pre-reload life analysis, we can wind up
4420 deleting all references to the pic register after reload.
4421 Consider if cross-jumping unifies two sides of a branch
d1f87653 4422 controlled by a comparison vs the only read from a global.
66edd3b4
RH
4423 In which case, allow the set_got to be deleted, though we're
4424 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4425 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4426 }
77a989d1 4427
66edd3b4
RH
4428 /* Prevent function calls from be scheduled before the call to mcount.
4429 In the pic_reg_used case, make sure that the got load isn't deleted. */
4430 if (current_function_profile)
4431 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4432}
4433
da2d1d3a
JH
4434/* Emit code to restore saved registers using MOV insns. First register
4435 is restored from POINTER + OFFSET. */
4436static void
72613dfa
JH
4437ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4438 int maybe_eh_return)
da2d1d3a
JH
4439{
4440 int regno;
72613dfa 4441 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 4442
4dd2ac2c 4443 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4444 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4445 {
72613dfa
JH
4446 /* Ensure that adjust_address won't be forced to produce pointer
4447 out of range allowed by x86-64 instruction set. */
4448 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4449 {
4450 rtx r11;
4451
4452 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4453 emit_move_insn (r11, GEN_INT (offset));
4454 emit_insn (gen_adddi3 (r11, r11, pointer));
4455 base_address = gen_rtx_MEM (Pmode, r11);
4456 offset = 0;
4457 }
4dd2ac2c 4458 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 4459 adjust_address (base_address, Pmode, offset));
4dd2ac2c 4460 offset += UNITS_PER_WORD;
da2d1d3a
JH
4461 }
4462}
4463
0f290768 4464/* Restore function stack, frame, and registers. */
e9a25f70 4465
2a2ab3f9 4466void
b96a374d 4467ix86_expand_epilogue (int style)
2a2ab3f9 4468{
1c71e60e 4469 int regno;
fdb8a883 4470 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4471 struct ix86_frame frame;
65954bd8 4472 HOST_WIDE_INT offset;
4dd2ac2c
JH
4473
4474 ix86_compute_frame_layout (&frame);
2a2ab3f9 4475
a4f31c00 4476 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4477 must be taken for the normal return case of a function using
4478 eh_return: the eax and edx registers are marked as saved, but not
4479 restored along this path. */
4480 offset = frame.nregs;
4481 if (current_function_calls_eh_return && style != 2)
4482 offset -= 2;
4483 offset *= -UNITS_PER_WORD;
2a2ab3f9 4484
fdb8a883
JW
4485 /* If we're only restoring one register and sp is not valid then
4486 using a move instruction to restore the register since it's
0f290768 4487 less work than reloading sp and popping the register.
da2d1d3a
JH
4488
4489 The default code result in stack adjustment using add/lea instruction,
4490 while this code results in LEAVE instruction (or discrete equivalent),
4491 so it is profitable in some other cases as well. Especially when there
4492 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 4493 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 4494 tuning in future. */
4dd2ac2c 4495 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4496 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 4497 && cfun->machine->use_fast_prologue_epilogue
c6036a37 4498 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4499 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4500 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
4501 && cfun->machine->use_fast_prologue_epilogue
4502 && frame.nregs == 1)
2ab0437e 4503 || current_function_calls_eh_return)
2a2ab3f9 4504 {
da2d1d3a
JH
4505 /* Restore registers. We can use ebp or esp to address the memory
4506 locations. If both are available, default to ebp, since offsets
4507 are known to be small. Only exception is esp pointing directly to the
4508 end of block of saved registers, where we may simplify addressing
4509 mode. */
4510
4dd2ac2c 4511 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4512 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4513 frame.to_allocate, style == 2);
da2d1d3a 4514 else
1020a5ab
RH
4515 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4516 offset, style == 2);
4517
4518 /* eh_return epilogues need %ecx added to the stack pointer. */
4519 if (style == 2)
4520 {
4521 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4522
1020a5ab
RH
4523 if (frame_pointer_needed)
4524 {
4525 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4526 tmp = plus_constant (tmp, UNITS_PER_WORD);
4527 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4528
4529 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4530 emit_move_insn (hard_frame_pointer_rtx, tmp);
4531
b19ee4bd
JJ
4532 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4533 const0_rtx, style);
1020a5ab
RH
4534 }
4535 else
4536 {
4537 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4538 tmp = plus_constant (tmp, (frame.to_allocate
4539 + frame.nregs * UNITS_PER_WORD));
4540 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4541 }
4542 }
4543 else if (!frame_pointer_needed)
b19ee4bd
JJ
4544 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4545 GEN_INT (frame.to_allocate
4546 + frame.nregs * UNITS_PER_WORD),
4547 style);
0f290768 4548 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
4549 else if (TARGET_USE_LEAVE || optimize_size
4550 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 4551 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4552 else
2a2ab3f9 4553 {
b19ee4bd
JJ
4554 pro_epilogue_adjust_stack (stack_pointer_rtx,
4555 hard_frame_pointer_rtx,
4556 const0_rtx, style);
8362f420
JH
4557 if (TARGET_64BIT)
4558 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4559 else
4560 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4561 }
4562 }
1c71e60e 4563 else
68f654ec 4564 {
1c71e60e
JH
4565 /* First step is to deallocate the stack frame so that we can
4566 pop the registers. */
4567 if (!sp_valid)
4568 {
4569 if (!frame_pointer_needed)
4570 abort ();
b19ee4bd
JJ
4571 pro_epilogue_adjust_stack (stack_pointer_rtx,
4572 hard_frame_pointer_rtx,
4573 GEN_INT (offset), style);
1c71e60e 4574 }
4dd2ac2c 4575 else if (frame.to_allocate)
b19ee4bd
JJ
4576 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4577 GEN_INT (frame.to_allocate), style);
1c71e60e 4578
4dd2ac2c 4579 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4580 if (ix86_save_reg (regno, false))
8362f420
JH
4581 {
4582 if (TARGET_64BIT)
4583 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4584 else
4585 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4586 }
4dd2ac2c 4587 if (frame_pointer_needed)
8362f420 4588 {
f5143c46 4589 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4590 able to grok it fast. */
4591 if (TARGET_USE_LEAVE)
4592 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4593 else if (TARGET_64BIT)
8362f420
JH
4594 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4595 else
4596 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4597 }
68f654ec 4598 }
68f654ec 4599
cbbf65e0 4600 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4601 if (style == 0)
cbbf65e0
RH
4602 return;
4603
2a2ab3f9
JVA
4604 if (current_function_pops_args && current_function_args_size)
4605 {
e075ae69 4606 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4607
b8c752c8
UD
4608 /* i386 can only pop 64K bytes. If asked to pop more, pop
4609 return address, do explicit add, and jump indirectly to the
0f290768 4610 caller. */
2a2ab3f9 4611
b8c752c8 4612 if (current_function_pops_args >= 65536)
2a2ab3f9 4613 {
e075ae69 4614 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4615
b19ee4bd 4616 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 4617 if (TARGET_64BIT)
b531087a 4618 abort ();
8362f420 4619
e075ae69
RH
4620 emit_insn (gen_popsi1 (ecx));
4621 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4622 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4623 }
79325812 4624 else
e075ae69
RH
4625 emit_jump_insn (gen_return_pop_internal (popc));
4626 }
4627 else
4628 emit_jump_insn (gen_return_internal ());
4629}
bd09bdeb
RH
4630
4631/* Reset from the function's potential modifications. */
4632
4633static void
b96a374d
AJ
4634ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4635 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
4636{
4637 if (pic_offset_table_rtx)
4638 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4639}
e075ae69
RH
4640\f
4641/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4642 for an instruction. Return 0 if the structure of the address is
4643 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 4644 strictly valid, but still used for computing length of lea instruction. */
e075ae69 4645
8fe75e43 4646int
8d531ab9 4647ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69
RH
4648{
4649 rtx base = NULL_RTX;
4650 rtx index = NULL_RTX;
4651 rtx disp = NULL_RTX;
4652 HOST_WIDE_INT scale = 1;
4653 rtx scale_rtx = NULL_RTX;
b446e5a2 4654 int retval = 1;
74dc3e94 4655 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 4656
90e4e4c5 4657 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
4658 base = addr;
4659 else if (GET_CODE (addr) == PLUS)
4660 {
74dc3e94
RH
4661 rtx addends[4], op;
4662 int n = 0, i;
e075ae69 4663
74dc3e94
RH
4664 op = addr;
4665 do
e075ae69 4666 {
74dc3e94
RH
4667 if (n >= 4)
4668 return 0;
4669 addends[n++] = XEXP (op, 1);
4670 op = XEXP (op, 0);
2a2ab3f9 4671 }
74dc3e94
RH
4672 while (GET_CODE (op) == PLUS);
4673 if (n >= 4)
4674 return 0;
4675 addends[n] = op;
4676
4677 for (i = n; i >= 0; --i)
e075ae69 4678 {
74dc3e94
RH
4679 op = addends[i];
4680 switch (GET_CODE (op))
4681 {
4682 case MULT:
4683 if (index)
4684 return 0;
4685 index = XEXP (op, 0);
4686 scale_rtx = XEXP (op, 1);
4687 break;
4688
4689 case UNSPEC:
4690 if (XINT (op, 1) == UNSPEC_TP
4691 && TARGET_TLS_DIRECT_SEG_REFS
4692 && seg == SEG_DEFAULT)
4693 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4694 else
4695 return 0;
4696 break;
4697
4698 case REG:
4699 case SUBREG:
4700 if (!base)
4701 base = op;
4702 else if (!index)
4703 index = op;
4704 else
4705 return 0;
4706 break;
4707
4708 case CONST:
4709 case CONST_INT:
4710 case SYMBOL_REF:
4711 case LABEL_REF:
4712 if (disp)
4713 return 0;
4714 disp = op;
4715 break;
4716
4717 default:
4718 return 0;
4719 }
e075ae69 4720 }
e075ae69
RH
4721 }
4722 else if (GET_CODE (addr) == MULT)
4723 {
4724 index = XEXP (addr, 0); /* index*scale */
4725 scale_rtx = XEXP (addr, 1);
4726 }
4727 else if (GET_CODE (addr) == ASHIFT)
4728 {
4729 rtx tmp;
4730
4731 /* We're called for lea too, which implements ashift on occasion. */
4732 index = XEXP (addr, 0);
4733 tmp = XEXP (addr, 1);
4734 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4735 return 0;
e075ae69
RH
4736 scale = INTVAL (tmp);
4737 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4738 return 0;
e075ae69 4739 scale = 1 << scale;
b446e5a2 4740 retval = -1;
2a2ab3f9 4741 }
2a2ab3f9 4742 else
e075ae69
RH
4743 disp = addr; /* displacement */
4744
4745 /* Extract the integral value of scale. */
4746 if (scale_rtx)
e9a25f70 4747 {
e075ae69 4748 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4749 return 0;
e075ae69 4750 scale = INTVAL (scale_rtx);
e9a25f70 4751 }
3b3c6a3f 4752
74dc3e94 4753 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 4754 if (base && index && scale == 1
74dc3e94
RH
4755 && (index == arg_pointer_rtx
4756 || index == frame_pointer_rtx
4757 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
4758 {
4759 rtx tmp = base;
4760 base = index;
4761 index = tmp;
4762 }
4763
4764 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4765 if ((base == hard_frame_pointer_rtx
4766 || base == frame_pointer_rtx
4767 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4768 disp = const0_rtx;
4769
4770 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4771 Avoid this by transforming to [%esi+0]. */
9e555526 4772 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 4773 && base && !index && !disp
329e1d01 4774 && REG_P (base)
e075ae69
RH
4775 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4776 disp = const0_rtx;
4777
4778 /* Special case: encode reg+reg instead of reg*2. */
4779 if (!base && index && scale && scale == 2)
4780 base = index, scale = 1;
0f290768 4781
e075ae69
RH
4782 /* Special case: scaling cannot be encoded without base or displacement. */
4783 if (!base && !disp && index && scale != 1)
4784 disp = const0_rtx;
4785
4786 out->base = base;
4787 out->index = index;
4788 out->disp = disp;
4789 out->scale = scale;
74dc3e94 4790 out->seg = seg;
3b3c6a3f 4791
b446e5a2 4792 return retval;
e075ae69 4793}
01329426
JH
4794\f
4795/* Return cost of the memory address x.
4796 For i386, it is better to use a complex address than let gcc copy
4797 the address into a reg and make a new pseudo. But not if the address
4798 requires to two regs - that would mean more pseudos with longer
4799 lifetimes. */
dcefdf67 4800static int
b96a374d 4801ix86_address_cost (rtx x)
01329426
JH
4802{
4803 struct ix86_address parts;
4804 int cost = 1;
3b3c6a3f 4805
01329426
JH
4806 if (!ix86_decompose_address (x, &parts))
4807 abort ();
4808
4809 /* More complex memory references are better. */
4810 if (parts.disp && parts.disp != const0_rtx)
4811 cost--;
74dc3e94
RH
4812 if (parts.seg != SEG_DEFAULT)
4813 cost--;
01329426
JH
4814
4815 /* Attempt to minimize number of registers in the address. */
4816 if ((parts.base
4817 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4818 || (parts.index
4819 && (!REG_P (parts.index)
4820 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4821 cost++;
4822
4823 if (parts.base
4824 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4825 && parts.index
4826 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4827 && parts.base != parts.index)
4828 cost++;
4829
4830 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4831 since it's predecode logic can't detect the length of instructions
4832 and it degenerates to vector decoded. Increase cost of such
4833 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4834 to split such addresses or even refuse such addresses at all.
01329426
JH
4835
4836 Following addressing modes are affected:
4837 [base+scale*index]
4838 [scale*index+disp]
4839 [base+index]
0f290768 4840
01329426
JH
4841 The first and last case may be avoidable by explicitly coding the zero in
4842 memory address, but I don't have AMD-K6 machine handy to check this
4843 theory. */
4844
4845 if (TARGET_K6
4846 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4847 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4848 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4849 cost += 10;
0f290768 4850
01329426
JH
4851 return cost;
4852}
4853\f
b949ea8b
JW
4854/* If X is a machine specific address (i.e. a symbol or label being
4855 referenced as a displacement from the GOT implemented using an
4856 UNSPEC), then return the base term. Otherwise return X. */
4857
4858rtx
b96a374d 4859ix86_find_base_term (rtx x)
b949ea8b
JW
4860{
4861 rtx term;
4862
6eb791fc
JH
4863 if (TARGET_64BIT)
4864 {
4865 if (GET_CODE (x) != CONST)
4866 return x;
4867 term = XEXP (x, 0);
4868 if (GET_CODE (term) == PLUS
4869 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4870 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4871 term = XEXP (term, 0);
4872 if (GET_CODE (term) != UNSPEC
8ee41eaf 4873 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4874 return x;
4875
4876 term = XVECEXP (term, 0, 0);
4877
4878 if (GET_CODE (term) != SYMBOL_REF
4879 && GET_CODE (term) != LABEL_REF)
4880 return x;
4881
4882 return term;
4883 }
4884
69bd9368 4885 term = ix86_delegitimize_address (x);
b949ea8b
JW
4886
4887 if (GET_CODE (term) != SYMBOL_REF
4888 && GET_CODE (term) != LABEL_REF)
4889 return x;
4890
4891 return term;
4892}
828a4fe4
MS
4893
4894/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4895 this is used for to form addresses to local data when -fPIC is in
4896 use. */
4897
4898static bool
4899darwin_local_data_pic (rtx disp)
4900{
4901 if (GET_CODE (disp) == MINUS)
4902 {
4903 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4904 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4905 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4906 {
4907 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4908 if (! strcmp (sym_name, "<pic base>"))
4909 return true;
4910 }
4911 }
4912
4913 return false;
4914}
b949ea8b 4915\f
f996902d
RH
4916/* Determine if a given RTX is a valid constant. We already know this
4917 satisfies CONSTANT_P. */
4918
4919bool
b96a374d 4920legitimate_constant_p (rtx x)
f996902d 4921{
f996902d
RH
4922 switch (GET_CODE (x))
4923 {
f996902d 4924 case CONST:
1e19ac74 4925 x = XEXP (x, 0);
f996902d 4926
1e19ac74 4927 if (GET_CODE (x) == PLUS)
828a4fe4 4928 {
1e19ac74 4929 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
828a4fe4 4930 return false;
1e19ac74 4931 x = XEXP (x, 0);
828a4fe4
MS
4932 }
4933
1e19ac74 4934 if (TARGET_MACHO && darwin_local_data_pic (x))
828a4fe4
MS
4935 return true;
4936
f996902d 4937 /* Only some unspecs are valid as "constants". */
1e19ac74
RH
4938 if (GET_CODE (x) == UNSPEC)
4939 switch (XINT (x, 1))
f996902d
RH
4940 {
4941 case UNSPEC_TPOFF:
cb0e3e3f 4942 case UNSPEC_NTPOFF:
1e19ac74 4943 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
cb0e3e3f 4944 case UNSPEC_DTPOFF:
1e19ac74 4945 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
f996902d
RH
4946 default:
4947 return false;
4948 }
1e19ac74
RH
4949
4950 /* We must have drilled down to a symbol. */
4951 if (!symbolic_operand (x, Pmode))
4952 return false;
4953 /* FALLTHRU */
4954
4955 case SYMBOL_REF:
4956 /* TLS symbols are never valid. */
4957 if (tls_symbolic_operand (x, Pmode))
4958 return false;
f996902d
RH
4959 break;
4960
4961 default:
4962 break;
4963 }
4964
4965 /* Otherwise we handle everything else in the move patterns. */
4966 return true;
4967}
4968
3a04ff64
RH
4969/* Determine if it's legal to put X into the constant pool. This
4970 is not possible for the address of thread-local symbols, which
4971 is checked above. */
4972
4973static bool
b96a374d 4974ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
4975{
4976 return !legitimate_constant_p (x);
4977}
4978
f996902d
RH
4979/* Determine if a given RTX is a valid constant address. */
4980
4981bool
b96a374d 4982constant_address_p (rtx x)
f996902d 4983{
a94f136b 4984 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
4985}
4986
4987/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 4988 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
4989 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4990
4991bool
b96a374d 4992legitimate_pic_operand_p (rtx x)
f996902d
RH
4993{
4994 rtx inner;
4995
4996 switch (GET_CODE (x))
4997 {
4998 case CONST:
4999 inner = XEXP (x, 0);
5000
5001 /* Only some unspecs are valid as "constants". */
5002 if (GET_CODE (inner) == UNSPEC)
5003 switch (XINT (inner, 1))
5004 {
5005 case UNSPEC_TPOFF:
5006 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5007 default:
5008 return false;
5009 }
5efb1046 5010 /* FALLTHRU */
f996902d
RH
5011
5012 case SYMBOL_REF:
5013 case LABEL_REF:
5014 return legitimate_pic_address_disp_p (x);
5015
5016 default:
5017 return true;
5018 }
5019}
5020
e075ae69
RH
5021/* Determine if a given CONST RTX is a valid memory displacement
5022 in PIC mode. */
0f290768 5023
59be65f6 5024int
8d531ab9 5025legitimate_pic_address_disp_p (rtx disp)
91bb873f 5026{
f996902d
RH
5027 bool saw_plus;
5028
6eb791fc
JH
5029 /* In 64bit mode we can allow direct addresses of symbols and labels
5030 when they are not dynamic symbols. */
c05dbe81
JH
5031 if (TARGET_64BIT)
5032 {
5033 /* TLS references should always be enclosed in UNSPEC. */
5034 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5035 return 0;
5036 if (GET_CODE (disp) == SYMBOL_REF
5037 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 5038 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
5039 return 1;
5040 if (GET_CODE (disp) == LABEL_REF)
5041 return 1;
5042 if (GET_CODE (disp) == CONST
a132b6a8
JJ
5043 && GET_CODE (XEXP (disp, 0)) == PLUS)
5044 {
5045 rtx op0 = XEXP (XEXP (disp, 0), 0);
5046 rtx op1 = XEXP (XEXP (disp, 0), 1);
5047
5048 /* TLS references should always be enclosed in UNSPEC. */
5049 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5050 return 0;
5051 if (((GET_CODE (op0) == SYMBOL_REF
5052 && ix86_cmodel == CM_SMALL_PIC
5053 && SYMBOL_REF_LOCAL_P (op0))
5054 || GET_CODE (op0) == LABEL_REF)
5055 && GET_CODE (op1) == CONST_INT
5056 && INTVAL (op1) < 16*1024*1024
5057 && INTVAL (op1) >= -16*1024*1024)
5058 return 1;
5059 }
c05dbe81 5060 }
91bb873f
RH
5061 if (GET_CODE (disp) != CONST)
5062 return 0;
5063 disp = XEXP (disp, 0);
5064
6eb791fc
JH
5065 if (TARGET_64BIT)
5066 {
5067 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5068 of GOT tables. We should not need these anyway. */
5069 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5070 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5071 return 0;
5072
5073 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5074 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5075 return 0;
5076 return 1;
5077 }
5078
f996902d 5079 saw_plus = false;
91bb873f
RH
5080 if (GET_CODE (disp) == PLUS)
5081 {
5082 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5083 return 0;
5084 disp = XEXP (disp, 0);
f996902d 5085 saw_plus = true;
91bb873f
RH
5086 }
5087
828a4fe4
MS
5088 if (TARGET_MACHO && darwin_local_data_pic (disp))
5089 return 1;
b069de3b 5090
8ee41eaf 5091 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5092 return 0;
5093
623fe810
RH
5094 switch (XINT (disp, 1))
5095 {
8ee41eaf 5096 case UNSPEC_GOT:
f996902d
RH
5097 if (saw_plus)
5098 return false;
623fe810 5099 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5100 case UNSPEC_GOTOFF:
799b33a0
JH
5101 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5102 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5103 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5104 return false;
f996902d 5105 case UNSPEC_GOTTPOFF:
dea73790
JJ
5106 case UNSPEC_GOTNTPOFF:
5107 case UNSPEC_INDNTPOFF:
f996902d
RH
5108 if (saw_plus)
5109 return false;
5110 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5111 case UNSPEC_NTPOFF:
f996902d
RH
5112 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5113 case UNSPEC_DTPOFF:
f996902d 5114 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5115 }
fce5a9f2 5116
623fe810 5117 return 0;
91bb873f
RH
5118}
5119
e075ae69
RH
5120/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5121 memory address for an instruction. The MODE argument is the machine mode
5122 for the MEM expression that wants to use this address.
5123
5124 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5125 convert common non-canonical forms to canonical form so that they will
5126 be recognized. */
5127
3b3c6a3f 5128int
8d531ab9 5129legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 5130{
e075ae69
RH
5131 struct ix86_address parts;
5132 rtx base, index, disp;
5133 HOST_WIDE_INT scale;
5134 const char *reason = NULL;
5135 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5136
5137 if (TARGET_DEBUG_ADDR)
5138 {
5139 fprintf (stderr,
e9a25f70 5140 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5141 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5142 debug_rtx (addr);
5143 }
5144
b446e5a2 5145 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5146 {
e075ae69 5147 reason = "decomposition failed";
50e60bc3 5148 goto report_error;
3b3c6a3f
MM
5149 }
5150
e075ae69
RH
5151 base = parts.base;
5152 index = parts.index;
5153 disp = parts.disp;
5154 scale = parts.scale;
91f0226f 5155
e075ae69 5156 /* Validate base register.
e9a25f70
JL
5157
5158 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5159 is one word out of a two word structure, which is represented internally
5160 as a DImode int. */
e9a25f70 5161
3b3c6a3f
MM
5162 if (base)
5163 {
e075ae69
RH
5164 reason_rtx = base;
5165
90e4e4c5 5166 if (GET_CODE (base) != REG)
3b3c6a3f 5167 {
e075ae69 5168 reason = "base is not a register";
50e60bc3 5169 goto report_error;
3b3c6a3f
MM
5170 }
5171
c954bd01
RH
5172 if (GET_MODE (base) != Pmode)
5173 {
e075ae69 5174 reason = "base is not in Pmode";
50e60bc3 5175 goto report_error;
c954bd01
RH
5176 }
5177
90e4e4c5
RH
5178 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5179 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 5180 {
e075ae69 5181 reason = "base is not valid";
50e60bc3 5182 goto report_error;
3b3c6a3f
MM
5183 }
5184 }
5185
e075ae69 5186 /* Validate index register.
e9a25f70
JL
5187
5188 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5189 is one word out of a two word structure, which is represented internally
5190 as a DImode int. */
e075ae69
RH
5191
5192 if (index)
3b3c6a3f 5193 {
e075ae69
RH
5194 reason_rtx = index;
5195
90e4e4c5 5196 if (GET_CODE (index) != REG)
3b3c6a3f 5197 {
e075ae69 5198 reason = "index is not a register";
50e60bc3 5199 goto report_error;
3b3c6a3f
MM
5200 }
5201
e075ae69 5202 if (GET_MODE (index) != Pmode)
c954bd01 5203 {
e075ae69 5204 reason = "index is not in Pmode";
50e60bc3 5205 goto report_error;
c954bd01
RH
5206 }
5207
90e4e4c5
RH
5208 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5209 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 5210 {
e075ae69 5211 reason = "index is not valid";
50e60bc3 5212 goto report_error;
3b3c6a3f
MM
5213 }
5214 }
3b3c6a3f 5215
e075ae69
RH
5216 /* Validate scale factor. */
5217 if (scale != 1)
3b3c6a3f 5218 {
e075ae69
RH
5219 reason_rtx = GEN_INT (scale);
5220 if (!index)
3b3c6a3f 5221 {
e075ae69 5222 reason = "scale without index";
50e60bc3 5223 goto report_error;
3b3c6a3f
MM
5224 }
5225
e075ae69 5226 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5227 {
e075ae69 5228 reason = "scale is not a valid multiplier";
50e60bc3 5229 goto report_error;
3b3c6a3f
MM
5230 }
5231 }
5232
91bb873f 5233 /* Validate displacement. */
3b3c6a3f
MM
5234 if (disp)
5235 {
e075ae69
RH
5236 reason_rtx = disp;
5237
f996902d
RH
5238 if (GET_CODE (disp) == CONST
5239 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5240 switch (XINT (XEXP (disp, 0), 1))
5241 {
5242 case UNSPEC_GOT:
5243 case UNSPEC_GOTOFF:
5244 case UNSPEC_GOTPCREL:
5245 if (!flag_pic)
5246 abort ();
5247 goto is_legitimate_pic;
5248
5249 case UNSPEC_GOTTPOFF:
dea73790
JJ
5250 case UNSPEC_GOTNTPOFF:
5251 case UNSPEC_INDNTPOFF:
f996902d
RH
5252 case UNSPEC_NTPOFF:
5253 case UNSPEC_DTPOFF:
5254 break;
5255
5256 default:
5257 reason = "invalid address unspec";
5258 goto report_error;
5259 }
5260
b069de3b
SS
5261 else if (flag_pic && (SYMBOLIC_CONST (disp)
5262#if TARGET_MACHO
5263 && !machopic_operand_p (disp)
5264#endif
5265 ))
3b3c6a3f 5266 {
f996902d 5267 is_legitimate_pic:
0d7d98ee
JH
5268 if (TARGET_64BIT && (index || base))
5269 {
75d38379
JJ
5270 /* foo@dtpoff(%rX) is ok. */
5271 if (GET_CODE (disp) != CONST
5272 || GET_CODE (XEXP (disp, 0)) != PLUS
5273 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5274 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5275 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5276 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5277 {
5278 reason = "non-constant pic memory reference";
5279 goto report_error;
5280 }
0d7d98ee 5281 }
75d38379 5282 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5283 {
e075ae69 5284 reason = "displacement is an invalid pic construct";
50e60bc3 5285 goto report_error;
91bb873f
RH
5286 }
5287
4e9efe54 5288 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5289 includes the pic_offset_table_rtx register.
5290
4e9efe54
JH
5291 While this is good idea, unfortunately these constructs may
5292 be created by "adds using lea" optimization for incorrect
5293 code like:
5294
5295 int a;
5296 int foo(int i)
5297 {
5298 return *(&a+i);
5299 }
5300
50e60bc3 5301 This code is nonsensical, but results in addressing
4e9efe54 5302 GOT table with pic_offset_table_rtx base. We can't
f710504c 5303 just refuse it easily, since it gets matched by
4e9efe54
JH
5304 "addsi3" pattern, that later gets split to lea in the
5305 case output register differs from input. While this
5306 can be handled by separate addsi pattern for this case
5307 that never results in lea, this seems to be easier and
5308 correct fix for crash to disable this test. */
3b3c6a3f 5309 }
a94f136b
JH
5310 else if (GET_CODE (disp) != LABEL_REF
5311 && GET_CODE (disp) != CONST_INT
5312 && (GET_CODE (disp) != CONST
5313 || !legitimate_constant_p (disp))
5314 && (GET_CODE (disp) != SYMBOL_REF
5315 || !legitimate_constant_p (disp)))
f996902d
RH
5316 {
5317 reason = "displacement is not constant";
5318 goto report_error;
5319 }
8fe75e43
RH
5320 else if (TARGET_64BIT
5321 && !x86_64_immediate_operand (disp, VOIDmode))
c05dbe81
JH
5322 {
5323 reason = "displacement is out of range";
5324 goto report_error;
5325 }
3b3c6a3f
MM
5326 }
5327
e075ae69 5328 /* Everything looks valid. */
3b3c6a3f 5329 if (TARGET_DEBUG_ADDR)
e075ae69 5330 fprintf (stderr, "Success.\n");
3b3c6a3f 5331 return TRUE;
e075ae69 5332
5bf0ebab 5333 report_error:
e075ae69
RH
5334 if (TARGET_DEBUG_ADDR)
5335 {
5336 fprintf (stderr, "Error: %s\n", reason);
5337 debug_rtx (reason_rtx);
5338 }
5339 return FALSE;
3b3c6a3f 5340}
3b3c6a3f 5341\f
55efb413
JW
5342/* Return an unique alias set for the GOT. */
5343
0f290768 5344static HOST_WIDE_INT
b96a374d 5345ix86_GOT_alias_set (void)
55efb413 5346{
5bf0ebab
RH
5347 static HOST_WIDE_INT set = -1;
5348 if (set == -1)
5349 set = new_alias_set ();
5350 return set;
0f290768 5351}
55efb413 5352
3b3c6a3f
MM
5353/* Return a legitimate reference for ORIG (an address) using the
5354 register REG. If REG is 0, a new pseudo is generated.
5355
91bb873f 5356 There are two types of references that must be handled:
3b3c6a3f
MM
5357
5358 1. Global data references must load the address from the GOT, via
5359 the PIC reg. An insn is emitted to do this load, and the reg is
5360 returned.
5361
91bb873f
RH
5362 2. Static data references, constant pool addresses, and code labels
5363 compute the address as an offset from the GOT, whose base is in
2ae5ae57 5364 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
5365 differentiate them from global data objects. The returned
5366 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5367
5368 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5369 reg also appears in the address. */
3b3c6a3f 5370
b39edae3 5371static rtx
b96a374d 5372legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
5373{
5374 rtx addr = orig;
5375 rtx new = orig;
91bb873f 5376 rtx base;
3b3c6a3f 5377
b069de3b
SS
5378#if TARGET_MACHO
5379 if (reg == 0)
5380 reg = gen_reg_rtx (Pmode);
5381 /* Use the generic Mach-O PIC machinery. */
5382 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5383#endif
5384
c05dbe81
JH
5385 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5386 new = addr;
5387 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 5388 {
c05dbe81
JH
5389 /* This symbol may be referenced via a displacement from the PIC
5390 base address (@GOTOFF). */
3b3c6a3f 5391
c05dbe81
JH
5392 if (reload_in_progress)
5393 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
5394 if (GET_CODE (addr) == CONST)
5395 addr = XEXP (addr, 0);
5396 if (GET_CODE (addr) == PLUS)
5397 {
5398 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5399 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5400 }
5401 else
5402 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
5403 new = gen_rtx_CONST (Pmode, new);
5404 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5405
c05dbe81
JH
5406 if (reg != 0)
5407 {
5408 emit_move_insn (reg, new);
5409 new = reg;
5410 }
3b3c6a3f 5411 }
91bb873f 5412 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5413 {
14f73b5a
JH
5414 if (TARGET_64BIT)
5415 {
8ee41eaf 5416 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a 5417 new = gen_rtx_CONST (Pmode, new);
542a8afa 5418 new = gen_const_mem (Pmode, new);
14f73b5a
JH
5419 set_mem_alias_set (new, ix86_GOT_alias_set ());
5420
5421 if (reg == 0)
5422 reg = gen_reg_rtx (Pmode);
5423 /* Use directly gen_movsi, otherwise the address is loaded
5424 into register for CSE. We don't want to CSE this addresses,
5425 instead we CSE addresses from the GOT table, so skip this. */
5426 emit_insn (gen_movsi (reg, new));
5427 new = reg;
5428 }
5429 else
5430 {
5431 /* This symbol must be referenced via a load from the
5432 Global Offset Table (@GOT). */
3b3c6a3f 5433
66edd3b4
RH
5434 if (reload_in_progress)
5435 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5436 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5437 new = gen_rtx_CONST (Pmode, new);
5438 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
542a8afa 5439 new = gen_const_mem (Pmode, new);
14f73b5a 5440 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5441
14f73b5a
JH
5442 if (reg == 0)
5443 reg = gen_reg_rtx (Pmode);
5444 emit_move_insn (reg, new);
5445 new = reg;
5446 }
0f290768 5447 }
91bb873f
RH
5448 else
5449 {
5450 if (GET_CODE (addr) == CONST)
3b3c6a3f 5451 {
91bb873f 5452 addr = XEXP (addr, 0);
e3c8ea67
RH
5453
5454 /* We must match stuff we generate before. Assume the only
5455 unspecs that can get here are ours. Not that we could do
43f3a59d 5456 anything with them anyway.... */
e3c8ea67
RH
5457 if (GET_CODE (addr) == UNSPEC
5458 || (GET_CODE (addr) == PLUS
5459 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5460 return orig;
5461 if (GET_CODE (addr) != PLUS)
564d80f4 5462 abort ();
3b3c6a3f 5463 }
91bb873f
RH
5464 if (GET_CODE (addr) == PLUS)
5465 {
5466 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5467
91bb873f
RH
5468 /* Check first to see if this is a constant offset from a @GOTOFF
5469 symbol reference. */
623fe810 5470 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5471 && GET_CODE (op1) == CONST_INT)
5472 {
6eb791fc
JH
5473 if (!TARGET_64BIT)
5474 {
66edd3b4
RH
5475 if (reload_in_progress)
5476 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5477 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5478 UNSPEC_GOTOFF);
6eb791fc
JH
5479 new = gen_rtx_PLUS (Pmode, new, op1);
5480 new = gen_rtx_CONST (Pmode, new);
5481 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5482
6eb791fc
JH
5483 if (reg != 0)
5484 {
5485 emit_move_insn (reg, new);
5486 new = reg;
5487 }
5488 }
5489 else
91bb873f 5490 {
75d38379
JJ
5491 if (INTVAL (op1) < -16*1024*1024
5492 || INTVAL (op1) >= 16*1024*1024)
b8771ace 5493 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
91bb873f
RH
5494 }
5495 }
5496 else
5497 {
5498 base = legitimize_pic_address (XEXP (addr, 0), reg);
5499 new = legitimize_pic_address (XEXP (addr, 1),
5500 base == reg ? NULL_RTX : reg);
5501
5502 if (GET_CODE (new) == CONST_INT)
5503 new = plus_constant (base, INTVAL (new));
5504 else
5505 {
5506 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5507 {
5508 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5509 new = XEXP (new, 1);
5510 }
5511 new = gen_rtx_PLUS (Pmode, base, new);
5512 }
5513 }
5514 }
3b3c6a3f
MM
5515 }
5516 return new;
5517}
5518\f
74dc3e94 5519/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
5520
5521static rtx
b96a374d 5522get_thread_pointer (int to_reg)
f996902d 5523{
74dc3e94 5524 rtx tp, reg, insn;
f996902d
RH
5525
5526 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
5527 if (!to_reg)
5528 return tp;
f996902d 5529
74dc3e94
RH
5530 reg = gen_reg_rtx (Pmode);
5531 insn = gen_rtx_SET (VOIDmode, reg, tp);
5532 insn = emit_insn (insn);
5533
5534 return reg;
5535}
5536
5537/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5538 false if we expect this to be used for a memory address and true if
5539 we expect to load the address into a register. */
5540
5541static rtx
b96a374d 5542legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
5543{
5544 rtx dest, base, off, pic;
5545 int type;
5546
5547 switch (model)
5548 {
5549 case TLS_MODEL_GLOBAL_DYNAMIC:
5550 dest = gen_reg_rtx (Pmode);
5551 if (TARGET_64BIT)
5552 {
5553 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5554
5555 start_sequence ();
5556 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5557 insns = get_insns ();
5558 end_sequence ();
5559
5560 emit_libcall_block (insns, dest, rax, x);
5561 }
5562 else
5563 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5564 break;
5565
5566 case TLS_MODEL_LOCAL_DYNAMIC:
5567 base = gen_reg_rtx (Pmode);
5568 if (TARGET_64BIT)
5569 {
5570 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5571
5572 start_sequence ();
5573 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5574 insns = get_insns ();
5575 end_sequence ();
5576
5577 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5578 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5579 emit_libcall_block (insns, base, rax, note);
5580 }
5581 else
5582 emit_insn (gen_tls_local_dynamic_base_32 (base));
5583
5584 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5585 off = gen_rtx_CONST (Pmode, off);
5586
5587 return gen_rtx_PLUS (Pmode, base, off);
5588
5589 case TLS_MODEL_INITIAL_EXEC:
5590 if (TARGET_64BIT)
5591 {
5592 pic = NULL;
5593 type = UNSPEC_GOTNTPOFF;
5594 }
5595 else if (flag_pic)
5596 {
5597 if (reload_in_progress)
5598 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5599 pic = pic_offset_table_rtx;
5600 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5601 }
5602 else if (!TARGET_GNU_TLS)
5603 {
5604 pic = gen_reg_rtx (Pmode);
5605 emit_insn (gen_set_got (pic));
5606 type = UNSPEC_GOTTPOFF;
5607 }
5608 else
5609 {
5610 pic = NULL;
5611 type = UNSPEC_INDNTPOFF;
5612 }
5613
5614 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5615 off = gen_rtx_CONST (Pmode, off);
5616 if (pic)
5617 off = gen_rtx_PLUS (Pmode, pic, off);
542a8afa 5618 off = gen_const_mem (Pmode, off);
74dc3e94
RH
5619 set_mem_alias_set (off, ix86_GOT_alias_set ());
5620
5621 if (TARGET_64BIT || TARGET_GNU_TLS)
5622 {
5623 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5624 off = force_reg (Pmode, off);
5625 return gen_rtx_PLUS (Pmode, base, off);
5626 }
5627 else
5628 {
5629 base = get_thread_pointer (true);
5630 dest = gen_reg_rtx (Pmode);
5631 emit_insn (gen_subsi3 (dest, base, off));
5632 }
5633 break;
5634
5635 case TLS_MODEL_LOCAL_EXEC:
5636 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5637 (TARGET_64BIT || TARGET_GNU_TLS)
5638 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5639 off = gen_rtx_CONST (Pmode, off);
5640
5641 if (TARGET_64BIT || TARGET_GNU_TLS)
5642 {
5643 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5644 return gen_rtx_PLUS (Pmode, base, off);
5645 }
5646 else
5647 {
5648 base = get_thread_pointer (true);
5649 dest = gen_reg_rtx (Pmode);
5650 emit_insn (gen_subsi3 (dest, base, off));
5651 }
5652 break;
5653
5654 default:
5655 abort ();
5656 }
5657
5658 return dest;
f996902d 5659}
fce5a9f2 5660
3b3c6a3f
MM
5661/* Try machine-dependent ways of modifying an illegitimate address
5662 to be legitimate. If we find one, return the new, valid address.
5663 This macro is used in only one place: `memory_address' in explow.c.
5664
5665 OLDX is the address as it was before break_out_memory_refs was called.
5666 In some cases it is useful to look at this to decide what needs to be done.
5667
5668 MODE and WIN are passed so that this macro can use
5669 GO_IF_LEGITIMATE_ADDRESS.
5670
5671 It is always safe for this macro to do nothing. It exists to recognize
5672 opportunities to optimize the output.
5673
5674 For the 80386, we handle X+REG by loading X into a register R and
5675 using R+REG. R will go in a general reg and indexing will be used.
5676 However, if REG is a broken-out memory address or multiplication,
5677 nothing needs to be done because REG can certainly go in a general reg.
5678
5679 When -fpic is used, special handling is needed for symbolic references.
5680 See comments by legitimize_pic_address in i386.c for details. */
5681
5682rtx
8d531ab9 5683legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
5684{
5685 int changed = 0;
5686 unsigned log;
5687
5688 if (TARGET_DEBUG_ADDR)
5689 {
e9a25f70
JL
5690 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5691 GET_MODE_NAME (mode));
3b3c6a3f
MM
5692 debug_rtx (x);
5693 }
5694
8fe75e43 5695 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
f996902d 5696 if (log)
74dc3e94 5697 return legitimize_tls_address (x, log, false);
b39edae3
RH
5698 if (GET_CODE (x) == CONST
5699 && GET_CODE (XEXP (x, 0)) == PLUS
8fe75e43
RH
5700 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5701 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
b39edae3
RH
5702 {
5703 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5704 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5705 }
f996902d 5706
3b3c6a3f
MM
5707 if (flag_pic && SYMBOLIC_CONST (x))
5708 return legitimize_pic_address (x, 0);
5709
5710 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5711 if (GET_CODE (x) == ASHIFT
5712 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5713 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5714 {
5715 changed = 1;
a269a03c
JC
5716 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5717 GEN_INT (1 << log));
3b3c6a3f
MM
5718 }
5719
5720 if (GET_CODE (x) == PLUS)
5721 {
0f290768 5722 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5723
3b3c6a3f
MM
5724 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5725 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5726 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5727 {
5728 changed = 1;
c5c76735
JL
5729 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5730 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5731 GEN_INT (1 << log));
3b3c6a3f
MM
5732 }
5733
5734 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5735 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5736 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5737 {
5738 changed = 1;
c5c76735
JL
5739 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5740 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5741 GEN_INT (1 << log));
3b3c6a3f
MM
5742 }
5743
0f290768 5744 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5745 if (GET_CODE (XEXP (x, 1)) == MULT)
5746 {
5747 rtx tmp = XEXP (x, 0);
5748 XEXP (x, 0) = XEXP (x, 1);
5749 XEXP (x, 1) = tmp;
5750 changed = 1;
5751 }
5752
5753 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5754 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5755 created by virtual register instantiation, register elimination, and
5756 similar optimizations. */
5757 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5758 {
5759 changed = 1;
c5c76735
JL
5760 x = gen_rtx_PLUS (Pmode,
5761 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5762 XEXP (XEXP (x, 1), 0)),
5763 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5764 }
5765
e9a25f70
JL
5766 /* Canonicalize
5767 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5768 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5769 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5770 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5771 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5772 && CONSTANT_P (XEXP (x, 1)))
5773 {
00c79232
ML
5774 rtx constant;
5775 rtx other = NULL_RTX;
3b3c6a3f
MM
5776
5777 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5778 {
5779 constant = XEXP (x, 1);
5780 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5781 }
5782 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5783 {
5784 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5785 other = XEXP (x, 1);
5786 }
5787 else
5788 constant = 0;
5789
5790 if (constant)
5791 {
5792 changed = 1;
c5c76735
JL
5793 x = gen_rtx_PLUS (Pmode,
5794 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5795 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5796 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5797 }
5798 }
5799
5800 if (changed && legitimate_address_p (mode, x, FALSE))
5801 return x;
5802
5803 if (GET_CODE (XEXP (x, 0)) == MULT)
5804 {
5805 changed = 1;
5806 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5807 }
5808
5809 if (GET_CODE (XEXP (x, 1)) == MULT)
5810 {
5811 changed = 1;
5812 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5813 }
5814
5815 if (changed
5816 && GET_CODE (XEXP (x, 1)) == REG
5817 && GET_CODE (XEXP (x, 0)) == REG)
5818 return x;
5819
5820 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5821 {
5822 changed = 1;
5823 x = legitimize_pic_address (x, 0);
5824 }
5825
5826 if (changed && legitimate_address_p (mode, x, FALSE))
5827 return x;
5828
5829 if (GET_CODE (XEXP (x, 0)) == REG)
5830 {
8d531ab9
KH
5831 rtx temp = gen_reg_rtx (Pmode);
5832 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
5833 if (val != temp)
5834 emit_move_insn (temp, val);
5835
5836 XEXP (x, 1) = temp;
5837 return x;
5838 }
5839
5840 else if (GET_CODE (XEXP (x, 1)) == REG)
5841 {
8d531ab9
KH
5842 rtx temp = gen_reg_rtx (Pmode);
5843 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
5844 if (val != temp)
5845 emit_move_insn (temp, val);
5846
5847 XEXP (x, 0) = temp;
5848 return x;
5849 }
5850 }
5851
5852 return x;
5853}
2a2ab3f9
JVA
5854\f
5855/* Print an integer constant expression in assembler syntax. Addition
5856 and subtraction are the only arithmetic that may appear in these
5857 expressions. FILE is the stdio stream to write to, X is the rtx, and
5858 CODE is the operand print code from the output string. */
5859
5860static void
b96a374d 5861output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
5862{
5863 char buf[256];
5864
5865 switch (GET_CODE (x))
5866 {
5867 case PC:
5868 if (flag_pic)
5869 putc ('.', file);
5870 else
5871 abort ();
5872 break;
5873
5874 case SYMBOL_REF:
79bba51c
AP
5875 /* Mark the decl as referenced so that cgraph will output the function. */
5876 if (SYMBOL_REF_DECL (x))
5877 mark_decl_referenced (SYMBOL_REF_DECL (x));
5878
91bb873f 5879 assemble_name (file, XSTR (x, 0));
12969f45 5880 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 5881 fputs ("@PLT", file);
2a2ab3f9
JVA
5882 break;
5883
91bb873f
RH
5884 case LABEL_REF:
5885 x = XEXP (x, 0);
5efb1046 5886 /* FALLTHRU */
2a2ab3f9
JVA
5887 case CODE_LABEL:
5888 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5889 assemble_name (asm_out_file, buf);
5890 break;
5891
5892 case CONST_INT:
f64cecad 5893 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5894 break;
5895
5896 case CONST:
5897 /* This used to output parentheses around the expression,
5898 but that does not work on the 386 (either ATT or BSD assembler). */
5899 output_pic_addr_const (file, XEXP (x, 0), code);
5900 break;
5901
5902 case CONST_DOUBLE:
5903 if (GET_MODE (x) == VOIDmode)
5904 {
5905 /* We can use %d if the number is <32 bits and positive. */
5906 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5907 fprintf (file, "0x%lx%08lx",
5908 (unsigned long) CONST_DOUBLE_HIGH (x),
5909 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5910 else
f64cecad 5911 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5912 }
5913 else
5914 /* We can't handle floating point constants;
5915 PRINT_OPERAND must handle them. */
5916 output_operand_lossage ("floating constant misused");
5917 break;
5918
5919 case PLUS:
e9a25f70 5920 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5921 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5922 {
2a2ab3f9 5923 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5924 putc ('+', file);
e9a25f70 5925 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5926 }
91bb873f 5927 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5928 {
2a2ab3f9 5929 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5930 putc ('+', file);
e9a25f70 5931 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5932 }
91bb873f
RH
5933 else
5934 abort ();
2a2ab3f9
JVA
5935 break;
5936
5937 case MINUS:
b069de3b
SS
5938 if (!TARGET_MACHO)
5939 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5940 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5941 putc ('-', file);
2a2ab3f9 5942 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
5943 if (!TARGET_MACHO)
5944 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5945 break;
5946
91bb873f
RH
5947 case UNSPEC:
5948 if (XVECLEN (x, 0) != 1)
5bf0ebab 5949 abort ();
91bb873f
RH
5950 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5951 switch (XINT (x, 1))
77ebd435 5952 {
8ee41eaf 5953 case UNSPEC_GOT:
77ebd435
AJ
5954 fputs ("@GOT", file);
5955 break;
8ee41eaf 5956 case UNSPEC_GOTOFF:
77ebd435
AJ
5957 fputs ("@GOTOFF", file);
5958 break;
8ee41eaf 5959 case UNSPEC_GOTPCREL:
edfe8595 5960 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 5961 break;
f996902d 5962 case UNSPEC_GOTTPOFF:
dea73790 5963 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
5964 fputs ("@GOTTPOFF", file);
5965 break;
5966 case UNSPEC_TPOFF:
5967 fputs ("@TPOFF", file);
5968 break;
5969 case UNSPEC_NTPOFF:
75d38379
JJ
5970 if (TARGET_64BIT)
5971 fputs ("@TPOFF", file);
5972 else
5973 fputs ("@NTPOFF", file);
f996902d
RH
5974 break;
5975 case UNSPEC_DTPOFF:
5976 fputs ("@DTPOFF", file);
5977 break;
dea73790 5978 case UNSPEC_GOTNTPOFF:
75d38379
JJ
5979 if (TARGET_64BIT)
5980 fputs ("@GOTTPOFF(%rip)", file);
5981 else
5982 fputs ("@GOTNTPOFF", file);
dea73790
JJ
5983 break;
5984 case UNSPEC_INDNTPOFF:
5985 fputs ("@INDNTPOFF", file);
5986 break;
77ebd435
AJ
5987 default:
5988 output_operand_lossage ("invalid UNSPEC as operand");
5989 break;
5990 }
91bb873f
RH
5991 break;
5992
2a2ab3f9
JVA
5993 default:
5994 output_operand_lossage ("invalid expression as operand");
5995 }
5996}
1865dbb5 5997
b9203463
RH
5998/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5999 We need to emit DTP-relative relocations. */
6000
6001void
b96a374d 6002i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 6003{
75d38379
JJ
6004 fputs (ASM_LONG, file);
6005 output_addr_const (file, x);
6006 fputs ("@DTPOFF", file);
b9203463
RH
6007 switch (size)
6008 {
6009 case 4:
b9203463
RH
6010 break;
6011 case 8:
75d38379 6012 fputs (", 0", file);
b9203463 6013 break;
b9203463
RH
6014 default:
6015 abort ();
6016 }
b9203463
RH
6017}
6018
1865dbb5
JM
6019/* In the name of slightly smaller debug output, and to cater to
6020 general assembler losage, recognize PIC+GOTOFF and turn it back
6021 into a direct symbol reference. */
6022
69bd9368 6023static rtx
b96a374d 6024ix86_delegitimize_address (rtx orig_x)
1865dbb5 6025{
ec65b2e3 6026 rtx x = orig_x, y;
1865dbb5 6027
4c8c0dec
JJ
6028 if (GET_CODE (x) == MEM)
6029 x = XEXP (x, 0);
6030
6eb791fc
JH
6031 if (TARGET_64BIT)
6032 {
6033 if (GET_CODE (x) != CONST
6034 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6035 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6036 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6037 return orig_x;
6038 return XVECEXP (XEXP (x, 0), 0, 0);
6039 }
6040
1865dbb5 6041 if (GET_CODE (x) != PLUS
1865dbb5
JM
6042 || GET_CODE (XEXP (x, 1)) != CONST)
6043 return orig_x;
6044
ec65b2e3
JJ
6045 if (GET_CODE (XEXP (x, 0)) == REG
6046 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6047 /* %ebx + GOT/GOTOFF */
6048 y = NULL;
6049 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6050 {
6051 /* %ebx + %reg * scale + GOT/GOTOFF */
6052 y = XEXP (x, 0);
6053 if (GET_CODE (XEXP (y, 0)) == REG
6054 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6055 y = XEXP (y, 1);
6056 else if (GET_CODE (XEXP (y, 1)) == REG
6057 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6058 y = XEXP (y, 0);
6059 else
6060 return orig_x;
6061 if (GET_CODE (y) != REG
6062 && GET_CODE (y) != MULT
6063 && GET_CODE (y) != ASHIFT)
6064 return orig_x;
6065 }
6066 else
6067 return orig_x;
6068
1865dbb5
JM
6069 x = XEXP (XEXP (x, 1), 0);
6070 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6071 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6072 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6073 {
6074 if (y)
6075 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6076 return XVECEXP (x, 0, 0);
6077 }
1865dbb5
JM
6078
6079 if (GET_CODE (x) == PLUS
6080 && GET_CODE (XEXP (x, 0)) == UNSPEC
6081 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6082 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6083 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6084 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6085 {
6086 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6087 if (y)
6088 return gen_rtx_PLUS (Pmode, y, x);
6089 return x;
6090 }
1865dbb5
JM
6091
6092 return orig_x;
6093}
2a2ab3f9 6094\f
a269a03c 6095static void
b96a374d
AJ
6096put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6097 int fp, FILE *file)
a269a03c 6098{
a269a03c
JC
6099 const char *suffix;
6100
9a915772
JH
6101 if (mode == CCFPmode || mode == CCFPUmode)
6102 {
6103 enum rtx_code second_code, bypass_code;
6104 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
f822d252 6105 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
b531087a 6106 abort ();
9a915772
JH
6107 code = ix86_fp_compare_code_to_integer (code);
6108 mode = CCmode;
6109 }
a269a03c
JC
6110 if (reverse)
6111 code = reverse_condition (code);
e075ae69 6112
a269a03c
JC
6113 switch (code)
6114 {
6115 case EQ:
6116 suffix = "e";
6117 break;
a269a03c
JC
6118 case NE:
6119 suffix = "ne";
6120 break;
a269a03c 6121 case GT:
7e08e190 6122 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6123 abort ();
6124 suffix = "g";
a269a03c 6125 break;
a269a03c 6126 case GTU:
e075ae69
RH
6127 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6128 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6129 if (mode != CCmode)
0f290768 6130 abort ();
e075ae69 6131 suffix = fp ? "nbe" : "a";
a269a03c 6132 break;
a269a03c 6133 case LT:
9076b9c1 6134 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6135 suffix = "s";
7e08e190 6136 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6137 suffix = "l";
9076b9c1 6138 else
0f290768 6139 abort ();
a269a03c 6140 break;
a269a03c 6141 case LTU:
9076b9c1 6142 if (mode != CCmode)
0f290768 6143 abort ();
a269a03c
JC
6144 suffix = "b";
6145 break;
a269a03c 6146 case GE:
9076b9c1 6147 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6148 suffix = "ns";
7e08e190 6149 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6150 suffix = "ge";
9076b9c1 6151 else
0f290768 6152 abort ();
a269a03c 6153 break;
a269a03c 6154 case GEU:
e075ae69 6155 /* ??? As above. */
7e08e190 6156 if (mode != CCmode)
0f290768 6157 abort ();
7e08e190 6158 suffix = fp ? "nb" : "ae";
a269a03c 6159 break;
a269a03c 6160 case LE:
7e08e190 6161 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6162 abort ();
6163 suffix = "le";
a269a03c 6164 break;
a269a03c 6165 case LEU:
9076b9c1
JH
6166 if (mode != CCmode)
6167 abort ();
7e08e190 6168 suffix = "be";
a269a03c 6169 break;
3a3677ff 6170 case UNORDERED:
9e7adcb3 6171 suffix = fp ? "u" : "p";
3a3677ff
RH
6172 break;
6173 case ORDERED:
9e7adcb3 6174 suffix = fp ? "nu" : "np";
3a3677ff 6175 break;
a269a03c
JC
6176 default:
6177 abort ();
6178 }
6179 fputs (suffix, file);
6180}
6181
a55f4481
RK
6182/* Print the name of register X to FILE based on its machine mode and number.
6183 If CODE is 'w', pretend the mode is HImode.
6184 If CODE is 'b', pretend the mode is QImode.
6185 If CODE is 'k', pretend the mode is SImode.
6186 If CODE is 'q', pretend the mode is DImode.
6187 If CODE is 'h', pretend the reg is the `high' byte register.
6188 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6189
e075ae69 6190void
b96a374d 6191print_reg (rtx x, int code, FILE *file)
e5cb57e8 6192{
a55f4481
RK
6193 if (REGNO (x) == ARG_POINTER_REGNUM
6194 || REGNO (x) == FRAME_POINTER_REGNUM
6195 || REGNO (x) == FLAGS_REG
6196 || REGNO (x) == FPSR_REG)
480feac0
ZW
6197 abort ();
6198
5bf0ebab 6199 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6200 putc ('%', file);
6201
ef6257cd 6202 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6203 code = 2;
6204 else if (code == 'b')
6205 code = 1;
6206 else if (code == 'k')
6207 code = 4;
3f3f2124
JH
6208 else if (code == 'q')
6209 code = 8;
e075ae69
RH
6210 else if (code == 'y')
6211 code = 3;
6212 else if (code == 'h')
6213 code = 0;
6214 else
6215 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6216
3f3f2124
JH
6217 /* Irritatingly, AMD extended registers use different naming convention
6218 from the normal registers. */
6219 if (REX_INT_REG_P (x))
6220 {
885a70fd
JH
6221 if (!TARGET_64BIT)
6222 abort ();
3f3f2124
JH
6223 switch (code)
6224 {
ef6257cd 6225 case 0:
c725bd79 6226 error ("extended registers have no high halves");
3f3f2124
JH
6227 break;
6228 case 1:
6229 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6230 break;
6231 case 2:
6232 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6233 break;
6234 case 4:
6235 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6236 break;
6237 case 8:
6238 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6239 break;
6240 default:
c725bd79 6241 error ("unsupported operand size for extended register");
3f3f2124
JH
6242 break;
6243 }
6244 return;
6245 }
e075ae69
RH
6246 switch (code)
6247 {
6248 case 3:
6249 if (STACK_TOP_P (x))
6250 {
6251 fputs ("st(0)", file);
6252 break;
6253 }
5efb1046 6254 /* FALLTHRU */
e075ae69 6255 case 8:
3f3f2124 6256 case 4:
e075ae69 6257 case 12:
446988df 6258 if (! ANY_FP_REG_P (x))
885a70fd 6259 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 6260 /* FALLTHRU */
a7180f70 6261 case 16:
e075ae69 6262 case 2:
d4c32b6f 6263 normal:
e075ae69
RH
6264 fputs (hi_reg_name[REGNO (x)], file);
6265 break;
6266 case 1:
d4c32b6f
RH
6267 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6268 goto normal;
e075ae69
RH
6269 fputs (qi_reg_name[REGNO (x)], file);
6270 break;
6271 case 0:
d4c32b6f
RH
6272 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6273 goto normal;
e075ae69
RH
6274 fputs (qi_high_reg_name[REGNO (x)], file);
6275 break;
6276 default:
6277 abort ();
fe25fea3 6278 }
e5cb57e8
SC
6279}
6280
f996902d
RH
6281/* Locate some local-dynamic symbol still in use by this function
6282 so that we can print its name in some tls_local_dynamic_base
6283 pattern. */
6284
6285static const char *
b96a374d 6286get_some_local_dynamic_name (void)
f996902d
RH
6287{
6288 rtx insn;
6289
6290 if (cfun->machine->some_ld_name)
6291 return cfun->machine->some_ld_name;
6292
6293 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6294 if (INSN_P (insn)
6295 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6296 return cfun->machine->some_ld_name;
6297
6298 abort ();
6299}
6300
6301static int
b96a374d 6302get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
6303{
6304 rtx x = *px;
6305
6306 if (GET_CODE (x) == SYMBOL_REF
6307 && local_dynamic_symbolic_operand (x, Pmode))
6308 {
6309 cfun->machine->some_ld_name = XSTR (x, 0);
6310 return 1;
6311 }
6312
6313 return 0;
6314}
6315
2a2ab3f9 6316/* Meaning of CODE:
fe25fea3 6317 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6318 C -- print opcode suffix for set/cmov insn.
fe25fea3 6319 c -- like C, but print reversed condition
ef6257cd 6320 F,f -- likewise, but for floating-point.
f6f5dff2
RO
6321 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6322 otherwise nothing
2a2ab3f9
JVA
6323 R -- print the prefix for register names.
6324 z -- print the opcode suffix for the size of the current operand.
6325 * -- print a star (in certain assembler syntax)
fb204271 6326 A -- print an absolute memory reference.
2a2ab3f9 6327 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6328 s -- print a shift double count, followed by the assemblers argument
6329 delimiter.
fe25fea3
SC
6330 b -- print the QImode name of the register for the indicated operand.
6331 %b0 would print %al if operands[0] is reg 0.
6332 w -- likewise, print the HImode name of the register.
6333 k -- likewise, print the SImode name of the register.
3f3f2124 6334 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6335 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6336 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6337 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6338 P -- if PIC, print an @PLT suffix.
6339 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6340 & -- print some in-use local-dynamic symbol name.
ef719a44 6341 H -- print a memory address offset by 8; used for sse high-parts
a46d1d38 6342 */
2a2ab3f9
JVA
6343
6344void
b96a374d 6345print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6346{
6347 if (code)
6348 {
6349 switch (code)
6350 {
6351 case '*':
80f33d06 6352 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6353 putc ('*', file);
6354 return;
6355
f996902d
RH
6356 case '&':
6357 assemble_name (file, get_some_local_dynamic_name ());
6358 return;
6359
fb204271 6360 case 'A':
80f33d06 6361 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6362 putc ('*', file);
80f33d06 6363 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6364 {
6365 /* Intel syntax. For absolute addresses, registers should not
6366 be surrounded by braces. */
6367 if (GET_CODE (x) != REG)
6368 {
6369 putc ('[', file);
6370 PRINT_OPERAND (file, x, 0);
6371 putc (']', file);
6372 return;
6373 }
6374 }
80f33d06
GS
6375 else
6376 abort ();
fb204271
DN
6377
6378 PRINT_OPERAND (file, x, 0);
6379 return;
6380
6381
2a2ab3f9 6382 case 'L':
80f33d06 6383 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6384 putc ('l', file);
2a2ab3f9
JVA
6385 return;
6386
6387 case 'W':
80f33d06 6388 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6389 putc ('w', file);
2a2ab3f9
JVA
6390 return;
6391
6392 case 'B':
80f33d06 6393 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6394 putc ('b', file);
2a2ab3f9
JVA
6395 return;
6396
6397 case 'Q':
80f33d06 6398 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6399 putc ('l', file);
2a2ab3f9
JVA
6400 return;
6401
6402 case 'S':
80f33d06 6403 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6404 putc ('s', file);
2a2ab3f9
JVA
6405 return;
6406
5f1ec3e6 6407 case 'T':
80f33d06 6408 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6409 putc ('t', file);
5f1ec3e6
JVA
6410 return;
6411
2a2ab3f9
JVA
6412 case 'z':
6413 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6414 registers. */
2a2ab3f9
JVA
6415 if (STACK_REG_P (x))
6416 return;
6417
831c4e87
KC
6418 /* Likewise if using Intel opcodes. */
6419 if (ASSEMBLER_DIALECT == ASM_INTEL)
6420 return;
6421
6422 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6423 switch (GET_MODE_SIZE (GET_MODE (x)))
6424 {
2a2ab3f9 6425 case 2:
155d8a47
JW
6426#ifdef HAVE_GAS_FILDS_FISTS
6427 putc ('s', file);
6428#endif
2a2ab3f9
JVA
6429 return;
6430
6431 case 4:
6432 if (GET_MODE (x) == SFmode)
6433 {
e075ae69 6434 putc ('s', file);
2a2ab3f9
JVA
6435 return;
6436 }
6437 else
e075ae69 6438 putc ('l', file);
2a2ab3f9
JVA
6439 return;
6440
5f1ec3e6 6441 case 12:
2b589241 6442 case 16:
e075ae69
RH
6443 putc ('t', file);
6444 return;
5f1ec3e6 6445
2a2ab3f9
JVA
6446 case 8:
6447 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6448 {
6449#ifdef GAS_MNEMONICS
e075ae69 6450 putc ('q', file);
56c0e8fa 6451#else
e075ae69
RH
6452 putc ('l', file);
6453 putc ('l', file);
56c0e8fa
JVA
6454#endif
6455 }
e075ae69
RH
6456 else
6457 putc ('l', file);
2a2ab3f9 6458 return;
155d8a47
JW
6459
6460 default:
6461 abort ();
2a2ab3f9 6462 }
4af3895e
JVA
6463
6464 case 'b':
6465 case 'w':
6466 case 'k':
3f3f2124 6467 case 'q':
4af3895e
JVA
6468 case 'h':
6469 case 'y':
5cb6195d 6470 case 'X':
e075ae69 6471 case 'P':
4af3895e
JVA
6472 break;
6473
2d49677f
SC
6474 case 's':
6475 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6476 {
6477 PRINT_OPERAND (file, x, 0);
e075ae69 6478 putc (',', file);
2d49677f 6479 }
a269a03c
JC
6480 return;
6481
a46d1d38
JH
6482 case 'D':
6483 /* Little bit of braindamage here. The SSE compare instructions
6484 does use completely different names for the comparisons that the
6485 fp conditional moves. */
6486 switch (GET_CODE (x))
6487 {
6488 case EQ:
6489 case UNEQ:
6490 fputs ("eq", file);
6491 break;
6492 case LT:
6493 case UNLT:
6494 fputs ("lt", file);
6495 break;
6496 case LE:
6497 case UNLE:
6498 fputs ("le", file);
6499 break;
6500 case UNORDERED:
6501 fputs ("unord", file);
6502 break;
6503 case NE:
6504 case LTGT:
6505 fputs ("neq", file);
6506 break;
6507 case UNGE:
6508 case GE:
6509 fputs ("nlt", file);
6510 break;
6511 case UNGT:
6512 case GT:
6513 fputs ("nle", file);
6514 break;
6515 case ORDERED:
6516 fputs ("ord", file);
6517 break;
6518 default:
6519 abort ();
6520 break;
6521 }
6522 return;
048b1c95 6523 case 'O':
f6f5dff2 6524#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6525 if (ASSEMBLER_DIALECT == ASM_ATT)
6526 {
6527 switch (GET_MODE (x))
6528 {
6529 case HImode: putc ('w', file); break;
6530 case SImode:
6531 case SFmode: putc ('l', file); break;
6532 case DImode:
6533 case DFmode: putc ('q', file); break;
6534 default: abort ();
6535 }
6536 putc ('.', file);
6537 }
6538#endif
6539 return;
1853aadd 6540 case 'C':
e075ae69 6541 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6542 return;
fe25fea3 6543 case 'F':
f6f5dff2 6544#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6545 if (ASSEMBLER_DIALECT == ASM_ATT)
6546 putc ('.', file);
6547#endif
e075ae69 6548 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6549 return;
6550
e9a25f70 6551 /* Like above, but reverse condition */
e075ae69 6552 case 'c':
fce5a9f2 6553 /* Check to see if argument to %c is really a constant
c1d5afc4 6554 and not a condition code which needs to be reversed. */
ec8e098d 6555 if (!COMPARISON_P (x))
c1d5afc4
CR
6556 {
6557 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6558 return;
6559 }
e075ae69
RH
6560 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6561 return;
fe25fea3 6562 case 'f':
f6f5dff2 6563#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6564 if (ASSEMBLER_DIALECT == ASM_ATT)
6565 putc ('.', file);
6566#endif
e075ae69 6567 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6568 return;
ef719a44
RH
6569
6570 case 'H':
6571 /* It doesn't actually matter what mode we use here, as we're
6572 only going to use this for printing. */
6573 x = adjust_address_nv (x, DImode, 8);
6574 break;
6575
ef6257cd
JH
6576 case '+':
6577 {
6578 rtx x;
e5cb57e8 6579
ef6257cd
JH
6580 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6581 return;
a4f31c00 6582
ef6257cd
JH
6583 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6584 if (x)
6585 {
6586 int pred_val = INTVAL (XEXP (x, 0));
6587
6588 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6589 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6590 {
6591 int taken = pred_val > REG_BR_PROB_BASE / 2;
6592 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6593
6594 /* Emit hints only in the case default branch prediction
d1f87653 6595 heuristics would fail. */
ef6257cd
JH
6596 if (taken != cputaken)
6597 {
6598 /* We use 3e (DS) prefix for taken branches and
6599 2e (CS) prefix for not taken branches. */
6600 if (taken)
6601 fputs ("ds ; ", file);
6602 else
6603 fputs ("cs ; ", file);
6604 }
6605 }
6606 }
6607 return;
6608 }
4af3895e 6609 default:
9e637a26 6610 output_operand_lossage ("invalid operand code '%c'", code);
2a2ab3f9
JVA
6611 }
6612 }
e9a25f70 6613
2a2ab3f9 6614 if (GET_CODE (x) == REG)
a55f4481 6615 print_reg (x, code, file);
e9a25f70 6616
2a2ab3f9
JVA
6617 else if (GET_CODE (x) == MEM)
6618 {
e075ae69 6619 /* No `byte ptr' prefix for call instructions. */
80f33d06 6620 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6621 {
69ddee61 6622 const char * size;
e075ae69
RH
6623 switch (GET_MODE_SIZE (GET_MODE (x)))
6624 {
6625 case 1: size = "BYTE"; break;
6626 case 2: size = "WORD"; break;
6627 case 4: size = "DWORD"; break;
6628 case 8: size = "QWORD"; break;
6629 case 12: size = "XWORD"; break;
a7180f70 6630 case 16: size = "XMMWORD"; break;
e075ae69 6631 default:
564d80f4 6632 abort ();
e075ae69 6633 }
fb204271
DN
6634
6635 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6636 if (code == 'b')
6637 size = "BYTE";
6638 else if (code == 'w')
6639 size = "WORD";
6640 else if (code == 'k')
6641 size = "DWORD";
6642
e075ae69
RH
6643 fputs (size, file);
6644 fputs (" PTR ", file);
2a2ab3f9 6645 }
e075ae69
RH
6646
6647 x = XEXP (x, 0);
0d7d98ee 6648 /* Avoid (%rip) for call operands. */
d10f5ecf 6649 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6650 && GET_CODE (x) != CONST_INT)
6651 output_addr_const (file, x);
c8b94768
RH
6652 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6653 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6654 else
e075ae69 6655 output_address (x);
2a2ab3f9 6656 }
e9a25f70 6657
2a2ab3f9
JVA
6658 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6659 {
e9a25f70
JL
6660 REAL_VALUE_TYPE r;
6661 long l;
6662
5f1ec3e6
JVA
6663 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6664 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6665
80f33d06 6666 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6667 putc ('$', file);
781f4ec1 6668 fprintf (file, "0x%08lx", l);
5f1ec3e6 6669 }
e9a25f70 6670
74dc3e94
RH
6671 /* These float cases don't actually occur as immediate operands. */
6672 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 6673 {
e9a25f70
JL
6674 char dstr[30];
6675
da6eec72 6676 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6677 fprintf (file, "%s", dstr);
2a2ab3f9 6678 }
e9a25f70 6679
2b589241 6680 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 6681 && GET_MODE (x) == XFmode)
2a2ab3f9 6682 {
e9a25f70
JL
6683 char dstr[30];
6684
da6eec72 6685 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6686 fprintf (file, "%s", dstr);
2a2ab3f9 6687 }
f996902d 6688
79325812 6689 else
2a2ab3f9 6690 {
4af3895e 6691 if (code != 'P')
2a2ab3f9 6692 {
695dac07 6693 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6694 {
80f33d06 6695 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6696 putc ('$', file);
6697 }
2a2ab3f9
JVA
6698 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6699 || GET_CODE (x) == LABEL_REF)
e075ae69 6700 {
80f33d06 6701 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6702 putc ('$', file);
6703 else
6704 fputs ("OFFSET FLAT:", file);
6705 }
2a2ab3f9 6706 }
e075ae69
RH
6707 if (GET_CODE (x) == CONST_INT)
6708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6709 else if (flag_pic)
2a2ab3f9
JVA
6710 output_pic_addr_const (file, x, code);
6711 else
6712 output_addr_const (file, x);
6713 }
6714}
6715\f
6716/* Print a memory operand whose address is ADDR. */
6717
6718void
8d531ab9 6719print_operand_address (FILE *file, rtx addr)
2a2ab3f9 6720{
e075ae69
RH
6721 struct ix86_address parts;
6722 rtx base, index, disp;
6723 int scale;
e9a25f70 6724
e075ae69
RH
6725 if (! ix86_decompose_address (addr, &parts))
6726 abort ();
e9a25f70 6727
e075ae69
RH
6728 base = parts.base;
6729 index = parts.index;
6730 disp = parts.disp;
6731 scale = parts.scale;
e9a25f70 6732
74dc3e94
RH
6733 switch (parts.seg)
6734 {
6735 case SEG_DEFAULT:
6736 break;
6737 case SEG_FS:
6738 case SEG_GS:
6739 if (USER_LABEL_PREFIX[0] == 0)
6740 putc ('%', file);
6741 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6742 break;
6743 default:
6744 abort ();
6745 }
6746
e075ae69
RH
6747 if (!base && !index)
6748 {
6749 /* Displacement only requires special attention. */
e9a25f70 6750
e075ae69 6751 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6752 {
74dc3e94 6753 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
6754 {
6755 if (USER_LABEL_PREFIX[0] == 0)
6756 putc ('%', file);
6757 fputs ("ds:", file);
6758 }
74dc3e94 6759 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 6760 }
e075ae69 6761 else if (flag_pic)
74dc3e94 6762 output_pic_addr_const (file, disp, 0);
e075ae69 6763 else
74dc3e94 6764 output_addr_const (file, disp);
0d7d98ee
JH
6765
6766 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 6767 if (TARGET_64BIT
74dc3e94
RH
6768 && ((GET_CODE (disp) == SYMBOL_REF
6769 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6770 || GET_CODE (disp) == LABEL_REF
6771 || (GET_CODE (disp) == CONST
6772 && GET_CODE (XEXP (disp, 0)) == PLUS
6773 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6774 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6775 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 6776 fputs ("(%rip)", file);
e075ae69
RH
6777 }
6778 else
6779 {
80f33d06 6780 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6781 {
e075ae69 6782 if (disp)
2a2ab3f9 6783 {
c399861d 6784 if (flag_pic)
e075ae69
RH
6785 output_pic_addr_const (file, disp, 0);
6786 else if (GET_CODE (disp) == LABEL_REF)
6787 output_asm_label (disp);
2a2ab3f9 6788 else
e075ae69 6789 output_addr_const (file, disp);
2a2ab3f9
JVA
6790 }
6791
e075ae69
RH
6792 putc ('(', file);
6793 if (base)
a55f4481 6794 print_reg (base, 0, file);
e075ae69 6795 if (index)
2a2ab3f9 6796 {
e075ae69 6797 putc (',', file);
a55f4481 6798 print_reg (index, 0, file);
e075ae69
RH
6799 if (scale != 1)
6800 fprintf (file, ",%d", scale);
2a2ab3f9 6801 }
e075ae69 6802 putc (')', file);
2a2ab3f9 6803 }
2a2ab3f9
JVA
6804 else
6805 {
e075ae69 6806 rtx offset = NULL_RTX;
e9a25f70 6807
e075ae69
RH
6808 if (disp)
6809 {
6810 /* Pull out the offset of a symbol; print any symbol itself. */
6811 if (GET_CODE (disp) == CONST
6812 && GET_CODE (XEXP (disp, 0)) == PLUS
6813 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6814 {
6815 offset = XEXP (XEXP (disp, 0), 1);
6816 disp = gen_rtx_CONST (VOIDmode,
6817 XEXP (XEXP (disp, 0), 0));
6818 }
ce193852 6819
e075ae69
RH
6820 if (flag_pic)
6821 output_pic_addr_const (file, disp, 0);
6822 else if (GET_CODE (disp) == LABEL_REF)
6823 output_asm_label (disp);
6824 else if (GET_CODE (disp) == CONST_INT)
6825 offset = disp;
6826 else
6827 output_addr_const (file, disp);
6828 }
e9a25f70 6829
e075ae69
RH
6830 putc ('[', file);
6831 if (base)
a8620236 6832 {
a55f4481 6833 print_reg (base, 0, file);
e075ae69
RH
6834 if (offset)
6835 {
6836 if (INTVAL (offset) >= 0)
6837 putc ('+', file);
6838 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6839 }
a8620236 6840 }
e075ae69
RH
6841 else if (offset)
6842 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6843 else
e075ae69 6844 putc ('0', file);
e9a25f70 6845
e075ae69
RH
6846 if (index)
6847 {
6848 putc ('+', file);
a55f4481 6849 print_reg (index, 0, file);
e075ae69
RH
6850 if (scale != 1)
6851 fprintf (file, "*%d", scale);
6852 }
6853 putc (']', file);
6854 }
2a2ab3f9
JVA
6855 }
6856}
f996902d
RH
6857
6858bool
b96a374d 6859output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
6860{
6861 rtx op;
6862
6863 if (GET_CODE (x) != UNSPEC)
6864 return false;
6865
6866 op = XVECEXP (x, 0, 0);
6867 switch (XINT (x, 1))
6868 {
6869 case UNSPEC_GOTTPOFF:
6870 output_addr_const (file, op);
dea73790 6871 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
6872 fputs ("@GOTTPOFF", file);
6873 break;
6874 case UNSPEC_TPOFF:
6875 output_addr_const (file, op);
6876 fputs ("@TPOFF", file);
6877 break;
6878 case UNSPEC_NTPOFF:
6879 output_addr_const (file, op);
75d38379
JJ
6880 if (TARGET_64BIT)
6881 fputs ("@TPOFF", file);
6882 else
6883 fputs ("@NTPOFF", file);
f996902d
RH
6884 break;
6885 case UNSPEC_DTPOFF:
6886 output_addr_const (file, op);
6887 fputs ("@DTPOFF", file);
6888 break;
dea73790
JJ
6889 case UNSPEC_GOTNTPOFF:
6890 output_addr_const (file, op);
75d38379
JJ
6891 if (TARGET_64BIT)
6892 fputs ("@GOTTPOFF(%rip)", file);
6893 else
6894 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6895 break;
6896 case UNSPEC_INDNTPOFF:
6897 output_addr_const (file, op);
6898 fputs ("@INDNTPOFF", file);
6899 break;
f996902d
RH
6900
6901 default:
6902 return false;
6903 }
6904
6905 return true;
6906}
2a2ab3f9
JVA
6907\f
6908/* Split one or more DImode RTL references into pairs of SImode
6909 references. The RTL can be REG, offsettable MEM, integer constant, or
6910 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6911 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6912 that parallel "operands". */
2a2ab3f9
JVA
6913
6914void
b96a374d 6915split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
6916{
6917 while (num--)
6918 {
57dbca5e 6919 rtx op = operands[num];
b932f770
JH
6920
6921 /* simplify_subreg refuse to split volatile memory addresses,
6922 but we still have to handle it. */
6923 if (GET_CODE (op) == MEM)
2a2ab3f9 6924 {
f4ef873c 6925 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6926 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6927 }
6928 else
b932f770 6929 {
38ca929b
JH
6930 lo_half[num] = simplify_gen_subreg (SImode, op,
6931 GET_MODE (op) == VOIDmode
6932 ? DImode : GET_MODE (op), 0);
6933 hi_half[num] = simplify_gen_subreg (SImode, op,
6934 GET_MODE (op) == VOIDmode
6935 ? DImode : GET_MODE (op), 4);
b932f770 6936 }
2a2ab3f9
JVA
6937 }
6938}
44cf5b6a
JH
6939/* Split one or more TImode RTL references into pairs of SImode
6940 references. The RTL can be REG, offsettable MEM, integer constant, or
6941 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6942 split and "num" is its length. lo_half and hi_half are output arrays
6943 that parallel "operands". */
6944
6945void
b96a374d 6946split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
6947{
6948 while (num--)
6949 {
6950 rtx op = operands[num];
b932f770
JH
6951
6952 /* simplify_subreg refuse to split volatile memory addresses, but we
6953 still have to handle it. */
6954 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6955 {
6956 lo_half[num] = adjust_address (op, DImode, 0);
6957 hi_half[num] = adjust_address (op, DImode, 8);
6958 }
6959 else
b932f770
JH
6960 {
6961 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6962 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6963 }
44cf5b6a
JH
6964 }
6965}
2a2ab3f9 6966\f
2a2ab3f9
JVA
6967/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6968 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6969 is the expression of the binary operation. The output may either be
6970 emitted here, or returned to the caller, like all output_* functions.
6971
6972 There is no guarantee that the operands are the same mode, as they
0f290768 6973 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6974
e3c2afab
AM
6975#ifndef SYSV386_COMPAT
6976/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6977 wants to fix the assemblers because that causes incompatibility
6978 with gcc. No-one wants to fix gcc because that causes
6979 incompatibility with assemblers... You can use the option of
6980 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6981#define SYSV386_COMPAT 1
6982#endif
6983
69ddee61 6984const char *
b96a374d 6985output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 6986{
e3c2afab 6987 static char buf[30];
69ddee61 6988 const char *p;
1deaa899 6989 const char *ssep;
89b17498 6990 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
2a2ab3f9 6991
e3c2afab
AM
6992#ifdef ENABLE_CHECKING
6993 /* Even if we do not want to check the inputs, this documents input
6994 constraints. Which helps in understanding the following code. */
6995 if (STACK_REG_P (operands[0])
6996 && ((REG_P (operands[1])
6997 && REGNO (operands[0]) == REGNO (operands[1])
6998 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6999 || (REG_P (operands[2])
7000 && REGNO (operands[0]) == REGNO (operands[2])
7001 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7002 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7003 ; /* ok */
1deaa899 7004 else if (!is_sse)
e3c2afab
AM
7005 abort ();
7006#endif
7007
2a2ab3f9
JVA
7008 switch (GET_CODE (operands[3]))
7009 {
7010 case PLUS:
e075ae69
RH
7011 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7012 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7013 p = "fiadd";
7014 else
7015 p = "fadd";
1deaa899 7016 ssep = "add";
2a2ab3f9
JVA
7017 break;
7018
7019 case MINUS:
e075ae69
RH
7020 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7021 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7022 p = "fisub";
7023 else
7024 p = "fsub";
1deaa899 7025 ssep = "sub";
2a2ab3f9
JVA
7026 break;
7027
7028 case MULT:
e075ae69
RH
7029 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7030 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7031 p = "fimul";
7032 else
7033 p = "fmul";
1deaa899 7034 ssep = "mul";
2a2ab3f9
JVA
7035 break;
7036
7037 case DIV:
e075ae69
RH
7038 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7039 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7040 p = "fidiv";
7041 else
7042 p = "fdiv";
1deaa899 7043 ssep = "div";
2a2ab3f9
JVA
7044 break;
7045
7046 default:
7047 abort ();
7048 }
7049
1deaa899
JH
7050 if (is_sse)
7051 {
7052 strcpy (buf, ssep);
7053 if (GET_MODE (operands[0]) == SFmode)
7054 strcat (buf, "ss\t{%2, %0|%0, %2}");
7055 else
7056 strcat (buf, "sd\t{%2, %0|%0, %2}");
7057 return buf;
7058 }
e075ae69 7059 strcpy (buf, p);
2a2ab3f9
JVA
7060
7061 switch (GET_CODE (operands[3]))
7062 {
7063 case MULT:
7064 case PLUS:
7065 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7066 {
e3c2afab 7067 rtx temp = operands[2];
2a2ab3f9
JVA
7068 operands[2] = operands[1];
7069 operands[1] = temp;
7070 }
7071
e3c2afab
AM
7072 /* know operands[0] == operands[1]. */
7073
2a2ab3f9 7074 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7075 {
7076 p = "%z2\t%2";
7077 break;
7078 }
2a2ab3f9
JVA
7079
7080 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7081 {
7082 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7083 /* How is it that we are storing to a dead operand[2]?
7084 Well, presumably operands[1] is dead too. We can't
7085 store the result to st(0) as st(0) gets popped on this
7086 instruction. Instead store to operands[2] (which I
7087 think has to be st(1)). st(1) will be popped later.
7088 gcc <= 2.8.1 didn't have this check and generated
7089 assembly code that the Unixware assembler rejected. */
7090 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7091 else
e3c2afab 7092 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7093 break;
6b28fd63 7094 }
2a2ab3f9
JVA
7095
7096 if (STACK_TOP_P (operands[0]))
e3c2afab 7097 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7098 else
e3c2afab 7099 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7100 break;
2a2ab3f9
JVA
7101
7102 case MINUS:
7103 case DIV:
7104 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7105 {
7106 p = "r%z1\t%1";
7107 break;
7108 }
2a2ab3f9
JVA
7109
7110 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7111 {
7112 p = "%z2\t%2";
7113 break;
7114 }
2a2ab3f9 7115
2a2ab3f9 7116 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7117 {
e3c2afab
AM
7118#if SYSV386_COMPAT
7119 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7120 derived assemblers, confusingly reverse the direction of
7121 the operation for fsub{r} and fdiv{r} when the
7122 destination register is not st(0). The Intel assembler
7123 doesn't have this brain damage. Read !SYSV386_COMPAT to
7124 figure out what the hardware really does. */
7125 if (STACK_TOP_P (operands[0]))
7126 p = "{p\t%0, %2|rp\t%2, %0}";
7127 else
7128 p = "{rp\t%2, %0|p\t%0, %2}";
7129#else
6b28fd63 7130 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7131 /* As above for fmul/fadd, we can't store to st(0). */
7132 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7133 else
e3c2afab
AM
7134 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7135#endif
e075ae69 7136 break;
6b28fd63 7137 }
2a2ab3f9
JVA
7138
7139 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7140 {
e3c2afab 7141#if SYSV386_COMPAT
6b28fd63 7142 if (STACK_TOP_P (operands[0]))
e3c2afab 7143 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7144 else
e3c2afab
AM
7145 p = "{p\t%1, %0|rp\t%0, %1}";
7146#else
7147 if (STACK_TOP_P (operands[0]))
7148 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7149 else
7150 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7151#endif
e075ae69 7152 break;
6b28fd63 7153 }
2a2ab3f9
JVA
7154
7155 if (STACK_TOP_P (operands[0]))
7156 {
7157 if (STACK_TOP_P (operands[1]))
e3c2afab 7158 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7159 else
e3c2afab 7160 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7161 break;
2a2ab3f9
JVA
7162 }
7163 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7164 {
7165#if SYSV386_COMPAT
7166 p = "{\t%1, %0|r\t%0, %1}";
7167#else
7168 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7169#endif
7170 }
2a2ab3f9 7171 else
e3c2afab
AM
7172 {
7173#if SYSV386_COMPAT
7174 p = "{r\t%2, %0|\t%0, %2}";
7175#else
7176 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7177#endif
7178 }
e075ae69 7179 break;
2a2ab3f9
JVA
7180
7181 default:
7182 abort ();
7183 }
e075ae69
RH
7184
7185 strcat (buf, p);
7186 return buf;
2a2ab3f9 7187}
e075ae69 7188
edeacc14
UB
7189/* Output code to initialize control word copies used by trunc?f?i and
7190 rounding patterns. CURRENT_MODE is set to current control word,
7191 while NEW_MODE is set to new control word. */
7192
7a2e09f4 7193void
edeacc14 7194emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7a2e09f4
JH
7195{
7196 rtx reg = gen_reg_rtx (HImode);
7197
edeacc14
UB
7198 emit_insn (gen_x86_fnstcw_1 (current_mode));
7199 emit_move_insn (reg, current_mode);
7200
7a2e09f4
JH
7201 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7202 && !TARGET_64BIT)
edeacc14
UB
7203 {
7204 switch (mode)
7205 {
7206 case I387_CW_FLOOR:
7207 /* round down toward -oo */
7208 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7209 break;
7210
7211 case I387_CW_CEIL:
7212 /* round up toward +oo */
7213 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7214 break;
7215
7216 case I387_CW_TRUNC:
7217 /* round toward zero (truncate) */
7218 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7219 break;
7220
7221 case I387_CW_MASK_PM:
7222 /* mask precision exception for nearbyint() */
7223 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7224 break;
7225
7226 default:
7227 abort();
7228 }
7229 }
7a2e09f4 7230 else
edeacc14
UB
7231 {
7232 switch (mode)
7233 {
7234 case I387_CW_FLOOR:
7235 /* round down toward -oo */
7236 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7237 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7238 break;
7239
7240 case I387_CW_CEIL:
7241 /* round up toward +oo */
7242 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7243 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7244 break;
7245
7246 case I387_CW_TRUNC:
7247 /* round toward zero (truncate) */
7248 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7249 break;
7250
7251 case I387_CW_MASK_PM:
7252 /* mask precision exception for nearbyint() */
7253 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7254 break;
7255
7256 default:
7257 abort();
7258 }
7259 }
7260
7261 emit_move_insn (new_mode, reg);
7a2e09f4
JH
7262}
7263
2a2ab3f9 7264/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7265 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7266 operand may be [SDX]Fmode. */
2a2ab3f9 7267
69ddee61 7268const char *
b96a374d 7269output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
7270{
7271 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7272 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7273
e075ae69
RH
7274 /* Jump through a hoop or two for DImode, since the hardware has no
7275 non-popping instruction. We used to do this a different way, but
7276 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7277 if (dimode_p && !stack_top_dies)
7278 output_asm_insn ("fld\t%y1", operands);
e075ae69 7279
7a2e09f4 7280 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7281 abort ();
7282
e075ae69 7283 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7284 abort ();
e9a25f70 7285
7a2e09f4 7286 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7287 if (stack_top_dies || dimode_p)
7a2e09f4 7288 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7289 else
7a2e09f4 7290 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7291 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7292
e075ae69 7293 return "";
2a2ab3f9 7294}
cda749b1 7295
e075ae69 7296/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7c82106f 7297 should be used. UNORDERED_P is true when fucom should be used. */
e075ae69 7298
69ddee61 7299const char *
b96a374d 7300output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 7301{
e075ae69 7302 int stack_top_dies;
869d095e 7303 rtx cmp_op0, cmp_op1;
7c82106f 7304 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
e075ae69 7305
7c82106f 7306 if (eflags_p)
e075ae69 7307 {
7c82106f
UB
7308 cmp_op0 = operands[0];
7309 cmp_op1 = operands[1];
e075ae69 7310 }
869d095e
UB
7311 else
7312 {
7c82106f
UB
7313 cmp_op0 = operands[1];
7314 cmp_op1 = operands[2];
869d095e
UB
7315 }
7316
0644b628
JH
7317 if (is_sse)
7318 {
7319 if (GET_MODE (operands[0]) == SFmode)
7320 if (unordered_p)
7321 return "ucomiss\t{%1, %0|%0, %1}";
7322 else
a5cf80f0 7323 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
7324 else
7325 if (unordered_p)
7326 return "ucomisd\t{%1, %0|%0, %1}";
7327 else
a5cf80f0 7328 return "comisd\t{%1, %0|%0, %1}";
0644b628 7329 }
cda749b1 7330
e075ae69 7331 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7332 abort ();
7333
e075ae69 7334 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7335
869d095e
UB
7336 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7337 {
7338 if (stack_top_dies)
7339 {
7340 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7341 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7342 }
7343 else
7344 return "ftst\n\tfnstsw\t%0";
7345 }
7346
e075ae69
RH
7347 if (STACK_REG_P (cmp_op1)
7348 && stack_top_dies
7349 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7350 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7351 {
e075ae69
RH
7352 /* If both the top of the 387 stack dies, and the other operand
7353 is also a stack register that dies, then this must be a
7354 `fcompp' float compare */
7355
7c82106f 7356 if (eflags_p)
e075ae69
RH
7357 {
7358 /* There is no double popping fcomi variant. Fortunately,
7359 eflags is immune from the fstp's cc clobbering. */
7360 if (unordered_p)
7361 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7362 else
7363 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
fb364dc4 7364 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
e075ae69
RH
7365 }
7366 else
cda749b1 7367 {
7c82106f
UB
7368 if (unordered_p)
7369 return "fucompp\n\tfnstsw\t%0";
cda749b1 7370 else
7c82106f 7371 return "fcompp\n\tfnstsw\t%0";
cda749b1 7372 }
cda749b1
JW
7373 }
7374 else
7375 {
e075ae69 7376 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7377
7c82106f 7378 static const char * const alt[16] =
e075ae69 7379 {
7c82106f
UB
7380 "fcom%z2\t%y2\n\tfnstsw\t%0",
7381 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7382 "fucom%z2\t%y2\n\tfnstsw\t%0",
7383 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7384
7c82106f
UB
7385 "ficom%z2\t%y2\n\tfnstsw\t%0",
7386 "ficomp%z2\t%y2\n\tfnstsw\t%0",
e075ae69
RH
7387 NULL,
7388 NULL,
7389
7390 "fcomi\t{%y1, %0|%0, %y1}",
7391 "fcomip\t{%y1, %0|%0, %y1}",
7392 "fucomi\t{%y1, %0|%0, %y1}",
7393 "fucomip\t{%y1, %0|%0, %y1}",
7394
7395 NULL,
7396 NULL,
7397 NULL,
e075ae69
RH
7398 NULL
7399 };
7400
7401 int mask;
69ddee61 7402 const char *ret;
e075ae69
RH
7403
7404 mask = eflags_p << 3;
7c82106f 7405 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
e075ae69
RH
7406 mask |= unordered_p << 1;
7407 mask |= stack_top_dies;
7408
7c82106f 7409 if (mask >= 16)
e075ae69
RH
7410 abort ();
7411 ret = alt[mask];
7412 if (ret == NULL)
7413 abort ();
cda749b1 7414
e075ae69 7415 return ret;
cda749b1
JW
7416 }
7417}
2a2ab3f9 7418
f88c65f7 7419void
b96a374d 7420ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
7421{
7422 const char *directive = ASM_LONG;
7423
7424 if (TARGET_64BIT)
7425 {
7426#ifdef ASM_QUAD
7427 directive = ASM_QUAD;
7428#else
7429 abort ();
7430#endif
7431 }
7432
7433 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7434}
7435
7436void
b96a374d 7437ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
7438{
7439 if (TARGET_64BIT)
74411039 7440 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7441 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7442 else if (HAVE_AS_GOTOFF_IN_DATA)
7443 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7444#if TARGET_MACHO
7445 else if (TARGET_MACHO)
86ecdfb6
AP
7446 {
7447 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7448 machopic_output_function_base_name (file);
7449 fprintf(file, "\n");
7450 }
b069de3b 7451#endif
f88c65f7 7452 else
5fc0e5df
KW
7453 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7454 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7455}
32b5b1aa 7456\f
a8bac9ab
RH
7457/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7458 for the target. */
7459
7460void
b96a374d 7461ix86_expand_clear (rtx dest)
a8bac9ab
RH
7462{
7463 rtx tmp;
7464
7465 /* We play register width games, which are only valid after reload. */
7466 if (!reload_completed)
7467 abort ();
7468
7469 /* Avoid HImode and its attendant prefix byte. */
7470 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7471 dest = gen_rtx_REG (SImode, REGNO (dest));
7472
7473 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7474
7475 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7476 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7477 {
7478 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7479 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7480 }
7481
7482 emit_insn (tmp);
7483}
7484
f996902d
RH
7485/* X is an unchanging MEM. If it is a constant pool reference, return
7486 the constant pool rtx, else NULL. */
7487
8fe75e43 7488rtx
b96a374d 7489maybe_get_pool_constant (rtx x)
f996902d 7490{
69bd9368 7491 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
7492
7493 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7494 return get_pool_constant (x);
7495
7496 return NULL_RTX;
7497}
7498
79325812 7499void
b96a374d 7500ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 7501{
e075ae69 7502 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
7503 rtx op0, op1;
7504 enum tls_model model;
f996902d
RH
7505
7506 op0 = operands[0];
7507 op1 = operands[1];
7508
d2ad2c8a 7509 if (GET_CODE (op1) == SYMBOL_REF)
f996902d 7510 {
d2ad2c8a
JH
7511 model = SYMBOL_REF_TLS_MODEL (op1);
7512 if (model)
7513 {
7514 op1 = legitimize_tls_address (op1, model, true);
7515 op1 = force_operand (op1, op0);
7516 if (op1 == op0)
7517 return;
7518 }
7519 }
7520 else if (GET_CODE (op1) == CONST
7521 && GET_CODE (XEXP (op1, 0)) == PLUS
7522 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7523 {
7524 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7525 if (model)
7526 {
7527 rtx addend = XEXP (XEXP (op1, 0), 1);
7528 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7529 op1 = force_operand (op1, NULL);
7530 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7531 op0, 1, OPTAB_DIRECT);
7532 if (op1 == op0)
7533 return;
7534 }
f996902d 7535 }
74dc3e94
RH
7536
7537 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 7538 {
b069de3b
SS
7539#if TARGET_MACHO
7540 if (MACHOPIC_PURE)
7541 {
7542 rtx temp = ((reload_in_progress
7543 || ((op0 && GET_CODE (op0) == REG)
7544 && mode == Pmode))
7545 ? op0 : gen_reg_rtx (Pmode));
7546 op1 = machopic_indirect_data_reference (op1, temp);
7547 op1 = machopic_legitimize_pic_address (op1, mode,
7548 temp == op1 ? 0 : temp);
7549 }
74dc3e94
RH
7550 else if (MACHOPIC_INDIRECT)
7551 op1 = machopic_indirect_data_reference (op1, 0);
7552 if (op0 == op1)
7553 return;
7554#else
f996902d
RH
7555 if (GET_CODE (op0) == MEM)
7556 op1 = force_reg (Pmode, op1);
e075ae69 7557 else
b39edae3 7558 op1 = legitimize_address (op1, op1, Pmode);
74dc3e94 7559#endif /* TARGET_MACHO */
e075ae69
RH
7560 }
7561 else
7562 {
f996902d 7563 if (GET_CODE (op0) == MEM
44cf5b6a 7564 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7565 || !push_operand (op0, mode))
7566 && GET_CODE (op1) == MEM)
7567 op1 = force_reg (mode, op1);
e9a25f70 7568
f996902d
RH
7569 if (push_operand (op0, mode)
7570 && ! general_no_elim_operand (op1, mode))
7571 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7572
44cf5b6a
JH
7573 /* Force large constants in 64bit compilation into register
7574 to get them CSEed. */
7575 if (TARGET_64BIT && mode == DImode
f996902d 7576 && immediate_operand (op1, mode)
8fe75e43 7577 && !x86_64_zext_immediate_operand (op1, VOIDmode)
f996902d 7578 && !register_operand (op0, mode)
44cf5b6a 7579 && optimize && !reload_completed && !reload_in_progress)
f996902d 7580 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7581
e075ae69 7582 if (FLOAT_MODE_P (mode))
32b5b1aa 7583 {
d7a29404
JH
7584 /* If we are loading a floating point constant to a register,
7585 force the value to memory now, since we'll get better code
7586 out the back end. */
e075ae69
RH
7587
7588 if (strict)
7589 ;
ddc67067
MM
7590 else if (GET_CODE (op1) == CONST_DOUBLE)
7591 {
7592 op1 = validize_mem (force_const_mem (mode, op1));
7593 if (!register_operand (op0, mode))
7594 {
7595 rtx temp = gen_reg_rtx (mode);
7596 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7597 emit_move_insn (op0, temp);
7598 return;
7599 }
7600 }
32b5b1aa 7601 }
32b5b1aa 7602 }
e9a25f70 7603
74dc3e94 7604 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 7605}
e9a25f70 7606
e37af218 7607void
b96a374d 7608ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218 7609{
c38573a8
RH
7610 rtx op0 = operands[0], op1 = operands[1];
7611
e37af218
RH
7612 /* Force constants other than zero into memory. We do not know how
7613 the instructions used to build constants modify the upper 64 bits
7614 of the register, once we have that information we may be able
7615 to handle some of them more efficiently. */
7616 if ((reload_in_progress | reload_completed) == 0
c38573a8
RH
7617 && register_operand (op0, mode)
7618 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7619 op1 = validize_mem (force_const_mem (mode, op1));
e37af218
RH
7620
7621 /* Make operand1 a register if it isn't already. */
f8ca7923 7622 if (!no_new_pseudos
c38573a8
RH
7623 && !register_operand (op0, mode)
7624 && !register_operand (op1, mode))
e37af218 7625 {
c38573a8 7626 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
e37af218
RH
7627 return;
7628 }
7629
c38573a8 7630 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
fce5a9f2 7631}
e37af218 7632
c38573a8
RH
7633/* Implement the movmisalign patterns for SSE. Non-SSE modes go
7634 straight to ix86_expand_vector_move. */
7635
7636void
7637ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7638{
7639 rtx op0, op1, m;
7640
7641 op0 = operands[0];
7642 op1 = operands[1];
7643
7644 if (MEM_P (op1))
7645 {
7646 /* If we're optimizing for size, movups is the smallest. */
7647 if (optimize_size)
7648 {
7649 op0 = gen_lowpart (V4SFmode, op0);
7650 op1 = gen_lowpart (V4SFmode, op1);
7651 emit_insn (gen_sse_movups (op0, op1));
7652 return;
7653 }
7654
7655 /* ??? If we have typed data, then it would appear that using
7656 movdqu is the only way to get unaligned data loaded with
7657 integer type. */
7658 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7659 {
7660 op0 = gen_lowpart (V16QImode, op0);
7661 op1 = gen_lowpart (V16QImode, op1);
7662 emit_insn (gen_sse2_movdqu (op0, op1));
7663 return;
7664 }
7665
7666 if (TARGET_SSE2 && mode == V2DFmode)
7667 {
eb701deb
RH
7668 rtx zero;
7669
c38573a8
RH
7670 /* When SSE registers are split into halves, we can avoid
7671 writing to the top half twice. */
7672 if (TARGET_SSE_SPLIT_REGS)
7673 {
7674 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
eb701deb 7675 zero = op0;
c38573a8
RH
7676 }
7677 else
7678 {
7679 /* ??? Not sure about the best option for the Intel chips.
7680 The following would seem to satisfy; the register is
7681 entirely cleared, breaking the dependency chain. We
7682 then store to the upper half, with a dependency depth
7683 of one. A rumor has it that Intel recommends two movsd
7684 followed by an unpacklpd, but this is unconfirmed. And
7685 given that the dependency depth of the unpacklpd would
7686 still be one, I'm not sure why this would be better. */
eb701deb 7687 zero = CONST0_RTX (V2DFmode);
c38573a8 7688 }
eb701deb
RH
7689
7690 m = adjust_address (op1, DFmode, 0);
7691 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7692 m = adjust_address (op1, DFmode, 8);
7693 emit_insn (gen_sse2_loadhpd (op0, op0, m));
c38573a8
RH
7694 }
7695 else
7696 {
7697 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7698 emit_move_insn (op0, CONST0_RTX (mode));
7699 else
7700 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7701
2cdb3148
RH
7702 m = adjust_address (op1, V2SFmode, 0);
7703 emit_insn (gen_sse_loadlps (op0, op0, m));
7704 m = adjust_address (op1, V2SFmode, 8);
7705 emit_insn (gen_sse_loadhps (op0, op0, m));
c38573a8
RH
7706 }
7707 }
7708 else if (MEM_P (op0))
7709 {
7710 /* If we're optimizing for size, movups is the smallest. */
7711 if (optimize_size)
7712 {
7713 op0 = gen_lowpart (V4SFmode, op0);
7714 op1 = gen_lowpart (V4SFmode, op1);
7715 emit_insn (gen_sse_movups (op0, op1));
7716 return;
7717 }
7718
7719 /* ??? Similar to above, only less clear because of quote
7720 typeless stores unquote. */
7721 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7722 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7723 {
7724 op0 = gen_lowpart (V16QImode, op0);
7725 op1 = gen_lowpart (V16QImode, op1);
7726 emit_insn (gen_sse2_movdqu (op0, op1));
7727 return;
7728 }
7729
7730 if (TARGET_SSE2 && mode == V2DFmode)
7731 {
7732 m = adjust_address (op0, DFmode, 0);
7733 emit_insn (gen_sse2_storelpd (m, op1));
7734 m = adjust_address (op0, DFmode, 8);
7735 emit_insn (gen_sse2_storehpd (m, op1));
c38573a8
RH
7736 }
7737 else
7738 {
eb701deb
RH
7739 if (mode != V4SFmode)
7740 op1 = gen_lowpart (V4SFmode, op1);
2cdb3148
RH
7741 m = adjust_address (op0, V2SFmode, 0);
7742 emit_insn (gen_sse_storelps (m, op1));
7743 m = adjust_address (op0, V2SFmode, 8);
7744 emit_insn (gen_sse_storehps (m, op1));
c38573a8
RH
7745 }
7746 }
7747 else
7748 gcc_unreachable ();
7749}
7750
7751
ef719a44
RH
7752/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7753 destination to use for the operation. If different from the true
7754 destination in operands[0], a copy operation will be required. */
e9a25f70 7755
ef719a44
RH
7756rtx
7757ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7758 rtx operands[])
e075ae69
RH
7759{
7760 int matching_memory;
ef719a44 7761 rtx src1, src2, dst;
e075ae69
RH
7762
7763 dst = operands[0];
7764 src1 = operands[1];
7765 src2 = operands[2];
7766
7767 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
ec8e098d 7768 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7769 && (rtx_equal_p (dst, src2)
7770 || immediate_operand (src1, mode)))
7771 {
7772 rtx temp = src1;
7773 src1 = src2;
7774 src2 = temp;
32b5b1aa 7775 }
e9a25f70 7776
e075ae69
RH
7777 /* If the destination is memory, and we do not have matching source
7778 operands, do things in registers. */
7779 matching_memory = 0;
7780 if (GET_CODE (dst) == MEM)
32b5b1aa 7781 {
e075ae69
RH
7782 if (rtx_equal_p (dst, src1))
7783 matching_memory = 1;
ec8e098d 7784 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7785 && rtx_equal_p (dst, src2))
7786 matching_memory = 2;
7787 else
7788 dst = gen_reg_rtx (mode);
7789 }
0f290768 7790
e075ae69
RH
7791 /* Both source operands cannot be in memory. */
7792 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7793 {
7794 if (matching_memory != 2)
7795 src2 = force_reg (mode, src2);
7796 else
7797 src1 = force_reg (mode, src1);
32b5b1aa 7798 }
e9a25f70 7799
06a964de
JH
7800 /* If the operation is not commutable, source 1 cannot be a constant
7801 or non-matching memory. */
0f290768 7802 if ((CONSTANT_P (src1)
06a964de 7803 || (!matching_memory && GET_CODE (src1) == MEM))
ec8e098d 7804 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69 7805 src1 = force_reg (mode, src1);
0f290768 7806
e075ae69 7807 /* If optimizing, copy to regs to improve CSE */
fe577e58 7808 if (optimize && ! no_new_pseudos)
32b5b1aa 7809 {
e075ae69
RH
7810 if (GET_CODE (dst) == MEM)
7811 dst = gen_reg_rtx (mode);
7812 if (GET_CODE (src1) == MEM)
7813 src1 = force_reg (mode, src1);
7814 if (GET_CODE (src2) == MEM)
7815 src2 = force_reg (mode, src2);
32b5b1aa 7816 }
e9a25f70 7817
ef719a44
RH
7818 src1 = operands[1] = src1;
7819 src2 = operands[2] = src2;
7820 return dst;
7821}
7822
7823/* Similarly, but assume that the destination has already been
7824 set up properly. */
7825
7826void
7827ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7828 enum machine_mode mode, rtx operands[])
7829{
7830 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7831 gcc_assert (dst == operands[0]);
7832}
7833
7834/* Attempt to expand a binary operator. Make the expansion closer to the
7835 actual machine, then just general_operand, which will allow 3 separate
7836 memory references (one output, two input) in a single insn. */
7837
7838void
7839ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7840 rtx operands[])
7841{
7842 rtx src1, src2, dst, op, clob;
7843
7844 dst = ix86_fixup_binary_operands (code, mode, operands);
7845 src1 = operands[1];
7846 src2 = operands[2];
7847
7848 /* Emit the instruction. */
e075ae69
RH
7849
7850 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7851 if (reload_in_progress)
7852 {
7853 /* Reload doesn't know about the flags register, and doesn't know that
7854 it doesn't want to clobber it. We can only do this with PLUS. */
7855 if (code != PLUS)
7856 abort ();
7857 emit_insn (op);
7858 }
7859 else
32b5b1aa 7860 {
e075ae69
RH
7861 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7862 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7863 }
e9a25f70 7864
e075ae69
RH
7865 /* Fix up the destination if needed. */
7866 if (dst != operands[0])
7867 emit_move_insn (operands[0], dst);
7868}
7869
7870/* Return TRUE or FALSE depending on whether the binary operator meets the
7871 appropriate constraints. */
7872
7873int
b96a374d
AJ
7874ix86_binary_operator_ok (enum rtx_code code,
7875 enum machine_mode mode ATTRIBUTE_UNUSED,
7876 rtx operands[3])
e075ae69
RH
7877{
7878 /* Both source operands cannot be in memory. */
7879 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7880 return 0;
7881 /* If the operation is not commutable, source 1 cannot be a constant. */
ec8e098d 7882 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69
RH
7883 return 0;
7884 /* If the destination is memory, we must have a matching source operand. */
7885 if (GET_CODE (operands[0]) == MEM
7886 && ! (rtx_equal_p (operands[0], operands[1])
ec8e098d 7887 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7888 && rtx_equal_p (operands[0], operands[2]))))
7889 return 0;
06a964de 7890 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7891 have a matching destination. */
06a964de 7892 if (GET_CODE (operands[1]) == MEM
ec8e098d 7893 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
06a964de
JH
7894 && ! rtx_equal_p (operands[0], operands[1]))
7895 return 0;
e075ae69
RH
7896 return 1;
7897}
7898
7899/* Attempt to expand a unary operator. Make the expansion closer to the
7900 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7901 memory references (one output, one input) in a single insn. */
e075ae69 7902
9d81fc27 7903void
b96a374d
AJ
7904ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7905 rtx operands[])
e075ae69 7906{
06a964de
JH
7907 int matching_memory;
7908 rtx src, dst, op, clob;
7909
7910 dst = operands[0];
7911 src = operands[1];
e075ae69 7912
06a964de
JH
7913 /* If the destination is memory, and we do not have matching source
7914 operands, do things in registers. */
7915 matching_memory = 0;
7cacf53e 7916 if (MEM_P (dst))
32b5b1aa 7917 {
06a964de
JH
7918 if (rtx_equal_p (dst, src))
7919 matching_memory = 1;
e075ae69 7920 else
06a964de 7921 dst = gen_reg_rtx (mode);
32b5b1aa 7922 }
e9a25f70 7923
06a964de 7924 /* When source operand is memory, destination must match. */
7cacf53e 7925 if (MEM_P (src) && !matching_memory)
06a964de 7926 src = force_reg (mode, src);
0f290768 7927
7cacf53e 7928 /* If optimizing, copy to regs to improve CSE. */
fe577e58 7929 if (optimize && ! no_new_pseudos)
06a964de
JH
7930 {
7931 if (GET_CODE (dst) == MEM)
7932 dst = gen_reg_rtx (mode);
7933 if (GET_CODE (src) == MEM)
7934 src = force_reg (mode, src);
7935 }
7936
7937 /* Emit the instruction. */
7938
7939 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7940 if (reload_in_progress || code == NOT)
7941 {
7942 /* Reload doesn't know about the flags register, and doesn't know that
7943 it doesn't want to clobber it. */
7944 if (code != NOT)
7945 abort ();
7946 emit_insn (op);
7947 }
7948 else
7949 {
7950 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7951 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7952 }
7953
7954 /* Fix up the destination if needed. */
7955 if (dst != operands[0])
7956 emit_move_insn (operands[0], dst);
e075ae69
RH
7957}
7958
7959/* Return TRUE or FALSE depending on whether the unary operator meets the
7960 appropriate constraints. */
7961
7962int
b96a374d
AJ
7963ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7964 enum machine_mode mode ATTRIBUTE_UNUSED,
7965 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 7966{
06a964de
JH
7967 /* If one of operands is memory, source and destination must match. */
7968 if ((GET_CODE (operands[0]) == MEM
7969 || GET_CODE (operands[1]) == MEM)
7970 && ! rtx_equal_p (operands[0], operands[1]))
7971 return FALSE;
e075ae69
RH
7972 return TRUE;
7973}
7cacf53e
RH
7974
7975/* Generate code for floating point ABS or NEG. */
7976
7977void
7978ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7979 rtx operands[])
7980{
7981 rtx mask, set, use, clob, dst, src;
7982 bool matching_memory;
7983 bool use_sse = false;
ef719a44
RH
7984 bool vector_mode = VECTOR_MODE_P (mode);
7985 enum machine_mode elt_mode = mode;
7986 enum machine_mode vec_mode = VOIDmode;
7cacf53e 7987
ef719a44
RH
7988 if (vector_mode)
7989 {
7990 elt_mode = GET_MODE_INNER (mode);
7991 vec_mode = mode;
7992 use_sse = true;
7993 }
7cacf53e
RH
7994 if (TARGET_SSE_MATH)
7995 {
7996 if (mode == SFmode)
ef719a44
RH
7997 {
7998 use_sse = true;
7999 vec_mode = V4SFmode;
8000 }
7cacf53e 8001 else if (mode == DFmode && TARGET_SSE2)
ef719a44
RH
8002 {
8003 use_sse = true;
8004 vec_mode = V2DFmode;
8005 }
7cacf53e
RH
8006 }
8007
8008 /* NEG and ABS performed with SSE use bitwise mask operations.
8009 Create the appropriate mask now. */
8010 if (use_sse)
8011 {
8012 HOST_WIDE_INT hi, lo;
8013 int shift = 63;
ef719a44 8014 rtvec v;
7cacf53e
RH
8015
8016 /* Find the sign bit, sign extended to 2*HWI. */
ef719a44 8017 if (elt_mode == SFmode)
7cacf53e
RH
8018 lo = 0x80000000, hi = lo < 0;
8019 else if (HOST_BITS_PER_WIDE_INT >= 64)
8020 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8021 else
8022 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8023
8024 /* If we're looking for the absolute value, then we want
8025 the compliment. */
8026 if (code == ABS)
8027 lo = ~lo, hi = ~hi;
8028
8029 /* Force this value into the low part of a fp vector constant. */
bb129a4f
RH
8030 mask = immed_double_const (lo, hi, elt_mode == SFmode ? SImode : DImode);
8031 mask = gen_lowpart (elt_mode, mask);
ef719a44
RH
8032
8033 switch (mode)
8034 {
8035 case SFmode:
8036 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8037 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8038 break;
8039
8040 case DFmode:
8041 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8042 break;
8043
8044 case V4SFmode:
8045 v = gen_rtvec (4, mask, mask, mask, mask);
8046 break;
8047
8048 case V4DFmode:
8049 v = gen_rtvec (2, mask, mask);
8050 break;
8051
8052 default:
8053 gcc_unreachable ();
8054 }
8055
8056 mask = gen_rtx_CONST_VECTOR (vec_mode, v);
8057 mask = force_reg (vec_mode, mask);
7cacf53e
RH
8058 }
8059 else
8060 {
8061 /* When not using SSE, we don't use the mask, but prefer to keep the
8062 same general form of the insn pattern to reduce duplication when
8063 it comes time to split. */
8064 mask = const0_rtx;
8065 }
8066
8067 dst = operands[0];
8068 src = operands[1];
8069
8070 /* If the destination is memory, and we don't have matching source
8071 operands, do things in registers. */
8072 matching_memory = false;
8073 if (MEM_P (dst))
8074 {
8075 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8076 matching_memory = true;
8077 else
8078 dst = gen_reg_rtx (mode);
8079 }
8080 if (MEM_P (src) && !matching_memory)
8081 src = force_reg (mode, src);
8082
ef719a44
RH
8083 if (vector_mode)
8084 {
8085 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8086 set = gen_rtx_SET (VOIDmode, dst, set);
8087 emit_insn (set);
8088 }
8089 else
8090 {
8091 set = gen_rtx_fmt_e (code, mode, src);
8092 set = gen_rtx_SET (VOIDmode, dst, set);
8093 use = gen_rtx_USE (VOIDmode, mask);
8094 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8095 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8096 }
7cacf53e
RH
8097
8098 if (dst != operands[0])
8099 emit_move_insn (operands[0], dst);
8100}
e075ae69 8101
16189740
RH
8102/* Return TRUE or FALSE depending on whether the first SET in INSN
8103 has source and destination with matching CC modes, and that the
8104 CC mode is at least as constrained as REQ_MODE. */
8105
8106int
b96a374d 8107ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
8108{
8109 rtx set;
8110 enum machine_mode set_mode;
8111
8112 set = PATTERN (insn);
8113 if (GET_CODE (set) == PARALLEL)
8114 set = XVECEXP (set, 0, 0);
8115 if (GET_CODE (set) != SET)
8116 abort ();
9076b9c1
JH
8117 if (GET_CODE (SET_SRC (set)) != COMPARE)
8118 abort ();
16189740
RH
8119
8120 set_mode = GET_MODE (SET_DEST (set));
8121 switch (set_mode)
8122 {
9076b9c1
JH
8123 case CCNOmode:
8124 if (req_mode != CCNOmode
8125 && (req_mode != CCmode
8126 || XEXP (SET_SRC (set), 1) != const0_rtx))
8127 return 0;
8128 break;
16189740 8129 case CCmode:
9076b9c1 8130 if (req_mode == CCGCmode)
16189740 8131 return 0;
5efb1046 8132 /* FALLTHRU */
9076b9c1
JH
8133 case CCGCmode:
8134 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8135 return 0;
5efb1046 8136 /* FALLTHRU */
9076b9c1 8137 case CCGOCmode:
16189740
RH
8138 if (req_mode == CCZmode)
8139 return 0;
5efb1046 8140 /* FALLTHRU */
16189740
RH
8141 case CCZmode:
8142 break;
8143
8144 default:
8145 abort ();
8146 }
8147
8148 return (GET_MODE (SET_SRC (set)) == set_mode);
8149}
8150
e075ae69
RH
8151/* Generate insn patterns to do an integer compare of OPERANDS. */
8152
8153static rtx
b96a374d 8154ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
8155{
8156 enum machine_mode cmpmode;
8157 rtx tmp, flags;
8158
8159 cmpmode = SELECT_CC_MODE (code, op0, op1);
8160 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8161
8162 /* This is very simple, but making the interface the same as in the
8163 FP case makes the rest of the code easier. */
8164 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8165 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8166
8167 /* Return the test that should be put into the flags user, i.e.
8168 the bcc, scc, or cmov instruction. */
8169 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8170}
8171
3a3677ff
RH
8172/* Figure out whether to use ordered or unordered fp comparisons.
8173 Return the appropriate mode to use. */
e075ae69 8174
b1cdafbb 8175enum machine_mode
b96a374d 8176ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 8177{
9e7adcb3
JH
8178 /* ??? In order to make all comparisons reversible, we do all comparisons
8179 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8180 all forms trapping and nontrapping comparisons, we can make inequality
8181 comparisons trapping again, since it results in better code when using
8182 FCOM based compares. */
8183 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
8184}
8185
9076b9c1 8186enum machine_mode
b96a374d 8187ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
8188{
8189 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8190 return ix86_fp_compare_mode (code);
8191 switch (code)
8192 {
8193 /* Only zero flag is needed. */
8194 case EQ: /* ZF=0 */
8195 case NE: /* ZF!=0 */
8196 return CCZmode;
8197 /* Codes needing carry flag. */
265dab10
JH
8198 case GEU: /* CF=0 */
8199 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8200 case LTU: /* CF=1 */
8201 case LEU: /* CF=1 | ZF=1 */
265dab10 8202 return CCmode;
9076b9c1
JH
8203 /* Codes possibly doable only with sign flag when
8204 comparing against zero. */
8205 case GE: /* SF=OF or SF=0 */
7e08e190 8206 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8207 if (op1 == const0_rtx)
8208 return CCGOCmode;
8209 else
8210 /* For other cases Carry flag is not required. */
8211 return CCGCmode;
8212 /* Codes doable only with sign flag when comparing
8213 against zero, but we miss jump instruction for it
4aae8a9a 8214 so we need to use relational tests against overflow
9076b9c1
JH
8215 that thus needs to be zero. */
8216 case GT: /* ZF=0 & SF=OF */
8217 case LE: /* ZF=1 | SF<>OF */
8218 if (op1 == const0_rtx)
8219 return CCNOmode;
8220 else
8221 return CCGCmode;
7fcd7218
JH
8222 /* strcmp pattern do (use flags) and combine may ask us for proper
8223 mode. */
8224 case USE:
8225 return CCmode;
9076b9c1 8226 default:
0f290768 8227 abort ();
9076b9c1
JH
8228 }
8229}
8230
e129d93a
ILT
8231/* Return the fixed registers used for condition codes. */
8232
8233static bool
8234ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8235{
8236 *p1 = FLAGS_REG;
8237 *p2 = FPSR_REG;
8238 return true;
8239}
8240
8241/* If two condition code modes are compatible, return a condition code
8242 mode which is compatible with both. Otherwise, return
8243 VOIDmode. */
8244
8245static enum machine_mode
8246ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8247{
8248 if (m1 == m2)
8249 return m1;
8250
8251 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8252 return VOIDmode;
8253
8254 if ((m1 == CCGCmode && m2 == CCGOCmode)
8255 || (m1 == CCGOCmode && m2 == CCGCmode))
8256 return CCGCmode;
8257
8258 switch (m1)
8259 {
8260 default:
8261 abort ();
8262
8263 case CCmode:
8264 case CCGCmode:
8265 case CCGOCmode:
8266 case CCNOmode:
8267 case CCZmode:
8268 switch (m2)
8269 {
8270 default:
8271 return VOIDmode;
8272
8273 case CCmode:
8274 case CCGCmode:
8275 case CCGOCmode:
8276 case CCNOmode:
8277 case CCZmode:
8278 return CCmode;
8279 }
8280
8281 case CCFPmode:
8282 case CCFPUmode:
8283 /* These are only compatible with themselves, which we already
8284 checked above. */
8285 return VOIDmode;
8286 }
8287}
8288
3a3677ff
RH
8289/* Return true if we should use an FCOMI instruction for this fp comparison. */
8290
a940d8bd 8291int
b96a374d 8292ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8293{
9e7adcb3
JH
8294 enum rtx_code swapped_code = swap_condition (code);
8295 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8296 || (ix86_fp_comparison_cost (swapped_code)
8297 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8298}
8299
0f290768 8300/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8301 to a fp comparison. The operands are updated in place; the new
d1f87653 8302 comparison code is returned. */
3a3677ff
RH
8303
8304static enum rtx_code
b96a374d 8305ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8306{
8307 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8308 rtx op0 = *pop0, op1 = *pop1;
8309 enum machine_mode op_mode = GET_MODE (op0);
89b17498 8310 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
3a3677ff 8311
e075ae69 8312 /* All of the unordered compare instructions only work on registers.
45c8c47f
UB
8313 The same is true of the fcomi compare instructions. The same is
8314 true of the XFmode compare instructions if not comparing with
8315 zero (ftst insn is used in this case). */
3a3677ff 8316
0644b628
JH
8317 if (!is_sse
8318 && (fpcmp_mode == CCFPUmode
45c8c47f
UB
8319 || (op_mode == XFmode
8320 && ! (standard_80387_constant_p (op0) == 1
8321 || standard_80387_constant_p (op1) == 1))
0644b628 8322 || ix86_use_fcomi_compare (code)))
e075ae69 8323 {
3a3677ff
RH
8324 op0 = force_reg (op_mode, op0);
8325 op1 = force_reg (op_mode, op1);
e075ae69
RH
8326 }
8327 else
8328 {
8329 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8330 things around if they appear profitable, otherwise force op0
8331 into a register. */
8332
8333 if (standard_80387_constant_p (op0) == 0
8334 || (GET_CODE (op0) == MEM
8335 && ! (standard_80387_constant_p (op1) == 0
8336 || GET_CODE (op1) == MEM)))
32b5b1aa 8337 {
e075ae69
RH
8338 rtx tmp;
8339 tmp = op0, op0 = op1, op1 = tmp;
8340 code = swap_condition (code);
8341 }
8342
8343 if (GET_CODE (op0) != REG)
3a3677ff 8344 op0 = force_reg (op_mode, op0);
e075ae69
RH
8345
8346 if (CONSTANT_P (op1))
8347 {
45c8c47f
UB
8348 int tmp = standard_80387_constant_p (op1);
8349 if (tmp == 0)
3a3677ff 8350 op1 = validize_mem (force_const_mem (op_mode, op1));
45c8c47f
UB
8351 else if (tmp == 1)
8352 {
8353 if (TARGET_CMOVE)
8354 op1 = force_reg (op_mode, op1);
8355 }
8356 else
8357 op1 = force_reg (op_mode, op1);
32b5b1aa
SC
8358 }
8359 }
e9a25f70 8360
9e7adcb3
JH
8361 /* Try to rearrange the comparison to make it cheaper. */
8362 if (ix86_fp_comparison_cost (code)
8363 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8364 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8365 {
8366 rtx tmp;
8367 tmp = op0, op0 = op1, op1 = tmp;
8368 code = swap_condition (code);
8369 if (GET_CODE (op0) != REG)
8370 op0 = force_reg (op_mode, op0);
8371 }
8372
3a3677ff
RH
8373 *pop0 = op0;
8374 *pop1 = op1;
8375 return code;
8376}
8377
c0c102a9
JH
8378/* Convert comparison codes we use to represent FP comparison to integer
8379 code that will result in proper branch. Return UNKNOWN if no such code
8380 is available. */
8fe75e43
RH
8381
8382enum rtx_code
b96a374d 8383ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8384{
8385 switch (code)
8386 {
8387 case GT:
8388 return GTU;
8389 case GE:
8390 return GEU;
8391 case ORDERED:
8392 case UNORDERED:
8393 return code;
8394 break;
8395 case UNEQ:
8396 return EQ;
8397 break;
8398 case UNLT:
8399 return LTU;
8400 break;
8401 case UNLE:
8402 return LEU;
8403 break;
8404 case LTGT:
8405 return NE;
8406 break;
8407 default:
8408 return UNKNOWN;
8409 }
8410}
8411
8412/* Split comparison code CODE into comparisons we can do using branch
8413 instructions. BYPASS_CODE is comparison code for branch that will
8414 branch around FIRST_CODE and SECOND_CODE. If some of branches
f822d252 8415 is not required, set value to UNKNOWN.
c0c102a9 8416 We never require more than two branches. */
8fe75e43
RH
8417
8418void
b96a374d
AJ
8419ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8420 enum rtx_code *first_code,
8421 enum rtx_code *second_code)
c0c102a9
JH
8422{
8423 *first_code = code;
f822d252
ZW
8424 *bypass_code = UNKNOWN;
8425 *second_code = UNKNOWN;
c0c102a9
JH
8426
8427 /* The fcomi comparison sets flags as follows:
8428
8429 cmp ZF PF CF
8430 > 0 0 0
8431 < 0 0 1
8432 = 1 0 0
8433 un 1 1 1 */
8434
8435 switch (code)
8436 {
8437 case GT: /* GTU - CF=0 & ZF=0 */
8438 case GE: /* GEU - CF=0 */
8439 case ORDERED: /* PF=0 */
8440 case UNORDERED: /* PF=1 */
8441 case UNEQ: /* EQ - ZF=1 */
8442 case UNLT: /* LTU - CF=1 */
8443 case UNLE: /* LEU - CF=1 | ZF=1 */
8444 case LTGT: /* EQ - ZF=0 */
8445 break;
8446 case LT: /* LTU - CF=1 - fails on unordered */
8447 *first_code = UNLT;
8448 *bypass_code = UNORDERED;
8449 break;
8450 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8451 *first_code = UNLE;
8452 *bypass_code = UNORDERED;
8453 break;
8454 case EQ: /* EQ - ZF=1 - fails on unordered */
8455 *first_code = UNEQ;
8456 *bypass_code = UNORDERED;
8457 break;
8458 case NE: /* NE - ZF=0 - fails on unordered */
8459 *first_code = LTGT;
8460 *second_code = UNORDERED;
8461 break;
8462 case UNGE: /* GEU - CF=0 - fails on unordered */
8463 *first_code = GE;
8464 *second_code = UNORDERED;
8465 break;
8466 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8467 *first_code = GT;
8468 *second_code = UNORDERED;
8469 break;
8470 default:
8471 abort ();
8472 }
8473 if (!TARGET_IEEE_FP)
8474 {
f822d252
ZW
8475 *second_code = UNKNOWN;
8476 *bypass_code = UNKNOWN;
c0c102a9
JH
8477 }
8478}
8479
9e7adcb3 8480/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8481 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8482 In future this should be tweaked to compute bytes for optimize_size and
8483 take into account performance of various instructions on various CPUs. */
8484static int
b96a374d 8485ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
8486{
8487 if (!TARGET_IEEE_FP)
8488 return 4;
8489 /* The cost of code output by ix86_expand_fp_compare. */
8490 switch (code)
8491 {
8492 case UNLE:
8493 case UNLT:
8494 case LTGT:
8495 case GT:
8496 case GE:
8497 case UNORDERED:
8498 case ORDERED:
8499 case UNEQ:
8500 return 4;
8501 break;
8502 case LT:
8503 case NE:
8504 case EQ:
8505 case UNGE:
8506 return 5;
8507 break;
8508 case LE:
8509 case UNGT:
8510 return 6;
8511 break;
8512 default:
8513 abort ();
8514 }
8515}
8516
8517/* Return cost of comparison done using fcomi operation.
8518 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8519static int
b96a374d 8520ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
8521{
8522 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8523 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8524 prevents gcc from using it. */
8525 if (!TARGET_CMOVE)
8526 return 1024;
8527 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8528 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9e7adcb3
JH
8529}
8530
8531/* Return cost of comparison done using sahf operation.
8532 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8533static int
b96a374d 8534ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
8535{
8536 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8537 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8538 avoids gcc from using it. */
8539 if (!TARGET_USE_SAHF && !optimize_size)
8540 return 1024;
8541 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8542 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9e7adcb3
JH
8543}
8544
8545/* Compute cost of the comparison done using any method.
8546 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8547static int
b96a374d 8548ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
8549{
8550 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8551 int min;
8552
8553 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8554 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8555
8556 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8557 if (min > sahf_cost)
8558 min = sahf_cost;
8559 if (min > fcomi_cost)
8560 min = fcomi_cost;
8561 return min;
8562}
c0c102a9 8563
3a3677ff
RH
8564/* Generate insn patterns to do a floating point compare of OPERANDS. */
8565
9e7adcb3 8566static rtx
b96a374d
AJ
8567ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8568 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
8569{
8570 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8571 rtx tmp, tmp2;
9e7adcb3 8572 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8573 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8574
8575 fpcmp_mode = ix86_fp_compare_mode (code);
8576 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8577
9e7adcb3
JH
8578 if (second_test)
8579 *second_test = NULL_RTX;
8580 if (bypass_test)
8581 *bypass_test = NULL_RTX;
8582
c0c102a9
JH
8583 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8584
9e7adcb3 8585 /* Do fcomi/sahf based test when profitable. */
f822d252
ZW
8586 if ((bypass_code == UNKNOWN || bypass_test)
8587 && (second_code == UNKNOWN || second_test)
9e7adcb3 8588 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8589 {
c0c102a9
JH
8590 if (TARGET_CMOVE)
8591 {
8592 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8593 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8594 tmp);
8595 emit_insn (tmp);
8596 }
8597 else
8598 {
8599 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8600 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8601 if (!scratch)
8602 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8603 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8604 emit_insn (gen_x86_sahf_1 (scratch));
8605 }
e075ae69
RH
8606
8607 /* The FP codes work out to act like unsigned. */
9a915772 8608 intcmp_mode = fpcmp_mode;
9e7adcb3 8609 code = first_code;
f822d252 8610 if (bypass_code != UNKNOWN)
9e7adcb3
JH
8611 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8612 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8613 const0_rtx);
f822d252 8614 if (second_code != UNKNOWN)
9e7adcb3
JH
8615 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8616 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8617 const0_rtx);
e075ae69
RH
8618 }
8619 else
8620 {
8621 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8622 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8623 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8624 if (!scratch)
8625 scratch = gen_reg_rtx (HImode);
3a3677ff 8626 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8627
9a915772
JH
8628 /* In the unordered case, we have to check C2 for NaN's, which
8629 doesn't happen to work out to anything nice combination-wise.
8630 So do some bit twiddling on the value we've got in AH to come
8631 up with an appropriate set of condition codes. */
e075ae69 8632
9a915772
JH
8633 intcmp_mode = CCNOmode;
8634 switch (code)
32b5b1aa 8635 {
9a915772
JH
8636 case GT:
8637 case UNGT:
8638 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8639 {
3a3677ff 8640 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8641 code = EQ;
9a915772
JH
8642 }
8643 else
8644 {
8645 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8646 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8647 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8648 intcmp_mode = CCmode;
8649 code = GEU;
8650 }
8651 break;
8652 case LT:
8653 case UNLT:
8654 if (code == LT && TARGET_IEEE_FP)
8655 {
3a3677ff
RH
8656 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8657 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8658 intcmp_mode = CCmode;
8659 code = EQ;
9a915772
JH
8660 }
8661 else
8662 {
8663 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8664 code = NE;
8665 }
8666 break;
8667 case GE:
8668 case UNGE:
8669 if (code == GE || !TARGET_IEEE_FP)
8670 {
3a3677ff 8671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8672 code = EQ;
9a915772
JH
8673 }
8674 else
8675 {
8676 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8677 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8678 GEN_INT (0x01)));
8679 code = NE;
8680 }
8681 break;
8682 case LE:
8683 case UNLE:
8684 if (code == LE && TARGET_IEEE_FP)
8685 {
3a3677ff
RH
8686 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8687 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8688 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8689 intcmp_mode = CCmode;
8690 code = LTU;
9a915772
JH
8691 }
8692 else
8693 {
8694 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8695 code = NE;
8696 }
8697 break;
8698 case EQ:
8699 case UNEQ:
8700 if (code == EQ && TARGET_IEEE_FP)
8701 {
3a3677ff
RH
8702 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8703 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8704 intcmp_mode = CCmode;
8705 code = EQ;
9a915772
JH
8706 }
8707 else
8708 {
3a3677ff
RH
8709 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8710 code = NE;
8711 break;
9a915772
JH
8712 }
8713 break;
8714 case NE:
8715 case LTGT:
8716 if (code == NE && TARGET_IEEE_FP)
8717 {
3a3677ff 8718 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8719 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8720 GEN_INT (0x40)));
3a3677ff 8721 code = NE;
9a915772
JH
8722 }
8723 else
8724 {
3a3677ff
RH
8725 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8726 code = EQ;
32b5b1aa 8727 }
9a915772
JH
8728 break;
8729
8730 case UNORDERED:
8731 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8732 code = NE;
8733 break;
8734 case ORDERED:
8735 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8736 code = EQ;
8737 break;
8738
8739 default:
8740 abort ();
32b5b1aa 8741 }
32b5b1aa 8742 }
e075ae69
RH
8743
8744 /* Return the test that should be put into the flags user, i.e.
8745 the bcc, scc, or cmov instruction. */
8746 return gen_rtx_fmt_ee (code, VOIDmode,
8747 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8748 const0_rtx);
8749}
8750
9e3e266c 8751rtx
b96a374d 8752ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
8753{
8754 rtx op0, op1, ret;
8755 op0 = ix86_compare_op0;
8756 op1 = ix86_compare_op1;
8757
a1b8572c
JH
8758 if (second_test)
8759 *second_test = NULL_RTX;
8760 if (bypass_test)
8761 *bypass_test = NULL_RTX;
8762
e075ae69 8763 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8764 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8765 second_test, bypass_test);
32b5b1aa 8766 else
e075ae69
RH
8767 ret = ix86_expand_int_compare (code, op0, op1);
8768
8769 return ret;
8770}
8771
03598dea
JH
8772/* Return true if the CODE will result in nontrivial jump sequence. */
8773bool
b96a374d 8774ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
8775{
8776 enum rtx_code bypass_code, first_code, second_code;
8777 if (!TARGET_CMOVE)
8778 return true;
8779 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8780 return bypass_code != UNKNOWN || second_code != UNKNOWN;
03598dea
JH
8781}
8782
e075ae69 8783void
b96a374d 8784ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 8785{
3a3677ff 8786 rtx tmp;
e075ae69 8787
3a3677ff 8788 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8789 {
3a3677ff
RH
8790 case QImode:
8791 case HImode:
8792 case SImode:
0d7d98ee 8793 simple:
a1b8572c 8794 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8795 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8796 gen_rtx_LABEL_REF (VOIDmode, label),
8797 pc_rtx);
8798 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8799 return;
e075ae69 8800
3a3677ff
RH
8801 case SFmode:
8802 case DFmode:
0f290768 8803 case XFmode:
3a3677ff
RH
8804 {
8805 rtvec vec;
8806 int use_fcomi;
03598dea 8807 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8808
8809 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8810 &ix86_compare_op1);
fce5a9f2 8811
03598dea
JH
8812 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8813
8814 /* Check whether we will use the natural sequence with one jump. If
8815 so, we can expand jump early. Otherwise delay expansion by
8816 creating compound insn to not confuse optimizers. */
f822d252 8817 if (bypass_code == UNKNOWN && second_code == UNKNOWN
03598dea
JH
8818 && TARGET_CMOVE)
8819 {
8820 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8821 gen_rtx_LABEL_REF (VOIDmode, label),
7c82106f 8822 pc_rtx, NULL_RTX, NULL_RTX);
03598dea
JH
8823 }
8824 else
8825 {
8826 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8827 ix86_compare_op0, ix86_compare_op1);
8828 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8829 gen_rtx_LABEL_REF (VOIDmode, label),
8830 pc_rtx);
8831 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8832
8833 use_fcomi = ix86_use_fcomi_compare (code);
8834 vec = rtvec_alloc (3 + !use_fcomi);
8835 RTVEC_ELT (vec, 0) = tmp;
8836 RTVEC_ELT (vec, 1)
8837 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8838 RTVEC_ELT (vec, 2)
8839 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8840 if (! use_fcomi)
8841 RTVEC_ELT (vec, 3)
8842 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8843
8844 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8845 }
3a3677ff
RH
8846 return;
8847 }
32b5b1aa 8848
3a3677ff 8849 case DImode:
0d7d98ee
JH
8850 if (TARGET_64BIT)
8851 goto simple;
3a3677ff
RH
8852 /* Expand DImode branch into multiple compare+branch. */
8853 {
8854 rtx lo[2], hi[2], label2;
8855 enum rtx_code code1, code2, code3;
32b5b1aa 8856
3a3677ff
RH
8857 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8858 {
8859 tmp = ix86_compare_op0;
8860 ix86_compare_op0 = ix86_compare_op1;
8861 ix86_compare_op1 = tmp;
8862 code = swap_condition (code);
8863 }
8864 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8865 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8866
3a3677ff
RH
8867 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8868 avoid two branches. This costs one extra insn, so disable when
8869 optimizing for size. */
32b5b1aa 8870
3a3677ff
RH
8871 if ((code == EQ || code == NE)
8872 && (!optimize_size
8873 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8874 {
8875 rtx xor0, xor1;
32b5b1aa 8876
3a3677ff
RH
8877 xor1 = hi[0];
8878 if (hi[1] != const0_rtx)
8879 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8880 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8881
3a3677ff
RH
8882 xor0 = lo[0];
8883 if (lo[1] != const0_rtx)
8884 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8885 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8886
3a3677ff
RH
8887 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8888 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8889
3a3677ff
RH
8890 ix86_compare_op0 = tmp;
8891 ix86_compare_op1 = const0_rtx;
8892 ix86_expand_branch (code, label);
8893 return;
8894 }
e075ae69 8895
1f9124e4
JJ
8896 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8897 op1 is a constant and the low word is zero, then we can just
8898 examine the high word. */
32b5b1aa 8899
1f9124e4
JJ
8900 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8901 switch (code)
8902 {
8903 case LT: case LTU: case GE: case GEU:
8904 ix86_compare_op0 = hi[0];
8905 ix86_compare_op1 = hi[1];
8906 ix86_expand_branch (code, label);
8907 return;
8908 default:
8909 break;
8910 }
e075ae69 8911
3a3677ff 8912 /* Otherwise, we need two or three jumps. */
e075ae69 8913
3a3677ff 8914 label2 = gen_label_rtx ();
e075ae69 8915
3a3677ff
RH
8916 code1 = code;
8917 code2 = swap_condition (code);
8918 code3 = unsigned_condition (code);
e075ae69 8919
3a3677ff
RH
8920 switch (code)
8921 {
8922 case LT: case GT: case LTU: case GTU:
8923 break;
e075ae69 8924
3a3677ff
RH
8925 case LE: code1 = LT; code2 = GT; break;
8926 case GE: code1 = GT; code2 = LT; break;
8927 case LEU: code1 = LTU; code2 = GTU; break;
8928 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8929
f822d252
ZW
8930 case EQ: code1 = UNKNOWN; code2 = NE; break;
8931 case NE: code2 = UNKNOWN; break;
e075ae69 8932
3a3677ff
RH
8933 default:
8934 abort ();
8935 }
e075ae69 8936
3a3677ff
RH
8937 /*
8938 * a < b =>
8939 * if (hi(a) < hi(b)) goto true;
8940 * if (hi(a) > hi(b)) goto false;
8941 * if (lo(a) < lo(b)) goto true;
8942 * false:
8943 */
8944
8945 ix86_compare_op0 = hi[0];
8946 ix86_compare_op1 = hi[1];
8947
f822d252 8948 if (code1 != UNKNOWN)
3a3677ff 8949 ix86_expand_branch (code1, label);
f822d252 8950 if (code2 != UNKNOWN)
3a3677ff
RH
8951 ix86_expand_branch (code2, label2);
8952
8953 ix86_compare_op0 = lo[0];
8954 ix86_compare_op1 = lo[1];
8955 ix86_expand_branch (code3, label);
8956
f822d252 8957 if (code2 != UNKNOWN)
3a3677ff
RH
8958 emit_label (label2);
8959 return;
8960 }
e075ae69 8961
3a3677ff
RH
8962 default:
8963 abort ();
8964 }
32b5b1aa 8965}
e075ae69 8966
9e7adcb3
JH
8967/* Split branch based on floating point condition. */
8968void
b96a374d 8969ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
7c82106f 8970 rtx target1, rtx target2, rtx tmp, rtx pushed)
9e7adcb3
JH
8971{
8972 rtx second, bypass;
8973 rtx label = NULL_RTX;
03598dea 8974 rtx condition;
6b24c259
JH
8975 int bypass_probability = -1, second_probability = -1, probability = -1;
8976 rtx i;
9e7adcb3
JH
8977
8978 if (target2 != pc_rtx)
8979 {
8980 rtx tmp = target2;
8981 code = reverse_condition_maybe_unordered (code);
8982 target2 = target1;
8983 target1 = tmp;
8984 }
8985
8986 condition = ix86_expand_fp_compare (code, op1, op2,
8987 tmp, &second, &bypass);
6b24c259 8988
7c82106f
UB
8989 /* Remove pushed operand from stack. */
8990 if (pushed)
8991 ix86_free_from_memory (GET_MODE (pushed));
8992
6b24c259
JH
8993 if (split_branch_probability >= 0)
8994 {
8995 /* Distribute the probabilities across the jumps.
8996 Assume the BYPASS and SECOND to be always test
8997 for UNORDERED. */
8998 probability = split_branch_probability;
8999
d6a7951f 9000 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
9001 to be updated. Later we may run some experiments and see
9002 if unordered values are more frequent in practice. */
9003 if (bypass)
9004 bypass_probability = 1;
9005 if (second)
9006 second_probability = 1;
9007 }
9e7adcb3
JH
9008 if (bypass != NULL_RTX)
9009 {
9010 label = gen_label_rtx ();
6b24c259
JH
9011 i = emit_jump_insn (gen_rtx_SET
9012 (VOIDmode, pc_rtx,
9013 gen_rtx_IF_THEN_ELSE (VOIDmode,
9014 bypass,
9015 gen_rtx_LABEL_REF (VOIDmode,
9016 label),
9017 pc_rtx)));
9018 if (bypass_probability >= 0)
9019 REG_NOTES (i)
9020 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9021 GEN_INT (bypass_probability),
9022 REG_NOTES (i));
9023 }
9024 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
9025 (VOIDmode, pc_rtx,
9026 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
9027 condition, target1, target2)));
9028 if (probability >= 0)
9029 REG_NOTES (i)
9030 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9031 GEN_INT (probability),
9032 REG_NOTES (i));
9033 if (second != NULL_RTX)
9e7adcb3 9034 {
6b24c259
JH
9035 i = emit_jump_insn (gen_rtx_SET
9036 (VOIDmode, pc_rtx,
9037 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9038 target2)));
9039 if (second_probability >= 0)
9040 REG_NOTES (i)
9041 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9042 GEN_INT (second_probability),
9043 REG_NOTES (i));
9e7adcb3 9044 }
9e7adcb3
JH
9045 if (label != NULL_RTX)
9046 emit_label (label);
9047}
9048
32b5b1aa 9049int
b96a374d 9050ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 9051{
3a627503 9052 rtx ret, tmp, tmpreg, equiv;
a1b8572c 9053 rtx second_test, bypass_test;
e075ae69 9054
885a70fd
JH
9055 if (GET_MODE (ix86_compare_op0) == DImode
9056 && !TARGET_64BIT)
e075ae69
RH
9057 return 0; /* FAIL */
9058
b932f770
JH
9059 if (GET_MODE (dest) != QImode)
9060 abort ();
e075ae69 9061
a1b8572c 9062 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
9063 PUT_MODE (ret, QImode);
9064
9065 tmp = dest;
a1b8572c 9066 tmpreg = dest;
32b5b1aa 9067
e075ae69 9068 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
9069 if (bypass_test || second_test)
9070 {
9071 rtx test = second_test;
9072 int bypass = 0;
9073 rtx tmp2 = gen_reg_rtx (QImode);
9074 if (bypass_test)
9075 {
9076 if (second_test)
b531087a 9077 abort ();
a1b8572c
JH
9078 test = bypass_test;
9079 bypass = 1;
9080 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9081 }
9082 PUT_MODE (test, QImode);
9083 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9084
9085 if (bypass)
9086 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9087 else
9088 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9089 }
e075ae69 9090
3a627503
RS
9091 /* Attach a REG_EQUAL note describing the comparison result. */
9092 equiv = simplify_gen_relational (code, QImode,
9093 GET_MODE (ix86_compare_op0),
9094 ix86_compare_op0, ix86_compare_op1);
9095 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9096
e075ae69 9097 return 1; /* DONE */
32b5b1aa 9098}
e075ae69 9099
c35d187f
RH
9100/* Expand comparison setting or clearing carry flag. Return true when
9101 successful and set pop for the operation. */
9102static bool
b96a374d 9103ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
9104{
9105 enum machine_mode mode =
9106 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9107
9108 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 9109 deal with FP compares yet. This is possible to add. */
e6e81735
JH
9110 if ((mode == DImode && !TARGET_64BIT))
9111 return false;
9112 if (FLOAT_MODE_P (mode))
9113 {
9114 rtx second_test = NULL, bypass_test = NULL;
9115 rtx compare_op, compare_seq;
9116
9117 /* Shortcut: following common codes never translate into carry flag compares. */
9118 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9119 || code == ORDERED || code == UNORDERED)
9120 return false;
9121
9122 /* These comparisons require zero flag; swap operands so they won't. */
9123 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9124 && !TARGET_IEEE_FP)
9125 {
9126 rtx tmp = op0;
9127 op0 = op1;
9128 op1 = tmp;
9129 code = swap_condition (code);
9130 }
9131
c51e6d85
KH
9132 /* Try to expand the comparison and verify that we end up with carry flag
9133 based comparison. This is fails to be true only when we decide to expand
9134 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
9135 start_sequence ();
9136 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9137 &second_test, &bypass_test);
9138 compare_seq = get_insns ();
9139 end_sequence ();
9140
9141 if (second_test || bypass_test)
9142 return false;
9143 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9144 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9145 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9146 else
9147 code = GET_CODE (compare_op);
9148 if (code != LTU && code != GEU)
9149 return false;
9150 emit_insn (compare_seq);
9151 *pop = compare_op;
9152 return true;
9153 }
9154 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
9155 return false;
9156 switch (code)
9157 {
9158 case LTU:
9159 case GEU:
9160 break;
9161
9162 /* Convert a==0 into (unsigned)a<1. */
9163 case EQ:
9164 case NE:
9165 if (op1 != const0_rtx)
9166 return false;
9167 op1 = const1_rtx;
9168 code = (code == EQ ? LTU : GEU);
9169 break;
9170
9171 /* Convert a>b into b<a or a>=b-1. */
9172 case GTU:
9173 case LEU:
9174 if (GET_CODE (op1) == CONST_INT)
9175 {
9176 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9177 /* Bail out on overflow. We still can swap operands but that
43f3a59d 9178 would force loading of the constant into register. */
4977bab6
ZW
9179 if (op1 == const0_rtx
9180 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9181 return false;
9182 code = (code == GTU ? GEU : LTU);
9183 }
9184 else
9185 {
9186 rtx tmp = op1;
9187 op1 = op0;
9188 op0 = tmp;
9189 code = (code == GTU ? LTU : GEU);
9190 }
9191 break;
9192
ccea753c 9193 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
9194 case LT:
9195 case GE:
9196 if (mode == DImode || op1 != const0_rtx)
9197 return false;
ccea753c 9198 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9199 code = (code == LT ? GEU : LTU);
9200 break;
9201 case LE:
9202 case GT:
9203 if (mode == DImode || op1 != constm1_rtx)
9204 return false;
ccea753c 9205 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
9206 code = (code == LE ? GEU : LTU);
9207 break;
9208
9209 default:
9210 return false;
9211 }
ebe75517
JH
9212 /* Swapping operands may cause constant to appear as first operand. */
9213 if (!nonimmediate_operand (op0, VOIDmode))
9214 {
9215 if (no_new_pseudos)
9216 return false;
9217 op0 = force_reg (mode, op0);
9218 }
4977bab6
ZW
9219 ix86_compare_op0 = op0;
9220 ix86_compare_op1 = op1;
9221 *pop = ix86_expand_compare (code, NULL, NULL);
9222 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9223 abort ();
9224 return true;
9225}
9226
32b5b1aa 9227int
b96a374d 9228ix86_expand_int_movcc (rtx operands[])
32b5b1aa 9229{
e075ae69
RH
9230 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9231 rtx compare_seq, compare_op;
a1b8572c 9232 rtx second_test, bypass_test;
635559ab 9233 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 9234 bool sign_bit_compare_p = false;;
3a3677ff 9235
e075ae69 9236 start_sequence ();
a1b8572c 9237 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 9238 compare_seq = get_insns ();
e075ae69
RH
9239 end_sequence ();
9240
9241 compare_code = GET_CODE (compare_op);
9242
4977bab6
ZW
9243 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9244 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9245 sign_bit_compare_p = true;
9246
e075ae69
RH
9247 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9248 HImode insns, we'd be swallowed in word prefix ops. */
9249
4977bab6 9250 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 9251 && (mode != DImode || TARGET_64BIT)
0f290768 9252 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
9253 && GET_CODE (operands[3]) == CONST_INT)
9254 {
9255 rtx out = operands[0];
9256 HOST_WIDE_INT ct = INTVAL (operands[2]);
9257 HOST_WIDE_INT cf = INTVAL (operands[3]);
9258 HOST_WIDE_INT diff;
9259
4977bab6
ZW
9260 diff = ct - cf;
9261 /* Sign bit compares are better done using shifts than we do by using
b96a374d 9262 sbb. */
4977bab6
ZW
9263 if (sign_bit_compare_p
9264 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9265 ix86_compare_op1, &compare_op))
e075ae69 9266 {
e075ae69
RH
9267 /* Detect overlap between destination and compare sources. */
9268 rtx tmp = out;
9269
4977bab6 9270 if (!sign_bit_compare_p)
36583fea 9271 {
e6e81735
JH
9272 bool fpcmp = false;
9273
4977bab6
ZW
9274 compare_code = GET_CODE (compare_op);
9275
e6e81735
JH
9276 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9277 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9278 {
9279 fpcmp = true;
9280 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9281 }
9282
4977bab6
ZW
9283 /* To simplify rest of code, restrict to the GEU case. */
9284 if (compare_code == LTU)
9285 {
9286 HOST_WIDE_INT tmp = ct;
9287 ct = cf;
9288 cf = tmp;
9289 compare_code = reverse_condition (compare_code);
9290 code = reverse_condition (code);
9291 }
e6e81735
JH
9292 else
9293 {
9294 if (fpcmp)
9295 PUT_CODE (compare_op,
9296 reverse_condition_maybe_unordered
9297 (GET_CODE (compare_op)));
9298 else
9299 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9300 }
4977bab6 9301 diff = ct - cf;
36583fea 9302
4977bab6
ZW
9303 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9304 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9305 tmp = gen_reg_rtx (mode);
e075ae69 9306
4977bab6 9307 if (mode == DImode)
e6e81735 9308 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9309 else
e6e81735 9310 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9311 }
14f73b5a 9312 else
4977bab6
ZW
9313 {
9314 if (code == GT || code == GE)
9315 code = reverse_condition (code);
9316 else
9317 {
9318 HOST_WIDE_INT tmp = ct;
9319 ct = cf;
9320 cf = tmp;
5fb48685 9321 diff = ct - cf;
4977bab6
ZW
9322 }
9323 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9324 ix86_compare_op1, VOIDmode, 0, -1);
9325 }
e075ae69 9326
36583fea
JH
9327 if (diff == 1)
9328 {
9329 /*
9330 * cmpl op0,op1
9331 * sbbl dest,dest
9332 * [addl dest, ct]
9333 *
9334 * Size 5 - 8.
9335 */
9336 if (ct)
b96a374d 9337 tmp = expand_simple_binop (mode, PLUS,
635559ab 9338 tmp, GEN_INT (ct),
4977bab6 9339 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9340 }
9341 else if (cf == -1)
9342 {
9343 /*
9344 * cmpl op0,op1
9345 * sbbl dest,dest
9346 * orl $ct, dest
9347 *
9348 * Size 8.
9349 */
635559ab
JH
9350 tmp = expand_simple_binop (mode, IOR,
9351 tmp, GEN_INT (ct),
4977bab6 9352 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9353 }
9354 else if (diff == -1 && ct)
9355 {
9356 /*
9357 * cmpl op0,op1
9358 * sbbl dest,dest
06ec023f 9359 * notl dest
36583fea
JH
9360 * [addl dest, cf]
9361 *
9362 * Size 8 - 11.
9363 */
4977bab6 9364 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9365 if (cf)
b96a374d 9366 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9367 copy_rtx (tmp), GEN_INT (cf),
9368 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9369 }
9370 else
9371 {
9372 /*
9373 * cmpl op0,op1
9374 * sbbl dest,dest
06ec023f 9375 * [notl dest]
36583fea
JH
9376 * andl cf - ct, dest
9377 * [addl dest, ct]
9378 *
9379 * Size 8 - 11.
9380 */
06ec023f
RB
9381
9382 if (cf == 0)
9383 {
9384 cf = ct;
9385 ct = 0;
4977bab6 9386 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9387 }
9388
635559ab 9389 tmp = expand_simple_binop (mode, AND,
4977bab6 9390 copy_rtx (tmp),
d8bf17f9 9391 gen_int_mode (cf - ct, mode),
4977bab6 9392 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9393 if (ct)
b96a374d 9394 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9395 copy_rtx (tmp), GEN_INT (ct),
9396 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9397 }
e075ae69 9398
4977bab6
ZW
9399 if (!rtx_equal_p (tmp, out))
9400 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9401
9402 return 1; /* DONE */
9403 }
9404
e075ae69
RH
9405 if (diff < 0)
9406 {
9407 HOST_WIDE_INT tmp;
9408 tmp = ct, ct = cf, cf = tmp;
9409 diff = -diff;
734dba19
JH
9410 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9411 {
9412 /* We may be reversing unordered compare to normal compare, that
9413 is not valid in general (we may convert non-trapping condition
9414 to trapping one), however on i386 we currently emit all
9415 comparisons unordered. */
9416 compare_code = reverse_condition_maybe_unordered (compare_code);
9417 code = reverse_condition_maybe_unordered (code);
9418 }
9419 else
9420 {
9421 compare_code = reverse_condition (compare_code);
9422 code = reverse_condition (code);
9423 }
e075ae69 9424 }
0f2a3457 9425
f822d252 9426 compare_code = UNKNOWN;
0f2a3457
JJ
9427 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9428 && GET_CODE (ix86_compare_op1) == CONST_INT)
9429 {
9430 if (ix86_compare_op1 == const0_rtx
9431 && (code == LT || code == GE))
9432 compare_code = code;
9433 else if (ix86_compare_op1 == constm1_rtx)
9434 {
9435 if (code == LE)
9436 compare_code = LT;
9437 else if (code == GT)
9438 compare_code = GE;
9439 }
9440 }
9441
9442 /* Optimize dest = (op0 < 0) ? -1 : cf. */
f822d252 9443 if (compare_code != UNKNOWN
0f2a3457
JJ
9444 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9445 && (cf == -1 || ct == -1))
9446 {
9447 /* If lea code below could be used, only optimize
9448 if it results in a 2 insn sequence. */
9449
9450 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9451 || diff == 3 || diff == 5 || diff == 9)
9452 || (compare_code == LT && ct == -1)
9453 || (compare_code == GE && cf == -1))
9454 {
9455 /*
9456 * notl op1 (if necessary)
9457 * sarl $31, op1
9458 * orl cf, op1
9459 */
9460 if (ct != -1)
9461 {
9462 cf = ct;
b96a374d 9463 ct = -1;
0f2a3457
JJ
9464 code = reverse_condition (code);
9465 }
9466
9467 out = emit_store_flag (out, code, ix86_compare_op0,
9468 ix86_compare_op1, VOIDmode, 0, -1);
9469
9470 out = expand_simple_binop (mode, IOR,
9471 out, GEN_INT (cf),
9472 out, 1, OPTAB_DIRECT);
9473 if (out != operands[0])
9474 emit_move_insn (operands[0], out);
9475
9476 return 1; /* DONE */
9477 }
9478 }
9479
4977bab6 9480
635559ab
JH
9481 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9482 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9483 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
8fe75e43
RH
9484 && (mode != DImode
9485 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
e075ae69
RH
9486 {
9487 /*
9488 * xorl dest,dest
9489 * cmpl op1,op2
9490 * setcc dest
9491 * lea cf(dest*(ct-cf)),dest
9492 *
9493 * Size 14.
9494 *
9495 * This also catches the degenerate setcc-only case.
9496 */
9497
9498 rtx tmp;
9499 int nops;
9500
9501 out = emit_store_flag (out, code, ix86_compare_op0,
9502 ix86_compare_op1, VOIDmode, 0, 1);
9503
9504 nops = 0;
97f51ac4
RB
9505 /* On x86_64 the lea instruction operates on Pmode, so we need
9506 to get arithmetics done in proper mode to match. */
e075ae69 9507 if (diff == 1)
068f5dea 9508 tmp = copy_rtx (out);
e075ae69
RH
9509 else
9510 {
885a70fd 9511 rtx out1;
068f5dea 9512 out1 = copy_rtx (out);
635559ab 9513 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9514 nops++;
9515 if (diff & 1)
9516 {
635559ab 9517 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9518 nops++;
9519 }
9520 }
9521 if (cf != 0)
9522 {
635559ab 9523 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9524 nops++;
9525 }
4977bab6 9526 if (!rtx_equal_p (tmp, out))
e075ae69 9527 {
14f73b5a 9528 if (nops == 1)
a5cf80f0 9529 out = force_operand (tmp, copy_rtx (out));
e075ae69 9530 else
4977bab6 9531 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9532 }
4977bab6 9533 if (!rtx_equal_p (out, operands[0]))
1985ef90 9534 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9535
9536 return 1; /* DONE */
9537 }
9538
9539 /*
9540 * General case: Jumpful:
9541 * xorl dest,dest cmpl op1, op2
9542 * cmpl op1, op2 movl ct, dest
9543 * setcc dest jcc 1f
9544 * decl dest movl cf, dest
9545 * andl (cf-ct),dest 1:
9546 * addl ct,dest
0f290768 9547 *
e075ae69
RH
9548 * Size 20. Size 14.
9549 *
9550 * This is reasonably steep, but branch mispredict costs are
9551 * high on modern cpus, so consider failing only if optimizing
9552 * for space.
e075ae69
RH
9553 */
9554
4977bab6
ZW
9555 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9556 && BRANCH_COST >= 2)
e075ae69 9557 {
97f51ac4 9558 if (cf == 0)
e075ae69 9559 {
97f51ac4
RB
9560 cf = ct;
9561 ct = 0;
734dba19 9562 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9563 /* We may be reversing unordered compare to normal compare,
9564 that is not valid in general (we may convert non-trapping
9565 condition to trapping one), however on i386 we currently
9566 emit all comparisons unordered. */
9567 code = reverse_condition_maybe_unordered (code);
9568 else
9569 {
9570 code = reverse_condition (code);
f822d252 9571 if (compare_code != UNKNOWN)
0f2a3457
JJ
9572 compare_code = reverse_condition (compare_code);
9573 }
9574 }
9575
f822d252 9576 if (compare_code != UNKNOWN)
0f2a3457
JJ
9577 {
9578 /* notl op1 (if needed)
9579 sarl $31, op1
9580 andl (cf-ct), op1
b96a374d 9581 addl ct, op1
0f2a3457
JJ
9582
9583 For x < 0 (resp. x <= -1) there will be no notl,
9584 so if possible swap the constants to get rid of the
9585 complement.
9586 True/false will be -1/0 while code below (store flag
9587 followed by decrement) is 0/-1, so the constants need
9588 to be exchanged once more. */
9589
9590 if (compare_code == GE || !cf)
734dba19 9591 {
b96a374d 9592 code = reverse_condition (code);
0f2a3457 9593 compare_code = LT;
734dba19
JH
9594 }
9595 else
9596 {
0f2a3457 9597 HOST_WIDE_INT tmp = cf;
b96a374d 9598 cf = ct;
0f2a3457 9599 ct = tmp;
734dba19 9600 }
0f2a3457
JJ
9601
9602 out = emit_store_flag (out, code, ix86_compare_op0,
9603 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9604 }
0f2a3457
JJ
9605 else
9606 {
9607 out = emit_store_flag (out, code, ix86_compare_op0,
9608 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9609
4977bab6
ZW
9610 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9611 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9612 }
e075ae69 9613
4977bab6 9614 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9615 gen_int_mode (cf - ct, mode),
4977bab6 9616 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9617 if (ct)
4977bab6
ZW
9618 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9619 copy_rtx (out), 1, OPTAB_DIRECT);
9620 if (!rtx_equal_p (out, operands[0]))
9621 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9622
9623 return 1; /* DONE */
9624 }
9625 }
9626
4977bab6 9627 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
9628 {
9629 /* Try a few things more with specific constants and a variable. */
9630
78a0d70c 9631 optab op;
e075ae69
RH
9632 rtx var, orig_out, out, tmp;
9633
4977bab6 9634 if (BRANCH_COST <= 2)
e075ae69
RH
9635 return 0; /* FAIL */
9636
0f290768 9637 /* If one of the two operands is an interesting constant, load a
e075ae69 9638 constant with the above and mask it in with a logical operation. */
0f290768 9639
e075ae69
RH
9640 if (GET_CODE (operands[2]) == CONST_INT)
9641 {
9642 var = operands[3];
4977bab6 9643 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 9644 operands[3] = constm1_rtx, op = and_optab;
4977bab6 9645 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 9646 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9647 else
9648 return 0; /* FAIL */
e075ae69
RH
9649 }
9650 else if (GET_CODE (operands[3]) == CONST_INT)
9651 {
9652 var = operands[2];
4977bab6 9653 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 9654 operands[2] = constm1_rtx, op = and_optab;
4977bab6 9655 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 9656 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9657 else
9658 return 0; /* FAIL */
e075ae69 9659 }
78a0d70c 9660 else
e075ae69
RH
9661 return 0; /* FAIL */
9662
9663 orig_out = operands[0];
635559ab 9664 tmp = gen_reg_rtx (mode);
e075ae69
RH
9665 operands[0] = tmp;
9666
9667 /* Recurse to get the constant loaded. */
9668 if (ix86_expand_int_movcc (operands) == 0)
9669 return 0; /* FAIL */
9670
9671 /* Mask in the interesting variable. */
635559ab 9672 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 9673 OPTAB_WIDEN);
4977bab6
ZW
9674 if (!rtx_equal_p (out, orig_out))
9675 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
9676
9677 return 1; /* DONE */
9678 }
9679
9680 /*
9681 * For comparison with above,
9682 *
9683 * movl cf,dest
9684 * movl ct,tmp
9685 * cmpl op1,op2
9686 * cmovcc tmp,dest
9687 *
9688 * Size 15.
9689 */
9690
635559ab
JH
9691 if (! nonimmediate_operand (operands[2], mode))
9692 operands[2] = force_reg (mode, operands[2]);
9693 if (! nonimmediate_operand (operands[3], mode))
9694 operands[3] = force_reg (mode, operands[3]);
e075ae69 9695
a1b8572c
JH
9696 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9697 {
635559ab 9698 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9699 emit_move_insn (tmp, operands[3]);
9700 operands[3] = tmp;
9701 }
9702 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9703 {
635559ab 9704 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9705 emit_move_insn (tmp, operands[2]);
9706 operands[2] = tmp;
9707 }
4977bab6 9708
c9682caf 9709 if (! register_operand (operands[2], VOIDmode)
b96a374d 9710 && (mode == QImode
4977bab6 9711 || ! register_operand (operands[3], VOIDmode)))
635559ab 9712 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9713
4977bab6
ZW
9714 if (mode == QImode
9715 && ! register_operand (operands[3], VOIDmode))
9716 operands[3] = force_reg (mode, operands[3]);
9717
e075ae69
RH
9718 emit_insn (compare_seq);
9719 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9720 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9721 compare_op, operands[2],
9722 operands[3])));
a1b8572c 9723 if (bypass_test)
4977bab6 9724 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9725 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9726 bypass_test,
4977bab6
ZW
9727 copy_rtx (operands[3]),
9728 copy_rtx (operands[0]))));
a1b8572c 9729 if (second_test)
4977bab6 9730 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9731 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9732 second_test,
4977bab6
ZW
9733 copy_rtx (operands[2]),
9734 copy_rtx (operands[0]))));
e075ae69
RH
9735
9736 return 1; /* DONE */
e9a25f70 9737}
e075ae69 9738
32b5b1aa 9739int
b96a374d 9740ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 9741{
eaa49b49
RH
9742 enum machine_mode mode = GET_MODE (operands[0]);
9743 enum rtx_code code = GET_CODE (operands[1]);
9744 rtx tmp, compare_op, second_test, bypass_test;
9745
9746 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9747 {
9748 rtx cmp_op0, cmp_op1, if_true, if_false;
9749 rtx clob;
9750 enum machine_mode vmode, cmode;
9751 bool is_minmax = false;
9752
9753 cmp_op0 = ix86_compare_op0;
9754 cmp_op1 = ix86_compare_op1;
9755 if_true = operands[2];
9756 if_false = operands[3];
9757
9758 /* Since we've no cmove for sse registers, don't force bad register
9759 allocation just to gain access to it. Deny movcc when the
9760 comparison mode doesn't match the move mode. */
9761 cmode = GET_MODE (cmp_op0);
9762 if (cmode == VOIDmode)
9763 cmode = GET_MODE (cmp_op1);
9764 if (cmode != mode)
9765 return 0;
9766
9767 /* Massage condition to satisfy sse_comparison_operator. In case we
9768 are in non-ieee mode, try to canonicalize the destination operand
9769 to be first in the comparison - this helps reload to avoid extra
9770 moves. */
9771 if (!sse_comparison_operator (operands[1], VOIDmode)
9772 || ((COMMUTATIVE_P (operands[1]) || !TARGET_IEEE_FP)
9773 && rtx_equal_p (operands[0], cmp_op1)))
0073023d 9774 {
eaa49b49
RH
9775 tmp = cmp_op0;
9776 cmp_op0 = cmp_op1;
9777 cmp_op1 = tmp;
9778 code = swap_condition (code);
0073023d 9779 }
eaa49b49
RH
9780
9781 /* Detect conditional moves that exactly match min/max operational
9782 semantics. Note that this is IEEE safe, as long as we don't
9783 interchange the operands. Which is why we keep this in the form
9784 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
9785 if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
0073023d 9786 {
eaa49b49
RH
9787 if (((cmp_op0 == if_true && cmp_op1 == if_false)
9788 || (cmp_op0 == if_false && cmp_op1 == if_true)))
0073023d 9789 {
eaa49b49
RH
9790 is_minmax = true;
9791 if (code == UNGE)
4977bab6 9792 {
eaa49b49
RH
9793 code = LT;
9794 tmp = if_true;
9795 if_true = if_false;
9796 if_false = tmp;
4977bab6 9797 }
0073023d
JH
9798 }
9799 }
eaa49b49
RH
9800
9801 if (mode == SFmode)
9802 vmode = V4SFmode;
9803 else if (mode == DFmode)
9804 vmode = V2DFmode;
0073023d 9805 else
eaa49b49
RH
9806 gcc_unreachable ();
9807
9808 cmp_op0 = force_reg (mode, cmp_op0);
9809 if (!nonimmediate_operand (cmp_op1, mode))
9810 cmp_op1 = force_reg (mode, cmp_op1);
9811
9812 tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
9813 gcc_assert (sse_comparison_operator (tmp, VOIDmode));
9814
9815 tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
9816 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
9817
9818 if (!is_minmax)
9819 {
9820 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
9821 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9822 }
9823
9824 emit_insn (tmp);
0073023d
JH
9825 return 1;
9826 }
9827
e075ae69 9828 /* The floating point conditional move instructions don't directly
0f290768 9829 support conditions resulting from a signed integer comparison. */
32b5b1aa 9830
a1b8572c 9831 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9832
9833 /* The floating point conditional move instructions don't directly
9834 support signed integer comparisons. */
9835
a1b8572c 9836 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9837 {
a1b8572c 9838 if (second_test != NULL || bypass_test != NULL)
b531087a 9839 abort ();
e075ae69 9840 tmp = gen_reg_rtx (QImode);
3a3677ff 9841 ix86_expand_setcc (code, tmp);
e075ae69
RH
9842 code = NE;
9843 ix86_compare_op0 = tmp;
9844 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9845 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9846 }
9847 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9848 {
eaa49b49 9849 tmp = gen_reg_rtx (mode);
a1b8572c
JH
9850 emit_move_insn (tmp, operands[3]);
9851 operands[3] = tmp;
9852 }
9853 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9854 {
eaa49b49 9855 tmp = gen_reg_rtx (mode);
a1b8572c
JH
9856 emit_move_insn (tmp, operands[2]);
9857 operands[2] = tmp;
e075ae69 9858 }
e9a25f70 9859
e075ae69 9860 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
9861 gen_rtx_IF_THEN_ELSE (mode, compare_op,
9862 operands[2], operands[3])));
a1b8572c
JH
9863 if (bypass_test)
9864 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
9865 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
9866 operands[3], operands[0])));
a1b8572c
JH
9867 if (second_test)
9868 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
eaa49b49
RH
9869 gen_rtx_IF_THEN_ELSE (mode, second_test,
9870 operands[2], operands[0])));
32b5b1aa 9871
e075ae69 9872 return 1;
32b5b1aa
SC
9873}
9874
eaa49b49
RH
9875void
9876ix86_split_sse_movcc (rtx operands[])
9877{
9878 rtx dest, scratch, cmp, op_true, op_false, x;
9879 enum machine_mode mode, vmode;
9880
9881 /* Note that the operator CMP has been set up with matching constraints
9882 such that dest is valid for the comparison. Unless one of the true
9883 or false operands are zero, the true operand has already been placed
9884 in SCRATCH. */
9885 dest = operands[0];
9886 scratch = operands[1];
9887 op_true = operands[2];
9888 op_false = operands[3];
9889 cmp = operands[4];
9890
9891 mode = GET_MODE (dest);
9892 vmode = GET_MODE (scratch);
9893
9894 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
9895
9896 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9897
9898 if (op_false == CONST0_RTX (mode))
9899 {
9900 op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
9901 x = gen_rtx_AND (vmode, dest, op_true);
9902 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9903 }
9904 else
9905 {
9906 op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
9907
9908 if (op_true == CONST0_RTX (mode))
9909 {
9910 x = gen_rtx_NOT (vmode, dest);
9911 x = gen_rtx_AND (vmode, x, op_false);
9912 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9913 }
9914 else
9915 {
9916 x = gen_rtx_AND (vmode, scratch, dest);
9917 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9918
9919 x = gen_rtx_NOT (vmode, dest);
9920 x = gen_rtx_AND (vmode, x, op_false);
9921 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9922
9923 x = gen_rtx_IOR (vmode, dest, scratch);
9924 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9925 }
9926 }
9927}
9928
7b52eede
JH
9929/* Expand conditional increment or decrement using adb/sbb instructions.
9930 The default case using setcc followed by the conditional move can be
9931 done by generic code. */
9932int
b96a374d 9933ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
9934{
9935 enum rtx_code code = GET_CODE (operands[1]);
9936 rtx compare_op;
9937 rtx val = const0_rtx;
e6e81735 9938 bool fpcmp = false;
e6e81735 9939 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
9940
9941 if (operands[3] != const1_rtx
9942 && operands[3] != constm1_rtx)
9943 return 0;
9944 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9945 ix86_compare_op1, &compare_op))
9946 return 0;
e6e81735
JH
9947 code = GET_CODE (compare_op);
9948
9949 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9950 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9951 {
9952 fpcmp = true;
9953 code = ix86_fp_compare_code_to_integer (code);
9954 }
9955
9956 if (code != LTU)
9957 {
9958 val = constm1_rtx;
9959 if (fpcmp)
9960 PUT_CODE (compare_op,
9961 reverse_condition_maybe_unordered
9962 (GET_CODE (compare_op)));
9963 else
9964 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9965 }
9966 PUT_MODE (compare_op, mode);
9967
9968 /* Construct either adc or sbb insn. */
9969 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
9970 {
9971 switch (GET_MODE (operands[0]))
9972 {
9973 case QImode:
e6e81735 9974 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9975 break;
9976 case HImode:
e6e81735 9977 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9978 break;
9979 case SImode:
e6e81735 9980 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9981 break;
9982 case DImode:
e6e81735 9983 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
9984 break;
9985 default:
9986 abort ();
9987 }
9988 }
9989 else
9990 {
9991 switch (GET_MODE (operands[0]))
9992 {
9993 case QImode:
e6e81735 9994 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9995 break;
9996 case HImode:
e6e81735 9997 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9998 break;
9999 case SImode:
e6e81735 10000 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
10001 break;
10002 case DImode:
e6e81735 10003 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
10004 break;
10005 default:
10006 abort ();
10007 }
10008 }
10009 return 1; /* DONE */
10010}
10011
10012
2450a057
JH
10013/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10014 works for floating pointer parameters and nonoffsetable memories.
10015 For pushes, it returns just stack offsets; the values will be saved
10016 in the right order. Maximally three parts are generated. */
10017
2b589241 10018static int
b96a374d 10019ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 10020{
26e5b205
JH
10021 int size;
10022
10023 if (!TARGET_64BIT)
f8a1ebc6 10024 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
10025 else
10026 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 10027
a7180f70
BS
10028 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10029 abort ();
2450a057
JH
10030 if (size < 2 || size > 3)
10031 abort ();
10032
f996902d
RH
10033 /* Optimize constant pool reference to immediates. This is used by fp
10034 moves, that force all constants to memory to allow combining. */
389fdba0 10035 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
f996902d
RH
10036 {
10037 rtx tmp = maybe_get_pool_constant (operand);
10038 if (tmp)
10039 operand = tmp;
10040 }
d7a29404 10041
2450a057 10042 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 10043 {
2450a057
JH
10044 /* The only non-offsetable memories we handle are pushes. */
10045 if (! push_operand (operand, VOIDmode))
10046 abort ();
10047
26e5b205
JH
10048 operand = copy_rtx (operand);
10049 PUT_MODE (operand, Pmode);
2450a057
JH
10050 parts[0] = parts[1] = parts[2] = operand;
10051 }
26e5b205 10052 else if (!TARGET_64BIT)
2450a057
JH
10053 {
10054 if (mode == DImode)
10055 split_di (&operand, 1, &parts[0], &parts[1]);
10056 else
e075ae69 10057 {
2450a057
JH
10058 if (REG_P (operand))
10059 {
10060 if (!reload_completed)
10061 abort ();
10062 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10063 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10064 if (size == 3)
10065 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10066 }
10067 else if (offsettable_memref_p (operand))
10068 {
f4ef873c 10069 operand = adjust_address (operand, SImode, 0);
2450a057 10070 parts[0] = operand;
b72f00af 10071 parts[1] = adjust_address (operand, SImode, 4);
2450a057 10072 if (size == 3)
b72f00af 10073 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
10074 }
10075 else if (GET_CODE (operand) == CONST_DOUBLE)
10076 {
10077 REAL_VALUE_TYPE r;
2b589241 10078 long l[4];
2450a057
JH
10079
10080 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10081 switch (mode)
10082 {
10083 case XFmode:
10084 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 10085 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
10086 break;
10087 case DFmode:
10088 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10089 break;
10090 default:
10091 abort ();
10092 }
d8bf17f9
LB
10093 parts[1] = gen_int_mode (l[1], SImode);
10094 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
10095 }
10096 else
10097 abort ();
e075ae69 10098 }
2450a057 10099 }
26e5b205
JH
10100 else
10101 {
44cf5b6a
JH
10102 if (mode == TImode)
10103 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
10104 if (mode == XFmode || mode == TFmode)
10105 {
f8a1ebc6 10106 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
10107 if (REG_P (operand))
10108 {
10109 if (!reload_completed)
10110 abort ();
10111 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 10112 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
10113 }
10114 else if (offsettable_memref_p (operand))
10115 {
b72f00af 10116 operand = adjust_address (operand, DImode, 0);
26e5b205 10117 parts[0] = operand;
f8a1ebc6 10118 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
10119 }
10120 else if (GET_CODE (operand) == CONST_DOUBLE)
10121 {
10122 REAL_VALUE_TYPE r;
38606553 10123 long l[4];
26e5b205
JH
10124
10125 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 10126 real_to_target (l, &r, mode);
38606553 10127
26e5b205
JH
10128 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10129 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 10130 parts[0]
d8bf17f9 10131 = gen_int_mode
44cf5b6a 10132 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 10133 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 10134 DImode);
26e5b205
JH
10135 else
10136 parts[0] = immed_double_const (l[0], l[1], DImode);
38606553 10137
f8a1ebc6
JH
10138 if (upper_mode == SImode)
10139 parts[1] = gen_int_mode (l[2], SImode);
10140 else if (HOST_BITS_PER_WIDE_INT >= 64)
10141 parts[1]
10142 = gen_int_mode
10143 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10144 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10145 DImode);
10146 else
10147 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
10148 }
10149 else
10150 abort ();
10151 }
10152 }
2450a057 10153
2b589241 10154 return size;
2450a057
JH
10155}
10156
10157/* Emit insns to perform a move or push of DI, DF, and XF values.
10158 Return false when normal moves are needed; true when all required
10159 insns have been emitted. Operands 2-4 contain the input values
10160 int the correct order; operands 5-7 contain the output values. */
10161
26e5b205 10162void
b96a374d 10163ix86_split_long_move (rtx operands[])
2450a057
JH
10164{
10165 rtx part[2][3];
26e5b205 10166 int nparts;
2450a057
JH
10167 int push = 0;
10168 int collisions = 0;
26e5b205
JH
10169 enum machine_mode mode = GET_MODE (operands[0]);
10170
10171 /* The DFmode expanders may ask us to move double.
10172 For 64bit target this is single move. By hiding the fact
10173 here we simplify i386.md splitters. */
10174 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10175 {
8cdfa312
RH
10176 /* Optimize constant pool reference to immediates. This is used by
10177 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
10178
10179 if (GET_CODE (operands[1]) == MEM
10180 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10181 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10182 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10183 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
10184 {
10185 operands[0] = copy_rtx (operands[0]);
10186 PUT_MODE (operands[0], Pmode);
10187 }
26e5b205
JH
10188 else
10189 operands[0] = gen_lowpart (DImode, operands[0]);
10190 operands[1] = gen_lowpart (DImode, operands[1]);
10191 emit_move_insn (operands[0], operands[1]);
10192 return;
10193 }
2450a057 10194
2450a057
JH
10195 /* The only non-offsettable memory we handle is push. */
10196 if (push_operand (operands[0], VOIDmode))
10197 push = 1;
10198 else if (GET_CODE (operands[0]) == MEM
10199 && ! offsettable_memref_p (operands[0]))
10200 abort ();
10201
26e5b205
JH
10202 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10203 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
10204
10205 /* When emitting push, take care for source operands on the stack. */
10206 if (push && GET_CODE (operands[1]) == MEM
10207 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10208 {
26e5b205 10209 if (nparts == 3)
886cbb88
JH
10210 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10211 XEXP (part[1][2], 0));
10212 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10213 XEXP (part[1][1], 0));
2450a057
JH
10214 }
10215
0f290768 10216 /* We need to do copy in the right order in case an address register
2450a057
JH
10217 of the source overlaps the destination. */
10218 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10219 {
10220 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10221 collisions++;
10222 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10223 collisions++;
26e5b205 10224 if (nparts == 3
2450a057
JH
10225 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10226 collisions++;
10227
10228 /* Collision in the middle part can be handled by reordering. */
26e5b205 10229 if (collisions == 1 && nparts == 3
2450a057 10230 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 10231 {
2450a057
JH
10232 rtx tmp;
10233 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10234 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10235 }
e075ae69 10236
2450a057
JH
10237 /* If there are more collisions, we can't handle it by reordering.
10238 Do an lea to the last part and use only one colliding move. */
10239 else if (collisions > 1)
10240 {
8231b3f9
RH
10241 rtx base;
10242
2450a057 10243 collisions = 1;
8231b3f9
RH
10244
10245 base = part[0][nparts - 1];
10246
10247 /* Handle the case when the last part isn't valid for lea.
10248 Happens in 64-bit mode storing the 12-byte XFmode. */
10249 if (GET_MODE (base) != Pmode)
10250 base = gen_rtx_REG (Pmode, REGNO (base));
10251
10252 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10253 part[1][0] = replace_equiv_address (part[1][0], base);
10254 part[1][1] = replace_equiv_address (part[1][1],
10255 plus_constant (base, UNITS_PER_WORD));
26e5b205 10256 if (nparts == 3)
8231b3f9
RH
10257 part[1][2] = replace_equiv_address (part[1][2],
10258 plus_constant (base, 8));
2450a057
JH
10259 }
10260 }
10261
10262 if (push)
10263 {
26e5b205 10264 if (!TARGET_64BIT)
2b589241 10265 {
26e5b205
JH
10266 if (nparts == 3)
10267 {
f8a1ebc6
JH
10268 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10269 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
10270 emit_move_insn (part[0][2], part[1][2]);
10271 }
2b589241 10272 }
26e5b205
JH
10273 else
10274 {
10275 /* In 64bit mode we don't have 32bit push available. In case this is
10276 register, it is OK - we will just use larger counterpart. We also
10277 retype memory - these comes from attempt to avoid REX prefix on
10278 moving of second half of TFmode value. */
10279 if (GET_MODE (part[1][1]) == SImode)
10280 {
10281 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 10282 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
10283 else if (REG_P (part[1][1]))
10284 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10285 else
b531087a 10286 abort ();
886cbb88
JH
10287 if (GET_MODE (part[1][0]) == SImode)
10288 part[1][0] = part[1][1];
26e5b205
JH
10289 }
10290 }
10291 emit_move_insn (part[0][1], part[1][1]);
10292 emit_move_insn (part[0][0], part[1][0]);
10293 return;
2450a057
JH
10294 }
10295
10296 /* Choose correct order to not overwrite the source before it is copied. */
10297 if ((REG_P (part[0][0])
10298 && REG_P (part[1][1])
10299 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 10300 || (nparts == 3
2450a057
JH
10301 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10302 || (collisions > 0
10303 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10304 {
26e5b205 10305 if (nparts == 3)
2450a057 10306 {
26e5b205
JH
10307 operands[2] = part[0][2];
10308 operands[3] = part[0][1];
10309 operands[4] = part[0][0];
10310 operands[5] = part[1][2];
10311 operands[6] = part[1][1];
10312 operands[7] = part[1][0];
2450a057
JH
10313 }
10314 else
10315 {
26e5b205
JH
10316 operands[2] = part[0][1];
10317 operands[3] = part[0][0];
10318 operands[5] = part[1][1];
10319 operands[6] = part[1][0];
2450a057
JH
10320 }
10321 }
10322 else
10323 {
26e5b205 10324 if (nparts == 3)
2450a057 10325 {
26e5b205
JH
10326 operands[2] = part[0][0];
10327 operands[3] = part[0][1];
10328 operands[4] = part[0][2];
10329 operands[5] = part[1][0];
10330 operands[6] = part[1][1];
10331 operands[7] = part[1][2];
2450a057
JH
10332 }
10333 else
10334 {
26e5b205
JH
10335 operands[2] = part[0][0];
10336 operands[3] = part[0][1];
10337 operands[5] = part[1][0];
10338 operands[6] = part[1][1];
e075ae69
RH
10339 }
10340 }
903a5059 10341
0e40b5f2 10342 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
903a5059
RS
10343 if (optimize_size)
10344 {
10345 if (GET_CODE (operands[5]) == CONST_INT
10346 && operands[5] != const0_rtx
10347 && REG_P (operands[2]))
10348 {
10349 if (GET_CODE (operands[6]) == CONST_INT
10350 && INTVAL (operands[6]) == INTVAL (operands[5]))
10351 operands[6] = operands[2];
10352
10353 if (nparts == 3
10354 && GET_CODE (operands[7]) == CONST_INT
10355 && INTVAL (operands[7]) == INTVAL (operands[5]))
10356 operands[7] = operands[2];
10357 }
10358
10359 if (nparts == 3
10360 && GET_CODE (operands[6]) == CONST_INT
10361 && operands[6] != const0_rtx
10362 && REG_P (operands[3])
10363 && GET_CODE (operands[7]) == CONST_INT
10364 && INTVAL (operands[7]) == INTVAL (operands[6]))
10365 operands[7] = operands[3];
10366 }
10367
26e5b205
JH
10368 emit_move_insn (operands[2], operands[5]);
10369 emit_move_insn (operands[3], operands[6]);
10370 if (nparts == 3)
10371 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10372
26e5b205 10373 return;
32b5b1aa 10374}
32b5b1aa 10375
1b83d209
RS
10376/* Helper function of ix86_split_ashldi used to generate an SImode
10377 left shift by a constant, either using a single shift or
10378 a sequence of add instructions. */
10379
10380static void
10381ix86_expand_ashlsi3_const (rtx operand, int count)
10382{
10383 if (count == 1)
10384 emit_insn (gen_addsi3 (operand, operand, operand));
10385 else if (!optimize_size
10386 && count * ix86_cost->add <= ix86_cost->shift_const)
10387 {
10388 int i;
10389 for (i=0; i<count; i++)
10390 emit_insn (gen_addsi3 (operand, operand, operand));
10391 }
10392 else
10393 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10394}
10395
e075ae69 10396void
b96a374d 10397ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10398{
e075ae69
RH
10399 rtx low[2], high[2];
10400 int count;
b985a30f 10401
e075ae69
RH
10402 if (GET_CODE (operands[2]) == CONST_INT)
10403 {
10404 split_di (operands, 2, low, high);
10405 count = INTVAL (operands[2]) & 63;
32b5b1aa 10406
e075ae69
RH
10407 if (count >= 32)
10408 {
10409 emit_move_insn (high[0], low[1]);
10410 emit_move_insn (low[0], const0_rtx);
b985a30f 10411
e075ae69 10412 if (count > 32)
1b83d209 10413 ix86_expand_ashlsi3_const (high[0], count - 32);
e075ae69
RH
10414 }
10415 else
10416 {
10417 if (!rtx_equal_p (operands[0], operands[1]))
10418 emit_move_insn (operands[0], operands[1]);
10419 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
1b83d209 10420 ix86_expand_ashlsi3_const (low[0], count);
e075ae69 10421 }
93330ea1 10422 return;
e075ae69 10423 }
93330ea1
RH
10424
10425 split_di (operands, 1, low, high);
10426
10427 if (operands[1] == const1_rtx)
e075ae69 10428 {
93330ea1
RH
10429 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10430 can be done with two 32-bit shifts, no branches, no cmoves. */
10431 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10432 {
10433 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
b985a30f 10434
93330ea1
RH
10435 ix86_expand_clear (low[0]);
10436 ix86_expand_clear (high[0]);
10437 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10438
10439 d = gen_lowpart (QImode, low[0]);
10440 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10441 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10442 emit_insn (gen_rtx_SET (VOIDmode, d, s));
b985a30f 10443
93330ea1
RH
10444 d = gen_lowpart (QImode, high[0]);
10445 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10446 s = gen_rtx_NE (QImode, flags, const0_rtx);
10447 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10448 }
32b5b1aa 10449
93330ea1
RH
10450 /* Otherwise, we can get the same results by manually performing
10451 a bit extract operation on bit 5, and then performing the two
10452 shifts. The two methods of getting 0/1 into low/high are exactly
10453 the same size. Avoiding the shift in the bit extract case helps
10454 pentium4 a bit; no one else seems to care much either way. */
10455 else
e075ae69 10456 {
93330ea1
RH
10457 rtx x;
10458
10459 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10460 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
e075ae69 10461 else
93330ea1
RH
10462 x = gen_lowpart (SImode, operands[2]);
10463 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
e075ae69 10464
93330ea1
RH
10465 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10466 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10467 emit_move_insn (low[0], high[0]);
10468 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
e075ae69 10469 }
93330ea1
RH
10470
10471 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10472 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10473 return;
10474 }
10475
10476 if (operands[1] == constm1_rtx)
10477 {
10478 /* For -1LL << N, we can avoid the shld instruction, because we
10479 know that we're shifting 0...31 ones into a -1. */
10480 emit_move_insn (low[0], constm1_rtx);
10481 if (optimize_size)
10482 emit_move_insn (high[0], low[0]);
e075ae69 10483 else
93330ea1 10484 emit_move_insn (high[0], constm1_rtx);
e075ae69 10485 }
93330ea1
RH
10486 else
10487 {
10488 if (!rtx_equal_p (operands[0], operands[1]))
10489 emit_move_insn (operands[0], operands[1]);
10490
10491 split_di (operands, 1, low, high);
10492 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10493 }
10494
10495 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10496
10497 if (TARGET_CMOVE && scratch)
10498 {
10499 ix86_expand_clear (scratch);
10500 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10501 }
10502 else
10503 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
e9a25f70 10504}
32b5b1aa 10505
e075ae69 10506void
b96a374d 10507ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10508{
e075ae69
RH
10509 rtx low[2], high[2];
10510 int count;
32b5b1aa 10511
e075ae69
RH
10512 if (GET_CODE (operands[2]) == CONST_INT)
10513 {
10514 split_di (operands, 2, low, high);
10515 count = INTVAL (operands[2]) & 63;
32b5b1aa 10516
8937b6a2
RS
10517 if (count == 63)
10518 {
10519 emit_move_insn (high[0], high[1]);
10520 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10521 emit_move_insn (low[0], high[0]);
10522
10523 }
10524 else if (count >= 32)
e075ae69
RH
10525 {
10526 emit_move_insn (low[0], high[1]);
93330ea1
RH
10527 emit_move_insn (high[0], low[0]);
10528 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
e075ae69
RH
10529 if (count > 32)
10530 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10531 }
10532 else
10533 {
10534 if (!rtx_equal_p (operands[0], operands[1]))
10535 emit_move_insn (operands[0], operands[1]);
10536 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10537 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10538 }
10539 }
10540 else
32b5b1aa 10541 {
e075ae69
RH
10542 if (!rtx_equal_p (operands[0], operands[1]))
10543 emit_move_insn (operands[0], operands[1]);
10544
10545 split_di (operands, 1, low, high);
10546
10547 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10548 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10549
93330ea1 10550 if (TARGET_CMOVE && scratch)
e075ae69 10551 {
e075ae69
RH
10552 emit_move_insn (scratch, high[0]);
10553 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10554 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10555 scratch));
10556 }
10557 else
10558 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10559 }
e075ae69 10560}
32b5b1aa 10561
e075ae69 10562void
b96a374d 10563ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10564{
10565 rtx low[2], high[2];
10566 int count;
32b5b1aa 10567
e075ae69 10568 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10569 {
e075ae69
RH
10570 split_di (operands, 2, low, high);
10571 count = INTVAL (operands[2]) & 63;
10572
10573 if (count >= 32)
c7271385 10574 {
e075ae69 10575 emit_move_insn (low[0], high[1]);
93330ea1 10576 ix86_expand_clear (high[0]);
32b5b1aa 10577
e075ae69
RH
10578 if (count > 32)
10579 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10580 }
10581 else
10582 {
10583 if (!rtx_equal_p (operands[0], operands[1]))
10584 emit_move_insn (operands[0], operands[1]);
10585 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10586 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10587 }
32b5b1aa 10588 }
e075ae69
RH
10589 else
10590 {
10591 if (!rtx_equal_p (operands[0], operands[1]))
10592 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10593
e075ae69
RH
10594 split_di (operands, 1, low, high);
10595
10596 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10597 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10598
10599 /* Heh. By reversing the arguments, we can reuse this pattern. */
93330ea1 10600 if (TARGET_CMOVE && scratch)
e075ae69 10601 {
93330ea1 10602 ix86_expand_clear (scratch);
e075ae69
RH
10603 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10604 scratch));
10605 }
10606 else
10607 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10608 }
32b5b1aa 10609}
3f803cd9 10610
0407c02b 10611/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10612 it is aligned to VALUE bytes. If true, jump to the label. */
10613static rtx
b96a374d 10614ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
10615{
10616 rtx label = gen_label_rtx ();
10617 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10618 if (GET_MODE (variable) == DImode)
10619 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10620 else
10621 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10622 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10623 1, label);
0945b39d
JH
10624 return label;
10625}
10626
10627/* Adjust COUNTER by the VALUE. */
10628static void
b96a374d 10629ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
10630{
10631 if (GET_MODE (countreg) == DImode)
10632 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10633 else
10634 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10635}
10636
10637/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10638rtx
b96a374d 10639ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
10640{
10641 rtx r;
10642 if (GET_MODE (exp) == VOIDmode)
10643 return force_reg (Pmode, exp);
10644 if (GET_MODE (exp) == Pmode)
10645 return copy_to_mode_reg (Pmode, exp);
10646 r = gen_reg_rtx (Pmode);
10647 emit_insn (gen_zero_extendsidi2 (r, exp));
10648 return r;
10649}
10650
10651/* Expand string move (memcpy) operation. Use i386 string operations when
70128ad9 10652 profitable. expand_clrmem contains similar code. */
0945b39d 10653int
70128ad9 10654ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 10655{
4e44c1ef 10656 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
10657 enum machine_mode counter_mode;
10658 HOST_WIDE_INT align = 0;
10659 unsigned HOST_WIDE_INT count = 0;
0945b39d 10660
0945b39d
JH
10661 if (GET_CODE (align_exp) == CONST_INT)
10662 align = INTVAL (align_exp);
10663
d0a5295a
RH
10664 /* Can't use any of this if the user has appropriated esi or edi. */
10665 if (global_regs[4] || global_regs[5])
10666 return 0;
10667
5519a4f9 10668 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10669 if (!TARGET_ALIGN_STRINGOPS)
10670 align = 64;
10671
10672 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10673 {
10674 count = INTVAL (count_exp);
10675 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10676 return 0;
10677 }
0945b39d
JH
10678
10679 /* Figure out proper mode for counter. For 32bits it is always SImode,
10680 for 64bits use SImode when possible, otherwise DImode.
10681 Set count to number of bytes copied when known at compile time. */
8fe75e43
RH
10682 if (!TARGET_64BIT
10683 || GET_MODE (count_exp) == SImode
10684 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
0945b39d
JH
10685 counter_mode = SImode;
10686 else
10687 counter_mode = DImode;
10688
10689 if (counter_mode != SImode && counter_mode != DImode)
10690 abort ();
10691
10692 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
10693 if (destreg != XEXP (dst, 0))
10694 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 10695 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
10696 if (srcreg != XEXP (src, 0))
10697 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
10698
10699 /* When optimizing for size emit simple rep ; movsb instruction for
10700 counts not divisible by 4. */
10701
10702 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10703 {
4e44c1ef 10704 emit_insn (gen_cld ());
0945b39d 10705 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
10706 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10707 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10708 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10709 destexp, srcexp));
0945b39d
JH
10710 }
10711
10712 /* For constant aligned (or small unaligned) copies use rep movsl
10713 followed by code copying the rest. For PentiumPro ensure 8 byte
10714 alignment to allow rep movsl acceleration. */
10715
10716 else if (count != 0
10717 && (align >= 8
10718 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10719 || optimize_size || count < (unsigned int) 64))
0945b39d 10720 {
4e44c1ef 10721 unsigned HOST_WIDE_INT offset = 0;
0945b39d 10722 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
10723 rtx srcmem, dstmem;
10724
10725 emit_insn (gen_cld ());
0945b39d
JH
10726 if (count & ~(size - 1))
10727 {
10728 countreg = copy_to_mode_reg (counter_mode,
10729 GEN_INT ((count >> (size == 4 ? 2 : 3))
10730 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10731 countreg = ix86_zero_extend_to_Pmode (countreg);
f676971a 10732
4e44c1ef
JJ
10733 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10734 GEN_INT (size == 4 ? 2 : 3));
10735 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10736 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10737
10738 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10739 countreg, destexp, srcexp));
10740 offset = count & ~(size - 1);
0945b39d
JH
10741 }
10742 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
10743 {
10744 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10745 offset);
10746 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10747 offset);
10748 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10749 offset += 4;
10750 }
0945b39d 10751 if (count & 0x02)
4e44c1ef
JJ
10752 {
10753 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10754 offset);
10755 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10756 offset);
10757 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10758 offset += 2;
10759 }
0945b39d 10760 if (count & 0x01)
4e44c1ef
JJ
10761 {
10762 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10763 offset);
10764 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10765 offset);
10766 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10767 }
0945b39d
JH
10768 }
10769 /* The generic code based on the glibc implementation:
10770 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10771 allowing accelerated copying there)
10772 - copy the data using rep movsl
10773 - copy the rest. */
10774 else
10775 {
10776 rtx countreg2;
10777 rtx label = NULL;
4e44c1ef 10778 rtx srcmem, dstmem;
37ad04a5
JH
10779 int desired_alignment = (TARGET_PENTIUMPRO
10780 && (count == 0 || count >= (unsigned int) 260)
10781 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
10782 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10783 dst = change_address (dst, BLKmode, destreg);
10784 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
10785
10786 /* In case we don't know anything about the alignment, default to
10787 library version, since it is usually equally fast and result in
b96a374d 10788 shorter code.
4977bab6
ZW
10789
10790 Also emit call when we know that the count is large and call overhead
10791 will not be important. */
10792 if (!TARGET_INLINE_ALL_STRINGOPS
10793 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 10794 return 0;
0945b39d
JH
10795
10796 if (TARGET_SINGLE_STRINGOP)
10797 emit_insn (gen_cld ());
10798
10799 countreg2 = gen_reg_rtx (Pmode);
10800 countreg = copy_to_mode_reg (counter_mode, count_exp);
10801
10802 /* We don't use loops to align destination and to copy parts smaller
10803 than 4 bytes, because gcc is able to optimize such code better (in
10804 the case the destination or the count really is aligned, gcc is often
10805 able to predict the branches) and also it is friendlier to the
a4f31c00 10806 hardware branch prediction.
0945b39d 10807
d1f87653 10808 Using loops is beneficial for generic case, because we can
0945b39d
JH
10809 handle small counts using the loops. Many CPUs (such as Athlon)
10810 have large REP prefix setup costs.
10811
4aae8a9a 10812 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
10813 add some customizability to this code. */
10814
37ad04a5 10815 if (count == 0 && align < desired_alignment)
0945b39d
JH
10816 {
10817 label = gen_label_rtx ();
aaae0bb9 10818 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10819 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10820 }
10821 if (align <= 1)
10822 {
10823 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
10824 srcmem = change_address (src, QImode, srcreg);
10825 dstmem = change_address (dst, QImode, destreg);
10826 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10827 ix86_adjust_counter (countreg, 1);
10828 emit_label (label);
10829 LABEL_NUSES (label) = 1;
10830 }
10831 if (align <= 2)
10832 {
10833 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
10834 srcmem = change_address (src, HImode, srcreg);
10835 dstmem = change_address (dst, HImode, destreg);
10836 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10837 ix86_adjust_counter (countreg, 2);
10838 emit_label (label);
10839 LABEL_NUSES (label) = 1;
10840 }
37ad04a5 10841 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10842 {
10843 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
10844 srcmem = change_address (src, SImode, srcreg);
10845 dstmem = change_address (dst, SImode, destreg);
10846 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10847 ix86_adjust_counter (countreg, 4);
10848 emit_label (label);
10849 LABEL_NUSES (label) = 1;
10850 }
10851
37ad04a5
JH
10852 if (label && desired_alignment > 4 && !TARGET_64BIT)
10853 {
10854 emit_label (label);
10855 LABEL_NUSES (label) = 1;
10856 label = NULL_RTX;
10857 }
0945b39d
JH
10858 if (!TARGET_SINGLE_STRINGOP)
10859 emit_insn (gen_cld ());
10860 if (TARGET_64BIT)
10861 {
10862 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10863 GEN_INT (3)));
4e44c1ef 10864 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
10865 }
10866 else
10867 {
4e44c1ef
JJ
10868 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10869 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 10870 }
4e44c1ef
JJ
10871 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10872 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10873 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10874 countreg2, destexp, srcexp));
0945b39d
JH
10875
10876 if (label)
10877 {
10878 emit_label (label);
10879 LABEL_NUSES (label) = 1;
10880 }
10881 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
10882 {
10883 srcmem = change_address (src, SImode, srcreg);
10884 dstmem = change_address (dst, SImode, destreg);
10885 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10886 }
0945b39d
JH
10887 if ((align <= 4 || count == 0) && TARGET_64BIT)
10888 {
10889 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
10890 srcmem = change_address (src, SImode, srcreg);
10891 dstmem = change_address (dst, SImode, destreg);
10892 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10893 emit_label (label);
10894 LABEL_NUSES (label) = 1;
10895 }
10896 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
10897 {
10898 srcmem = change_address (src, HImode, srcreg);
10899 dstmem = change_address (dst, HImode, destreg);
10900 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10901 }
0945b39d
JH
10902 if (align <= 2 || count == 0)
10903 {
10904 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
10905 srcmem = change_address (src, HImode, srcreg);
10906 dstmem = change_address (dst, HImode, destreg);
10907 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10908 emit_label (label);
10909 LABEL_NUSES (label) = 1;
10910 }
10911 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
10912 {
10913 srcmem = change_address (src, QImode, srcreg);
10914 dstmem = change_address (dst, QImode, destreg);
10915 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10916 }
0945b39d
JH
10917 if (align <= 1 || count == 0)
10918 {
10919 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
10920 srcmem = change_address (src, QImode, srcreg);
10921 dstmem = change_address (dst, QImode, destreg);
10922 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10923 emit_label (label);
10924 LABEL_NUSES (label) = 1;
10925 }
10926 }
10927
0945b39d
JH
10928 return 1;
10929}
10930
10931/* Expand string clear operation (bzero). Use i386 string operations when
70128ad9 10932 profitable. expand_movmem contains similar code. */
0945b39d 10933int
70128ad9 10934ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 10935{
4e44c1ef 10936 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
10937 enum machine_mode counter_mode;
10938 HOST_WIDE_INT align = 0;
10939 unsigned HOST_WIDE_INT count = 0;
10940
10941 if (GET_CODE (align_exp) == CONST_INT)
10942 align = INTVAL (align_exp);
10943
d0a5295a
RH
10944 /* Can't use any of this if the user has appropriated esi. */
10945 if (global_regs[4])
10946 return 0;
10947
5519a4f9 10948 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10949 if (!TARGET_ALIGN_STRINGOPS)
10950 align = 32;
10951
10952 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10953 {
10954 count = INTVAL (count_exp);
10955 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10956 return 0;
10957 }
0945b39d
JH
10958 /* Figure out proper mode for counter. For 32bits it is always SImode,
10959 for 64bits use SImode when possible, otherwise DImode.
10960 Set count to number of bytes copied when known at compile time. */
8fe75e43
RH
10961 if (!TARGET_64BIT
10962 || GET_MODE (count_exp) == SImode
10963 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
0945b39d
JH
10964 counter_mode = SImode;
10965 else
10966 counter_mode = DImode;
10967
4e44c1ef
JJ
10968 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10969 if (destreg != XEXP (dst, 0))
10970 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 10971
0945b39d
JH
10972
10973 /* When optimizing for size emit simple rep ; movsb instruction for
6b32b628
JJ
10974 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10975 sequence is 7 bytes long, so if optimizing for size and count is
10976 small enough that some stosl, stosw and stosb instructions without
10977 rep are shorter, fall back into the next if. */
0945b39d 10978
6b32b628
JJ
10979 if ((!optimize || optimize_size)
10980 && (count == 0
10981 || ((count & 0x03)
10982 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
0945b39d 10983 {
6b32b628
JJ
10984 emit_insn (gen_cld ());
10985
0945b39d
JH
10986 countreg = ix86_zero_extend_to_Pmode (count_exp);
10987 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
10988 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10989 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
10990 }
10991 else if (count != 0
10992 && (align >= 8
10993 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10994 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10995 {
10996 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
10997 unsigned HOST_WIDE_INT offset = 0;
10998
6b32b628
JJ
10999 emit_insn (gen_cld ());
11000
0945b39d
JH
11001 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11002 if (count & ~(size - 1))
11003 {
6b32b628
JJ
11004 unsigned HOST_WIDE_INT repcount;
11005 unsigned int max_nonrep;
11006
11007 repcount = count >> (size == 4 ? 2 : 3);
11008 if (!TARGET_64BIT)
11009 repcount &= 0x3fffffff;
11010
11011 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11012 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11013 bytes. In both cases the latter seems to be faster for small
11014 values of N. */
11015 max_nonrep = size == 4 ? 7 : 4;
11016 if (!optimize_size)
11017 switch (ix86_tune)
11018 {
11019 case PROCESSOR_PENTIUM4:
11020 case PROCESSOR_NOCONA:
11021 max_nonrep = 3;
11022 break;
11023 default:
11024 break;
11025 }
11026
11027 if (repcount <= max_nonrep)
11028 while (repcount-- > 0)
11029 {
11030 rtx mem = adjust_automodify_address_nv (dst,
11031 GET_MODE (zeroreg),
11032 destreg, offset);
0737fbff 11033 emit_insn (gen_strset (destreg, mem, zeroreg));
6b32b628
JJ
11034 offset += size;
11035 }
11036 else
11037 {
11038 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11039 countreg = ix86_zero_extend_to_Pmode (countreg);
11040 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11041 GEN_INT (size == 4 ? 2 : 3));
11042 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11043 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11044 destexp));
11045 offset = count & ~(size - 1);
11046 }
0945b39d
JH
11047 }
11048 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
11049 {
11050 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11051 offset);
11052 emit_insn (gen_strset (destreg, mem,
0945b39d 11053 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
11054 offset += 4;
11055 }
0945b39d 11056 if (count & 0x02)
4e44c1ef
JJ
11057 {
11058 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11059 offset);
11060 emit_insn (gen_strset (destreg, mem,
0945b39d 11061 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
11062 offset += 2;
11063 }
0945b39d 11064 if (count & 0x01)
4e44c1ef
JJ
11065 {
11066 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11067 offset);
11068 emit_insn (gen_strset (destreg, mem,
0945b39d 11069 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 11070 }
0945b39d
JH
11071 }
11072 else
11073 {
11074 rtx countreg2;
11075 rtx label = NULL;
37ad04a5
JH
11076 /* Compute desired alignment of the string operation. */
11077 int desired_alignment = (TARGET_PENTIUMPRO
11078 && (count == 0 || count >= (unsigned int) 260)
11079 ? 8 : UNITS_PER_WORD);
0945b39d
JH
11080
11081 /* In case we don't know anything about the alignment, default to
11082 library version, since it is usually equally fast and result in
4977bab6
ZW
11083 shorter code.
11084
11085 Also emit call when we know that the count is large and call overhead
11086 will not be important. */
11087 if (!TARGET_INLINE_ALL_STRINGOPS
11088 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
11089 return 0;
11090
11091 if (TARGET_SINGLE_STRINGOP)
11092 emit_insn (gen_cld ());
11093
11094 countreg2 = gen_reg_rtx (Pmode);
11095 countreg = copy_to_mode_reg (counter_mode, count_exp);
11096 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
11097 /* Get rid of MEM_OFFSET, it won't be accurate. */
11098 dst = change_address (dst, BLKmode, destreg);
0945b39d 11099
37ad04a5 11100 if (count == 0 && align < desired_alignment)
0945b39d
JH
11101 {
11102 label = gen_label_rtx ();
37ad04a5 11103 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 11104 LEU, 0, counter_mode, 1, label);
0945b39d
JH
11105 }
11106 if (align <= 1)
11107 {
11108 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
11109 emit_insn (gen_strset (destreg, dst,
11110 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11111 ix86_adjust_counter (countreg, 1);
11112 emit_label (label);
11113 LABEL_NUSES (label) = 1;
11114 }
11115 if (align <= 2)
11116 {
11117 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
11118 emit_insn (gen_strset (destreg, dst,
11119 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11120 ix86_adjust_counter (countreg, 2);
11121 emit_label (label);
11122 LABEL_NUSES (label) = 1;
11123 }
37ad04a5 11124 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
11125 {
11126 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
11127 emit_insn (gen_strset (destreg, dst,
11128 (TARGET_64BIT
11129 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11130 : zeroreg)));
0945b39d
JH
11131 ix86_adjust_counter (countreg, 4);
11132 emit_label (label);
11133 LABEL_NUSES (label) = 1;
11134 }
11135
37ad04a5
JH
11136 if (label && desired_alignment > 4 && !TARGET_64BIT)
11137 {
11138 emit_label (label);
11139 LABEL_NUSES (label) = 1;
11140 label = NULL_RTX;
11141 }
11142
0945b39d
JH
11143 if (!TARGET_SINGLE_STRINGOP)
11144 emit_insn (gen_cld ());
11145 if (TARGET_64BIT)
11146 {
11147 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11148 GEN_INT (3)));
4e44c1ef 11149 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
11150 }
11151 else
11152 {
4e44c1ef
JJ
11153 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11154 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 11155 }
4e44c1ef
JJ
11156 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11157 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11158
0945b39d
JH
11159 if (label)
11160 {
11161 emit_label (label);
11162 LABEL_NUSES (label) = 1;
11163 }
37ad04a5 11164
0945b39d 11165 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
11166 emit_insn (gen_strset (destreg, dst,
11167 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11168 if (TARGET_64BIT && (align <= 4 || count == 0))
11169 {
79258dce 11170 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
11171 emit_insn (gen_strset (destreg, dst,
11172 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
11173 emit_label (label);
11174 LABEL_NUSES (label) = 1;
11175 }
11176 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
11177 emit_insn (gen_strset (destreg, dst,
11178 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11179 if (align <= 2 || count == 0)
11180 {
74411039 11181 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
11182 emit_insn (gen_strset (destreg, dst,
11183 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
11184 emit_label (label);
11185 LABEL_NUSES (label) = 1;
11186 }
11187 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
11188 emit_insn (gen_strset (destreg, dst,
11189 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11190 if (align <= 1 || count == 0)
11191 {
74411039 11192 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
11193 emit_insn (gen_strset (destreg, dst,
11194 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
11195 emit_label (label);
11196 LABEL_NUSES (label) = 1;
11197 }
11198 }
11199 return 1;
11200}
4e44c1ef 11201
0945b39d
JH
11202/* Expand strlen. */
11203int
b96a374d 11204ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
11205{
11206 rtx addr, scratch1, scratch2, scratch3, scratch4;
11207
11208 /* The generic case of strlen expander is long. Avoid it's
11209 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11210
11211 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11212 && !TARGET_INLINE_ALL_STRINGOPS
11213 && !optimize_size
11214 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11215 return 0;
11216
11217 addr = force_reg (Pmode, XEXP (src, 0));
11218 scratch1 = gen_reg_rtx (Pmode);
11219
11220 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11221 && !optimize_size)
11222 {
11223 /* Well it seems that some optimizer does not combine a call like
11224 foo(strlen(bar), strlen(bar));
11225 when the move and the subtraction is done here. It does calculate
11226 the length just once when these instructions are done inside of
11227 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11228 often used and I use one fewer register for the lifetime of
11229 output_strlen_unroll() this is better. */
11230
11231 emit_move_insn (out, addr);
11232
4e44c1ef 11233 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
11234
11235 /* strlensi_unroll_1 returns the address of the zero at the end of
11236 the string, like memchr(), so compute the length by subtracting
11237 the start address. */
11238 if (TARGET_64BIT)
11239 emit_insn (gen_subdi3 (out, out, addr));
11240 else
11241 emit_insn (gen_subsi3 (out, out, addr));
11242 }
11243 else
11244 {
4e44c1ef 11245 rtx unspec;
0945b39d
JH
11246 scratch2 = gen_reg_rtx (Pmode);
11247 scratch3 = gen_reg_rtx (Pmode);
11248 scratch4 = force_reg (Pmode, constm1_rtx);
11249
11250 emit_move_insn (scratch3, addr);
11251 eoschar = force_reg (QImode, eoschar);
11252
11253 emit_insn (gen_cld ());
4e44c1ef
JJ
11254 src = replace_equiv_address_nv (src, scratch3);
11255
11256 /* If .md starts supporting :P, this can be done in .md. */
11257 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11258 scratch4), UNSPEC_SCAS);
11259 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
11260 if (TARGET_64BIT)
11261 {
0945b39d
JH
11262 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11263 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11264 }
11265 else
11266 {
0945b39d
JH
11267 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11268 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11269 }
11270 }
11271 return 1;
11272}
11273
e075ae69
RH
11274/* Expand the appropriate insns for doing strlen if not just doing
11275 repnz; scasb
11276
11277 out = result, initialized with the start address
11278 align_rtx = alignment of the address.
11279 scratch = scratch register, initialized with the startaddress when
77ebd435 11280 not aligned, otherwise undefined
3f803cd9 11281
39e3f58c 11282 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
11283 some address computing at the end. These things are done in i386.md. */
11284
0945b39d 11285static void
4e44c1ef 11286ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 11287{
e075ae69
RH
11288 int align;
11289 rtx tmp;
11290 rtx align_2_label = NULL_RTX;
11291 rtx align_3_label = NULL_RTX;
11292 rtx align_4_label = gen_label_rtx ();
11293 rtx end_0_label = gen_label_rtx ();
e075ae69 11294 rtx mem;
e2e52e1b 11295 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 11296 rtx scratch = gen_reg_rtx (SImode);
e6e81735 11297 rtx cmp;
e075ae69
RH
11298
11299 align = 0;
11300 if (GET_CODE (align_rtx) == CONST_INT)
11301 align = INTVAL (align_rtx);
3f803cd9 11302
e9a25f70 11303 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 11304
e9a25f70 11305 /* Is there a known alignment and is it less than 4? */
e075ae69 11306 if (align < 4)
3f803cd9 11307 {
0945b39d
JH
11308 rtx scratch1 = gen_reg_rtx (Pmode);
11309 emit_move_insn (scratch1, out);
e9a25f70 11310 /* Is there a known alignment and is it not 2? */
e075ae69 11311 if (align != 2)
3f803cd9 11312 {
e075ae69
RH
11313 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11314 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11315
11316 /* Leave just the 3 lower bits. */
0945b39d 11317 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11318 NULL_RTX, 0, OPTAB_WIDEN);
11319
9076b9c1 11320 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11321 Pmode, 1, align_4_label);
60c81c89 11322 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 11323 Pmode, 1, align_2_label);
60c81c89 11324 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 11325 Pmode, 1, align_3_label);
3f803cd9
SC
11326 }
11327 else
11328 {
e9a25f70
JL
11329 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11330 check if is aligned to 4 - byte. */
e9a25f70 11331
60c81c89 11332 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
11333 NULL_RTX, 0, OPTAB_WIDEN);
11334
9076b9c1 11335 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11336 Pmode, 1, align_4_label);
3f803cd9
SC
11337 }
11338
4e44c1ef 11339 mem = change_address (src, QImode, out);
e9a25f70 11340
e075ae69 11341 /* Now compare the bytes. */
e9a25f70 11342
0f290768 11343 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11344 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11345 QImode, 1, end_0_label);
3f803cd9 11346
0f290768 11347 /* Increment the address. */
0945b39d
JH
11348 if (TARGET_64BIT)
11349 emit_insn (gen_adddi3 (out, out, const1_rtx));
11350 else
11351 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11352
e075ae69
RH
11353 /* Not needed with an alignment of 2 */
11354 if (align != 2)
11355 {
11356 emit_label (align_2_label);
3f803cd9 11357
d43e0b7d
RK
11358 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11359 end_0_label);
e075ae69 11360
0945b39d
JH
11361 if (TARGET_64BIT)
11362 emit_insn (gen_adddi3 (out, out, const1_rtx));
11363 else
11364 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11365
11366 emit_label (align_3_label);
11367 }
11368
d43e0b7d
RK
11369 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11370 end_0_label);
e075ae69 11371
0945b39d
JH
11372 if (TARGET_64BIT)
11373 emit_insn (gen_adddi3 (out, out, const1_rtx));
11374 else
11375 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11376 }
11377
e075ae69
RH
11378 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11379 align this loop. It gives only huge programs, but does not help to
11380 speed up. */
11381 emit_label (align_4_label);
3f803cd9 11382
4e44c1ef 11383 mem = change_address (src, SImode, out);
e075ae69 11384 emit_move_insn (scratch, mem);
0945b39d
JH
11385 if (TARGET_64BIT)
11386 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11387 else
11388 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11389
e2e52e1b
JH
11390 /* This formula yields a nonzero result iff one of the bytes is zero.
11391 This saves three branches inside loop and many cycles. */
11392
11393 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11394 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11395 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11396 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11397 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11398 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11399 align_4_label);
e2e52e1b
JH
11400
11401 if (TARGET_CMOVE)
11402 {
11403 rtx reg = gen_reg_rtx (SImode);
0945b39d 11404 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11405 emit_move_insn (reg, tmpreg);
11406 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11407
0f290768 11408 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11409 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11410 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11411 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11412 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11413 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11414 reg,
11415 tmpreg)));
e2e52e1b 11416 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 11417 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 11418 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
11419
11420 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11421 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11422 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11423 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11424 reg2,
11425 out)));
e2e52e1b
JH
11426
11427 }
11428 else
11429 {
11430 rtx end_2_label = gen_label_rtx ();
11431 /* Is zero in the first two bytes? */
11432
16189740 11433 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11434 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11435 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11436 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11437 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11438 pc_rtx);
11439 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11440 JUMP_LABEL (tmp) = end_2_label;
11441
0f290768 11442 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11443 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 11444 if (TARGET_64BIT)
60c81c89 11445 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 11446 else
60c81c89 11447 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
11448
11449 emit_label (end_2_label);
11450
11451 }
11452
0f290768 11453 /* Avoid branch in fixing the byte. */
e2e52e1b 11454 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11455 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11456 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11457 if (TARGET_64BIT)
e6e81735 11458 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11459 else
e6e81735 11460 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11461
11462 emit_label (end_0_label);
11463}
0e07aff3
RH
11464
11465void
0f901c4c
SH
11466ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11467 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 11468 rtx pop, int sibcall)
0e07aff3
RH
11469{
11470 rtx use = NULL, call;
11471
11472 if (pop == const0_rtx)
11473 pop = NULL;
11474 if (TARGET_64BIT && pop)
11475 abort ();
11476
b069de3b
SS
11477#if TARGET_MACHO
11478 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11479 fnaddr = machopic_indirect_call_target (fnaddr);
11480#else
0e07aff3
RH
11481 /* Static functions and indirect calls don't need the pic register. */
11482 if (! TARGET_64BIT && flag_pic
11483 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11484 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11485 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11486
11487 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11488 {
11489 rtx al = gen_rtx_REG (QImode, 0);
11490 emit_move_insn (al, callarg2);
11491 use_reg (&use, al);
11492 }
b069de3b 11493#endif /* TARGET_MACHO */
0e07aff3
RH
11494
11495 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11496 {
11497 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11498 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11499 }
4977bab6
ZW
11500 if (sibcall && TARGET_64BIT
11501 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11502 {
11503 rtx addr;
11504 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11505 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11506 emit_move_insn (fnaddr, addr);
11507 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11508 }
0e07aff3
RH
11509
11510 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11511 if (retval)
11512 call = gen_rtx_SET (VOIDmode, retval, call);
11513 if (pop)
11514 {
11515 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11516 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11517 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11518 }
11519
11520 call = emit_call_insn (call);
11521 if (use)
11522 CALL_INSN_FUNCTION_USAGE (call) = use;
11523}
fce5a9f2 11524
e075ae69 11525\f
e075ae69
RH
11526/* Clear stack slot assignments remembered from previous functions.
11527 This is called from INIT_EXPANDERS once before RTL is emitted for each
11528 function. */
11529
e2500fed 11530static struct machine_function *
b96a374d 11531ix86_init_machine_status (void)
37b15744 11532{
d7394366
JH
11533 struct machine_function *f;
11534
11535 f = ggc_alloc_cleared (sizeof (struct machine_function));
11536 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11537
11538 return f;
1526a060
BS
11539}
11540
e075ae69
RH
11541/* Return a MEM corresponding to a stack slot with mode MODE.
11542 Allocate a new slot if necessary.
11543
11544 The RTL for a function can have several slots available: N is
11545 which slot to use. */
11546
11547rtx
b96a374d 11548assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11549{
ddb0ae00
ZW
11550 struct stack_local_entry *s;
11551
e075ae69
RH
11552 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11553 abort ();
11554
ddb0ae00
ZW
11555 for (s = ix86_stack_locals; s; s = s->next)
11556 if (s->mode == mode && s->n == n)
11557 return s->rtl;
11558
11559 s = (struct stack_local_entry *)
11560 ggc_alloc (sizeof (struct stack_local_entry));
11561 s->n = n;
11562 s->mode = mode;
11563 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11564
ddb0ae00
ZW
11565 s->next = ix86_stack_locals;
11566 ix86_stack_locals = s;
11567 return s->rtl;
e075ae69 11568}
f996902d
RH
11569
11570/* Construct the SYMBOL_REF for the tls_get_addr function. */
11571
e2500fed 11572static GTY(()) rtx ix86_tls_symbol;
f996902d 11573rtx
b96a374d 11574ix86_tls_get_addr (void)
f996902d 11575{
f996902d 11576
e2500fed 11577 if (!ix86_tls_symbol)
f996902d 11578 {
75d38379
JJ
11579 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11580 (TARGET_GNU_TLS && !TARGET_64BIT)
11581 ? "___tls_get_addr"
11582 : "__tls_get_addr");
f996902d
RH
11583 }
11584
e2500fed 11585 return ix86_tls_symbol;
f996902d 11586}
e075ae69
RH
11587\f
11588/* Calculate the length of the memory address in the instruction
11589 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11590
8fe75e43 11591int
b96a374d 11592memory_address_length (rtx addr)
e075ae69
RH
11593{
11594 struct ix86_address parts;
11595 rtx base, index, disp;
11596 int len;
11597
11598 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11599 || GET_CODE (addr) == POST_INC
11600 || GET_CODE (addr) == PRE_MODIFY
11601 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11602 return 0;
3f803cd9 11603
e075ae69
RH
11604 if (! ix86_decompose_address (addr, &parts))
11605 abort ();
3f803cd9 11606
e075ae69
RH
11607 base = parts.base;
11608 index = parts.index;
11609 disp = parts.disp;
11610 len = 0;
3f803cd9 11611
7b65ed54
EB
11612 /* Rule of thumb:
11613 - esp as the base always wants an index,
11614 - ebp as the base always wants a displacement. */
11615
e075ae69
RH
11616 /* Register Indirect. */
11617 if (base && !index && !disp)
11618 {
7b65ed54
EB
11619 /* esp (for its index) and ebp (for its displacement) need
11620 the two-byte modrm form. */
e075ae69
RH
11621 if (addr == stack_pointer_rtx
11622 || addr == arg_pointer_rtx
564d80f4
JH
11623 || addr == frame_pointer_rtx
11624 || addr == hard_frame_pointer_rtx)
e075ae69 11625 len = 1;
3f803cd9 11626 }
e9a25f70 11627
e075ae69
RH
11628 /* Direct Addressing. */
11629 else if (disp && !base && !index)
11630 len = 4;
11631
3f803cd9
SC
11632 else
11633 {
e075ae69
RH
11634 /* Find the length of the displacement constant. */
11635 if (disp)
11636 {
11637 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
11638 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11639 && base)
e075ae69
RH
11640 len = 1;
11641 else
11642 len = 4;
11643 }
7b65ed54
EB
11644 /* ebp always wants a displacement. */
11645 else if (base == hard_frame_pointer_rtx)
11646 len = 1;
3f803cd9 11647
43f3a59d 11648 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
11649 if (index
11650 /* ...like esp, which always wants an index. */
11651 || base == stack_pointer_rtx
11652 || base == arg_pointer_rtx
11653 || base == frame_pointer_rtx)
e075ae69 11654 len += 1;
3f803cd9
SC
11655 }
11656
e075ae69
RH
11657 return len;
11658}
79325812 11659
5bf0ebab
RH
11660/* Compute default value for "length_immediate" attribute. When SHORTFORM
11661 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11662int
b96a374d 11663ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 11664{
6ef67412
JH
11665 int len = 0;
11666 int i;
6c698a6d 11667 extract_insn_cached (insn);
6ef67412
JH
11668 for (i = recog_data.n_operands - 1; i >= 0; --i)
11669 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11670 {
6ef67412 11671 if (len)
3071fab5 11672 abort ();
6ef67412
JH
11673 if (shortform
11674 && GET_CODE (recog_data.operand[i]) == CONST_INT
11675 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11676 len = 1;
11677 else
11678 {
11679 switch (get_attr_mode (insn))
11680 {
11681 case MODE_QI:
11682 len+=1;
11683 break;
11684 case MODE_HI:
11685 len+=2;
11686 break;
11687 case MODE_SI:
11688 len+=4;
11689 break;
14f73b5a
JH
11690 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11691 case MODE_DI:
11692 len+=4;
11693 break;
6ef67412 11694 default:
c725bd79 11695 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11696 }
11697 }
3071fab5 11698 }
6ef67412
JH
11699 return len;
11700}
11701/* Compute default value for "length_address" attribute. */
11702int
b96a374d 11703ix86_attr_length_address_default (rtx insn)
6ef67412
JH
11704{
11705 int i;
9b73c90a
EB
11706
11707 if (get_attr_type (insn) == TYPE_LEA)
11708 {
11709 rtx set = PATTERN (insn);
11710 if (GET_CODE (set) == SET)
11711 ;
11712 else if (GET_CODE (set) == PARALLEL
11713 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11714 set = XVECEXP (set, 0, 0);
11715 else
11716 {
11717#ifdef ENABLE_CHECKING
11718 abort ();
11719#endif
11720 return 0;
11721 }
11722
11723 return memory_address_length (SET_SRC (set));
11724 }
11725
6c698a6d 11726 extract_insn_cached (insn);
1ccbefce
RH
11727 for (i = recog_data.n_operands - 1; i >= 0; --i)
11728 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11729 {
6ef67412 11730 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11731 break;
11732 }
6ef67412 11733 return 0;
3f803cd9 11734}
e075ae69
RH
11735\f
11736/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11737
c237e94a 11738static int
b96a374d 11739ix86_issue_rate (void)
b657fc39 11740{
9e555526 11741 switch (ix86_tune)
b657fc39 11742 {
e075ae69
RH
11743 case PROCESSOR_PENTIUM:
11744 case PROCESSOR_K6:
11745 return 2;
79325812 11746
e075ae69 11747 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11748 case PROCESSOR_PENTIUM4:
11749 case PROCESSOR_ATHLON:
4977bab6 11750 case PROCESSOR_K8:
89c43c0a 11751 case PROCESSOR_NOCONA:
e075ae69 11752 return 3;
b657fc39 11753
b657fc39 11754 default:
e075ae69 11755 return 1;
b657fc39 11756 }
b657fc39
L
11757}
11758
e075ae69
RH
11759/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11760 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11761
e075ae69 11762static int
b96a374d 11763ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11764{
11765 rtx set, set2;
b657fc39 11766
e075ae69
RH
11767 /* Simplify the test for uninteresting insns. */
11768 if (insn_type != TYPE_SETCC
11769 && insn_type != TYPE_ICMOV
11770 && insn_type != TYPE_FCMOV
11771 && insn_type != TYPE_IBR)
11772 return 0;
b657fc39 11773
e075ae69
RH
11774 if ((set = single_set (dep_insn)) != 0)
11775 {
11776 set = SET_DEST (set);
11777 set2 = NULL_RTX;
11778 }
11779 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11780 && XVECLEN (PATTERN (dep_insn), 0) == 2
11781 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11782 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11783 {
11784 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11785 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11786 }
78a0d70c
ZW
11787 else
11788 return 0;
b657fc39 11789
78a0d70c
ZW
11790 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11791 return 0;
b657fc39 11792
f5143c46 11793 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11794 not any other potentially set register. */
11795 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11796 return 0;
11797
11798 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11799 return 0;
11800
11801 return 1;
e075ae69 11802}
b657fc39 11803
e075ae69
RH
11804/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11805 address with operands set by DEP_INSN. */
11806
11807static int
b96a374d 11808ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11809{
11810 rtx addr;
11811
6ad48e84
JH
11812 if (insn_type == TYPE_LEA
11813 && TARGET_PENTIUM)
5fbdde42
RH
11814 {
11815 addr = PATTERN (insn);
11816 if (GET_CODE (addr) == SET)
11817 ;
11818 else if (GET_CODE (addr) == PARALLEL
11819 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11820 addr = XVECEXP (addr, 0, 0);
11821 else
11822 abort ();
11823 addr = SET_SRC (addr);
11824 }
e075ae69
RH
11825 else
11826 {
11827 int i;
6c698a6d 11828 extract_insn_cached (insn);
1ccbefce
RH
11829 for (i = recog_data.n_operands - 1; i >= 0; --i)
11830 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11831 {
1ccbefce 11832 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11833 goto found;
11834 }
11835 return 0;
11836 found:;
b657fc39
L
11837 }
11838
e075ae69 11839 return modified_in_p (addr, dep_insn);
b657fc39 11840}
a269a03c 11841
c237e94a 11842static int
b96a374d 11843ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 11844{
e075ae69 11845 enum attr_type insn_type, dep_insn_type;
8695f61e 11846 enum attr_memory memory;
e075ae69 11847 rtx set, set2;
9b00189f 11848 int dep_insn_code_number;
a269a03c 11849
d1f87653 11850 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 11851 if (REG_NOTE_KIND (link) != 0)
309ada50 11852 return 0;
a269a03c 11853
9b00189f
JH
11854 dep_insn_code_number = recog_memoized (dep_insn);
11855
e075ae69 11856 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11857 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11858 return cost;
a269a03c 11859
1c71e60e
JH
11860 insn_type = get_attr_type (insn);
11861 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11862
9e555526 11863 switch (ix86_tune)
a269a03c
JC
11864 {
11865 case PROCESSOR_PENTIUM:
e075ae69
RH
11866 /* Address Generation Interlock adds a cycle of latency. */
11867 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11868 cost += 1;
11869
11870 /* ??? Compares pair with jump/setcc. */
11871 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11872 cost = 0;
11873
d1f87653 11874 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 11875 if (insn_type == TYPE_FMOV
e075ae69
RH
11876 && get_attr_memory (insn) == MEMORY_STORE
11877 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11878 cost += 1;
11879 break;
a269a03c 11880
e075ae69 11881 case PROCESSOR_PENTIUMPRO:
6ad48e84 11882 memory = get_attr_memory (insn);
e075ae69
RH
11883
11884 /* INT->FP conversion is expensive. */
11885 if (get_attr_fp_int_src (dep_insn))
11886 cost += 5;
11887
11888 /* There is one cycle extra latency between an FP op and a store. */
11889 if (insn_type == TYPE_FMOV
11890 && (set = single_set (dep_insn)) != NULL_RTX
11891 && (set2 = single_set (insn)) != NULL_RTX
11892 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11893 && GET_CODE (SET_DEST (set2)) == MEM)
11894 cost += 1;
6ad48e84
JH
11895
11896 /* Show ability of reorder buffer to hide latency of load by executing
11897 in parallel with previous instruction in case
11898 previous instruction is not needed to compute the address. */
11899 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11900 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11901 {
6ad48e84
JH
11902 /* Claim moves to take one cycle, as core can issue one load
11903 at time and the next load can start cycle later. */
11904 if (dep_insn_type == TYPE_IMOV
11905 || dep_insn_type == TYPE_FMOV)
11906 cost = 1;
11907 else if (cost > 1)
11908 cost--;
11909 }
e075ae69 11910 break;
a269a03c 11911
e075ae69 11912 case PROCESSOR_K6:
6ad48e84 11913 memory = get_attr_memory (insn);
8695f61e 11914
e075ae69
RH
11915 /* The esp dependency is resolved before the instruction is really
11916 finished. */
11917 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11918 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11919 return 1;
a269a03c 11920
e075ae69
RH
11921 /* INT->FP conversion is expensive. */
11922 if (get_attr_fp_int_src (dep_insn))
11923 cost += 5;
6ad48e84
JH
11924
11925 /* Show ability of reorder buffer to hide latency of load by executing
11926 in parallel with previous instruction in case
11927 previous instruction is not needed to compute the address. */
11928 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11929 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11930 {
6ad48e84
JH
11931 /* Claim moves to take one cycle, as core can issue one load
11932 at time and the next load can start cycle later. */
11933 if (dep_insn_type == TYPE_IMOV
11934 || dep_insn_type == TYPE_FMOV)
11935 cost = 1;
11936 else if (cost > 2)
11937 cost -= 2;
11938 else
11939 cost = 1;
11940 }
a14003ee 11941 break;
e075ae69 11942
309ada50 11943 case PROCESSOR_ATHLON:
4977bab6 11944 case PROCESSOR_K8:
6ad48e84 11945 memory = get_attr_memory (insn);
6ad48e84 11946
6ad48e84
JH
11947 /* Show ability of reorder buffer to hide latency of load by executing
11948 in parallel with previous instruction in case
11949 previous instruction is not needed to compute the address. */
11950 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11951 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11952 {
26f74aa3
JH
11953 enum attr_unit unit = get_attr_unit (insn);
11954 int loadcost = 3;
11955
11956 /* Because of the difference between the length of integer and
11957 floating unit pipeline preparation stages, the memory operands
b96a374d 11958 for floating point are cheaper.
26f74aa3 11959
c51e6d85 11960 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
11961 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11962 loadcost = 3;
11963 else
11964 loadcost = TARGET_ATHLON ? 2 : 0;
11965
11966 if (cost >= loadcost)
11967 cost -= loadcost;
6ad48e84
JH
11968 else
11969 cost = 0;
11970 }
309ada50 11971
a269a03c 11972 default:
a269a03c
JC
11973 break;
11974 }
11975
11976 return cost;
11977}
0a726ef1 11978
9b690711
RH
11979/* How many alternative schedules to try. This should be as wide as the
11980 scheduling freedom in the DFA, but no wider. Making this value too
11981 large results extra work for the scheduler. */
11982
11983static int
b96a374d 11984ia32_multipass_dfa_lookahead (void)
9b690711 11985{
9e555526 11986 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 11987 return 2;
56bab446 11988
8695f61e
SB
11989 if (ix86_tune == PROCESSOR_PENTIUMPRO
11990 || ix86_tune == PROCESSOR_K6)
56bab446
SB
11991 return 1;
11992
9b690711 11993 else
56bab446 11994 return 0;
9b690711
RH
11995}
11996
0e4970d7 11997\f
a7180f70
BS
11998/* Compute the alignment given to a constant that is being placed in memory.
11999 EXP is the constant and ALIGN is the alignment that the object would
12000 ordinarily have.
12001 The value of this function is used instead of that alignment to align
12002 the object. */
12003
12004int
b96a374d 12005ix86_constant_alignment (tree exp, int align)
a7180f70
BS
12006{
12007 if (TREE_CODE (exp) == REAL_CST)
12008 {
12009 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12010 return 64;
12011 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12012 return 128;
12013 }
4137ba7a
JJ
12014 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12015 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12016 return BITS_PER_WORD;
a7180f70
BS
12017
12018 return align;
12019}
12020
12021/* Compute the alignment for a static variable.
12022 TYPE is the data type, and ALIGN is the alignment that
12023 the object would ordinarily have. The value of this function is used
12024 instead of that alignment to align the object. */
12025
12026int
b96a374d 12027ix86_data_alignment (tree type, int align)
a7180f70
BS
12028{
12029 if (AGGREGATE_TYPE_P (type)
12030 && TYPE_SIZE (type)
12031 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12032 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12033 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12034 return 256;
12035
0d7d98ee
JH
12036 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12037 to 16byte boundary. */
12038 if (TARGET_64BIT)
12039 {
12040 if (AGGREGATE_TYPE_P (type)
12041 && TYPE_SIZE (type)
12042 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12043 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12044 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12045 return 128;
12046 }
12047
a7180f70
BS
12048 if (TREE_CODE (type) == ARRAY_TYPE)
12049 {
12050 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12051 return 64;
12052 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12053 return 128;
12054 }
12055 else if (TREE_CODE (type) == COMPLEX_TYPE)
12056 {
0f290768 12057
a7180f70
BS
12058 if (TYPE_MODE (type) == DCmode && align < 64)
12059 return 64;
12060 if (TYPE_MODE (type) == XCmode && align < 128)
12061 return 128;
12062 }
12063 else if ((TREE_CODE (type) == RECORD_TYPE
12064 || TREE_CODE (type) == UNION_TYPE
12065 || TREE_CODE (type) == QUAL_UNION_TYPE)
12066 && TYPE_FIELDS (type))
12067 {
12068 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12069 return 64;
12070 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12071 return 128;
12072 }
12073 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12074 || TREE_CODE (type) == INTEGER_TYPE)
12075 {
12076 if (TYPE_MODE (type) == DFmode && align < 64)
12077 return 64;
12078 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12079 return 128;
12080 }
12081
12082 return align;
12083}
12084
12085/* Compute the alignment for a local variable.
12086 TYPE is the data type, and ALIGN is the alignment that
12087 the object would ordinarily have. The value of this macro is used
12088 instead of that alignment to align the object. */
12089
12090int
b96a374d 12091ix86_local_alignment (tree type, int align)
a7180f70 12092{
0d7d98ee
JH
12093 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12094 to 16byte boundary. */
12095 if (TARGET_64BIT)
12096 {
12097 if (AGGREGATE_TYPE_P (type)
12098 && TYPE_SIZE (type)
12099 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12100 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12101 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12102 return 128;
12103 }
a7180f70
BS
12104 if (TREE_CODE (type) == ARRAY_TYPE)
12105 {
12106 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12107 return 64;
12108 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12109 return 128;
12110 }
12111 else if (TREE_CODE (type) == COMPLEX_TYPE)
12112 {
12113 if (TYPE_MODE (type) == DCmode && align < 64)
12114 return 64;
12115 if (TYPE_MODE (type) == XCmode && align < 128)
12116 return 128;
12117 }
12118 else if ((TREE_CODE (type) == RECORD_TYPE
12119 || TREE_CODE (type) == UNION_TYPE
12120 || TREE_CODE (type) == QUAL_UNION_TYPE)
12121 && TYPE_FIELDS (type))
12122 {
12123 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12124 return 64;
12125 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12126 return 128;
12127 }
12128 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12129 || TREE_CODE (type) == INTEGER_TYPE)
12130 {
0f290768 12131
a7180f70
BS
12132 if (TYPE_MODE (type) == DFmode && align < 64)
12133 return 64;
12134 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12135 return 128;
12136 }
12137 return align;
12138}
0ed08620
JH
12139\f
12140/* Emit RTL insns to initialize the variable parts of a trampoline.
12141 FNADDR is an RTX for the address of the function's pure code.
12142 CXT is an RTX for the static chain value for the function. */
12143void
b96a374d 12144x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
12145{
12146 if (!TARGET_64BIT)
12147 {
12148 /* Compute offset from the end of the jmp to the target function. */
12149 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12150 plus_constant (tramp, 10),
12151 NULL_RTX, 1, OPTAB_DIRECT);
12152 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 12153 gen_int_mode (0xb9, QImode));
0ed08620
JH
12154 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12155 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 12156 gen_int_mode (0xe9, QImode));
0ed08620
JH
12157 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12158 }
12159 else
12160 {
12161 int offset = 0;
12162 /* Try to load address using shorter movl instead of movabs.
12163 We may want to support movq for kernel mode, but kernel does not use
12164 trampolines at the moment. */
8fe75e43 12165 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
0ed08620
JH
12166 {
12167 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12168 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12169 gen_int_mode (0xbb41, HImode));
0ed08620
JH
12170 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12171 gen_lowpart (SImode, fnaddr));
12172 offset += 6;
12173 }
12174 else
12175 {
12176 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12177 gen_int_mode (0xbb49, HImode));
0ed08620
JH
12178 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12179 fnaddr);
12180 offset += 10;
12181 }
12182 /* Load static chain using movabs to r10. */
12183 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12184 gen_int_mode (0xba49, HImode));
0ed08620
JH
12185 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12186 cxt);
12187 offset += 10;
12188 /* Jump to the r11 */
12189 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 12190 gen_int_mode (0xff49, HImode));
0ed08620 12191 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 12192 gen_int_mode (0xe3, QImode));
0ed08620
JH
12193 offset += 3;
12194 if (offset > TRAMPOLINE_SIZE)
b531087a 12195 abort ();
0ed08620 12196 }
5791cc29 12197
e7a742ec 12198#ifdef ENABLE_EXECUTE_STACK
f84d109f 12199 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
12200 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12201#endif
0ed08620 12202}
eeb06b1b 12203\f
eb701deb
RH
12204/* Codes for all the SSE/MMX builtins. */
12205enum ix86_builtins
12206{
12207 IX86_BUILTIN_ADDPS,
12208 IX86_BUILTIN_ADDSS,
12209 IX86_BUILTIN_DIVPS,
12210 IX86_BUILTIN_DIVSS,
12211 IX86_BUILTIN_MULPS,
12212 IX86_BUILTIN_MULSS,
12213 IX86_BUILTIN_SUBPS,
12214 IX86_BUILTIN_SUBSS,
12215
12216 IX86_BUILTIN_CMPEQPS,
12217 IX86_BUILTIN_CMPLTPS,
12218 IX86_BUILTIN_CMPLEPS,
12219 IX86_BUILTIN_CMPGTPS,
12220 IX86_BUILTIN_CMPGEPS,
12221 IX86_BUILTIN_CMPNEQPS,
12222 IX86_BUILTIN_CMPNLTPS,
12223 IX86_BUILTIN_CMPNLEPS,
12224 IX86_BUILTIN_CMPNGTPS,
12225 IX86_BUILTIN_CMPNGEPS,
12226 IX86_BUILTIN_CMPORDPS,
12227 IX86_BUILTIN_CMPUNORDPS,
12228 IX86_BUILTIN_CMPNEPS,
12229 IX86_BUILTIN_CMPEQSS,
12230 IX86_BUILTIN_CMPLTSS,
12231 IX86_BUILTIN_CMPLESS,
12232 IX86_BUILTIN_CMPNEQSS,
12233 IX86_BUILTIN_CMPNLTSS,
12234 IX86_BUILTIN_CMPNLESS,
12235 IX86_BUILTIN_CMPNGTSS,
12236 IX86_BUILTIN_CMPNGESS,
12237 IX86_BUILTIN_CMPORDSS,
12238 IX86_BUILTIN_CMPUNORDSS,
12239 IX86_BUILTIN_CMPNESS,
12240
12241 IX86_BUILTIN_COMIEQSS,
12242 IX86_BUILTIN_COMILTSS,
12243 IX86_BUILTIN_COMILESS,
12244 IX86_BUILTIN_COMIGTSS,
12245 IX86_BUILTIN_COMIGESS,
12246 IX86_BUILTIN_COMINEQSS,
12247 IX86_BUILTIN_UCOMIEQSS,
12248 IX86_BUILTIN_UCOMILTSS,
12249 IX86_BUILTIN_UCOMILESS,
12250 IX86_BUILTIN_UCOMIGTSS,
12251 IX86_BUILTIN_UCOMIGESS,
12252 IX86_BUILTIN_UCOMINEQSS,
12253
12254 IX86_BUILTIN_CVTPI2PS,
12255 IX86_BUILTIN_CVTPS2PI,
12256 IX86_BUILTIN_CVTSI2SS,
12257 IX86_BUILTIN_CVTSI642SS,
12258 IX86_BUILTIN_CVTSS2SI,
12259 IX86_BUILTIN_CVTSS2SI64,
12260 IX86_BUILTIN_CVTTPS2PI,
12261 IX86_BUILTIN_CVTTSS2SI,
12262 IX86_BUILTIN_CVTTSS2SI64,
12263
12264 IX86_BUILTIN_MAXPS,
12265 IX86_BUILTIN_MAXSS,
12266 IX86_BUILTIN_MINPS,
12267 IX86_BUILTIN_MINSS,
12268
12269 IX86_BUILTIN_LOADUPS,
12270 IX86_BUILTIN_STOREUPS,
12271 IX86_BUILTIN_MOVSS,
12272
12273 IX86_BUILTIN_MOVHLPS,
12274 IX86_BUILTIN_MOVLHPS,
12275 IX86_BUILTIN_LOADHPS,
12276 IX86_BUILTIN_LOADLPS,
12277 IX86_BUILTIN_STOREHPS,
12278 IX86_BUILTIN_STORELPS,
12279
12280 IX86_BUILTIN_MASKMOVQ,
12281 IX86_BUILTIN_MOVMSKPS,
12282 IX86_BUILTIN_PMOVMSKB,
12283
12284 IX86_BUILTIN_MOVNTPS,
12285 IX86_BUILTIN_MOVNTQ,
12286
12287 IX86_BUILTIN_LOADDQU,
12288 IX86_BUILTIN_STOREDQU,
eb701deb
RH
12289
12290 IX86_BUILTIN_PACKSSWB,
12291 IX86_BUILTIN_PACKSSDW,
12292 IX86_BUILTIN_PACKUSWB,
12293
12294 IX86_BUILTIN_PADDB,
12295 IX86_BUILTIN_PADDW,
12296 IX86_BUILTIN_PADDD,
12297 IX86_BUILTIN_PADDQ,
12298 IX86_BUILTIN_PADDSB,
12299 IX86_BUILTIN_PADDSW,
12300 IX86_BUILTIN_PADDUSB,
12301 IX86_BUILTIN_PADDUSW,
12302 IX86_BUILTIN_PSUBB,
12303 IX86_BUILTIN_PSUBW,
12304 IX86_BUILTIN_PSUBD,
12305 IX86_BUILTIN_PSUBQ,
12306 IX86_BUILTIN_PSUBSB,
12307 IX86_BUILTIN_PSUBSW,
12308 IX86_BUILTIN_PSUBUSB,
12309 IX86_BUILTIN_PSUBUSW,
12310
12311 IX86_BUILTIN_PAND,
12312 IX86_BUILTIN_PANDN,
12313 IX86_BUILTIN_POR,
12314 IX86_BUILTIN_PXOR,
12315
12316 IX86_BUILTIN_PAVGB,
12317 IX86_BUILTIN_PAVGW,
12318
12319 IX86_BUILTIN_PCMPEQB,
12320 IX86_BUILTIN_PCMPEQW,
12321 IX86_BUILTIN_PCMPEQD,
12322 IX86_BUILTIN_PCMPGTB,
12323 IX86_BUILTIN_PCMPGTW,
12324 IX86_BUILTIN_PCMPGTD,
12325
12326 IX86_BUILTIN_PMADDWD,
12327
12328 IX86_BUILTIN_PMAXSW,
12329 IX86_BUILTIN_PMAXUB,
12330 IX86_BUILTIN_PMINSW,
12331 IX86_BUILTIN_PMINUB,
12332
12333 IX86_BUILTIN_PMULHUW,
12334 IX86_BUILTIN_PMULHW,
12335 IX86_BUILTIN_PMULLW,
12336
12337 IX86_BUILTIN_PSADBW,
12338 IX86_BUILTIN_PSHUFW,
12339
12340 IX86_BUILTIN_PSLLW,
12341 IX86_BUILTIN_PSLLD,
12342 IX86_BUILTIN_PSLLQ,
12343 IX86_BUILTIN_PSRAW,
12344 IX86_BUILTIN_PSRAD,
12345 IX86_BUILTIN_PSRLW,
12346 IX86_BUILTIN_PSRLD,
12347 IX86_BUILTIN_PSRLQ,
12348 IX86_BUILTIN_PSLLWI,
12349 IX86_BUILTIN_PSLLDI,
12350 IX86_BUILTIN_PSLLQI,
12351 IX86_BUILTIN_PSRAWI,
12352 IX86_BUILTIN_PSRADI,
12353 IX86_BUILTIN_PSRLWI,
12354 IX86_BUILTIN_PSRLDI,
12355 IX86_BUILTIN_PSRLQI,
12356
12357 IX86_BUILTIN_PUNPCKHBW,
12358 IX86_BUILTIN_PUNPCKHWD,
12359 IX86_BUILTIN_PUNPCKHDQ,
12360 IX86_BUILTIN_PUNPCKLBW,
12361 IX86_BUILTIN_PUNPCKLWD,
12362 IX86_BUILTIN_PUNPCKLDQ,
12363
12364 IX86_BUILTIN_SHUFPS,
12365
12366 IX86_BUILTIN_RCPPS,
12367 IX86_BUILTIN_RCPSS,
12368 IX86_BUILTIN_RSQRTPS,
12369 IX86_BUILTIN_RSQRTSS,
12370 IX86_BUILTIN_SQRTPS,
12371 IX86_BUILTIN_SQRTSS,
12372
12373 IX86_BUILTIN_UNPCKHPS,
12374 IX86_BUILTIN_UNPCKLPS,
12375
12376 IX86_BUILTIN_ANDPS,
12377 IX86_BUILTIN_ANDNPS,
12378 IX86_BUILTIN_ORPS,
12379 IX86_BUILTIN_XORPS,
12380
12381 IX86_BUILTIN_EMMS,
12382 IX86_BUILTIN_LDMXCSR,
12383 IX86_BUILTIN_STMXCSR,
12384 IX86_BUILTIN_SFENCE,
12385
12386 /* 3DNow! Original */
12387 IX86_BUILTIN_FEMMS,
12388 IX86_BUILTIN_PAVGUSB,
12389 IX86_BUILTIN_PF2ID,
12390 IX86_BUILTIN_PFACC,
12391 IX86_BUILTIN_PFADD,
12392 IX86_BUILTIN_PFCMPEQ,
12393 IX86_BUILTIN_PFCMPGE,
12394 IX86_BUILTIN_PFCMPGT,
12395 IX86_BUILTIN_PFMAX,
12396 IX86_BUILTIN_PFMIN,
12397 IX86_BUILTIN_PFMUL,
12398 IX86_BUILTIN_PFRCP,
12399 IX86_BUILTIN_PFRCPIT1,
12400 IX86_BUILTIN_PFRCPIT2,
12401 IX86_BUILTIN_PFRSQIT1,
12402 IX86_BUILTIN_PFRSQRT,
12403 IX86_BUILTIN_PFSUB,
12404 IX86_BUILTIN_PFSUBR,
12405 IX86_BUILTIN_PI2FD,
12406 IX86_BUILTIN_PMULHRW,
12407
12408 /* 3DNow! Athlon Extensions */
12409 IX86_BUILTIN_PF2IW,
12410 IX86_BUILTIN_PFNACC,
12411 IX86_BUILTIN_PFPNACC,
12412 IX86_BUILTIN_PI2FW,
12413 IX86_BUILTIN_PSWAPDSI,
12414 IX86_BUILTIN_PSWAPDSF,
12415
12416 /* SSE2 */
12417 IX86_BUILTIN_ADDPD,
12418 IX86_BUILTIN_ADDSD,
12419 IX86_BUILTIN_DIVPD,
12420 IX86_BUILTIN_DIVSD,
12421 IX86_BUILTIN_MULPD,
12422 IX86_BUILTIN_MULSD,
12423 IX86_BUILTIN_SUBPD,
12424 IX86_BUILTIN_SUBSD,
12425
12426 IX86_BUILTIN_CMPEQPD,
12427 IX86_BUILTIN_CMPLTPD,
12428 IX86_BUILTIN_CMPLEPD,
12429 IX86_BUILTIN_CMPGTPD,
12430 IX86_BUILTIN_CMPGEPD,
12431 IX86_BUILTIN_CMPNEQPD,
12432 IX86_BUILTIN_CMPNLTPD,
12433 IX86_BUILTIN_CMPNLEPD,
12434 IX86_BUILTIN_CMPNGTPD,
12435 IX86_BUILTIN_CMPNGEPD,
12436 IX86_BUILTIN_CMPORDPD,
12437 IX86_BUILTIN_CMPUNORDPD,
12438 IX86_BUILTIN_CMPNEPD,
12439 IX86_BUILTIN_CMPEQSD,
12440 IX86_BUILTIN_CMPLTSD,
12441 IX86_BUILTIN_CMPLESD,
12442 IX86_BUILTIN_CMPNEQSD,
12443 IX86_BUILTIN_CMPNLTSD,
12444 IX86_BUILTIN_CMPNLESD,
12445 IX86_BUILTIN_CMPORDSD,
12446 IX86_BUILTIN_CMPUNORDSD,
12447 IX86_BUILTIN_CMPNESD,
12448
12449 IX86_BUILTIN_COMIEQSD,
12450 IX86_BUILTIN_COMILTSD,
12451 IX86_BUILTIN_COMILESD,
12452 IX86_BUILTIN_COMIGTSD,
12453 IX86_BUILTIN_COMIGESD,
12454 IX86_BUILTIN_COMINEQSD,
12455 IX86_BUILTIN_UCOMIEQSD,
12456 IX86_BUILTIN_UCOMILTSD,
12457 IX86_BUILTIN_UCOMILESD,
12458 IX86_BUILTIN_UCOMIGTSD,
12459 IX86_BUILTIN_UCOMIGESD,
12460 IX86_BUILTIN_UCOMINEQSD,
12461
12462 IX86_BUILTIN_MAXPD,
12463 IX86_BUILTIN_MAXSD,
12464 IX86_BUILTIN_MINPD,
12465 IX86_BUILTIN_MINSD,
12466
12467 IX86_BUILTIN_ANDPD,
12468 IX86_BUILTIN_ANDNPD,
12469 IX86_BUILTIN_ORPD,
12470 IX86_BUILTIN_XORPD,
12471
12472 IX86_BUILTIN_SQRTPD,
12473 IX86_BUILTIN_SQRTSD,
12474
12475 IX86_BUILTIN_UNPCKHPD,
12476 IX86_BUILTIN_UNPCKLPD,
12477
12478 IX86_BUILTIN_SHUFPD,
12479
12480 IX86_BUILTIN_LOADUPD,
12481 IX86_BUILTIN_STOREUPD,
12482 IX86_BUILTIN_MOVSD,
12483
12484 IX86_BUILTIN_LOADHPD,
12485 IX86_BUILTIN_LOADLPD,
12486
12487 IX86_BUILTIN_CVTDQ2PD,
12488 IX86_BUILTIN_CVTDQ2PS,
12489
12490 IX86_BUILTIN_CVTPD2DQ,
12491 IX86_BUILTIN_CVTPD2PI,
12492 IX86_BUILTIN_CVTPD2PS,
12493 IX86_BUILTIN_CVTTPD2DQ,
12494 IX86_BUILTIN_CVTTPD2PI,
12495
12496 IX86_BUILTIN_CVTPI2PD,
12497 IX86_BUILTIN_CVTSI2SD,
12498 IX86_BUILTIN_CVTSI642SD,
12499
12500 IX86_BUILTIN_CVTSD2SI,
12501 IX86_BUILTIN_CVTSD2SI64,
12502 IX86_BUILTIN_CVTSD2SS,
12503 IX86_BUILTIN_CVTSS2SD,
12504 IX86_BUILTIN_CVTTSD2SI,
12505 IX86_BUILTIN_CVTTSD2SI64,
12506
12507 IX86_BUILTIN_CVTPS2DQ,
12508 IX86_BUILTIN_CVTPS2PD,
12509 IX86_BUILTIN_CVTTPS2DQ,
12510
12511 IX86_BUILTIN_MOVNTI,
12512 IX86_BUILTIN_MOVNTPD,
12513 IX86_BUILTIN_MOVNTDQ,
12514
12515 /* SSE2 MMX */
12516 IX86_BUILTIN_MASKMOVDQU,
12517 IX86_BUILTIN_MOVMSKPD,
12518 IX86_BUILTIN_PMOVMSKB128,
eb701deb
RH
12519
12520 IX86_BUILTIN_PACKSSWB128,
12521 IX86_BUILTIN_PACKSSDW128,
12522 IX86_BUILTIN_PACKUSWB128,
12523
12524 IX86_BUILTIN_PADDB128,
12525 IX86_BUILTIN_PADDW128,
12526 IX86_BUILTIN_PADDD128,
12527 IX86_BUILTIN_PADDQ128,
12528 IX86_BUILTIN_PADDSB128,
12529 IX86_BUILTIN_PADDSW128,
12530 IX86_BUILTIN_PADDUSB128,
12531 IX86_BUILTIN_PADDUSW128,
12532 IX86_BUILTIN_PSUBB128,
12533 IX86_BUILTIN_PSUBW128,
12534 IX86_BUILTIN_PSUBD128,
12535 IX86_BUILTIN_PSUBQ128,
12536 IX86_BUILTIN_PSUBSB128,
12537 IX86_BUILTIN_PSUBSW128,
12538 IX86_BUILTIN_PSUBUSB128,
12539 IX86_BUILTIN_PSUBUSW128,
12540
12541 IX86_BUILTIN_PAND128,
12542 IX86_BUILTIN_PANDN128,
12543 IX86_BUILTIN_POR128,
12544 IX86_BUILTIN_PXOR128,
12545
12546 IX86_BUILTIN_PAVGB128,
12547 IX86_BUILTIN_PAVGW128,
12548
12549 IX86_BUILTIN_PCMPEQB128,
12550 IX86_BUILTIN_PCMPEQW128,
12551 IX86_BUILTIN_PCMPEQD128,
12552 IX86_BUILTIN_PCMPGTB128,
12553 IX86_BUILTIN_PCMPGTW128,
12554 IX86_BUILTIN_PCMPGTD128,
12555
12556 IX86_BUILTIN_PMADDWD128,
12557
12558 IX86_BUILTIN_PMAXSW128,
12559 IX86_BUILTIN_PMAXUB128,
12560 IX86_BUILTIN_PMINSW128,
12561 IX86_BUILTIN_PMINUB128,
12562
12563 IX86_BUILTIN_PMULUDQ,
12564 IX86_BUILTIN_PMULUDQ128,
12565 IX86_BUILTIN_PMULHUW128,
12566 IX86_BUILTIN_PMULHW128,
12567 IX86_BUILTIN_PMULLW128,
12568
12569 IX86_BUILTIN_PSADBW128,
12570 IX86_BUILTIN_PSHUFHW,
12571 IX86_BUILTIN_PSHUFLW,
12572 IX86_BUILTIN_PSHUFD,
12573
12574 IX86_BUILTIN_PSLLW128,
12575 IX86_BUILTIN_PSLLD128,
12576 IX86_BUILTIN_PSLLQ128,
12577 IX86_BUILTIN_PSRAW128,
12578 IX86_BUILTIN_PSRAD128,
12579 IX86_BUILTIN_PSRLW128,
12580 IX86_BUILTIN_PSRLD128,
12581 IX86_BUILTIN_PSRLQ128,
12582 IX86_BUILTIN_PSLLDQI128,
12583 IX86_BUILTIN_PSLLWI128,
12584 IX86_BUILTIN_PSLLDI128,
12585 IX86_BUILTIN_PSLLQI128,
12586 IX86_BUILTIN_PSRAWI128,
12587 IX86_BUILTIN_PSRADI128,
12588 IX86_BUILTIN_PSRLDQI128,
12589 IX86_BUILTIN_PSRLWI128,
12590 IX86_BUILTIN_PSRLDI128,
12591 IX86_BUILTIN_PSRLQI128,
12592
12593 IX86_BUILTIN_PUNPCKHBW128,
12594 IX86_BUILTIN_PUNPCKHWD128,
12595 IX86_BUILTIN_PUNPCKHDQ128,
12596 IX86_BUILTIN_PUNPCKHQDQ128,
12597 IX86_BUILTIN_PUNPCKLBW128,
12598 IX86_BUILTIN_PUNPCKLWD128,
12599 IX86_BUILTIN_PUNPCKLDQ128,
12600 IX86_BUILTIN_PUNPCKLQDQ128,
12601
12602 IX86_BUILTIN_CLFLUSH,
12603 IX86_BUILTIN_MFENCE,
12604 IX86_BUILTIN_LFENCE,
12605
12606 /* Prescott New Instructions. */
12607 IX86_BUILTIN_ADDSUBPS,
12608 IX86_BUILTIN_HADDPS,
12609 IX86_BUILTIN_HSUBPS,
12610 IX86_BUILTIN_MOVSHDUP,
12611 IX86_BUILTIN_MOVSLDUP,
12612 IX86_BUILTIN_ADDSUBPD,
12613 IX86_BUILTIN_HADDPD,
12614 IX86_BUILTIN_HSUBPD,
12615 IX86_BUILTIN_LDDQU,
12616
12617 IX86_BUILTIN_MONITOR,
12618 IX86_BUILTIN_MWAIT,
12619
12620 IX86_BUILTIN_VEC_INIT_V2SI,
12621 IX86_BUILTIN_VEC_INIT_V4HI,
12622 IX86_BUILTIN_VEC_INIT_V8QI,
12623 IX86_BUILTIN_VEC_EXT_V2DF,
12624 IX86_BUILTIN_VEC_EXT_V2DI,
12625 IX86_BUILTIN_VEC_EXT_V4SF,
ed9b5396 12626 IX86_BUILTIN_VEC_EXT_V4SI,
eb701deb
RH
12627 IX86_BUILTIN_VEC_EXT_V8HI,
12628 IX86_BUILTIN_VEC_EXT_V4HI,
12629 IX86_BUILTIN_VEC_SET_V8HI,
12630 IX86_BUILTIN_VEC_SET_V4HI,
12631
12632 IX86_BUILTIN_MAX
12633};
12634
6e34d3a3
JM
12635#define def_builtin(MASK, NAME, TYPE, CODE) \
12636do { \
12637 if ((MASK) & target_flags \
12638 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12639 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12640 NULL, NULL_TREE); \
eeb06b1b 12641} while (0)
bd793c65 12642
e358acde
RH
12643/* Bits for builtin_description.flag. */
12644
12645/* Set when we don't support the comparison natively, and should
12646 swap_comparison in order to support it. */
12647#define BUILTIN_DESC_SWAP_OPERANDS 1
12648
bd793c65
BS
12649struct builtin_description
12650{
8b60264b
KG
12651 const unsigned int mask;
12652 const enum insn_code icode;
12653 const char *const name;
12654 const enum ix86_builtins code;
12655 const enum rtx_code comparison;
12656 const unsigned int flag;
bd793c65
BS
12657};
12658
8b60264b 12659static const struct builtin_description bdesc_comi[] =
bd793c65 12660{
37f22004
L
12661 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12662 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12663 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12664 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12665 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12666 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12667 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12668 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12669 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12670 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12671 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12672 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
12673 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12674 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12675 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12676 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12677 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12678 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12679 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12680 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12681 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12682 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12683 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12684 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
12685};
12686
8b60264b 12687static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
12688{
12689 /* SSE */
37f22004
L
12690 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
ef719a44
RH
12694 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12695 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12696 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12697 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12698
12699 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12700 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12701 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12702 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
e358acde 12703 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 12704 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
e358acde 12705 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
12706 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12707 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
12708 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
12709 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
12710 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
e358acde 12711 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 12712 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
e358acde 12713 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
12714 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
12715 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12716 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12717 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12718 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12719 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
12720 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
12721 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
12722 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
12723 BUILTIN_DESC_SWAP_OPERANDS },
12724 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
12725 BUILTIN_DESC_SWAP_OPERANDS },
12726 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
37f22004
L
12727
12728 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12729 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
ef719a44
RH
12730 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12731 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
37f22004 12732
ef719a44 12733 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
37f22004 12734 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
ef719a44
RH
12735 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12736 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
37f22004
L
12737
12738 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12739 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12740 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12741 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12742 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
12743
12744 /* MMX */
80e8bb90
RH
12745 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12746 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12747 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12748 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
80e8bb90
RH
12749 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12750 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12751 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12752 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b 12753
80e8bb90
RH
12754 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12755 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12756 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12757 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12760 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12761 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
eeb06b1b 12762
80e8bb90
RH
12763 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12764 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12765 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b 12766
80e8bb90
RH
12767 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12768 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12770 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
eeb06b1b 12771
37f22004
L
12772 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12773 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b 12774
80e8bb90
RH
12775 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12776 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12777 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12778 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12779 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12780 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
eeb06b1b 12781
80e8bb90
RH
12782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12784 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12785 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12786
12787 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12788 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12789 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12790 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12792 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12793
12794 /* Special. */
eeb06b1b
BS
12795 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12796 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12797 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12798
ef719a44
RH
12799 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12800 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12801 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b 12802
80e8bb90
RH
12803 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12804 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12805 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12806 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
eeb06b1b
BS
12807 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12809
80e8bb90
RH
12810 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12811 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12812 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
eeb06b1b
BS
12814 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12815 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12816
80e8bb90
RH
12817 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12818 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12819 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12820 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
eeb06b1b 12821
37f22004 12822 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12823 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12824
12825 /* SSE2 */
12826 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
ef719a44
RH
12830 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12834
12835 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12836 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12837 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12838 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
e358acde 12839 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 12840 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
e358acde 12841 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
12842 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
12844 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
12845 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
12846 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
e358acde 12847 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44 12848 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
e358acde 12849 BUILTIN_DESC_SWAP_OPERANDS },
ef719a44
RH
12850 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
12851 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12852 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12853 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12854 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12855 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
12856 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
12857 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
12858 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
fbe5eb6d
BS
12859
12860 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
ef719a44
RH
12862 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
fbe5eb6d 12864
ef719a44 12865 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
1877be45 12866 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
ef719a44
RH
12867 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12869
12870 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12873
12874 /* SSE2 MMX */
12875 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12878 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12879 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12882 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d 12883
ef719a44
RH
12884 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12885 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12886 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12887 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12888 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12889 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12890 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12891 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
fbe5eb6d
BS
12892
12893 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
ef719a44 12894 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
fbe5eb6d 12895
ef719a44 12896 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
916b60b7 12897 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
ef719a44
RH
12898 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12900
12901 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12903
ef719a44
RH
12904 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
fbe5eb6d
BS
12910
12911 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12915
12916 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12919 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12920 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12923 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12924
916b60b7
BS
12925 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12928
ef719a44 12929 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
916b60b7
BS
12930 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12931
9e9fb0ce
JB
12932 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12934
916b60b7 12935 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
916b60b7 12936 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
916b60b7
BS
12937 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12938
916b60b7 12939 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
916b60b7 12940 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
916b60b7
BS
12941 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12942
916b60b7 12943 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
916b60b7
BS
12944 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12945
12946 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12947
ef719a44
RH
12948 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12949 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
22c7c85e 12952
9e200aaf 12953 /* SSE3 MMX */
ef719a44
RH
12954 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12955 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12956 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12957 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12958 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12959 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12960};
12961
8b60264b 12962static const struct builtin_description bdesc_1arg[] =
bd793c65 12963{
37f22004
L
12964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12965 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12966
37f22004 12967 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
ef719a44
RH
12968 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12969 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12970
ef719a44
RH
12971 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12972 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12973 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12974 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12975 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12976 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
12977
12978 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
fbe5eb6d
BS
12980
12981 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12982
ef719a44
RH
12983 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12985
ef719a44
RH
12986 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12991
ef719a44 12992 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
fbe5eb6d 12993
ef719a44
RH
12994 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12996 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12997 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
f02e1358 12998
ef719a44
RH
12999 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
22c7c85e 13002
9e200aaf 13003 /* SSE3 */
ef719a44
RH
13004 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13005 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
bd793c65
BS
13006};
13007
eb701deb 13008static void
b96a374d 13009ix86_init_builtins (void)
f6155fda
SS
13010{
13011 if (TARGET_MMX)
13012 ix86_init_mmx_sse_builtins ();
13013}
13014
13015/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
13016 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13017 builtins. */
e37af218 13018static void
b96a374d 13019ix86_init_mmx_sse_builtins (void)
bd793c65 13020{
8b60264b 13021 const struct builtin_description * d;
77ebd435 13022 size_t i;
bd793c65 13023
4a5eab38
PB
13024 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13025 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13026 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
a16da3ae
RH
13027 tree V2DI_type_node
13028 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
4a5eab38
PB
13029 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13030 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13031 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13032 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13033 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13034 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13035
bd793c65 13036 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
13037 tree pcchar_type_node = build_pointer_type (
13038 build_type_variant (char_type_node, 1, 0));
bd793c65 13039 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
13040 tree pcfloat_type_node = build_pointer_type (
13041 build_type_variant (float_type_node, 1, 0));
bd793c65 13042 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 13043 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
13044 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13045
13046 /* Comparisons. */
13047 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
13048 = build_function_type_list (integer_type_node,
13049 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13050 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
13051 = build_function_type_list (V4SI_type_node,
13052 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13053 /* MMX/SSE/integer conversions. */
bd793c65 13054 tree int_ftype_v4sf
b4de2f7d
AH
13055 = build_function_type_list (integer_type_node,
13056 V4SF_type_node, NULL_TREE);
453ee231
JH
13057 tree int64_ftype_v4sf
13058 = build_function_type_list (long_long_integer_type_node,
13059 V4SF_type_node, NULL_TREE);
bd793c65 13060 tree int_ftype_v8qi
b4de2f7d 13061 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13062 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
13063 = build_function_type_list (V4SF_type_node,
13064 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13065 tree v4sf_ftype_v4sf_int64
13066 = build_function_type_list (V4SF_type_node,
13067 V4SF_type_node, long_long_integer_type_node,
13068 NULL_TREE);
bd793c65 13069 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
13070 = build_function_type_list (V4SF_type_node,
13071 V4SF_type_node, V2SI_type_node, NULL_TREE);
eb701deb 13072
bd793c65
BS
13073 /* Miscellaneous. */
13074 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
13075 = build_function_type_list (V8QI_type_node,
13076 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13077 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
13078 = build_function_type_list (V4HI_type_node,
13079 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13080 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
13081 = build_function_type_list (V4SF_type_node,
13082 V4SF_type_node, V4SF_type_node,
13083 integer_type_node, NULL_TREE);
bd793c65 13084 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
13085 = build_function_type_list (V2SI_type_node,
13086 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13087 tree v4hi_ftype_v4hi_int
b4de2f7d 13088 = build_function_type_list (V4HI_type_node,
e7a60f56 13089 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 13090 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
13091 = build_function_type_list (V4HI_type_node,
13092 V4HI_type_node, long_long_unsigned_type_node,
13093 NULL_TREE);
bd793c65 13094 tree v2si_ftype_v2si_di
b4de2f7d
AH
13095 = build_function_type_list (V2SI_type_node,
13096 V2SI_type_node, long_long_unsigned_type_node,
13097 NULL_TREE);
bd793c65 13098 tree void_ftype_void
b4de2f7d 13099 = build_function_type (void_type_node, void_list_node);
bd793c65 13100 tree void_ftype_unsigned
b4de2f7d 13101 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
13102 tree void_ftype_unsigned_unsigned
13103 = build_function_type_list (void_type_node, unsigned_type_node,
13104 unsigned_type_node, NULL_TREE);
13105 tree void_ftype_pcvoid_unsigned_unsigned
13106 = build_function_type_list (void_type_node, const_ptr_type_node,
13107 unsigned_type_node, unsigned_type_node,
13108 NULL_TREE);
bd793c65 13109 tree unsigned_ftype_void
b4de2f7d 13110 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 13111 tree v2si_ftype_v4sf
b4de2f7d 13112 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13113 /* Loads/stores. */
bd793c65 13114 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
13115 = build_function_type_list (void_type_node,
13116 V8QI_type_node, V8QI_type_node,
13117 pchar_type_node, NULL_TREE);
068f5dea
JH
13118 tree v4sf_ftype_pcfloat
13119 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
13120 /* @@@ the type is bogus */
13121 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 13122 = build_function_type_list (V4SF_type_node,
f8ca7923 13123 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 13124 tree void_ftype_pv2si_v4sf
b4de2f7d 13125 = build_function_type_list (void_type_node,
f8ca7923 13126 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13127 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
13128 = build_function_type_list (void_type_node,
13129 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13130 tree void_ftype_pdi_di
b4de2f7d
AH
13131 = build_function_type_list (void_type_node,
13132 pdi_type_node, long_long_unsigned_type_node,
13133 NULL_TREE);
916b60b7 13134 tree void_ftype_pv2di_v2di
b4de2f7d
AH
13135 = build_function_type_list (void_type_node,
13136 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
13137 /* Normal vector unops. */
13138 tree v4sf_ftype_v4sf
b4de2f7d 13139 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 13140
bd793c65
BS
13141 /* Normal vector binops. */
13142 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
13143 = build_function_type_list (V4SF_type_node,
13144 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 13145 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
13146 = build_function_type_list (V8QI_type_node,
13147 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 13148 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
13149 = build_function_type_list (V4HI_type_node,
13150 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 13151 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
13152 = build_function_type_list (V2SI_type_node,
13153 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 13154 tree di_ftype_di_di
b4de2f7d
AH
13155 = build_function_type_list (long_long_unsigned_type_node,
13156 long_long_unsigned_type_node,
13157 long_long_unsigned_type_node, NULL_TREE);
bd793c65 13158
47f339cf 13159 tree v2si_ftype_v2sf
ae3aa00d 13160 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13161 tree v2sf_ftype_v2si
b4de2f7d 13162 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13163 tree v2si_ftype_v2si
b4de2f7d 13164 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 13165 tree v2sf_ftype_v2sf
b4de2f7d 13166 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13167 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
13168 = build_function_type_list (V2SF_type_node,
13169 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 13170 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
13171 = build_function_type_list (V2SI_type_node,
13172 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
13173 tree pint_type_node = build_pointer_type (integer_type_node);
13174 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
13175 tree pcdouble_type_node = build_pointer_type (
13176 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 13177 tree int_ftype_v2df_v2df
b4de2f7d
AH
13178 = build_function_type_list (integer_type_node,
13179 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13180
fbe5eb6d 13181 tree ti_ftype_ti_ti
b4de2f7d
AH
13182 = build_function_type_list (intTI_type_node,
13183 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
13184 tree void_ftype_pcvoid
13185 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 13186 tree v4sf_ftype_v4si
b4de2f7d 13187 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13188 tree v4si_ftype_v4sf
b4de2f7d 13189 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13190 tree v2df_ftype_v4si
b4de2f7d 13191 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13192 tree v4si_ftype_v2df
b4de2f7d 13193 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13194 tree v2si_ftype_v2df
b4de2f7d 13195 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13196 tree v4sf_ftype_v2df
b4de2f7d 13197 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13198 tree v2df_ftype_v2si
b4de2f7d 13199 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 13200 tree v2df_ftype_v4sf
b4de2f7d 13201 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13202 tree int_ftype_v2df
b4de2f7d 13203 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
13204 tree int64_ftype_v2df
13205 = build_function_type_list (long_long_integer_type_node,
b96a374d 13206 V2DF_type_node, NULL_TREE);
fbe5eb6d 13207 tree v2df_ftype_v2df_int
b4de2f7d
AH
13208 = build_function_type_list (V2DF_type_node,
13209 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
13210 tree v2df_ftype_v2df_int64
13211 = build_function_type_list (V2DF_type_node,
13212 V2DF_type_node, long_long_integer_type_node,
13213 NULL_TREE);
fbe5eb6d 13214 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
13215 = build_function_type_list (V4SF_type_node,
13216 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13217 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
13218 = build_function_type_list (V2DF_type_node,
13219 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 13220 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
13221 = build_function_type_list (V2DF_type_node,
13222 V2DF_type_node, V2DF_type_node,
13223 integer_type_node,
13224 NULL_TREE);
1c47af84 13225 tree v2df_ftype_v2df_pcdouble
b4de2f7d 13226 = build_function_type_list (V2DF_type_node,
1c47af84 13227 V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13228 tree void_ftype_pdouble_v2df
b4de2f7d
AH
13229 = build_function_type_list (void_type_node,
13230 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13231 tree void_ftype_pint_int
b4de2f7d
AH
13232 = build_function_type_list (void_type_node,
13233 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13234 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
13235 = build_function_type_list (void_type_node,
13236 V16QI_type_node, V16QI_type_node,
13237 pchar_type_node, NULL_TREE);
068f5dea
JH
13238 tree v2df_ftype_pcdouble
13239 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 13240 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
13241 = build_function_type_list (V2DF_type_node,
13242 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13243 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
13244 = build_function_type_list (V16QI_type_node,
13245 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 13246 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
13247 = build_function_type_list (V8HI_type_node,
13248 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 13249 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
13250 = build_function_type_list (V4SI_type_node,
13251 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 13252 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
13253 = build_function_type_list (V2DI_type_node,
13254 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 13255 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
13256 = build_function_type_list (V2DI_type_node,
13257 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 13258 tree v2df_ftype_v2df
b4de2f7d 13259 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
916b60b7 13260 tree v2di_ftype_v2di_int
b4de2f7d
AH
13261 = build_function_type_list (V2DI_type_node,
13262 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13263 tree v4si_ftype_v4si_int
b4de2f7d
AH
13264 = build_function_type_list (V4SI_type_node,
13265 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 13266 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
13267 = build_function_type_list (V8HI_type_node,
13268 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 13269 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
13270 = build_function_type_list (V8HI_type_node,
13271 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13272 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
13273 = build_function_type_list (V4SI_type_node,
13274 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 13275 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
13276 = build_function_type_list (V4SI_type_node,
13277 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 13278 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
13279 = build_function_type_list (long_long_unsigned_type_node,
13280 V8QI_type_node, V8QI_type_node, NULL_TREE);
9e9fb0ce
JB
13281 tree di_ftype_v2si_v2si
13282 = build_function_type_list (long_long_unsigned_type_node,
13283 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 13284 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
13285 = build_function_type_list (V2DI_type_node,
13286 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
13287 tree v2di_ftype_v4si_v4si
13288 = build_function_type_list (V2DI_type_node,
13289 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 13290 tree int_ftype_v16qi
b4de2f7d 13291 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
13292 tree v16qi_ftype_pcchar
13293 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
13294 tree void_ftype_pchar_v16qi
13295 = build_function_type_list (void_type_node,
13296 pchar_type_node, V16QI_type_node, NULL_TREE);
47f339cf 13297
f8a1ebc6
JH
13298 tree float80_type;
13299 tree float128_type;
eb701deb 13300 tree ftype;
f8a1ebc6
JH
13301
13302 /* The __float80 type. */
13303 if (TYPE_MODE (long_double_type_node) == XFmode)
13304 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13305 "__float80");
13306 else
13307 {
13308 /* The __float80 type. */
13309 float80_type = make_node (REAL_TYPE);
968a7562 13310 TYPE_PRECISION (float80_type) = 80;
f8a1ebc6
JH
13311 layout_type (float80_type);
13312 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13313 }
13314
13315 float128_type = make_node (REAL_TYPE);
13316 TYPE_PRECISION (float128_type) = 128;
13317 layout_type (float128_type);
13318 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13319
bd793c65
BS
13320 /* Add all builtins that are more or less simple operations on two
13321 operands. */
ca7558fc 13322 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13323 {
13324 /* Use one of the operands; the target can have a different mode for
13325 mask-generating compares. */
13326 enum machine_mode mode;
13327 tree type;
13328
13329 if (d->name == 0)
13330 continue;
13331 mode = insn_data[d->icode].operand[1].mode;
13332
bd793c65
BS
13333 switch (mode)
13334 {
fbe5eb6d
BS
13335 case V16QImode:
13336 type = v16qi_ftype_v16qi_v16qi;
13337 break;
13338 case V8HImode:
13339 type = v8hi_ftype_v8hi_v8hi;
13340 break;
13341 case V4SImode:
13342 type = v4si_ftype_v4si_v4si;
13343 break;
13344 case V2DImode:
13345 type = v2di_ftype_v2di_v2di;
13346 break;
13347 case V2DFmode:
13348 type = v2df_ftype_v2df_v2df;
13349 break;
13350 case TImode:
13351 type = ti_ftype_ti_ti;
13352 break;
bd793c65
BS
13353 case V4SFmode:
13354 type = v4sf_ftype_v4sf_v4sf;
13355 break;
13356 case V8QImode:
13357 type = v8qi_ftype_v8qi_v8qi;
13358 break;
13359 case V4HImode:
13360 type = v4hi_ftype_v4hi_v4hi;
13361 break;
13362 case V2SImode:
13363 type = v2si_ftype_v2si_v2si;
13364 break;
bd793c65
BS
13365 case DImode:
13366 type = di_ftype_di_di;
13367 break;
13368
13369 default:
13370 abort ();
13371 }
0f290768 13372
bd793c65 13373 /* Override for comparisons. */
ef719a44
RH
13374 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13375 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
bd793c65
BS
13376 type = v4si_ftype_v4sf_v4sf;
13377
ef719a44
RH
13378 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13379 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
fbe5eb6d
BS
13380 type = v2di_ftype_v2df_v2df;
13381
eeb06b1b 13382 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
13383 }
13384
13385 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b 13386 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
13387 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13388 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13389 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13390
13391 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13392 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13393 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13394
13395 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13396 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13397
13398 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13399 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 13400
bd793c65 13401 /* comi/ucomi insns. */
ca7558fc 13402 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
13403 if (d->mask == MASK_SSE2)
13404 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13405 else
13406 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 13407
1255c85c
BS
13408 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13409 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13410 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 13411
37f22004
L
13412 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13413 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13414 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13415 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13416 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13417 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13418 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13419 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13420 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13421 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13422 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13423
37f22004
L
13424 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13425
37f22004 13426 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
37f22004 13427 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
37f22004
L
13428
13429 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13430 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13431 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13432 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13433
13434 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13435 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13436 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13437 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13438
13439 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13440
13441 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13442
13443 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13444 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13445 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13446 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13447 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13448 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13449
13450 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 13451
47f339cf
BS
13452 /* Original 3DNow! */
13453 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13454 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13455 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13456 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13457 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13458 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13459 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13460 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13461 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13462 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13467 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13468 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13469 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13470 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
13473
13474 /* 3DNow! extension as used in the Athlon CPU. */
13475 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13476 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13477 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13478 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13479 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13480 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13481
fbe5eb6d 13482 /* SSE2 */
fbe5eb6d 13483 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
fbe5eb6d 13484
068f5dea 13485 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
fbe5eb6d 13486 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
fbe5eb6d 13487
1c47af84
RH
13488 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13489 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
fbe5eb6d
BS
13490
13491 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 13492 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
13493 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13494 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 13495 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
13496
13497 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13498 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13499 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 13500 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
13501
13502 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13504
13505 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13506
13507 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 13508 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
13509
13510 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13515
13516 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13517
13518 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
13520 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13521 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
13522
13523 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13524 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13526
13527 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 13528 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
13529 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13531
068f5dea 13532 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
13533 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 13535
068f5dea 13536 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
f02e1358 13537 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
f02e1358 13538
9e9fb0ce
JB
13539 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13541
916b60b7
BS
13542 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13545
13546 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13549
13550 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13552
ab3146fd 13553 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
13554 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13557
ab3146fd 13558 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
13559 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13562
13563 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13565
13566 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
13567
13568 /* Prescott New Instructions. */
9e200aaf 13569 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
13570 void_ftype_pcvoid_unsigned_unsigned,
13571 IX86_BUILTIN_MONITOR);
9e200aaf 13572 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
13573 void_ftype_unsigned_unsigned,
13574 IX86_BUILTIN_MWAIT);
9e200aaf 13575 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
13576 v4sf_ftype_v4sf,
13577 IX86_BUILTIN_MOVSHDUP);
9e200aaf 13578 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
13579 v4sf_ftype_v4sf,
13580 IX86_BUILTIN_MOVSLDUP);
9e200aaf 13581 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 13582 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
eb701deb
RH
13583
13584 /* Access to the vec_init patterns. */
13585 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
13586 integer_type_node, NULL_TREE);
13587 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
13588 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
13589
13590 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
13591 short_integer_type_node,
13592 short_integer_type_node,
13593 short_integer_type_node, NULL_TREE);
13594 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
13595 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
13596
13597 ftype = build_function_type_list (V8QI_type_node, char_type_node,
13598 char_type_node, char_type_node,
13599 char_type_node, char_type_node,
13600 char_type_node, char_type_node,
13601 char_type_node, NULL_TREE);
13602 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
13603 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
13604
13605 /* Access to the vec_extract patterns. */
13606 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13607 integer_type_node, NULL_TREE);
13608 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
13609 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
13610
13611 ftype = build_function_type_list (long_long_integer_type_node,
13612 V2DI_type_node, integer_type_node,
13613 NULL_TREE);
13614 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
13615 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
13616
13617 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13618 integer_type_node, NULL_TREE);
13619 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
13620 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
13621
ed9b5396
RH
13622 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13623 integer_type_node, NULL_TREE);
13624 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
13625 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
13626
eb701deb
RH
13627 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13628 integer_type_node, NULL_TREE);
13629 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
13630 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
13631
13632 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
13633 integer_type_node, NULL_TREE);
13634 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
13635 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
13636
13637 /* Access to the vec_set patterns. */
13638 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13639 intHI_type_node,
13640 integer_type_node, NULL_TREE);
13641 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
13642 ftype, IX86_BUILTIN_VEC_SET_V8HI);
13643
13644 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
13645 intHI_type_node,
13646 integer_type_node, NULL_TREE);
13647 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
13648 ftype, IX86_BUILTIN_VEC_SET_V4HI);
bd793c65
BS
13649}
13650
13651/* Errors in the source file can cause expand_expr to return const0_rtx
13652 where we expect a vector. To avoid crashing, use one of the vector
13653 clear instructions. */
13654static rtx
b96a374d 13655safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65 13656{
ef719a44
RH
13657 if (x == const0_rtx)
13658 x = CONST0_RTX (mode);
bd793c65
BS
13659 return x;
13660}
13661
13662/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13663
13664static rtx
b96a374d 13665ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65 13666{
ef719a44 13667 rtx pat, xops[3];
bd793c65
BS
13668 tree arg0 = TREE_VALUE (arglist);
13669 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13670 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13671 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13672 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13673 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13674 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13675
13676 if (VECTOR_MODE_P (mode0))
13677 op0 = safe_vector_operand (op0, mode0);
13678 if (VECTOR_MODE_P (mode1))
13679 op1 = safe_vector_operand (op1, mode1);
13680
e358acde 13681 if (optimize || !target
bd793c65
BS
13682 || GET_MODE (target) != tmode
13683 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13684 target = gen_reg_rtx (tmode);
13685
d9deed68
JH
13686 if (GET_MODE (op1) == SImode && mode1 == TImode)
13687 {
13688 rtx x = gen_reg_rtx (V4SImode);
13689 emit_insn (gen_sse2_loadd (x, op1));
13690 op1 = gen_lowpart (TImode, x);
13691 }
13692
bd793c65
BS
13693 /* In case the insn wants input operands in modes different from
13694 the result, abort. */
ebe75517
JH
13695 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13696 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
13697 abort ();
13698
ef719a44 13699 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 13700 op0 = copy_to_mode_reg (mode0, op0);
ef719a44 13701 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
13702 op1 = copy_to_mode_reg (mode1, op1);
13703
eb701deb
RH
13704 /* ??? Using ix86_fixup_binary_operands is problematic when
13705 we've got mismatched modes. Fake it. */
13706
ef719a44
RH
13707 xops[0] = target;
13708 xops[1] = op0;
13709 xops[2] = op1;
59bef189 13710
eb701deb
RH
13711 if (tmode == mode0 && tmode == mode1)
13712 {
13713 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
13714 op0 = xops[1];
13715 op1 = xops[2];
13716 }
13717 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
13718 {
13719 op0 = force_reg (mode0, op0);
13720 op1 = force_reg (mode1, op1);
13721 target = gen_reg_rtx (tmode);
13722 }
13723
13724 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13725 if (! pat)
13726 return 0;
13727 emit_insn (pat);
13728 return target;
13729}
13730
13731/* Subroutine of ix86_expand_builtin to take care of stores. */
13732
13733static rtx
b96a374d 13734ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13735{
13736 rtx pat;
13737 tree arg0 = TREE_VALUE (arglist);
13738 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13739 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13740 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13741 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13742 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13743
13744 if (VECTOR_MODE_P (mode1))
13745 op1 = safe_vector_operand (op1, mode1);
13746
13747 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13748 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13749
bd793c65
BS
13750 pat = GEN_FCN (icode) (op0, op1);
13751 if (pat)
13752 emit_insn (pat);
13753 return 0;
13754}
13755
13756/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13757
13758static rtx
b96a374d
AJ
13759ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13760 rtx target, int do_load)
bd793c65
BS
13761{
13762 rtx pat;
13763 tree arg0 = TREE_VALUE (arglist);
13764 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13765 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13766 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13767
e358acde 13768 if (optimize || !target
bd793c65
BS
13769 || GET_MODE (target) != tmode
13770 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13771 target = gen_reg_rtx (tmode);
13772 if (do_load)
13773 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13774 else
13775 {
13776 if (VECTOR_MODE_P (mode0))
13777 op0 = safe_vector_operand (op0, mode0);
13778
e358acde
RH
13779 if ((optimize && !register_operand (op0, mode0))
13780 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65
BS
13781 op0 = copy_to_mode_reg (mode0, op0);
13782 }
13783
13784 pat = GEN_FCN (icode) (target, op0);
13785 if (! pat)
13786 return 0;
13787 emit_insn (pat);
13788 return target;
13789}
13790
13791/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13792 sqrtss, rsqrtss, rcpss. */
13793
13794static rtx
b96a374d 13795ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13796{
13797 rtx pat;
13798 tree arg0 = TREE_VALUE (arglist);
59bef189 13799 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13800 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13801 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13802
e358acde 13803 if (optimize || !target
bd793c65
BS
13804 || GET_MODE (target) != tmode
13805 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13806 target = gen_reg_rtx (tmode);
13807
13808 if (VECTOR_MODE_P (mode0))
13809 op0 = safe_vector_operand (op0, mode0);
13810
e358acde
RH
13811 if ((optimize && !register_operand (op0, mode0))
13812 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
bd793c65 13813 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13814
59bef189
RH
13815 op1 = op0;
13816 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13817 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13818
59bef189 13819 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13820 if (! pat)
13821 return 0;
13822 emit_insn (pat);
13823 return target;
13824}
13825
13826/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13827
13828static rtx
b96a374d
AJ
13829ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13830 rtx target)
bd793c65
BS
13831{
13832 rtx pat;
13833 tree arg0 = TREE_VALUE (arglist);
13834 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13835 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13836 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13837 rtx op2;
13838 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13839 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13840 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13841 enum rtx_code comparison = d->comparison;
13842
13843 if (VECTOR_MODE_P (mode0))
13844 op0 = safe_vector_operand (op0, mode0);
13845 if (VECTOR_MODE_P (mode1))
13846 op1 = safe_vector_operand (op1, mode1);
13847
13848 /* Swap operands if we have a comparison that isn't available in
13849 hardware. */
e358acde 13850 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65 13851 {
21e1b5f1
BS
13852 rtx tmp = gen_reg_rtx (mode1);
13853 emit_move_insn (tmp, op1);
bd793c65 13854 op1 = op0;
21e1b5f1 13855 op0 = tmp;
bd793c65 13856 }
21e1b5f1 13857
e358acde 13858 if (optimize || !target
21e1b5f1
BS
13859 || GET_MODE (target) != tmode
13860 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13861 target = gen_reg_rtx (tmode);
13862
e358acde
RH
13863 if ((optimize && !register_operand (op0, mode0))
13864 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
bd793c65 13865 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
13866 if ((optimize && !register_operand (op1, mode1))
13867 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
13868 op1 = copy_to_mode_reg (mode1, op1);
13869
13870 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13871 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13872 if (! pat)
13873 return 0;
13874 emit_insn (pat);
13875 return target;
13876}
13877
13878/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13879
13880static rtx
b96a374d
AJ
13881ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13882 rtx target)
bd793c65
BS
13883{
13884 rtx pat;
13885 tree arg0 = TREE_VALUE (arglist);
13886 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13887 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13888 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13889 rtx op2;
13890 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13891 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13892 enum rtx_code comparison = d->comparison;
13893
13894 if (VECTOR_MODE_P (mode0))
13895 op0 = safe_vector_operand (op0, mode0);
13896 if (VECTOR_MODE_P (mode1))
13897 op1 = safe_vector_operand (op1, mode1);
13898
13899 /* Swap operands if we have a comparison that isn't available in
13900 hardware. */
e358acde 13901 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
bd793c65
BS
13902 {
13903 rtx tmp = op1;
13904 op1 = op0;
13905 op0 = tmp;
bd793c65
BS
13906 }
13907
13908 target = gen_reg_rtx (SImode);
13909 emit_move_insn (target, const0_rtx);
13910 target = gen_rtx_SUBREG (QImode, target, 0);
13911
e358acde
RH
13912 if ((optimize && !register_operand (op0, mode0))
13913 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
bd793c65 13914 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
13915 if ((optimize && !register_operand (op1, mode1))
13916 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
bd793c65
BS
13917 op1 = copy_to_mode_reg (mode1, op1);
13918
13919 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13920 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13921 if (! pat)
13922 return 0;
13923 emit_insn (pat);
29628f27
BS
13924 emit_insn (gen_rtx_SET (VOIDmode,
13925 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13926 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13927 SET_DEST (pat),
29628f27 13928 const0_rtx)));
bd793c65 13929
6f1a6c5b 13930 return SUBREG_REG (target);
bd793c65
BS
13931}
13932
eb701deb
RH
13933/* Return the integer constant in ARG. Constrain it to be in the range
13934 of the subparts of VEC_TYPE; issue an error if not. */
13935
13936static int
13937get_element_number (tree vec_type, tree arg)
13938{
13939 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13940
13941 if (!host_integerp (arg, 1)
13942 || (elt = tree_low_cst (arg, 1), elt > max))
13943 {
13944 error ("selector must be an integer constant in the range 0..%i", max);
13945 return 0;
13946 }
13947
13948 return elt;
13949}
13950
13951/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
13952 ix86_expand_vector_init. We DO have language-level syntax for this, in
13953 the form of (type){ init-list }. Except that since we can't place emms
13954 instructions from inside the compiler, we can't allow the use of MMX
13955 registers unless the user explicitly asks for it. So we do *not* define
13956 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
13957 we have builtins invoked by mmintrin.h that gives us license to emit
13958 these sorts of instructions. */
13959
13960static rtx
13961ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
13962{
13963 enum machine_mode tmode = TYPE_MODE (type);
13964 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
13965 int i, n_elt = GET_MODE_NUNITS (tmode);
13966 rtvec v = rtvec_alloc (n_elt);
13967
13968 gcc_assert (VECTOR_MODE_P (tmode));
13969
13970 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
13971 {
13972 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13973 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13974 }
13975
13976 gcc_assert (arglist == NULL);
13977
13978 if (!target || !register_operand (target, tmode))
13979 target = gen_reg_rtx (tmode);
13980
13981 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
13982 return target;
13983}
13984
13985/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
13986 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
13987 had a language-level syntax for referencing vector elements. */
13988
13989static rtx
13990ix86_expand_vec_ext_builtin (tree arglist, rtx target)
13991{
13992 enum machine_mode tmode, mode0;
13993 tree arg0, arg1;
13994 int elt;
13995 rtx op0;
13996
13997 arg0 = TREE_VALUE (arglist);
13998 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13999
14000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14001 elt = get_element_number (TREE_TYPE (arg0), arg1);
14002
14003 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14004 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14005 gcc_assert (VECTOR_MODE_P (mode0));
14006
14007 op0 = force_reg (mode0, op0);
14008
14009 if (optimize || !target || !register_operand (target, tmode))
14010 target = gen_reg_rtx (tmode);
14011
14012 ix86_expand_vector_extract (true, target, op0, elt);
14013
14014 return target;
14015}
14016
14017/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14018 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14019 a language-level syntax for referencing vector elements. */
14020
14021static rtx
14022ix86_expand_vec_set_builtin (tree arglist)
14023{
14024 enum machine_mode tmode, mode1;
14025 tree arg0, arg1, arg2;
14026 int elt;
14027 rtx op0, op1;
14028
14029 arg0 = TREE_VALUE (arglist);
14030 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14031 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14032
14033 tmode = TYPE_MODE (TREE_TYPE (arg0));
14034 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14035 gcc_assert (VECTOR_MODE_P (tmode));
14036
14037 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14038 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14039 elt = get_element_number (TREE_TYPE (arg0), arg2);
14040
14041 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14042 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14043
14044 op0 = force_reg (tmode, op0);
14045 op1 = force_reg (mode1, op1);
14046
14047 ix86_expand_vector_set (true, op0, op1, elt);
14048
14049 return op0;
14050}
14051
bd793c65
BS
14052/* Expand an expression EXP that calls a built-in function,
14053 with result going to TARGET if that's convenient
14054 (and in mode MODE if that's convenient).
14055 SUBTARGET may be used as the target for computing one of EXP's operands.
14056 IGNORE is nonzero if the value is to be ignored. */
14057
eb701deb 14058static rtx
b96a374d
AJ
14059ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14060 enum machine_mode mode ATTRIBUTE_UNUSED,
14061 int ignore ATTRIBUTE_UNUSED)
bd793c65 14062{
8b60264b 14063 const struct builtin_description *d;
77ebd435 14064 size_t i;
bd793c65
BS
14065 enum insn_code icode;
14066 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14067 tree arglist = TREE_OPERAND (exp, 1);
e37af218 14068 tree arg0, arg1, arg2;
bd793c65
BS
14069 rtx op0, op1, op2, pat;
14070 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 14071 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
14072
14073 switch (fcode)
14074 {
14075 case IX86_BUILTIN_EMMS:
80e8bb90 14076 emit_insn (gen_mmx_emms ());
bd793c65
BS
14077 return 0;
14078
14079 case IX86_BUILTIN_SFENCE:
80e8bb90 14080 emit_insn (gen_sse_sfence ());
bd793c65
BS
14081 return 0;
14082
bd793c65 14083 case IX86_BUILTIN_MASKMOVQ:
077084dd 14084 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d 14085 icode = (fcode == IX86_BUILTIN_MASKMOVQ
80e8bb90 14086 ? CODE_FOR_mmx_maskmovq
ef719a44 14087 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
14088 /* Note the arg order is different from the operand order. */
14089 arg1 = TREE_VALUE (arglist);
14090 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14091 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14092 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14093 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14094 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14095 mode0 = insn_data[icode].operand[0].mode;
14096 mode1 = insn_data[icode].operand[1].mode;
14097 mode2 = insn_data[icode].operand[2].mode;
14098
80e8bb90
RH
14099 op0 = force_reg (Pmode, op0);
14100 op0 = gen_rtx_MEM (mode1, op0);
ef719a44 14101
5c464583 14102 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
14103 op0 = copy_to_mode_reg (mode0, op0);
14104 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14105 op1 = copy_to_mode_reg (mode1, op1);
14106 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14107 op2 = copy_to_mode_reg (mode2, op2);
14108 pat = GEN_FCN (icode) (op0, op1, op2);
14109 if (! pat)
14110 return 0;
14111 emit_insn (pat);
14112 return 0;
14113
14114 case IX86_BUILTIN_SQRTSS:
ef719a44 14115 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
bd793c65 14116 case IX86_BUILTIN_RSQRTSS:
ef719a44 14117 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
bd793c65 14118 case IX86_BUILTIN_RCPSS:
ef719a44 14119 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
bd793c65 14120
bd793c65
BS
14121 case IX86_BUILTIN_LOADUPS:
14122 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14123
bd793c65 14124 case IX86_BUILTIN_STOREUPS:
e37af218 14125 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65 14126
0f290768 14127 case IX86_BUILTIN_LOADHPS:
bd793c65 14128 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
14129 case IX86_BUILTIN_LOADHPD:
14130 case IX86_BUILTIN_LOADLPD:
2cdb3148
RH
14131 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14132 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
1c47af84
RH
14133 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14134 : CODE_FOR_sse2_loadlpd);
bd793c65
BS
14135 arg0 = TREE_VALUE (arglist);
14136 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14137 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14138 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14139 tmode = insn_data[icode].operand[0].mode;
14140 mode0 = insn_data[icode].operand[1].mode;
14141 mode1 = insn_data[icode].operand[2].mode;
14142
e358acde 14143 op0 = force_reg (mode0, op0);
bd793c65 14144 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
e358acde 14145 if (optimize || target == 0
bd793c65 14146 || GET_MODE (target) != tmode
e358acde 14147 || !register_operand (target, tmode))
bd793c65
BS
14148 target = gen_reg_rtx (tmode);
14149 pat = GEN_FCN (icode) (target, op0, op1);
14150 if (! pat)
14151 return 0;
14152 emit_insn (pat);
14153 return target;
0f290768 14154
bd793c65
BS
14155 case IX86_BUILTIN_STOREHPS:
14156 case IX86_BUILTIN_STORELPS:
2cdb3148 14157 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
eb701deb 14158 : CODE_FOR_sse_storelps);
1c47af84
RH
14159 arg0 = TREE_VALUE (arglist);
14160 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14161 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14162 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14163 mode0 = insn_data[icode].operand[0].mode;
14164 mode1 = insn_data[icode].operand[1].mode;
14165
14166 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
e358acde 14167 op1 = force_reg (mode1, op1);
1c47af84
RH
14168
14169 pat = GEN_FCN (icode) (op0, op1);
14170 if (! pat)
14171 return 0;
14172 emit_insn (pat);
14173 return const0_rtx;
bd793c65
BS
14174
14175 case IX86_BUILTIN_MOVNTPS:
e37af218 14176 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 14177 case IX86_BUILTIN_MOVNTQ:
e37af218 14178 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
14179
14180 case IX86_BUILTIN_LDMXCSR:
14181 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14182 target = assign_386_stack_local (SImode, 0);
14183 emit_move_insn (target, op0);
80e8bb90 14184 emit_insn (gen_sse_ldmxcsr (target));
bd793c65
BS
14185 return 0;
14186
14187 case IX86_BUILTIN_STMXCSR:
14188 target = assign_386_stack_local (SImode, 0);
80e8bb90 14189 emit_insn (gen_sse_stmxcsr (target));
bd793c65
BS
14190 return copy_to_mode_reg (SImode, target);
14191
bd793c65 14192 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
14193 case IX86_BUILTIN_SHUFPD:
14194 icode = (fcode == IX86_BUILTIN_SHUFPS
14195 ? CODE_FOR_sse_shufps
14196 : CODE_FOR_sse2_shufpd);
bd793c65
BS
14197 arg0 = TREE_VALUE (arglist);
14198 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14199 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14200 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14201 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14202 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14203 tmode = insn_data[icode].operand[0].mode;
14204 mode0 = insn_data[icode].operand[1].mode;
14205 mode1 = insn_data[icode].operand[2].mode;
14206 mode2 = insn_data[icode].operand[3].mode;
14207
14208 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14209 op0 = copy_to_mode_reg (mode0, op0);
e358acde
RH
14210 if ((optimize && !register_operand (op1, mode1))
14211 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
bd793c65
BS
14212 op1 = copy_to_mode_reg (mode1, op1);
14213 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14214 {
14215 /* @@@ better error message */
14216 error ("mask must be an immediate");
6f1a6c5b 14217 return gen_reg_rtx (tmode);
bd793c65 14218 }
e358acde 14219 if (optimize || target == 0
bd793c65
BS
14220 || GET_MODE (target) != tmode
14221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14222 target = gen_reg_rtx (tmode);
14223 pat = GEN_FCN (icode) (target, op0, op1, op2);
14224 if (! pat)
14225 return 0;
14226 emit_insn (pat);
14227 return target;
14228
14229 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
14230 case IX86_BUILTIN_PSHUFD:
14231 case IX86_BUILTIN_PSHUFHW:
14232 case IX86_BUILTIN_PSHUFLW:
14233 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14234 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14235 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14236 : CODE_FOR_mmx_pshufw);
bd793c65
BS
14237 arg0 = TREE_VALUE (arglist);
14238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14239 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14240 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14241 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
14242 mode1 = insn_data[icode].operand[1].mode;
14243 mode2 = insn_data[icode].operand[2].mode;
bd793c65 14244
29628f27
BS
14245 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14246 op0 = copy_to_mode_reg (mode1, op0);
14247 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
14248 {
14249 /* @@@ better error message */
14250 error ("mask must be an immediate");
14251 return const0_rtx;
14252 }
14253 if (target == 0
14254 || GET_MODE (target) != tmode
14255 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14256 target = gen_reg_rtx (tmode);
29628f27 14257 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
14258 if (! pat)
14259 return 0;
14260 emit_insn (pat);
14261 return target;
14262
ab3146fd
ZD
14263 case IX86_BUILTIN_PSLLDQI128:
14264 case IX86_BUILTIN_PSRLDQI128:
14265 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14266 : CODE_FOR_sse2_lshrti3);
14267 arg0 = TREE_VALUE (arglist);
14268 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14269 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14270 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14271 tmode = insn_data[icode].operand[0].mode;
14272 mode1 = insn_data[icode].operand[1].mode;
14273 mode2 = insn_data[icode].operand[2].mode;
14274
14275 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14276 {
14277 op0 = copy_to_reg (op0);
14278 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14279 }
14280 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14281 {
14282 error ("shift must be an immediate");
14283 return const0_rtx;
14284 }
14285 target = gen_reg_rtx (V2DImode);
14286 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14287 if (! pat)
14288 return 0;
14289 emit_insn (pat);
14290 return target;
14291
47f339cf 14292 case IX86_BUILTIN_FEMMS:
80e8bb90 14293 emit_insn (gen_mmx_femms ());
47f339cf
BS
14294 return NULL_RTX;
14295
14296 case IX86_BUILTIN_PAVGUSB:
80e8bb90 14297 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
47f339cf
BS
14298
14299 case IX86_BUILTIN_PF2ID:
80e8bb90 14300 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
47f339cf
BS
14301
14302 case IX86_BUILTIN_PFACC:
80e8bb90 14303 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
47f339cf
BS
14304
14305 case IX86_BUILTIN_PFADD:
80e8bb90 14306 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
47f339cf
BS
14307
14308 case IX86_BUILTIN_PFCMPEQ:
80e8bb90 14309 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
47f339cf
BS
14310
14311 case IX86_BUILTIN_PFCMPGE:
80e8bb90 14312 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
47f339cf
BS
14313
14314 case IX86_BUILTIN_PFCMPGT:
80e8bb90 14315 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
47f339cf
BS
14316
14317 case IX86_BUILTIN_PFMAX:
80e8bb90 14318 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
47f339cf
BS
14319
14320 case IX86_BUILTIN_PFMIN:
80e8bb90 14321 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
47f339cf
BS
14322
14323 case IX86_BUILTIN_PFMUL:
80e8bb90 14324 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
47f339cf
BS
14325
14326 case IX86_BUILTIN_PFRCP:
80e8bb90 14327 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
47f339cf
BS
14328
14329 case IX86_BUILTIN_PFRCPIT1:
80e8bb90 14330 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
47f339cf
BS
14331
14332 case IX86_BUILTIN_PFRCPIT2:
80e8bb90 14333 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
47f339cf
BS
14334
14335 case IX86_BUILTIN_PFRSQIT1:
80e8bb90 14336 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
47f339cf
BS
14337
14338 case IX86_BUILTIN_PFRSQRT:
80e8bb90 14339 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
47f339cf
BS
14340
14341 case IX86_BUILTIN_PFSUB:
80e8bb90 14342 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
47f339cf
BS
14343
14344 case IX86_BUILTIN_PFSUBR:
80e8bb90 14345 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
47f339cf
BS
14346
14347 case IX86_BUILTIN_PI2FD:
80e8bb90 14348 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
47f339cf
BS
14349
14350 case IX86_BUILTIN_PMULHRW:
80e8bb90 14351 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
47f339cf 14352
47f339cf 14353 case IX86_BUILTIN_PF2IW:
80e8bb90 14354 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
47f339cf
BS
14355
14356 case IX86_BUILTIN_PFNACC:
80e8bb90 14357 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
47f339cf
BS
14358
14359 case IX86_BUILTIN_PFPNACC:
80e8bb90 14360 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
47f339cf
BS
14361
14362 case IX86_BUILTIN_PI2FW:
80e8bb90 14363 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
47f339cf
BS
14364
14365 case IX86_BUILTIN_PSWAPDSI:
80e8bb90 14366 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
47f339cf
BS
14367
14368 case IX86_BUILTIN_PSWAPDSF:
80e8bb90 14369 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
47f339cf 14370
fbe5eb6d 14371 case IX86_BUILTIN_SQRTSD:
ef719a44 14372 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
fbe5eb6d
BS
14373 case IX86_BUILTIN_LOADUPD:
14374 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
fbe5eb6d
BS
14375 case IX86_BUILTIN_STOREUPD:
14376 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14377
fbe5eb6d
BS
14378 case IX86_BUILTIN_MFENCE:
14379 emit_insn (gen_sse2_mfence ());
14380 return 0;
14381 case IX86_BUILTIN_LFENCE:
14382 emit_insn (gen_sse2_lfence ());
14383 return 0;
14384
14385 case IX86_BUILTIN_CLFLUSH:
14386 arg0 = TREE_VALUE (arglist);
14387 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14388 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
14389 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14390 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
14391
14392 emit_insn (gen_sse2_clflush (op0));
14393 return 0;
14394
14395 case IX86_BUILTIN_MOVNTPD:
14396 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14397 case IX86_BUILTIN_MOVNTDQ:
916b60b7 14398 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
14399 case IX86_BUILTIN_MOVNTI:
14400 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14401
f02e1358
JH
14402 case IX86_BUILTIN_LOADDQU:
14403 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
f02e1358
JH
14404 case IX86_BUILTIN_STOREDQU:
14405 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
f02e1358 14406
22c7c85e
L
14407 case IX86_BUILTIN_MONITOR:
14408 arg0 = TREE_VALUE (arglist);
14409 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14410 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14411 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14412 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14413 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14414 if (!REG_P (op0))
14415 op0 = copy_to_mode_reg (SImode, op0);
14416 if (!REG_P (op1))
14417 op1 = copy_to_mode_reg (SImode, op1);
14418 if (!REG_P (op2))
14419 op2 = copy_to_mode_reg (SImode, op2);
ef719a44 14420 emit_insn (gen_sse3_monitor (op0, op1, op2));
22c7c85e
L
14421 return 0;
14422
14423 case IX86_BUILTIN_MWAIT:
14424 arg0 = TREE_VALUE (arglist);
14425 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14426 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14427 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14428 if (!REG_P (op0))
14429 op0 = copy_to_mode_reg (SImode, op0);
14430 if (!REG_P (op1))
14431 op1 = copy_to_mode_reg (SImode, op1);
ef719a44 14432 emit_insn (gen_sse3_mwait (op0, op1));
22c7c85e
L
14433 return 0;
14434
22c7c85e 14435 case IX86_BUILTIN_LDDQU:
eb701deb
RH
14436 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14437 target, 1);
14438
14439 case IX86_BUILTIN_VEC_INIT_V2SI:
14440 case IX86_BUILTIN_VEC_INIT_V4HI:
14441 case IX86_BUILTIN_VEC_INIT_V8QI:
14442 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14443
14444 case IX86_BUILTIN_VEC_EXT_V2DF:
14445 case IX86_BUILTIN_VEC_EXT_V2DI:
14446 case IX86_BUILTIN_VEC_EXT_V4SF:
ed9b5396 14447 case IX86_BUILTIN_VEC_EXT_V4SI:
eb701deb
RH
14448 case IX86_BUILTIN_VEC_EXT_V8HI:
14449 case IX86_BUILTIN_VEC_EXT_V4HI:
14450 return ix86_expand_vec_ext_builtin (arglist, target);
14451
14452 case IX86_BUILTIN_VEC_SET_V8HI:
14453 case IX86_BUILTIN_VEC_SET_V4HI:
14454 return ix86_expand_vec_set_builtin (arglist);
22c7c85e 14455
bd793c65
BS
14456 default:
14457 break;
14458 }
14459
ca7558fc 14460 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
14461 if (d->code == fcode)
14462 {
14463 /* Compares are treated specially. */
ef719a44
RH
14464 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14465 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14466 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14467 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
bd793c65
BS
14468 return ix86_expand_sse_compare (d, arglist, target);
14469
14470 return ix86_expand_binop_builtin (d->icode, arglist, target);
14471 }
14472
ca7558fc 14473 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
14474 if (d->code == fcode)
14475 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 14476
ca7558fc 14477 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
14478 if (d->code == fcode)
14479 return ix86_expand_sse_comi (d, arglist, target);
0f290768 14480
ed9b5396 14481 gcc_unreachable ();
bd793c65 14482}
4211a8fb
JH
14483
14484/* Store OPERAND to the memory after reload is completed. This means
f710504c 14485 that we can't easily use assign_stack_local. */
4211a8fb 14486rtx
b96a374d 14487ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 14488{
898d374d 14489 rtx result;
4211a8fb
JH
14490 if (!reload_completed)
14491 abort ();
a5b378d6 14492 if (TARGET_RED_ZONE)
898d374d
JH
14493 {
14494 result = gen_rtx_MEM (mode,
14495 gen_rtx_PLUS (Pmode,
14496 stack_pointer_rtx,
14497 GEN_INT (-RED_ZONE_SIZE)));
14498 emit_move_insn (result, operand);
14499 }
a5b378d6 14500 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 14501 {
898d374d 14502 switch (mode)
4211a8fb 14503 {
898d374d
JH
14504 case HImode:
14505 case SImode:
14506 operand = gen_lowpart (DImode, operand);
5efb1046 14507 /* FALLTHRU */
898d374d 14508 case DImode:
4211a8fb 14509 emit_insn (
898d374d
JH
14510 gen_rtx_SET (VOIDmode,
14511 gen_rtx_MEM (DImode,
14512 gen_rtx_PRE_DEC (DImode,
14513 stack_pointer_rtx)),
14514 operand));
14515 break;
14516 default:
14517 abort ();
14518 }
14519 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14520 }
14521 else
14522 {
14523 switch (mode)
14524 {
14525 case DImode:
14526 {
14527 rtx operands[2];
14528 split_di (&operand, 1, operands, operands + 1);
14529 emit_insn (
14530 gen_rtx_SET (VOIDmode,
14531 gen_rtx_MEM (SImode,
14532 gen_rtx_PRE_DEC (Pmode,
14533 stack_pointer_rtx)),
14534 operands[1]));
14535 emit_insn (
14536 gen_rtx_SET (VOIDmode,
14537 gen_rtx_MEM (SImode,
14538 gen_rtx_PRE_DEC (Pmode,
14539 stack_pointer_rtx)),
14540 operands[0]));
14541 }
14542 break;
14543 case HImode:
14544 /* It is better to store HImodes as SImodes. */
14545 if (!TARGET_PARTIAL_REG_STALL)
14546 operand = gen_lowpart (SImode, operand);
5efb1046 14547 /* FALLTHRU */
898d374d 14548 case SImode:
4211a8fb 14549 emit_insn (
898d374d
JH
14550 gen_rtx_SET (VOIDmode,
14551 gen_rtx_MEM (GET_MODE (operand),
14552 gen_rtx_PRE_DEC (SImode,
14553 stack_pointer_rtx)),
14554 operand));
14555 break;
14556 default:
14557 abort ();
4211a8fb 14558 }
898d374d 14559 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 14560 }
898d374d 14561 return result;
4211a8fb
JH
14562}
14563
14564/* Free operand from the memory. */
14565void
b96a374d 14566ix86_free_from_memory (enum machine_mode mode)
4211a8fb 14567{
a5b378d6 14568 if (!TARGET_RED_ZONE)
898d374d
JH
14569 {
14570 int size;
14571
14572 if (mode == DImode || TARGET_64BIT)
14573 size = 8;
14574 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14575 size = 2;
14576 else
14577 size = 4;
14578 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14579 to pop or add instruction if registers are available. */
14580 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14581 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14582 GEN_INT (size))));
14583 }
4211a8fb 14584}
a946dd00 14585
f84aa48a
JH
14586/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14587 QImode must go into class Q_REGS.
14588 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 14589 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 14590enum reg_class
b96a374d 14591ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 14592{
1877be45
JH
14593 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14594 return NO_REGS;
f84aa48a
JH
14595 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14596 {
14597 /* SSE can't load any constant directly yet. */
14598 if (SSE_CLASS_P (class))
14599 return NO_REGS;
14600 /* Floats can load 0 and 1. */
14601 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14602 {
14603 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14604 if (MAYBE_SSE_CLASS_P (class))
14605 return (reg_class_subset_p (class, GENERAL_REGS)
14606 ? GENERAL_REGS : FLOAT_REGS);
14607 else
14608 return class;
14609 }
14610 /* General regs can load everything. */
14611 if (reg_class_subset_p (class, GENERAL_REGS))
14612 return GENERAL_REGS;
14613 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14614 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14615 return NO_REGS;
14616 }
14617 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14618 return NO_REGS;
14619 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14620 return Q_REGS;
14621 return class;
14622}
14623
14624/* If we are copying between general and FP registers, we need a memory
14625 location. The same is true for SSE and MMX registers.
14626
14627 The macro can't work reliably when one of the CLASSES is class containing
14628 registers from multiple units (SSE, MMX, integer). We avoid this by never
14629 combining those units in single alternative in the machine description.
14630 Ensure that this constraint holds to avoid unexpected surprises.
14631
14632 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14633 enforce these sanity checks. */
14634int
b96a374d
AJ
14635ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14636 enum machine_mode mode, int strict)
f84aa48a
JH
14637{
14638 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14639 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14640 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14641 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14642 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14643 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14644 {
14645 if (strict)
14646 abort ();
14647 else
14648 return 1;
14649 }
14650 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
14651 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14652 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14653 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14654 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
14655}
14656/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 14657 one in class CLASS2.
f84aa48a
JH
14658
14659 It is not required that the cost always equal 2 when FROM is the same as TO;
14660 on some machines it is expensive to move between registers if they are not
14661 general registers. */
14662int
b96a374d
AJ
14663ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14664 enum reg_class class2)
f84aa48a
JH
14665{
14666 /* In case we require secondary memory, compute cost of the store followed
b96a374d 14667 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
14668 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14669
f84aa48a
JH
14670 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14671 {
d631b80a
RH
14672 int cost = 1;
14673
14674 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14675 MEMORY_MOVE_COST (mode, class1, 1));
14676 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14677 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 14678
d631b80a
RH
14679 /* In case of copying from general_purpose_register we may emit multiple
14680 stores followed by single load causing memory size mismatch stall.
d1f87653 14681 Count this as arbitrarily high cost of 20. */
62415523 14682 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
14683 cost += 20;
14684
14685 /* In the case of FP/MMX moves, the registers actually overlap, and we
14686 have to switch modes in order to treat them differently. */
14687 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14688 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14689 cost += 20;
14690
14691 return cost;
f84aa48a 14692 }
d631b80a 14693
92d0fb09 14694 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14695 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14696 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14697 return ix86_cost->mmxsse_to_integer;
14698 if (MAYBE_FLOAT_CLASS_P (class1))
14699 return ix86_cost->fp_move;
14700 if (MAYBE_SSE_CLASS_P (class1))
14701 return ix86_cost->sse_move;
14702 if (MAYBE_MMX_CLASS_P (class1))
14703 return ix86_cost->mmx_move;
f84aa48a
JH
14704 return 2;
14705}
14706
a946dd00
JH
14707/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14708int
b96a374d 14709ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14710{
14711 /* Flags and only flags can only hold CCmode values. */
14712 if (CC_REGNO_P (regno))
14713 return GET_MODE_CLASS (mode) == MODE_CC;
14714 if (GET_MODE_CLASS (mode) == MODE_CC
14715 || GET_MODE_CLASS (mode) == MODE_RANDOM
14716 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14717 return 0;
14718 if (FP_REGNO_P (regno))
14719 return VALID_FP_MODE_P (mode);
14720 if (SSE_REGNO_P (regno))
dcbca208 14721 {
6c4ccfd8
RH
14722 /* We implement the move patterns for all vector modes into and
14723 out of SSE registers, even when no operation instructions
14724 are available. */
14725 return (VALID_SSE_REG_MODE (mode)
14726 || VALID_SSE2_REG_MODE (mode)
14727 || VALID_MMX_REG_MODE (mode)
14728 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 14729 }
a946dd00 14730 if (MMX_REGNO_P (regno))
dcbca208 14731 {
6c4ccfd8
RH
14732 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14733 so if the register is available at all, then we can move data of
14734 the given mode into or out of it. */
14735 return (VALID_MMX_REG_MODE (mode)
14736 || VALID_MMX_REG_MODE_3DNOW (mode));
dcbca208 14737 }
a946dd00
JH
14738 /* We handle both integer and floats in the general purpose registers.
14739 In future we should be able to handle vector modes as well. */
14740 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14741 return 0;
14742 /* Take care for QImode values - they can be in non-QI regs, but then
14743 they do cause partial register stalls. */
d2836273 14744 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14745 return 1;
14746 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14747}
fa79946e
JH
14748
14749/* Return the cost of moving data of mode M between a
14750 register and memory. A value of 2 is the default; this cost is
14751 relative to those in `REGISTER_MOVE_COST'.
14752
14753 If moving between registers and memory is more expensive than
14754 between two registers, you should define this macro to express the
a4f31c00
AJ
14755 relative cost.
14756
fa79946e
JH
14757 Model also increased moving costs of QImode registers in non
14758 Q_REGS classes.
14759 */
14760int
b96a374d 14761ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14762{
14763 if (FLOAT_CLASS_P (class))
14764 {
14765 int index;
14766 switch (mode)
14767 {
14768 case SFmode:
14769 index = 0;
14770 break;
14771 case DFmode:
14772 index = 1;
14773 break;
14774 case XFmode:
fa79946e
JH
14775 index = 2;
14776 break;
14777 default:
14778 return 100;
14779 }
14780 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14781 }
14782 if (SSE_CLASS_P (class))
14783 {
14784 int index;
14785 switch (GET_MODE_SIZE (mode))
14786 {
14787 case 4:
14788 index = 0;
14789 break;
14790 case 8:
14791 index = 1;
14792 break;
14793 case 16:
14794 index = 2;
14795 break;
14796 default:
14797 return 100;
14798 }
14799 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14800 }
14801 if (MMX_CLASS_P (class))
14802 {
14803 int index;
14804 switch (GET_MODE_SIZE (mode))
14805 {
14806 case 4:
14807 index = 0;
14808 break;
14809 case 8:
14810 index = 1;
14811 break;
14812 default:
14813 return 100;
14814 }
14815 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14816 }
14817 switch (GET_MODE_SIZE (mode))
14818 {
14819 case 1:
14820 if (in)
14821 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14822 : ix86_cost->movzbl_load);
14823 else
14824 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14825 : ix86_cost->int_store[0] + 4);
14826 break;
14827 case 2:
14828 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14829 default:
14830 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14831 if (mode == TFmode)
14832 mode = XFmode;
3bb7e126 14833 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14834 * (((int) GET_MODE_SIZE (mode)
14835 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14836 }
14837}
0ecf09f9 14838
3c50106f
RH
14839/* Compute a (partial) cost for rtx X. Return true if the complete
14840 cost has been computed, and false if subexpressions should be
14841 scanned. In either case, *TOTAL contains the cost result. */
14842
14843static bool
b96a374d 14844ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14845{
14846 enum machine_mode mode = GET_MODE (x);
14847
14848 switch (code)
14849 {
14850 case CONST_INT:
14851 case CONST:
14852 case LABEL_REF:
14853 case SYMBOL_REF:
8fe75e43 14854 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
3c50106f 14855 *total = 3;
8fe75e43 14856 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
3c50106f 14857 *total = 2;
3504dad3
JH
14858 else if (flag_pic && SYMBOLIC_CONST (x)
14859 && (!TARGET_64BIT
14860 || (!GET_CODE (x) != LABEL_REF
14861 && (GET_CODE (x) != SYMBOL_REF
12969f45 14862 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14863 *total = 1;
14864 else
14865 *total = 0;
14866 return true;
14867
14868 case CONST_DOUBLE:
14869 if (mode == VOIDmode)
14870 *total = 0;
14871 else
14872 switch (standard_80387_constant_p (x))
14873 {
14874 case 1: /* 0.0 */
14875 *total = 1;
14876 break;
881b2a96 14877 default: /* Other constants */
3c50106f
RH
14878 *total = 2;
14879 break;
881b2a96
RS
14880 case 0:
14881 case -1:
3c50106f
RH
14882 /* Start with (MEM (SYMBOL_REF)), since that's where
14883 it'll probably end up. Add a penalty for size. */
14884 *total = (COSTS_N_INSNS (1)
3504dad3 14885 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14886 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14887 break;
14888 }
14889 return true;
14890
14891 case ZERO_EXTEND:
14892 /* The zero extensions is often completely free on x86_64, so make
14893 it as cheap as possible. */
14894 if (TARGET_64BIT && mode == DImode
14895 && GET_MODE (XEXP (x, 0)) == SImode)
14896 *total = 1;
14897 else if (TARGET_ZERO_EXTEND_WITH_AND)
14898 *total = COSTS_N_INSNS (ix86_cost->add);
14899 else
14900 *total = COSTS_N_INSNS (ix86_cost->movzx);
14901 return false;
14902
14903 case SIGN_EXTEND:
14904 *total = COSTS_N_INSNS (ix86_cost->movsx);
14905 return false;
14906
14907 case ASHIFT:
14908 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14909 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14910 {
14911 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14912 if (value == 1)
14913 {
14914 *total = COSTS_N_INSNS (ix86_cost->add);
14915 return false;
14916 }
14917 if ((value == 2 || value == 3)
3c50106f
RH
14918 && ix86_cost->lea <= ix86_cost->shift_const)
14919 {
14920 *total = COSTS_N_INSNS (ix86_cost->lea);
14921 return false;
14922 }
14923 }
5efb1046 14924 /* FALLTHRU */
3c50106f
RH
14925
14926 case ROTATE:
14927 case ASHIFTRT:
14928 case LSHIFTRT:
14929 case ROTATERT:
14930 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14931 {
14932 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14933 {
14934 if (INTVAL (XEXP (x, 1)) > 32)
14935 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14936 else
14937 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14938 }
14939 else
14940 {
14941 if (GET_CODE (XEXP (x, 1)) == AND)
14942 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14943 else
14944 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14945 }
14946 }
14947 else
14948 {
14949 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14950 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14951 else
14952 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14953 }
14954 return false;
14955
14956 case MULT:
14957 if (FLOAT_MODE_P (mode))
3c50106f 14958 {
4a5eab38
PB
14959 *total = COSTS_N_INSNS (ix86_cost->fmul);
14960 return false;
3c50106f
RH
14961 }
14962 else
14963 {
4a5eab38
PB
14964 rtx op0 = XEXP (x, 0);
14965 rtx op1 = XEXP (x, 1);
14966 int nbits;
14967 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14968 {
14969 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14970 for (nbits = 0; value != 0; value &= value - 1)
14971 nbits++;
14972 }
14973 else
14974 /* This is arbitrary. */
14975 nbits = 7;
14976
14977 /* Compute costs correctly for widening multiplication. */
14978 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14979 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14980 == GET_MODE_SIZE (mode))
14981 {
14982 int is_mulwiden = 0;
14983 enum machine_mode inner_mode = GET_MODE (op0);
14984
14985 if (GET_CODE (op0) == GET_CODE (op1))
14986 is_mulwiden = 1, op1 = XEXP (op1, 0);
14987 else if (GET_CODE (op1) == CONST_INT)
14988 {
14989 if (GET_CODE (op0) == SIGN_EXTEND)
14990 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14991 == INTVAL (op1);
14992 else
14993 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14994 }
14995
14996 if (is_mulwiden)
14997 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14998 }
f676971a 14999
4a5eab38
PB
15000 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15001 + nbits * ix86_cost->mult_bit)
15002 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15003
15004 return true;
3c50106f 15005 }
3c50106f
RH
15006
15007 case DIV:
15008 case UDIV:
15009 case MOD:
15010 case UMOD:
15011 if (FLOAT_MODE_P (mode))
15012 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15013 else
15014 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15015 return false;
15016
15017 case PLUS:
15018 if (FLOAT_MODE_P (mode))
15019 *total = COSTS_N_INSNS (ix86_cost->fadd);
e0c00392 15020 else if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
15021 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15022 {
15023 if (GET_CODE (XEXP (x, 0)) == PLUS
15024 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15025 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15026 && CONSTANT_P (XEXP (x, 1)))
15027 {
15028 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15029 if (val == 2 || val == 4 || val == 8)
15030 {
15031 *total = COSTS_N_INSNS (ix86_cost->lea);
15032 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15033 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15034 outer_code);
15035 *total += rtx_cost (XEXP (x, 1), outer_code);
15036 return true;
15037 }
15038 }
15039 else if (GET_CODE (XEXP (x, 0)) == MULT
15040 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15041 {
15042 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15043 if (val == 2 || val == 4 || val == 8)
15044 {
15045 *total = COSTS_N_INSNS (ix86_cost->lea);
15046 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15047 *total += rtx_cost (XEXP (x, 1), outer_code);
15048 return true;
15049 }
15050 }
15051 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15052 {
15053 *total = COSTS_N_INSNS (ix86_cost->lea);
15054 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15055 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15056 *total += rtx_cost (XEXP (x, 1), outer_code);
15057 return true;
15058 }
15059 }
5efb1046 15060 /* FALLTHRU */
3c50106f
RH
15061
15062 case MINUS:
15063 if (FLOAT_MODE_P (mode))
15064 {
15065 *total = COSTS_N_INSNS (ix86_cost->fadd);
15066 return false;
15067 }
5efb1046 15068 /* FALLTHRU */
3c50106f
RH
15069
15070 case AND:
15071 case IOR:
15072 case XOR:
15073 if (!TARGET_64BIT && mode == DImode)
15074 {
15075 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15076 + (rtx_cost (XEXP (x, 0), outer_code)
15077 << (GET_MODE (XEXP (x, 0)) != DImode))
15078 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 15079 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
15080 return true;
15081 }
5efb1046 15082 /* FALLTHRU */
3c50106f
RH
15083
15084 case NEG:
15085 if (FLOAT_MODE_P (mode))
15086 {
15087 *total = COSTS_N_INSNS (ix86_cost->fchs);
15088 return false;
15089 }
5efb1046 15090 /* FALLTHRU */
3c50106f
RH
15091
15092 case NOT:
15093 if (!TARGET_64BIT && mode == DImode)
15094 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15095 else
15096 *total = COSTS_N_INSNS (ix86_cost->add);
15097 return false;
15098
c271ba77
KH
15099 case COMPARE:
15100 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15101 && XEXP (XEXP (x, 0), 1) == const1_rtx
15102 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15103 && XEXP (x, 1) == const0_rtx)
15104 {
15105 /* This kind of construct is implemented using test[bwl].
15106 Treat it as if we had an AND. */
15107 *total = (COSTS_N_INSNS (ix86_cost->add)
15108 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15109 + rtx_cost (const1_rtx, outer_code));
15110 return true;
15111 }
15112 return false;
15113
3c50106f 15114 case FLOAT_EXTEND:
dcbca208
RH
15115 if (!TARGET_SSE_MATH
15116 || mode == XFmode
15117 || (mode == DFmode && !TARGET_SSE2))
3c50106f
RH
15118 *total = 0;
15119 return false;
15120
15121 case ABS:
15122 if (FLOAT_MODE_P (mode))
15123 *total = COSTS_N_INSNS (ix86_cost->fabs);
15124 return false;
15125
15126 case SQRT:
15127 if (FLOAT_MODE_P (mode))
15128 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15129 return false;
15130
74dc3e94
RH
15131 case UNSPEC:
15132 if (XINT (x, 1) == UNSPEC_TP)
15133 *total = 0;
15134 return false;
15135
3c50106f
RH
15136 default:
15137 return false;
15138 }
15139}
15140
b069de3b
SS
15141#if TARGET_MACHO
15142
15143static int current_machopic_label_num;
15144
15145/* Given a symbol name and its associated stub, write out the
15146 definition of the stub. */
15147
15148void
b96a374d 15149machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
15150{
15151 unsigned int length;
15152 char *binder_name, *symbol_name, lazy_ptr_name[32];
15153 int label = ++current_machopic_label_num;
15154
15155 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15156 symb = (*targetm.strip_name_encoding) (symb);
15157
15158 length = strlen (stub);
15159 binder_name = alloca (length + 32);
15160 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15161
15162 length = strlen (symb);
15163 symbol_name = alloca (length + 32);
15164 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15165
15166 sprintf (lazy_ptr_name, "L%d$lz", label);
15167
15168 if (MACHOPIC_PURE)
15169 machopic_picsymbol_stub_section ();
15170 else
15171 machopic_symbol_stub_section ();
15172
15173 fprintf (file, "%s:\n", stub);
15174 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15175
15176 if (MACHOPIC_PURE)
15177 {
15178 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15179 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15180 fprintf (file, "\tjmp %%edx\n");
15181 }
15182 else
15183 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 15184
b069de3b 15185 fprintf (file, "%s:\n", binder_name);
b96a374d 15186
b069de3b
SS
15187 if (MACHOPIC_PURE)
15188 {
15189 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15190 fprintf (file, "\tpushl %%eax\n");
15191 }
15192 else
15193 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15194
15195 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15196
15197 machopic_lazy_symbol_ptr_section ();
15198 fprintf (file, "%s:\n", lazy_ptr_name);
15199 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15200 fprintf (file, "\t.long %s\n", binder_name);
15201}
15202#endif /* TARGET_MACHO */
15203
162f023b
JH
15204/* Order the registers for register allocator. */
15205
15206void
b96a374d 15207x86_order_regs_for_local_alloc (void)
162f023b
JH
15208{
15209 int pos = 0;
15210 int i;
15211
15212 /* First allocate the local general purpose registers. */
15213 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15214 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15215 reg_alloc_order [pos++] = i;
15216
15217 /* Global general purpose registers. */
15218 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15219 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15220 reg_alloc_order [pos++] = i;
15221
15222 /* x87 registers come first in case we are doing FP math
15223 using them. */
15224 if (!TARGET_SSE_MATH)
15225 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15226 reg_alloc_order [pos++] = i;
fce5a9f2 15227
162f023b
JH
15228 /* SSE registers. */
15229 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15230 reg_alloc_order [pos++] = i;
15231 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15232 reg_alloc_order [pos++] = i;
15233
d1f87653 15234 /* x87 registers. */
162f023b
JH
15235 if (TARGET_SSE_MATH)
15236 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15237 reg_alloc_order [pos++] = i;
15238
15239 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15240 reg_alloc_order [pos++] = i;
15241
15242 /* Initialize the rest of array as we do not allocate some registers
15243 at all. */
15244 while (pos < FIRST_PSEUDO_REGISTER)
15245 reg_alloc_order [pos++] = 0;
15246}
194734e9 15247
4977bab6
ZW
15248#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15249#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15250#endif
15251
fe77449a
DR
15252/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15253 struct attribute_spec.handler. */
15254static tree
b96a374d
AJ
15255ix86_handle_struct_attribute (tree *node, tree name,
15256 tree args ATTRIBUTE_UNUSED,
15257 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
15258{
15259 tree *type = NULL;
15260 if (DECL_P (*node))
15261 {
15262 if (TREE_CODE (*node) == TYPE_DECL)
15263 type = &TREE_TYPE (*node);
15264 }
15265 else
15266 type = node;
15267
15268 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15269 || TREE_CODE (*type) == UNION_TYPE)))
15270 {
9e637a26 15271 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
fe77449a
DR
15272 *no_add_attrs = true;
15273 }
15274
15275 else if ((is_attribute_p ("ms_struct", name)
15276 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15277 || ((is_attribute_p ("gcc_struct", name)
15278 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15279 {
9e637a26 15280 warning ("%qs incompatible attribute ignored",
fe77449a
DR
15281 IDENTIFIER_POINTER (name));
15282 *no_add_attrs = true;
15283 }
15284
15285 return NULL_TREE;
15286}
15287
4977bab6 15288static bool
b96a374d 15289ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 15290{
fe77449a 15291 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 15292 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 15293 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
15294}
15295
483ab821
MM
15296/* Returns an expression indicating where the this parameter is
15297 located on entry to the FUNCTION. */
15298
15299static rtx
b96a374d 15300x86_this_parameter (tree function)
483ab821
MM
15301{
15302 tree type = TREE_TYPE (function);
15303
3961e8fe
RH
15304 if (TARGET_64BIT)
15305 {
61f71b34 15306 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
15307 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15308 }
15309
e767b5be 15310 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
15311 {
15312 tree parm;
15313
15314 parm = TYPE_ARG_TYPES (type);
15315 /* Figure out whether or not the function has a variable number of
15316 arguments. */
3961e8fe 15317 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
15318 if (TREE_VALUE (parm) == void_type_node)
15319 break;
e767b5be 15320 /* If not, the this parameter is in the first argument. */
483ab821 15321 if (parm)
e767b5be
JH
15322 {
15323 int regno = 0;
15324 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15325 regno = 2;
02e02343 15326 return gen_rtx_REG (SImode, regno);
e767b5be 15327 }
483ab821
MM
15328 }
15329
61f71b34 15330 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
15331 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15332 else
15333 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15334}
15335
3961e8fe
RH
15336/* Determine whether x86_output_mi_thunk can succeed. */
15337
15338static bool
b96a374d
AJ
15339x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15340 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15341 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
15342{
15343 /* 64-bit can handle anything. */
15344 if (TARGET_64BIT)
15345 return true;
15346
15347 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 15348 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
15349 return true;
15350
15351 /* Need a free register for vcall_offset. */
15352 if (vcall_offset)
15353 return false;
15354
15355 /* Need a free register for GOT references. */
15356 if (flag_pic && !(*targetm.binds_local_p) (function))
15357 return false;
15358
15359 /* Otherwise ok. */
15360 return true;
15361}
15362
15363/* Output the assembler code for a thunk function. THUNK_DECL is the
15364 declaration for the thunk function itself, FUNCTION is the decl for
15365 the target function. DELTA is an immediate constant offset to be
272d0bee 15366 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 15367 *(*this + vcall_offset) should be added to THIS. */
483ab821 15368
c590b625 15369static void
b96a374d
AJ
15370x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15371 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15372 HOST_WIDE_INT vcall_offset, tree function)
194734e9 15373{
194734e9 15374 rtx xops[3];
3961e8fe
RH
15375 rtx this = x86_this_parameter (function);
15376 rtx this_reg, tmp;
194734e9 15377
3961e8fe
RH
15378 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15379 pull it in now and let DELTA benefit. */
15380 if (REG_P (this))
15381 this_reg = this;
15382 else if (vcall_offset)
15383 {
15384 /* Put the this parameter into %eax. */
15385 xops[0] = this;
15386 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15387 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15388 }
15389 else
15390 this_reg = NULL_RTX;
15391
15392 /* Adjust the this parameter by a fixed constant. */
15393 if (delta)
194734e9 15394 {
483ab821 15395 xops[0] = GEN_INT (delta);
3961e8fe
RH
15396 xops[1] = this_reg ? this_reg : this;
15397 if (TARGET_64BIT)
194734e9 15398 {
3961e8fe
RH
15399 if (!x86_64_general_operand (xops[0], DImode))
15400 {
15401 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15402 xops[1] = tmp;
15403 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15404 xops[0] = tmp;
15405 xops[1] = this;
15406 }
15407 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
15408 }
15409 else
3961e8fe 15410 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 15411 }
3961e8fe
RH
15412
15413 /* Adjust the this parameter by a value stored in the vtable. */
15414 if (vcall_offset)
194734e9 15415 {
3961e8fe
RH
15416 if (TARGET_64BIT)
15417 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15418 else
e767b5be
JH
15419 {
15420 int tmp_regno = 2 /* ECX */;
15421 if (lookup_attribute ("fastcall",
15422 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15423 tmp_regno = 0 /* EAX */;
15424 tmp = gen_rtx_REG (SImode, tmp_regno);
15425 }
483ab821 15426
3961e8fe
RH
15427 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15428 xops[1] = tmp;
15429 if (TARGET_64BIT)
15430 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15431 else
15432 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 15433
3961e8fe
RH
15434 /* Adjust the this parameter. */
15435 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15436 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15437 {
15438 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15439 xops[0] = GEN_INT (vcall_offset);
15440 xops[1] = tmp2;
15441 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15442 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 15443 }
3961e8fe
RH
15444 xops[1] = this_reg;
15445 if (TARGET_64BIT)
15446 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15447 else
15448 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15449 }
194734e9 15450
3961e8fe
RH
15451 /* If necessary, drop THIS back to its stack slot. */
15452 if (this_reg && this_reg != this)
15453 {
15454 xops[0] = this_reg;
15455 xops[1] = this;
15456 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15457 }
194734e9 15458
89ce1c8f 15459 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
15460 if (TARGET_64BIT)
15461 {
15462 if (!flag_pic || (*targetm.binds_local_p) (function))
15463 output_asm_insn ("jmp\t%P0", xops);
15464 else
fcbe3b89 15465 {
89ce1c8f 15466 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
15467 tmp = gen_rtx_CONST (Pmode, tmp);
15468 tmp = gen_rtx_MEM (QImode, tmp);
15469 xops[0] = tmp;
15470 output_asm_insn ("jmp\t%A0", xops);
15471 }
3961e8fe
RH
15472 }
15473 else
15474 {
15475 if (!flag_pic || (*targetm.binds_local_p) (function))
15476 output_asm_insn ("jmp\t%P0", xops);
194734e9 15477 else
21ff35fb 15478#if TARGET_MACHO
095fa594
SH
15479 if (TARGET_MACHO)
15480 {
11abc112 15481 rtx sym_ref = XEXP (DECL_RTL (function), 0);
f676971a
EC
15482 tmp = (gen_rtx_SYMBOL_REF
15483 (Pmode,
11abc112 15484 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
095fa594
SH
15485 tmp = gen_rtx_MEM (QImode, tmp);
15486 xops[0] = tmp;
15487 output_asm_insn ("jmp\t%0", xops);
15488 }
15489 else
15490#endif /* TARGET_MACHO */
194734e9 15491 {
3961e8fe
RH
15492 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15493 output_set_got (tmp);
15494
15495 xops[1] = tmp;
15496 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15497 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
15498 }
15499 }
15500}
e2500fed 15501
1bc7c5b6 15502static void
b96a374d 15503x86_file_start (void)
1bc7c5b6
ZW
15504{
15505 default_file_start ();
15506 if (X86_FILE_START_VERSION_DIRECTIVE)
15507 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15508 if (X86_FILE_START_FLTUSED)
15509 fputs ("\t.global\t__fltused\n", asm_out_file);
15510 if (ix86_asm_dialect == ASM_INTEL)
15511 fputs ("\t.intel_syntax\n", asm_out_file);
15512}
15513
e932b21b 15514int
b96a374d 15515x86_field_alignment (tree field, int computed)
e932b21b
JH
15516{
15517 enum machine_mode mode;
ad9335eb
JJ
15518 tree type = TREE_TYPE (field);
15519
15520 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 15521 return computed;
ad9335eb
JJ
15522 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15523 ? get_inner_array_type (type) : type);
39e3a681
JJ
15524 if (mode == DFmode || mode == DCmode
15525 || GET_MODE_CLASS (mode) == MODE_INT
15526 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
15527 return MIN (32, computed);
15528 return computed;
15529}
15530
a5fa1ecd
JH
15531/* Output assembler code to FILE to increment profiler label # LABELNO
15532 for profiling a function entry. */
15533void
b96a374d 15534x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
15535{
15536 if (TARGET_64BIT)
15537 if (flag_pic)
15538 {
15539#ifndef NO_PROFILE_COUNTERS
15540 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15541#endif
15542 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15543 }
15544 else
15545 {
15546#ifndef NO_PROFILE_COUNTERS
15547 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15548#endif
15549 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15550 }
15551 else if (flag_pic)
15552 {
15553#ifndef NO_PROFILE_COUNTERS
15554 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15555 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15556#endif
15557 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15558 }
15559 else
15560 {
15561#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 15562 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
15563 PROFILE_COUNT_REGISTER);
15564#endif
15565 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15566 }
15567}
15568
d2c49530
JH
15569/* We don't have exact information about the insn sizes, but we may assume
15570 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 15571 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
15572 99% of cases. */
15573
15574static int
b96a374d 15575min_insn_size (rtx insn)
d2c49530
JH
15576{
15577 int l = 0;
15578
15579 if (!INSN_P (insn) || !active_insn_p (insn))
15580 return 0;
15581
15582 /* Discard alignments we've emit and jump instructions. */
15583 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15584 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15585 return 0;
15586 if (GET_CODE (insn) == JUMP_INSN
15587 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15588 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15589 return 0;
15590
15591 /* Important case - calls are always 5 bytes.
15592 It is common to have many calls in the row. */
15593 if (GET_CODE (insn) == CALL_INSN
15594 && symbolic_reference_mentioned_p (PATTERN (insn))
15595 && !SIBLING_CALL_P (insn))
15596 return 5;
15597 if (get_attr_length (insn) <= 1)
15598 return 1;
15599
15600 /* For normal instructions we may rely on the sizes of addresses
15601 and the presence of symbol to require 4 bytes of encoding.
15602 This is not the case for jumps where references are PC relative. */
15603 if (GET_CODE (insn) != JUMP_INSN)
15604 {
15605 l = get_attr_length_address (insn);
15606 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15607 l = 4;
15608 }
15609 if (l)
15610 return 1+l;
15611 else
15612 return 2;
15613}
15614
c51e6d85 15615/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
15616 window. */
15617
15618static void
be04394b 15619ix86_avoid_jump_misspredicts (void)
d2c49530
JH
15620{
15621 rtx insn, start = get_insns ();
15622 int nbytes = 0, njumps = 0;
15623 int isjump = 0;
15624
15625 /* Look for all minimal intervals of instructions containing 4 jumps.
15626 The intervals are bounded by START and INSN. NBYTES is the total
15627 size of instructions in the interval including INSN and not including
15628 START. When the NBYTES is smaller than 16 bytes, it is possible
15629 that the end of START and INSN ends up in the same 16byte page.
15630
15631 The smallest offset in the page INSN can start is the case where START
15632 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15633 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15634 */
15635 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15636 {
15637
15638 nbytes += min_insn_size (insn);
c263766c
RH
15639 if (dump_file)
15640 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
15641 INSN_UID (insn), min_insn_size (insn));
15642 if ((GET_CODE (insn) == JUMP_INSN
15643 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15644 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15645 || GET_CODE (insn) == CALL_INSN)
15646 njumps++;
15647 else
15648 continue;
15649
15650 while (njumps > 3)
15651 {
15652 start = NEXT_INSN (start);
15653 if ((GET_CODE (start) == JUMP_INSN
15654 && GET_CODE (PATTERN (start)) != ADDR_VEC
15655 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15656 || GET_CODE (start) == CALL_INSN)
15657 njumps--, isjump = 1;
15658 else
15659 isjump = 0;
15660 nbytes -= min_insn_size (start);
15661 }
15662 if (njumps < 0)
15663 abort ();
c263766c
RH
15664 if (dump_file)
15665 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
15666 INSN_UID (start), INSN_UID (insn), nbytes);
15667
15668 if (njumps == 3 && isjump && nbytes < 16)
15669 {
15670 int padsize = 15 - nbytes + min_insn_size (insn);
15671
c263766c
RH
15672 if (dump_file)
15673 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15674 INSN_UID (insn), padsize);
d2c49530
JH
15675 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15676 }
15677 }
15678}
15679
be04394b 15680/* AMD Athlon works faster
d1f87653 15681 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
15682 by other jump instruction. We avoid the penalty by inserting NOP just
15683 before the RET instructions in such cases. */
18dbd950 15684static void
be04394b 15685ix86_pad_returns (void)
2a500b9e
JH
15686{
15687 edge e;
628f6a4e 15688 edge_iterator ei;
2a500b9e 15689
628f6a4e
BE
15690 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15691 {
15692 basic_block bb = e->src;
15693 rtx ret = BB_END (bb);
15694 rtx prev;
15695 bool replace = false;
15696
15697 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15698 || !maybe_hot_bb_p (bb))
15699 continue;
15700 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15701 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15702 break;
15703 if (prev && GET_CODE (prev) == CODE_LABEL)
15704 {
15705 edge e;
15706 edge_iterator ei;
15707
15708 FOR_EACH_EDGE (e, ei, bb->preds)
15709 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15710 && !(e->flags & EDGE_FALLTHRU))
15711 replace = true;
15712 }
15713 if (!replace)
15714 {
15715 prev = prev_active_insn (ret);
15716 if (prev
15717 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15718 || GET_CODE (prev) == CALL_INSN))
253c7a00 15719 replace = true;
628f6a4e
BE
15720 /* Empty functions get branch mispredict even when the jump destination
15721 is not visible to us. */
15722 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15723 replace = true;
15724 }
15725 if (replace)
15726 {
15727 emit_insn_before (gen_return_internal_long (), ret);
15728 delete_insn (ret);
15729 }
15730 }
be04394b
JH
15731}
15732
15733/* Implement machine specific optimizations. We implement padding of returns
15734 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15735static void
15736ix86_reorg (void)
15737{
15738 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15739 ix86_pad_returns ();
15740 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15741 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
15742}
15743
4977bab6
ZW
15744/* Return nonzero when QImode register that must be represented via REX prefix
15745 is used. */
15746bool
b96a374d 15747x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15748{
15749 int i;
15750 extract_insn_cached (insn);
15751 for (i = 0; i < recog_data.n_operands; i++)
15752 if (REG_P (recog_data.operand[i])
15753 && REGNO (recog_data.operand[i]) >= 4)
15754 return true;
15755 return false;
15756}
15757
15758/* Return nonzero when P points to register encoded via REX prefix.
15759 Called via for_each_rtx. */
15760static int
b96a374d 15761extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15762{
15763 unsigned int regno;
15764 if (!REG_P (*p))
15765 return 0;
15766 regno = REGNO (*p);
15767 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15768}
15769
15770/* Return true when INSN mentions register that must be encoded using REX
15771 prefix. */
15772bool
b96a374d 15773x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15774{
15775 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15776}
15777
1d6ba901 15778/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15779 optabs would emit if we didn't have TFmode patterns. */
15780
15781void
b96a374d 15782x86_emit_floatuns (rtx operands[2])
8d705469
JH
15783{
15784 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15785 enum machine_mode mode, inmode;
15786
15787 inmode = GET_MODE (operands[1]);
15788 if (inmode != SImode
15789 && inmode != DImode)
15790 abort ();
8d705469
JH
15791
15792 out = operands[0];
1d6ba901 15793 in = force_reg (inmode, operands[1]);
8d705469
JH
15794 mode = GET_MODE (out);
15795 neglab = gen_label_rtx ();
15796 donelab = gen_label_rtx ();
15797 i1 = gen_reg_rtx (Pmode);
15798 f0 = gen_reg_rtx (mode);
15799
15800 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15801
15802 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15803 emit_jump_insn (gen_jump (donelab));
15804 emit_barrier ();
15805
15806 emit_label (neglab);
15807
15808 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15809 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15810 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15811 expand_float (f0, i0, 0);
15812 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15813
15814 emit_label (donelab);
15815}
eb701deb
RH
15816\f
15817/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15818 with all elements equal to VAR. Return true if successful. */
15819
15820static bool
15821ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
15822 rtx target, rtx val)
15823{
15824 enum machine_mode smode, wsmode, wvmode;
15825 rtx x;
15826
15827 switch (mode)
15828 {
15829 case V2SImode:
15830 case V2SFmode:
15831 if (!mmx_ok && !TARGET_SSE)
15832 return false;
15833 /* FALLTHRU */
15834
15835 case V2DFmode:
15836 case V2DImode:
15837 case V4SFmode:
15838 case V4SImode:
15839 val = force_reg (GET_MODE_INNER (mode), val);
15840 x = gen_rtx_VEC_DUPLICATE (mode, val);
15841 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15842 return true;
15843
15844 case V4HImode:
15845 if (!mmx_ok)
15846 return false;
15847 val = gen_lowpart (SImode, val);
15848 x = gen_rtx_TRUNCATE (HImode, val);
15849 x = gen_rtx_VEC_DUPLICATE (mode, x);
15850 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15851 return true;
15852
15853 case V8QImode:
15854 if (!mmx_ok)
15855 return false;
15856 smode = QImode;
15857 wsmode = HImode;
15858 wvmode = V4HImode;
15859 goto widen;
15860 case V8HImode:
15861 smode = HImode;
15862 wsmode = SImode;
15863 wvmode = V4SImode;
15864 goto widen;
15865 case V16QImode:
15866 smode = QImode;
15867 wsmode = HImode;
15868 wvmode = V8HImode;
15869 goto widen;
15870 widen:
15871 /* Replicate the value once into the next wider mode and recurse. */
15872 val = convert_modes (wsmode, smode, val, true);
15873 x = expand_simple_binop (wsmode, ASHIFT, val,
15874 GEN_INT (GET_MODE_BITSIZE (smode)),
15875 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15876 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
15877
15878 x = gen_reg_rtx (wvmode);
15879 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
15880 gcc_unreachable ();
15881 emit_move_insn (target, gen_lowpart (mode, x));
15882 return true;
15883
15884 default:
15885 return false;
15886 }
15887}
15888
15889/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15890 whose low element is VAR, and other elements are zero. Return true
15891 if successful. */
15892
15893static bool
15894ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
15895 rtx target, rtx var)
15896{
15897 enum machine_mode vsimode;
15898 rtx x;
15899
15900 switch (mode)
15901 {
15902 case V2SFmode:
15903 case V2SImode:
15904 if (!mmx_ok && !TARGET_SSE)
15905 return false;
15906 /* FALLTHRU */
15907
15908 case V2DFmode:
15909 case V2DImode:
15910 var = force_reg (GET_MODE_INNER (mode), var);
15911 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
15912 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15913 return true;
15914
15915 case V4SFmode:
15916 case V4SImode:
15917 var = force_reg (GET_MODE_INNER (mode), var);
15918 x = gen_rtx_VEC_DUPLICATE (mode, var);
15919 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
15920 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15921 return true;
15922
15923 case V8HImode:
15924 case V16QImode:
15925 vsimode = V4SImode;
15926 goto widen;
15927 case V4HImode:
15928 case V8QImode:
15929 if (!mmx_ok)
15930 return false;
15931 vsimode = V2SImode;
15932 goto widen;
15933 widen:
15934 /* Zero extend the variable element to SImode and recurse. */
15935 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
15936
15937 x = gen_reg_rtx (vsimode);
15938 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
15939 gcc_unreachable ();
15940
15941 emit_move_insn (target, gen_lowpart (mode, x));
15942 return true;
15943
15944 default:
15945 return false;
15946 }
15947}
15948
15949/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15950 consisting of the values in VALS. It is known that all elements
15951 except ONE_VAR are constants. Return true if successful. */
15952
15953static bool
15954ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
15955 rtx target, rtx vals, int one_var)
15956{
15957 rtx var = XVECEXP (vals, 0, one_var);
15958 enum machine_mode wmode;
15959 rtx const_vec, x;
15960
15961 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
15962 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
15963
15964 switch (mode)
15965 {
15966 case V2DFmode:
15967 case V2DImode:
15968 case V2SFmode:
15969 case V2SImode:
15970 /* For the two element vectors, it's just as easy to use
15971 the general case. */
15972 return false;
15973
15974 case V4SFmode:
15975 case V4SImode:
15976 case V8HImode:
15977 case V4HImode:
15978 break;
15979
15980 case V16QImode:
15981 wmode = V8HImode;
15982 goto widen;
15983 case V8QImode:
15984 wmode = V4HImode;
15985 goto widen;
15986 widen:
15987 /* There's no way to set one QImode entry easily. Combine
15988 the variable value with its adjacent constant value, and
15989 promote to an HImode set. */
15990 x = XVECEXP (vals, 0, one_var ^ 1);
15991 if (one_var & 1)
15992 {
15993 var = convert_modes (HImode, QImode, var, true);
15994 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
15995 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15996 x = GEN_INT (INTVAL (x) & 0xff);
15997 }
15998 else
15999 {
16000 var = convert_modes (HImode, QImode, var, true);
16001 x = gen_int_mode (INTVAL (x) << 8, HImode);
16002 }
16003 if (x != const0_rtx)
16004 var = expand_simple_binop (HImode, IOR, var, x, var,
16005 1, OPTAB_LIB_WIDEN);
16006
16007 x = gen_reg_rtx (wmode);
16008 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16009 ix86_expand_vector_set (mmx_ok, target, var, one_var >> 1);
16010
16011 emit_move_insn (target, gen_lowpart (mode, x));
16012 return true;
16013
16014 default:
16015 return false;
16016 }
16017
16018 emit_move_insn (target, const_vec);
16019 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16020 return true;
16021}
16022
16023/* A subroutine of ix86_expand_vector_init. Handle the most general case:
16024 all values variable, and none identical. */
16025
16026static void
16027ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16028 rtx target, rtx vals)
16029{
16030 enum machine_mode half_mode = GET_MODE_INNER (mode);
16031 rtx op0 = NULL, op1 = NULL;
16032 bool use_vec_concat = false;
16033
16034 switch (mode)
16035 {
16036 case V2SFmode:
16037 case V2SImode:
16038 if (!mmx_ok && !TARGET_SSE)
16039 break;
16040 /* FALLTHRU */
16041
16042 case V2DFmode:
16043 case V2DImode:
16044 /* For the two element vectors, we always implement VEC_CONCAT. */
16045 op0 = XVECEXP (vals, 0, 0);
16046 op1 = XVECEXP (vals, 0, 1);
16047 use_vec_concat = true;
16048 break;
16049
16050 case V4SFmode:
16051 half_mode = V2SFmode;
16052 goto half;
16053 case V4SImode:
16054 half_mode = V2SImode;
16055 goto half;
16056 half:
16057 {
16058 rtvec v;
16059
16060 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16061 Recurse to load the two halves. */
16062
16063 op0 = gen_reg_rtx (half_mode);
16064 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16065 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16066
16067 op1 = gen_reg_rtx (half_mode);
16068 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16069 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16070
16071 use_vec_concat = true;
16072 }
16073 break;
16074
16075 case V8HImode:
16076 case V16QImode:
16077 case V4HImode:
16078 case V8QImode:
16079 break;
16080
16081 default:
16082 gcc_unreachable ();
16083 }
16084
16085 if (use_vec_concat)
16086 {
16087 if (!register_operand (op0, half_mode))
16088 op0 = force_reg (half_mode, op0);
16089 if (!register_operand (op1, half_mode))
16090 op1 = force_reg (half_mode, op1);
16091
16092 emit_insn (gen_rtx_SET (VOIDmode, target,
16093 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16094 }
16095 else
16096 {
16097 int i, j, n_elts, n_words, n_elt_per_word;
16098 enum machine_mode inner_mode;
16099 rtx words[4], shift;
16100
16101 inner_mode = GET_MODE_INNER (mode);
16102 n_elts = GET_MODE_NUNITS (mode);
16103 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16104 n_elt_per_word = n_elts / n_words;
16105 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16106
16107 for (i = 0; i < n_words; ++i)
16108 {
16109 rtx word = NULL_RTX;
16110
16111 for (j = 0; j < n_elt_per_word; ++j)
16112 {
16113 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16114 elt = convert_modes (word_mode, inner_mode, elt, true);
16115
16116 if (j == 0)
16117 word = elt;
16118 else
16119 {
16120 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16121 word, 1, OPTAB_LIB_WIDEN);
16122 word = expand_simple_binop (word_mode, IOR, word, elt,
16123 word, 1, OPTAB_LIB_WIDEN);
16124 }
16125 }
16126
16127 words[i] = word;
16128 }
16129
16130 if (n_words == 1)
16131 emit_move_insn (target, gen_lowpart (mode, words[0]));
16132 else if (n_words == 2)
16133 {
16134 rtx tmp = gen_reg_rtx (mode);
16135 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16136 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16137 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16138 emit_move_insn (target, tmp);
16139 }
16140 else if (n_words == 4)
16141 {
16142 rtx tmp = gen_reg_rtx (V4SImode);
16143 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16144 ix86_expand_vector_init_general (false, V4SImode, target, vals);
16145 emit_move_insn (target, gen_lowpart (mode, tmp));
16146 }
16147 else
16148 gcc_unreachable ();
16149 }
16150}
16151
16152/* Initialize vector TARGET via VALS. Suppress the use of MMX
16153 instructions unless MMX_OK is true. */
8d705469 16154
997404de 16155void
eb701deb 16156ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
997404de
JH
16157{
16158 enum machine_mode mode = GET_MODE (target);
eb701deb
RH
16159 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16160 int n_elts = GET_MODE_NUNITS (mode);
16161 int n_var = 0, one_var = -1;
16162 bool all_same = true, all_const_zero = true;
997404de 16163 int i;
eb701deb 16164 rtx x;
f676971a 16165
eb701deb
RH
16166 for (i = 0; i < n_elts; ++i)
16167 {
16168 x = XVECEXP (vals, 0, i);
16169 if (!CONSTANT_P (x))
16170 n_var++, one_var = i;
16171 else if (x != CONST0_RTX (inner_mode))
16172 all_const_zero = false;
16173 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16174 all_same = false;
16175 }
997404de 16176
eb701deb
RH
16177 /* Constants are best loaded from the constant pool. */
16178 if (n_var == 0)
997404de
JH
16179 {
16180 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16181 return;
16182 }
16183
eb701deb
RH
16184 /* If all values are identical, broadcast the value. */
16185 if (all_same
16186 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16187 XVECEXP (vals, 0, 0)))
16188 return;
16189
16190 /* Values where only one field is non-constant are best loaded from
16191 the pool and overwritten via move later. */
16192 if (n_var == 1)
997404de 16193 {
eb701deb
RH
16194 if (all_const_zero && one_var == 0
16195 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16196 XVECEXP (vals, 0, 0)))
16197 return;
16198
16199 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16200 return;
16201 }
16202
16203 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16204}
16205
16206void
16207ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16208{
16209 enum machine_mode mode = GET_MODE (target);
16210 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16211 bool use_vec_merge = false;
16212 rtx tmp;
16213
16214 switch (mode)
16215 {
16216 case V2SFmode:
16217 case V2SImode:
16218 if (!mmx_ok)
16219 break;
16220 /* FALLTHRU */
16221
16222 case V2DFmode:
16223 case V2DImode:
16224 {
16225 rtx op0, op1;
16226
16227 /* For the two element vectors, we implement a VEC_CONCAT with
16228 the extraction of the other element. */
16229
16230 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16231 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16232
16233 if (elt == 0)
16234 op0 = val, op1 = tmp;
16235 else
16236 op0 = tmp, op1 = val;
16237
16238 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16239 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16240 }
16241 return;
1c47af84 16242
eb701deb
RH
16243 case V4SFmode:
16244 switch (elt)
997404de 16245 {
eb701deb
RH
16246 case 0:
16247 use_vec_merge = true;
1c47af84
RH
16248 break;
16249
eb701deb
RH
16250 case 1:
16251 /* tmp = op0 = A B C D */
16252 tmp = copy_to_reg (target);
16253
16254 /* op0 = C C D D */
16255 emit_insn (gen_sse_unpcklps (target, target, target));
16256
16257 /* op0 = C C D X */
16258 ix86_expand_vector_set (false, target, val, 0);
16259
16260 /* op0 = A B X D */
16261 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16262 GEN_INT (1), GEN_INT (0),
16263 GEN_INT (2), GEN_INT (3)));
16264 return;
16265
16266 case 2:
16267 tmp = copy_to_reg (target);
16268 ix86_expand_vector_set (false, target, val, 0);
16269 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16270 GEN_INT (0), GEN_INT (1),
16271 GEN_INT (0), GEN_INT (3)));
16272 return;
16273
16274 case 3:
16275 tmp = copy_to_reg (target);
16276 ix86_expand_vector_set (false, target, val, 0);
16277 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16278 GEN_INT (0), GEN_INT (1),
16279 GEN_INT (2), GEN_INT (0)));
16280 return;
1c47af84
RH
16281
16282 default:
eb701deb
RH
16283 gcc_unreachable ();
16284 }
16285 break;
16286
16287 case V4SImode:
16288 /* Element 0 handled by vec_merge below. */
16289 if (elt == 0)
16290 {
16291 use_vec_merge = true;
1c47af84 16292 break;
997404de 16293 }
eb701deb
RH
16294
16295 if (TARGET_SSE2)
16296 {
16297 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16298 store into element 0, then shuffle them back. */
16299
16300 rtx order[4];
16301
16302 order[0] = GEN_INT (elt);
16303 order[1] = const1_rtx;
16304 order[2] = const2_rtx;
16305 order[3] = GEN_INT (3);
16306 order[elt] = const0_rtx;
16307
16308 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16309 order[1], order[2], order[3]));
16310
16311 ix86_expand_vector_set (false, target, val, 0);
16312
16313 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16314 order[1], order[2], order[3]));
16315 }
16316 else
16317 {
16318 /* For SSE1, we have to reuse the V4SF code. */
16319 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
16320 gen_lowpart (SFmode, val), elt);
16321 }
997404de 16322 return;
eb701deb
RH
16323
16324 case V8HImode:
16325 use_vec_merge = TARGET_SSE2;
16326 break;
16327 case V4HImode:
16328 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16329 break;
16330
16331 case V16QImode:
16332 case V8QImode:
16333 default:
16334 break;
997404de
JH
16335 }
16336
eb701deb 16337 if (use_vec_merge)
997404de 16338 {
eb701deb
RH
16339 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
16340 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
16341 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16342 }
16343 else
16344 {
16345 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16346
16347 emit_move_insn (mem, target);
16348
16349 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16350 emit_move_insn (tmp, val);
16351
16352 emit_move_insn (target, mem);
16353 }
16354}
16355
16356void
16357ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
16358{
16359 enum machine_mode mode = GET_MODE (vec);
16360 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16361 bool use_vec_extr = false;
16362 rtx tmp;
16363
16364 switch (mode)
16365 {
16366 case V2SImode:
16367 case V2SFmode:
16368 if (!mmx_ok)
16369 break;
16370 /* FALLTHRU */
16371
16372 case V2DFmode:
16373 case V2DImode:
16374 use_vec_extr = true;
16375 break;
16376
16377 case V4SFmode:
16378 switch (elt)
997404de 16379 {
eb701deb
RH
16380 case 0:
16381 tmp = vec;
16382 break;
997404de 16383
eb701deb
RH
16384 case 1:
16385 case 3:
16386 tmp = gen_reg_rtx (mode);
16387 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
16388 GEN_INT (elt), GEN_INT (elt),
16389 GEN_INT (elt), GEN_INT (elt)));
16390 break;
16391
16392 case 2:
16393 tmp = gen_reg_rtx (mode);
16394 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
16395 break;
16396
16397 default:
16398 gcc_unreachable ();
997404de 16399 }
eb701deb
RH
16400 vec = tmp;
16401 use_vec_extr = true;
ed9b5396 16402 elt = 0;
eb701deb
RH
16403 break;
16404
16405 case V4SImode:
16406 if (TARGET_SSE2)
997404de 16407 {
eb701deb
RH
16408 switch (elt)
16409 {
16410 case 0:
16411 tmp = vec;
16412 break;
16413
16414 case 1:
16415 case 3:
16416 tmp = gen_reg_rtx (mode);
16417 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
16418 GEN_INT (elt), GEN_INT (elt),
16419 GEN_INT (elt), GEN_INT (elt)));
16420 break;
16421
16422 case 2:
16423 tmp = gen_reg_rtx (mode);
16424 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
16425 break;
16426
16427 default:
16428 gcc_unreachable ();
16429 }
16430 vec = tmp;
16431 use_vec_extr = true;
ed9b5396 16432 elt = 0;
997404de 16433 }
eb701deb
RH
16434 else
16435 {
16436 /* For SSE1, we have to reuse the V4SF code. */
16437 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
16438 gen_lowpart (V4SFmode, vec), elt);
16439 return;
16440 }
16441 break;
16442
16443 case V8HImode:
16444 use_vec_extr = TARGET_SSE2;
16445 break;
16446 case V4HImode:
16447 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16448 break;
16449
16450 case V16QImode:
16451 case V8QImode:
16452 /* ??? Could extract the appropriate HImode element and shift. */
16453 default:
16454 break;
997404de 16455 }
997404de 16456
eb701deb
RH
16457 if (use_vec_extr)
16458 {
16459 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
16460 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
16461
16462 /* Let the rtl optimizers know about the zero extension performed. */
16463 if (inner_mode == HImode)
16464 {
16465 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
16466 target = gen_lowpart (SImode, target);
16467 }
16468
16469 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16470 }
16471 else
16472 {
16473 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16474
16475 emit_move_insn (mem, vec);
16476
16477 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16478 emit_move_insn (target, tmp);
16479 }
16480}
16481\f
f676971a
EC
16482/* Implements target hook vector_mode_supported_p. */
16483static bool
16484ix86_vector_mode_supported_p (enum machine_mode mode)
16485{
dcbca208 16486 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
f676971a 16487 return true;
dcbca208 16488 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
f676971a 16489 return true;
dcbca208 16490 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
f676971a 16491 return true;
dcbca208
RH
16492 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
16493 return true;
16494 return false;
f676971a
EC
16495}
16496
67dfe110
KH
16497/* Worker function for TARGET_MD_ASM_CLOBBERS.
16498
16499 We do this in the new i386 backend to maintain source compatibility
16500 with the old cc0-based compiler. */
16501
16502static tree
16503ix86_md_asm_clobbers (tree clobbers)
16504{
f676971a
EC
16505 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
16506 clobbers);
16507 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
16508 clobbers);
16509 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
16510 clobbers);
67dfe110
KH
16511 return clobbers;
16512}
16513
3c5cb3e4
KH
16514/* Worker function for REVERSE_CONDITION. */
16515
16516enum rtx_code
16517ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
16518{
16519 return (mode != CCFPmode && mode != CCFPUmode
16520 ? reverse_condition (code)
16521 : reverse_condition_maybe_unordered (code));
16522}
16523
5ea9cb6e
RS
16524/* Output code to perform an x87 FP register move, from OPERANDS[1]
16525 to OPERANDS[0]. */
16526
16527const char *
16528output_387_reg_move (rtx insn, rtx *operands)
16529{
16530 if (REG_P (operands[1])
16531 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16532 {
16533 if (REGNO (operands[0]) == FIRST_STACK_REG
16534 && TARGET_USE_FFREEP)
16535 return "ffreep\t%y0";
16536 return "fstp\t%y0";
16537 }
16538 if (STACK_TOP_P (operands[0]))
16539 return "fld%z1\t%y1";
16540 return "fst\t%y0";
16541}
16542
5ae27cfa
UB
16543/* Output code to perform a conditional jump to LABEL, if C2 flag in
16544 FP status register is set. */
16545
16546void
16547ix86_emit_fp_unordered_jump (rtx label)
16548{
16549 rtx reg = gen_reg_rtx (HImode);
16550 rtx temp;
16551
16552 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35
UB
16553
16554 if (TARGET_USE_SAHF)
16555 {
16556 emit_insn (gen_x86_sahf_1 (reg));
16557
f676971a 16558 temp = gen_rtx_REG (CCmode, FLAGS_REG);
2484cc35
UB
16559 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16560 }
16561 else
16562 {
16563 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16564
f676971a 16565 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
2484cc35
UB
16566 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16567 }
f676971a 16568
5ae27cfa
UB
16569 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16570 gen_rtx_LABEL_REF (VOIDmode, label),
16571 pc_rtx);
16572 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16573 emit_jump_insn (temp);
16574}
16575
c2fcfa4f
UB
16576/* Output code to perform a log1p XFmode calculation. */
16577
16578void ix86_emit_i387_log1p (rtx op0, rtx op1)
16579{
16580 rtx label1 = gen_label_rtx ();
16581 rtx label2 = gen_label_rtx ();
16582
16583 rtx tmp = gen_reg_rtx (XFmode);
16584 rtx tmp2 = gen_reg_rtx (XFmode);
16585
16586 emit_insn (gen_absxf2 (tmp, op1));
16587 emit_insn (gen_cmpxf (tmp,
16588 CONST_DOUBLE_FROM_REAL_VALUE (
16589 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16590 XFmode)));
16591 emit_jump_insn (gen_bge (label1));
16592
16593 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16594 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16595 emit_jump (label2);
16596
16597 emit_label (label1);
16598 emit_move_insn (tmp, CONST1_RTX (XFmode));
16599 emit_insn (gen_addxf3 (tmp, op1, tmp));
16600 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16601 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16602
16603 emit_label (label2);
16604}
f676971a 16605
a8e68029
DJ
16606/* Solaris named-section hook. Parameters are as for
16607 named_section_real. */
16608
16609static void
16610i386_solaris_elf_named_section (const char *name, unsigned int flags,
16611 tree decl)
16612{
16613 /* With Binutils 2.15, the "@unwind" marker must be specified on
16614 every occurrence of the ".eh_frame" section, not just the first
16615 one. */
16616 if (TARGET_64BIT
16617 && strcmp (name, ".eh_frame") == 0)
16618 {
16619 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
16620 flags & SECTION_WRITE ? "aw" : "a");
16621 return;
16622 }
16623 default_elf_asm_named_section (name, flags, decl);
16624}
16625
e2500fed 16626#include "gt-i386.h"
This page took 5.236191 seconds and 5 git commands to generate.