]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
tree.c (type_contains_placeholder_1): Always return false for VECTOR_TYPE if its...
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
e129d93a 3 2002, 2003, 2004 Free Software Foundation, Inc.
2a2ab3f9 4
188fc5b5 5This file is part of GCC.
2a2ab3f9 6
188fc5b5 7GCC is free software; you can redistribute it and/or modify
2a2ab3f9
JVA
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
188fc5b5 12GCC is distributed in the hope that it will be useful,
2a2ab3f9
JVA
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
188fc5b5 18along with GCC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
2a2ab3f9 26#include "rtl.h"
6baf1cc8
BS
27#include "tree.h"
28#include "tm_p.h"
2a2ab3f9
JVA
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
2a2ab3f9 34#include "output.h"
8bc527af 35#include "insn-codes.h"
2a2ab3f9 36#include "insn-attr.h"
2a2ab3f9 37#include "flags.h"
a8ffcc81 38#include "except.h"
ecbc4695 39#include "function.h"
00c79232 40#include "recog.h"
ced8dd8c 41#include "expr.h"
e78d8e51 42#include "optabs.h"
f103890b 43#include "toplev.h"
e075ae69 44#include "basic-block.h"
1526a060 45#include "ggc.h"
672a6f42
NB
46#include "target.h"
47#include "target-def.h"
f1e639b1 48#include "langhooks.h"
dafc5b82 49#include "cgraph.h"
cd3ce9b4 50#include "tree-gimple.h"
2a2ab3f9 51
8dfe5673 52#ifndef CHECK_STACK_LIMIT
07933f72 53#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
54#endif
55
3c50106f
RH
56/* Return index of given mode in mult and division cost tables. */
57#define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
2ab0437e 64/* Processor costs (relative to an add) */
fce5a9f2 65static const
2ab0437e
JH
66struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
4977bab6 71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
2ab0437e 72 0, /* cost of multiply per each bit set */
4977bab6 73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
44cf5b6a
JH
74 3, /* cost of movsx */
75 3, /* cost of movzx */
2ab0437e
JH
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
f4365627
JH
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
4977bab6 100 1, /* Branch cost */
229b303a
RS
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
2ab0437e 107};
229b303a 108
32b5b1aa 109/* Processor costs (relative to an add) */
fce5a9f2 110static const
32b5b1aa 111struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 112 1, /* cost of an add instruction */
32b5b1aa
SC
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
4977bab6 116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
32b5b1aa 117 1, /* cost of multiply per each bit set */
4977bab6 118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
44cf5b6a
JH
119 3, /* cost of movsx */
120 2, /* cost of movzx */
96e7ae40 121 15, /* "large" insn */
e2e52e1b 122 3, /* MOVE_RATIO */
7c6b971d 123 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
0f290768 126 Relative to reg-reg move (2). */
96e7ae40
JH
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
fa79946e
JH
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
f4365627
JH
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
4977bab6 145 1, /* Branch cost */
229b303a
RS
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
152};
153
fce5a9f2 154static const
32b5b1aa
SC
155struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
4977bab6 160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
32b5b1aa 161 1, /* cost of multiply per each bit set */
4977bab6 162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
44cf5b6a
JH
163 3, /* cost of movsx */
164 2, /* cost of movzx */
96e7ae40 165 15, /* "large" insn */
e2e52e1b 166 3, /* MOVE_RATIO */
7c6b971d 167 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
0f290768 170 Relative to reg-reg move (2). */
96e7ae40
JH
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
fa79946e
JH
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
f4365627
JH
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
4977bab6 189 1, /* Branch cost */
229b303a
RS
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
196};
197
fce5a9f2 198static const
e5cb57e8 199struct processor_costs pentium_cost = {
32b5b1aa
SC
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
856b07a1 202 4, /* variable shift costs */
e5cb57e8 203 1, /* constant shift costs */
4977bab6 204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
856b07a1 205 0, /* cost of multiply per each bit set */
4977bab6 206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
44cf5b6a
JH
207 3, /* cost of movsx */
208 2, /* cost of movzx */
96e7ae40 209 8, /* "large" insn */
e2e52e1b 210 6, /* MOVE_RATIO */
7c6b971d 211 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
0f290768 214 Relative to reg-reg move (2). */
96e7ae40
JH
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
fa79946e
JH
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
f4365627
JH
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
4977bab6 233 2, /* Branch cost */
229b303a
RS
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
240};
241
fce5a9f2 242static const
856b07a1
SC
243struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
e075ae69 246 1, /* variable shift costs */
856b07a1 247 1, /* constant shift costs */
4977bab6 248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
856b07a1 249 0, /* cost of multiply per each bit set */
4977bab6 250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
44cf5b6a
JH
251 1, /* cost of movsx */
252 1, /* cost of movzx */
96e7ae40 253 8, /* "large" insn */
e2e52e1b 254 6, /* MOVE_RATIO */
7c6b971d 255 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
0f290768 258 Relative to reg-reg move (2). */
96e7ae40
JH
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
fa79946e
JH
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
f4365627
JH
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
4977bab6 277 2, /* Branch cost */
229b303a
RS
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
856b07a1
SC
284};
285
fce5a9f2 286static const
a269a03c
JC
287struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
e075ae69 289 2, /* cost of a lea instruction */
a269a03c
JC
290 1, /* variable shift costs */
291 1, /* constant shift costs */
4977bab6 292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
a269a03c 293 0, /* cost of multiply per each bit set */
4977bab6 294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
44cf5b6a
JH
295 2, /* cost of movsx */
296 2, /* cost of movzx */
96e7ae40 297 8, /* "large" insn */
e2e52e1b 298 4, /* MOVE_RATIO */
7c6b971d 299 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
0f290768 302 Relative to reg-reg move (2). */
96e7ae40
JH
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
fa79946e
JH
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
f4365627
JH
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
4977bab6 321 1, /* Branch cost */
229b303a
RS
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
4f770e7b
RS
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
229b303a
RS
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
a269a03c
JC
328};
329
fce5a9f2 330static const
309ada50
JH
331struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
0b5107cf 333 2, /* cost of a lea instruction */
309ada50
JH
334 1, /* variable shift costs */
335 1, /* constant shift costs */
4977bab6 336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
309ada50 337 0, /* cost of multiply per each bit set */
4977bab6 338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
44cf5b6a
JH
339 1, /* cost of movsx */
340 1, /* cost of movzx */
309ada50 341 8, /* "large" insn */
e2e52e1b 342 9, /* MOVE_RATIO */
309ada50 343 4, /* cost for loading QImode using movzbl */
b72b1c29 344 {3, 4, 3}, /* cost of loading integer registers
309ada50 345 in QImode, HImode and SImode.
0f290768 346 Relative to reg-reg move (2). */
b72b1c29 347 {3, 4, 3}, /* cost of storing integer registers */
309ada50 348 4, /* cost of reg,reg fld/fst */
b72b1c29 349 {4, 4, 12}, /* cost of loading fp registers
309ada50 350 in SFmode, DFmode and XFmode */
b72b1c29 351 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 352 2, /* cost of moving MMX register */
b72b1c29 353 {4, 4}, /* cost of loading MMX registers
fa79946e 354 in SImode and DImode */
b72b1c29 355 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
b72b1c29 358 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 359 in SImode, DImode and TImode */
b72b1c29 360 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 361 in SImode, DImode and TImode */
b72b1c29 362 5, /* MMX or SSE register to integer */
f4365627
JH
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
8c1e80e9 365 5, /* Branch cost */
229b303a
RS
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
309ada50
JH
372};
373
4977bab6
ZW
374static const
375struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
8c1e80e9 409 5, /* Branch cost */
4977bab6
ZW
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416};
417
fce5a9f2 418static const
b4e89e2d
JH
419struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
e0c00392 421 3, /* cost of a lea instruction */
4977bab6
ZW
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
b4e89e2d 425 0, /* cost of multiply per each bit set */
4977bab6 426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
44cf5b6a
JH
427 1, /* cost of movsx */
428 1, /* cost of movzx */
b4e89e2d
JH
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
f4365627
JH
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
4977bab6 453 2, /* Branch cost */
229b303a
RS
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
460};
461
89c43c0a
VM
462static const
463struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504};
505
8b60264b 506const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 507
a269a03c
JC
508/* Processor feature/optimization bitmasks. */
509#define m_386 (1<<PROCESSOR_I386)
510#define m_486 (1<<PROCESSOR_I486)
511#define m_PENT (1<<PROCESSOR_PENTIUM)
512#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513#define m_K6 (1<<PROCESSOR_K6)
309ada50 514#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 515#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
4977bab6
ZW
516#define m_K8 (1<<PROCESSOR_K8)
517#define m_ATHLON_K8 (m_K8 | m_ATHLON)
89c43c0a 518#define m_NOCONA (1<<PROCESSOR_NOCONA)
a269a03c 519
4977bab6 520const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
89c43c0a 521const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
a269a03c 522const int x86_zero_extend_with_and = m_486 | m_PENT;
89c43c0a 523const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
e075ae69 524const int x86_double_with_add = ~m_386;
a269a03c 525const int x86_use_bit_test = m_386;
4977bab6 526const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
89c43c0a 527const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
4977bab6 528const int x86_3dnow_a = m_ATHLON_K8;
89c43c0a 529const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
d20bf446
L
530/* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534const int x86_branch_hints = 0;
89c43c0a 535const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
e075ae69
RH
536const int x86_partial_reg_stall = m_PPRO;
537const int x86_use_loop = m_K6;
4977bab6 538const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
e075ae69
RH
539const int x86_use_mov0 = m_K6;
540const int x86_use_cltd = ~(m_PENT | m_K6);
541const int x86_read_modify_write = ~m_PENT;
542const int x86_read_modify = ~(m_PENT | m_PPRO);
543const int x86_split_long_moves = m_PPRO;
4977bab6 544const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
285464d0 545const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
89c43c0a 546const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
d9f32422
JH
547const int x86_qimode_math = ~(0);
548const int x86_promote_qi_regs = 0;
549const int x86_himode_math = ~(m_PPRO);
550const int x86_promote_hi_regs = m_PPRO;
89c43c0a
VM
551const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
7b50a809
JH
559const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
89c43c0a 561const int x86_decompose_lea = m_PENT4 | m_NOCONA;
495333a6 562const int x86_shift1 = ~m_486;
89c43c0a
VM
563const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
4977bab6 565/* Set for machines where the type and dependencies are resolved on SSE register
d1f87653 566 parts instead of whole registers, so we may maintain just lower part of
4977bab6
ZW
567 scalar values in proper format leaving the upper part undefined. */
568const int x86_sse_partial_regs = m_ATHLON_K8;
569/* Athlon optimizes partial-register FPS special case, thus avoiding the
570 need for extra instructions beforehand */
571const int x86_sse_partial_regs_for_cvtsd2ss = 0;
572const int x86_sse_typeless_stores = m_ATHLON_K8;
89c43c0a 573const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
4977bab6
ZW
574const int x86_use_ffreep = m_ATHLON_K8;
575const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
8f62128d 576const int x86_inter_unit_moves = ~(m_ATHLON_K8);
89c43c0a 577const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
be04394b
JH
578/* Some CPU cores are not able to predict more than 4 branch instructions in
579 the 16 byte window. */
89c43c0a 580const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
03e00d30 581const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
7cacf53e 582const int x86_use_bt = m_ATHLON_K8;
a269a03c 583
d1f87653 584/* In case the average insn count for single function invocation is
6ab16dd9
JH
585 lower than this constant, emit fast (but longer) prologue and
586 epilogue code. */
4977bab6 587#define FAST_PROLOGUE_INSN_COUNT 20
5bf0ebab 588
5bf0ebab
RH
589/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
590static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
591static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
592static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
593
594/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 595 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 596
e075ae69 597enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
598{
599 /* ax, dx, cx, bx */
ab408a86 600 AREG, DREG, CREG, BREG,
4c0d89b5 601 /* si, di, bp, sp */
e075ae69 602 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
603 /* FP registers */
604 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 605 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 606 /* arg pointer */
83774849 607 NON_Q_REGS,
564d80f4 608 /* flags, fpsr, dirflag, frame */
a7180f70
BS
609 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
612 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
613 MMX_REGS, MMX_REGS,
614 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
615 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
616 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
617 SSE_REGS, SSE_REGS,
4c0d89b5 618};
c572e5ba 619
3d117b30 620/* The "default" register map used in 32bit mode. */
83774849 621
0f290768 622int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
623{
624 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
625 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 626 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
627 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
628 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
629 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
630 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
631};
632
5bf0ebab
RH
633static int const x86_64_int_parameter_registers[6] =
634{
635 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
636 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
637};
638
639static int const x86_64_int_return_registers[4] =
640{
641 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
642};
53c17031 643
0f7fa3d0
JH
644/* The "default" register map used in 64bit mode. */
645int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
646{
647 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 648 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
649 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
650 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
651 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
652 8,9,10,11,12,13,14,15, /* extended integer registers */
653 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
654};
655
83774849
RH
656/* Define the register numbers to be used in Dwarf debugging information.
657 The SVR4 reference port C compiler uses the following register numbers
658 in its Dwarf output code:
659 0 for %eax (gcc regno = 0)
660 1 for %ecx (gcc regno = 2)
661 2 for %edx (gcc regno = 1)
662 3 for %ebx (gcc regno = 3)
663 4 for %esp (gcc regno = 7)
664 5 for %ebp (gcc regno = 6)
665 6 for %esi (gcc regno = 4)
666 7 for %edi (gcc regno = 5)
667 The following three DWARF register numbers are never generated by
668 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
669 believes these numbers have these meanings.
670 8 for %eip (no gcc equivalent)
671 9 for %eflags (gcc regno = 17)
672 10 for %trapno (no gcc equivalent)
673 It is not at all clear how we should number the FP stack registers
674 for the x86 architecture. If the version of SDB on x86/svr4 were
675 a bit less brain dead with respect to floating-point then we would
676 have a precedent to follow with respect to DWARF register numbers
677 for x86 FP registers, but the SDB on x86/svr4 is so completely
678 broken with respect to FP registers that it is hardly worth thinking
679 of it as something to strive for compatibility with.
680 The version of x86/svr4 SDB I have at the moment does (partially)
681 seem to believe that DWARF register number 11 is associated with
682 the x86 register %st(0), but that's about all. Higher DWARF
683 register numbers don't seem to be associated with anything in
684 particular, and even for DWARF regno 11, SDB only seems to under-
685 stand that it should say that a variable lives in %st(0) (when
686 asked via an `=' command) if we said it was in DWARF regno 11,
687 but SDB still prints garbage when asked for the value of the
688 variable in question (via a `/' command).
689 (Also note that the labels SDB prints for various FP stack regs
690 when doing an `x' command are all wrong.)
691 Note that these problems generally don't affect the native SVR4
692 C compiler because it doesn't allow the use of -O with -g and
693 because when it is *not* optimizing, it allocates a memory
694 location for each floating-point variable, and the memory
695 location is what gets described in the DWARF AT_location
696 attribute for the variable in question.
697 Regardless of the severe mental illness of the x86/svr4 SDB, we
698 do something sensible here and we use the following DWARF
699 register numbers. Note that these are all stack-top-relative
700 numbers.
701 11 for %st(0) (gcc regno = 8)
702 12 for %st(1) (gcc regno = 9)
703 13 for %st(2) (gcc regno = 10)
704 14 for %st(3) (gcc regno = 11)
705 15 for %st(4) (gcc regno = 12)
706 16 for %st(5) (gcc regno = 13)
707 17 for %st(6) (gcc regno = 14)
708 18 for %st(7) (gcc regno = 15)
709*/
0f290768 710int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
711{
712 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
713 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 714 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
715 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
716 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
d1f87653
KH
717 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
718 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
719};
720
c572e5ba
JVA
721/* Test and compare insns in i386.md store the information needed to
722 generate branch and scc insns here. */
723
07933f72
GS
724rtx ix86_compare_op0 = NULL_RTX;
725rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 726
7a2e09f4 727#define MAX_386_STACK_LOCALS 3
8362f420
JH
728/* Size of the register save area. */
729#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
730
731/* Define the structure for the machine field in struct function. */
ddb0ae00
ZW
732
733struct stack_local_entry GTY(())
734{
735 unsigned short mode;
736 unsigned short n;
737 rtx rtl;
738 struct stack_local_entry *next;
739};
740
4dd2ac2c
JH
741/* Structure describing stack frame layout.
742 Stack grows downward:
743
744 [arguments]
745 <- ARG_POINTER
746 saved pc
747
748 saved frame pointer if frame_pointer_needed
749 <- HARD_FRAME_POINTER
750 [saved regs]
751
752 [padding1] \
753 )
754 [va_arg registers] (
755 > to_allocate <- FRAME_POINTER
756 [frame] (
757 )
758 [padding2] /
759 */
760struct ix86_frame
761{
762 int nregs;
763 int padding1;
8362f420 764 int va_arg_size;
4dd2ac2c
JH
765 HOST_WIDE_INT frame;
766 int padding2;
767 int outgoing_arguments_size;
8362f420 768 int red_zone_size;
4dd2ac2c
JH
769
770 HOST_WIDE_INT to_allocate;
771 /* The offsets relative to ARG_POINTER. */
772 HOST_WIDE_INT frame_pointer_offset;
773 HOST_WIDE_INT hard_frame_pointer_offset;
774 HOST_WIDE_INT stack_pointer_offset;
d9b40e8d
JH
775
776 /* When save_regs_using_mov is set, emit prologue using
777 move instead of push instructions. */
778 bool save_regs_using_mov;
4dd2ac2c
JH
779};
780
c93e80a5
JH
781/* Used to enable/disable debugging features. */
782const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
783/* Code model option as passed by user. */
784const char *ix86_cmodel_string;
785/* Parsed value. */
786enum cmodel ix86_cmodel;
80f33d06
GS
787/* Asm dialect. */
788const char *ix86_asm_string;
789enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
790/* TLS dialext. */
791const char *ix86_tls_dialect_string;
792enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 793
5bf0ebab 794/* Which unit we are generating floating point math for. */
965f5423
JH
795enum fpmath_unit ix86_fpmath;
796
5bf0ebab 797/* Which cpu are we scheduling for. */
9e555526 798enum processor_type ix86_tune;
5bf0ebab
RH
799/* Which instruction set architecture to use. */
800enum processor_type ix86_arch;
c8c5cb99
SC
801
802/* Strings to hold which cpu and instruction set architecture to use. */
9e555526 803const char *ix86_tune_string; /* for -mtune=<xxx> */
9c23aa47 804const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 805const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 806
0f290768 807/* # of registers to use to pass arguments. */
e075ae69 808const char *ix86_regparm_string;
e9a25f70 809
f4365627
JH
810/* true if sse prefetch instruction is not NOOP. */
811int x86_prefetch_sse;
812
e075ae69
RH
813/* ix86_regparm_string as a number */
814int ix86_regparm;
e9a25f70
JL
815
816/* Alignment to use for loops and jumps: */
817
0f290768 818/* Power of two alignment for loops. */
e075ae69 819const char *ix86_align_loops_string;
e9a25f70 820
0f290768 821/* Power of two alignment for non-loop jumps. */
e075ae69 822const char *ix86_align_jumps_string;
e9a25f70 823
3af4bd89 824/* Power of two alignment for stack boundary in bytes. */
e075ae69 825const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
826
827/* Preferred alignment for stack boundary in bits. */
95899b34 828unsigned int ix86_preferred_stack_boundary;
3af4bd89 829
e9a25f70 830/* Values 1-5: see jump.c */
e075ae69
RH
831int ix86_branch_cost;
832const char *ix86_branch_cost_string;
e9a25f70 833
0f290768 834/* Power of two alignment for functions. */
e075ae69 835const char *ix86_align_funcs_string;
623fe810
RH
836
837/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
8fe75e43
RH
838char internal_label_prefix[16];
839int internal_label_prefix_len;
e075ae69 840\f
b96a374d
AJ
841static void output_pic_addr_const (FILE *, rtx, int);
842static void put_condition_code (enum rtx_code, enum machine_mode,
843 int, int, FILE *);
844static const char *get_some_local_dynamic_name (void);
845static int get_some_local_dynamic_name_1 (rtx *, void *);
b96a374d
AJ
846static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
847static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
848 rtx *);
e129d93a
ILT
849static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
850static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
851 enum machine_mode);
b96a374d
AJ
852static rtx get_thread_pointer (int);
853static rtx legitimize_tls_address (rtx, enum tls_model, int);
854static void get_pc_thunk_name (char [32], unsigned int);
855static rtx gen_push (rtx);
b96a374d
AJ
856static int ix86_flags_dependant (rtx, rtx, enum attr_type);
857static int ix86_agi_dependant (rtx, rtx, enum attr_type);
b96a374d
AJ
858static struct machine_function * ix86_init_machine_status (void);
859static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
860static int ix86_nsaved_regs (void);
861static void ix86_emit_save_regs (void);
862static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
72613dfa 863static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
b96a374d 864static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
b96a374d
AJ
865static HOST_WIDE_INT ix86_GOT_alias_set (void);
866static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
867static rtx ix86_expand_aligntest (rtx, int);
4e44c1ef 868static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
b96a374d
AJ
869static int ix86_issue_rate (void);
870static int ix86_adjust_cost (rtx, rtx, rtx, int);
b96a374d 871static int ia32_multipass_dfa_lookahead (void);
7ccf35ed 872static bool ix86_misaligned_mem_ok (enum machine_mode);
b96a374d
AJ
873static void ix86_init_mmx_sse_builtins (void);
874static rtx x86_this_parameter (tree);
875static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
876 HOST_WIDE_INT, tree);
877static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
878static void x86_file_start (void);
879static void ix86_reorg (void);
c35d187f
RH
880static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
881static tree ix86_build_builtin_va_list (void);
a0524eb3
KH
882static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
883 tree, int *, int);
23a60a04 884static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
f676971a 885static bool ix86_vector_mode_supported_p (enum machine_mode);
e075ae69 886
b96a374d
AJ
887static int ix86_address_cost (rtx);
888static bool ix86_cannot_force_const_mem (rtx);
889static rtx ix86_delegitimize_address (rtx);
bd793c65
BS
890
891struct builtin_description;
b96a374d
AJ
892static rtx ix86_expand_sse_comi (const struct builtin_description *,
893 tree, rtx);
894static rtx ix86_expand_sse_compare (const struct builtin_description *,
895 tree, rtx);
896static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
897static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
898static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
899static rtx ix86_expand_store_builtin (enum insn_code, tree);
900static rtx safe_vector_operand (rtx, enum machine_mode);
b96a374d
AJ
901static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
902static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
903static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
904static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
905static int ix86_fp_comparison_cost (enum rtx_code code);
906static unsigned int ix86_select_alt_pic_regnum (void);
907static int ix86_save_reg (unsigned int, int);
908static void ix86_compute_frame_layout (struct ix86_frame *);
909static int ix86_comp_type_attributes (tree, tree);
e767b5be 910static int ix86_function_regparm (tree, tree);
91d231cb 911const struct attribute_spec ix86_attribute_table[];
b96a374d
AJ
912static bool ix86_function_ok_for_sibcall (tree, tree);
913static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
914static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
915static int ix86_value_regno (enum machine_mode);
916static bool contains_128bit_aligned_vector_p (tree);
0397ac35 917static rtx ix86_struct_value_rtx (tree, int);
b96a374d
AJ
918static bool ix86_ms_bitfield_layout_p (tree);
919static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
920static int extended_reg_mentioned_1 (rtx *, void *);
921static bool ix86_rtx_costs (rtx, int, int, int *);
922static int min_insn_size (rtx);
67dfe110 923static tree ix86_md_asm_clobbers (tree clobbers);
fe984136 924static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
8cd5a4e0
RH
925static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
926 tree, bool);
7c262518 927
7915fbaa
MM
928/* This function is only used on Solaris. */
929static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
930 ATTRIBUTE_UNUSED;
e56feed6 931
53c17031
JH
932/* Register class used for passing given 64bit part of the argument.
933 These represent classes as documented by the PS ABI, with the exception
934 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
d1f87653 935 use SF or DFmode move instead of DImode to avoid reformatting penalties.
53c17031 936
d1f87653 937 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
53c17031
JH
938 whenever possible (upper half does contain padding).
939 */
940enum x86_64_reg_class
941 {
942 X86_64_NO_CLASS,
943 X86_64_INTEGER_CLASS,
944 X86_64_INTEGERSI_CLASS,
945 X86_64_SSE_CLASS,
946 X86_64_SSESF_CLASS,
947 X86_64_SSEDF_CLASS,
948 X86_64_SSEUP_CLASS,
949 X86_64_X87_CLASS,
950 X86_64_X87UP_CLASS,
951 X86_64_MEMORY_CLASS
952 };
0b5826ac 953static const char * const x86_64_reg_class_name[] =
53c17031
JH
954 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
955
956#define MAX_CLASSES 4
b96a374d
AJ
957static int classify_argument (enum machine_mode, tree,
958 enum x86_64_reg_class [MAX_CLASSES], int);
959static int examine_argument (enum machine_mode, tree, int, int *, int *);
960static rtx construct_container (enum machine_mode, tree, int, int, int,
961 const int *, int);
962static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
963 enum x86_64_reg_class);
881b2a96 964
43f3a59d 965/* Table of constants used by fldpi, fldln2, etc.... */
881b2a96
RS
966static REAL_VALUE_TYPE ext_80387_constants_table [5];
967static bool ext_80387_constants_init = 0;
b96a374d 968static void init_ext_80387_constants (void);
672a6f42
NB
969\f
970/* Initialize the GCC target structure. */
91d231cb
JM
971#undef TARGET_ATTRIBUTE_TABLE
972#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
b2ca3702 973#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
974# undef TARGET_MERGE_DECL_ATTRIBUTES
975# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
976#endif
977
8d8e52be
JM
978#undef TARGET_COMP_TYPE_ATTRIBUTES
979#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
980
f6155fda
SS
981#undef TARGET_INIT_BUILTINS
982#define TARGET_INIT_BUILTINS ix86_init_builtins
983
984#undef TARGET_EXPAND_BUILTIN
985#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
986
bd09bdeb
RH
987#undef TARGET_ASM_FUNCTION_EPILOGUE
988#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 989
17b53c33
NB
990#undef TARGET_ASM_OPEN_PAREN
991#define TARGET_ASM_OPEN_PAREN ""
992#undef TARGET_ASM_CLOSE_PAREN
993#define TARGET_ASM_CLOSE_PAREN ""
994
301d03af
RS
995#undef TARGET_ASM_ALIGNED_HI_OP
996#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
997#undef TARGET_ASM_ALIGNED_SI_OP
998#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
999#ifdef ASM_QUAD
1000#undef TARGET_ASM_ALIGNED_DI_OP
1001#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1002#endif
1003
1004#undef TARGET_ASM_UNALIGNED_HI_OP
1005#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1006#undef TARGET_ASM_UNALIGNED_SI_OP
1007#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1008#undef TARGET_ASM_UNALIGNED_DI_OP
1009#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1010
c237e94a
ZW
1011#undef TARGET_SCHED_ADJUST_COST
1012#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1013#undef TARGET_SCHED_ISSUE_RATE
1014#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
9b690711
RH
1015#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1016#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1017 ia32_multipass_dfa_lookahead
c237e94a 1018
7ccf35ed
DN
1019#undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
1020#define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok
1021
4977bab6
ZW
1022#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1023#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1024
f996902d
RH
1025#ifdef HAVE_AS_TLS
1026#undef TARGET_HAVE_TLS
1027#define TARGET_HAVE_TLS true
1028#endif
3a04ff64
RH
1029#undef TARGET_CANNOT_FORCE_CONST_MEM
1030#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
f996902d 1031
7daebb7a 1032#undef TARGET_DELEGITIMIZE_ADDRESS
69bd9368 1033#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
7daebb7a 1034
4977bab6
ZW
1035#undef TARGET_MS_BITFIELD_LAYOUT_P
1036#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1037
c590b625
RH
1038#undef TARGET_ASM_OUTPUT_MI_THUNK
1039#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
3961e8fe
RH
1040#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1041#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
c590b625 1042
1bc7c5b6
ZW
1043#undef TARGET_ASM_FILE_START
1044#define TARGET_ASM_FILE_START x86_file_start
1045
3c50106f
RH
1046#undef TARGET_RTX_COSTS
1047#define TARGET_RTX_COSTS ix86_rtx_costs
dcefdf67
RH
1048#undef TARGET_ADDRESS_COST
1049#define TARGET_ADDRESS_COST ix86_address_cost
3c50106f 1050
e129d93a
ILT
1051#undef TARGET_FIXED_CONDITION_CODE_REGS
1052#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1053#undef TARGET_CC_MODES_COMPATIBLE
1054#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1055
18dbd950
RS
1056#undef TARGET_MACHINE_DEPENDENT_REORG
1057#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1058
c35d187f
RH
1059#undef TARGET_BUILD_BUILTIN_VA_LIST
1060#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1061
67dfe110
KH
1062#undef TARGET_MD_ASM_CLOBBERS
1063#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1064
9184f892
KH
1065#undef TARGET_PROMOTE_PROTOTYPES
1066#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
0397ac35
RH
1067#undef TARGET_STRUCT_VALUE_RTX
1068#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
a0524eb3
KH
1069#undef TARGET_SETUP_INCOMING_VARARGS
1070#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
fe984136
RH
1071#undef TARGET_MUST_PASS_IN_STACK
1072#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
8cd5a4e0
RH
1073#undef TARGET_PASS_BY_REFERENCE
1074#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
a0524eb3 1075
cd3ce9b4
JM
1076#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1077#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1078
f676971a
EC
1079#undef TARGET_VECTOR_MODE_SUPPORTED_P
1080#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1081
07a43492
DJ
1082#ifdef SUBTARGET_INSERT_ATTRIBUTES
1083#undef TARGET_INSERT_ATTRIBUTES
1084#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1085#endif
1086
f6897b10 1087struct gcc_target targetm = TARGET_INITIALIZER;
89c43c0a 1088
e075ae69 1089\f
67c2b45f
JS
1090/* The svr4 ABI for the i386 says that records and unions are returned
1091 in memory. */
1092#ifndef DEFAULT_PCC_STRUCT_RETURN
1093#define DEFAULT_PCC_STRUCT_RETURN 1
1094#endif
1095
f5316dfe
MM
1096/* Sometimes certain combinations of command options do not make
1097 sense on a particular target machine. You can define a macro
1098 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1099 defined, is executed once just after all the command options have
1100 been parsed.
1101
1102 Don't use this macro to turn on various extra optimizations for
1103 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1104
1105void
b96a374d 1106override_options (void)
f5316dfe 1107{
400500c4 1108 int i;
3326f410
DJ
1109 int ix86_tune_defaulted = 0;
1110
e075ae69
RH
1111 /* Comes from final.c -- no real reason to change it. */
1112#define MAX_CODE_ALIGN 16
f5316dfe 1113
c8c5cb99
SC
1114 static struct ptt
1115 {
8b60264b
KG
1116 const struct processor_costs *cost; /* Processor costs */
1117 const int target_enable; /* Target flags to enable. */
1118 const int target_disable; /* Target flags to disable. */
1119 const int align_loop; /* Default alignments. */
2cca7283 1120 const int align_loop_max_skip;
8b60264b 1121 const int align_jump;
2cca7283 1122 const int align_jump_max_skip;
8b60264b 1123 const int align_func;
e075ae69 1124 }
0f290768 1125 const processor_target_table[PROCESSOR_max] =
e075ae69 1126 {
4977bab6
ZW
1127 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1128 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1129 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1131 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1132 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1133 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
89c43c0a
VM
1134 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1135 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
e075ae69
RH
1136 };
1137
f4365627 1138 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
1139 static struct pta
1140 {
8b60264b
KG
1141 const char *const name; /* processor name or nickname. */
1142 const enum processor_type processor;
0dd0e980
JH
1143 const enum pta_flags
1144 {
1145 PTA_SSE = 1,
1146 PTA_SSE2 = 2,
5bbeea44
JH
1147 PTA_SSE3 = 4,
1148 PTA_MMX = 8,
1149 PTA_PREFETCH_SSE = 16,
1150 PTA_3DNOW = 32,
4977bab6
ZW
1151 PTA_3DNOW_A = 64,
1152 PTA_64BIT = 128
0dd0e980 1153 } flags;
e075ae69 1154 }
0f290768 1155 const processor_alias_table[] =
e075ae69 1156 {
0dd0e980
JH
1157 {"i386", PROCESSOR_I386, 0},
1158 {"i486", PROCESSOR_I486, 0},
1159 {"i586", PROCESSOR_PENTIUM, 0},
1160 {"pentium", PROCESSOR_PENTIUM, 0},
1161 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
1162 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1163 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1164 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
3462df62 1165 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
0dd0e980
JH
1166 {"i686", PROCESSOR_PENTIUMPRO, 0},
1167 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1168 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 1169 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
5bbeea44
JH
1170 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1171 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1172 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1173 | PTA_MMX | PTA_PREFETCH_SSE},
1174 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1175 | PTA_MMX | PTA_PREFETCH_SSE},
89c43c0a
VM
1176 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1177 | PTA_MMX | PTA_PREFETCH_SSE},
1178 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
5bbeea44 1179 | PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
1180 {"k6", PROCESSOR_K6, PTA_MMX},
1181 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1182 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 1183 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1184 | PTA_3DNOW_A},
f4365627 1185 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 1186 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 1187 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1188 | PTA_3DNOW_A | PTA_SSE},
f4365627 1189 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1190 | PTA_3DNOW_A | PTA_SSE},
f4365627 1191 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 1192 | PTA_3DNOW_A | PTA_SSE},
3fec9fa9
JJ
1193 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1194 | PTA_SSE | PTA_SSE2 },
4977bab6
ZW
1195 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1196 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
9a609388
JH
1197 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1198 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1199 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1200 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1201 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1202 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
3af4bd89 1203 };
c8c5cb99 1204
ca7558fc 1205 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 1206
554707bd
DJ
1207#ifdef SUBTARGET_OVERRIDE_OPTIONS
1208 SUBTARGET_OVERRIDE_OPTIONS;
1209#endif
1210
41ed2237 1211 /* Set the default values for switches whose default depends on TARGET_64BIT
d1f87653 1212 in case they weren't overwritten by command line options. */
55ba61f3
JH
1213 if (TARGET_64BIT)
1214 {
1215 if (flag_omit_frame_pointer == 2)
1216 flag_omit_frame_pointer = 1;
1217 if (flag_asynchronous_unwind_tables == 2)
1218 flag_asynchronous_unwind_tables = 1;
1219 if (flag_pcc_struct_return == 2)
1220 flag_pcc_struct_return = 0;
1221 }
1222 else
1223 {
1224 if (flag_omit_frame_pointer == 2)
1225 flag_omit_frame_pointer = 0;
1226 if (flag_asynchronous_unwind_tables == 2)
1227 flag_asynchronous_unwind_tables = 0;
1228 if (flag_pcc_struct_return == 2)
7c712dcc 1229 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
55ba61f3
JH
1230 }
1231
9e555526
RH
1232 if (!ix86_tune_string && ix86_arch_string)
1233 ix86_tune_string = ix86_arch_string;
1234 if (!ix86_tune_string)
3326f410
DJ
1235 {
1236 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1237 ix86_tune_defaulted = 1;
1238 }
f4365627 1239 if (!ix86_arch_string)
3fec9fa9 1240 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
e075ae69 1241
6189a572
JH
1242 if (ix86_cmodel_string != 0)
1243 {
1244 if (!strcmp (ix86_cmodel_string, "small"))
1245 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1246 else if (flag_pic)
c725bd79 1247 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1248 else if (!strcmp (ix86_cmodel_string, "32"))
1249 ix86_cmodel = CM_32;
1250 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1251 ix86_cmodel = CM_KERNEL;
1252 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1253 ix86_cmodel = CM_MEDIUM;
1254 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1255 ix86_cmodel = CM_LARGE;
1256 else
1257 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1258 }
1259 else
1260 {
1261 ix86_cmodel = CM_32;
1262 if (TARGET_64BIT)
1263 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1264 }
c93e80a5
JH
1265 if (ix86_asm_string != 0)
1266 {
1267 if (!strcmp (ix86_asm_string, "intel"))
1268 ix86_asm_dialect = ASM_INTEL;
1269 else if (!strcmp (ix86_asm_string, "att"))
1270 ix86_asm_dialect = ASM_ATT;
1271 else
1272 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1273 }
6189a572 1274 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
9e637a26 1275 error ("code model %qs not supported in the %s bit mode",
6189a572
JH
1276 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1277 if (ix86_cmodel == CM_LARGE)
9e637a26 1278 sorry ("code model %<large%> not supported yet");
0c2dc519 1279 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1280 sorry ("%i-bit mode not compiled in",
0c2dc519 1281 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1282
f4365627
JH
1283 for (i = 0; i < pta_size; i++)
1284 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1285 {
1286 ix86_arch = processor_alias_table[i].processor;
1287 /* Default cpu tuning to the architecture. */
9e555526 1288 ix86_tune = ix86_arch;
f4365627 1289 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1290 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1291 target_flags |= MASK_MMX;
1292 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1293 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1294 target_flags |= MASK_3DNOW;
1295 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1296 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1297 target_flags |= MASK_3DNOW_A;
1298 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1299 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1300 target_flags |= MASK_SSE;
1301 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1302 && !(target_flags_explicit & MASK_SSE2))
f4365627 1303 target_flags |= MASK_SSE2;
5bbeea44
JH
1304 if (processor_alias_table[i].flags & PTA_SSE3
1305 && !(target_flags_explicit & MASK_SSE3))
1306 target_flags |= MASK_SSE3;
f4365627
JH
1307 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1308 x86_prefetch_sse = true;
6716ecbc
JM
1309 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1310 error ("CPU you selected does not support x86-64 "
1311 "instruction set");
1312 break;
1313 }
1314
1315 if (i == pta_size)
1316 error ("bad value (%s) for -march= switch", ix86_arch_string);
1317
1318 for (i = 0; i < pta_size; i++)
1319 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1320 {
1321 ix86_tune = processor_alias_table[i].processor;
4977bab6 1322 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3326f410
DJ
1323 {
1324 if (ix86_tune_defaulted)
1325 {
1326 ix86_tune_string = "x86-64";
1327 for (i = 0; i < pta_size; i++)
1328 if (! strcmp (ix86_tune_string,
1329 processor_alias_table[i].name))
1330 break;
1331 ix86_tune = processor_alias_table[i].processor;
1332 }
1333 else
1334 error ("CPU you selected does not support x86-64 "
1335 "instruction set");
1336 }
c618c6ec
JJ
1337 /* Intel CPUs have always interpreted SSE prefetch instructions as
1338 NOPs; so, we can enable SSE prefetch instructions even when
1339 -mtune (rather than -march) points us to a processor that has them.
1340 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1341 higher processors. */
1342 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1343 x86_prefetch_sse = true;
f4365627
JH
1344 break;
1345 }
f4365627 1346 if (i == pta_size)
9e555526 1347 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
e075ae69 1348
2ab0437e
JH
1349 if (optimize_size)
1350 ix86_cost = &size_cost;
1351 else
9e555526
RH
1352 ix86_cost = processor_target_table[ix86_tune].cost;
1353 target_flags |= processor_target_table[ix86_tune].target_enable;
1354 target_flags &= ~processor_target_table[ix86_tune].target_disable;
e075ae69 1355
36edd3cc
BS
1356 /* Arrange to set up i386_stack_locals for all functions. */
1357 init_machine_status = ix86_init_machine_status;
fce5a9f2 1358
0f290768 1359 /* Validate -mregparm= value. */
e075ae69 1360 if (ix86_regparm_string)
b08de47e 1361 {
400500c4
RK
1362 i = atoi (ix86_regparm_string);
1363 if (i < 0 || i > REGPARM_MAX)
1364 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1365 else
1366 ix86_regparm = i;
b08de47e 1367 }
0d7d98ee
JH
1368 else
1369 if (TARGET_64BIT)
1370 ix86_regparm = REGPARM_MAX;
b08de47e 1371
3e18fdf6 1372 /* If the user has provided any of the -malign-* options,
a4f31c00 1373 warn and use that value only if -falign-* is not set.
3e18fdf6 1374 Remove this code in GCC 3.2 or later. */
e075ae69 1375 if (ix86_align_loops_string)
b08de47e 1376 {
3e18fdf6
GK
1377 warning ("-malign-loops is obsolete, use -falign-loops");
1378 if (align_loops == 0)
1379 {
1380 i = atoi (ix86_align_loops_string);
1381 if (i < 0 || i > MAX_CODE_ALIGN)
1382 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1383 else
1384 align_loops = 1 << i;
1385 }
b08de47e 1386 }
3af4bd89 1387
e075ae69 1388 if (ix86_align_jumps_string)
b08de47e 1389 {
3e18fdf6
GK
1390 warning ("-malign-jumps is obsolete, use -falign-jumps");
1391 if (align_jumps == 0)
1392 {
1393 i = atoi (ix86_align_jumps_string);
1394 if (i < 0 || i > MAX_CODE_ALIGN)
1395 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1396 else
1397 align_jumps = 1 << i;
1398 }
b08de47e 1399 }
b08de47e 1400
e075ae69 1401 if (ix86_align_funcs_string)
b08de47e 1402 {
3e18fdf6
GK
1403 warning ("-malign-functions is obsolete, use -falign-functions");
1404 if (align_functions == 0)
1405 {
1406 i = atoi (ix86_align_funcs_string);
1407 if (i < 0 || i > MAX_CODE_ALIGN)
1408 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1409 else
1410 align_functions = 1 << i;
1411 }
b08de47e 1412 }
3af4bd89 1413
3e18fdf6 1414 /* Default align_* from the processor table. */
3e18fdf6 1415 if (align_loops == 0)
2cca7283 1416 {
9e555526
RH
1417 align_loops = processor_target_table[ix86_tune].align_loop;
1418 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2cca7283 1419 }
3e18fdf6 1420 if (align_jumps == 0)
2cca7283 1421 {
9e555526
RH
1422 align_jumps = processor_target_table[ix86_tune].align_jump;
1423 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2cca7283 1424 }
3e18fdf6 1425 if (align_functions == 0)
2cca7283 1426 {
9e555526 1427 align_functions = processor_target_table[ix86_tune].align_func;
2cca7283 1428 }
3e18fdf6 1429
e4c0478d 1430 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1431 The default of 128 bits is for Pentium III's SSE __m128, but we
1432 don't want additional code to keep the stack aligned when
1433 optimizing for code size. */
1434 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1435 ? TARGET_64BIT ? 128 : 32
fbb83b43 1436 : 128);
e075ae69 1437 if (ix86_preferred_stack_boundary_string)
3af4bd89 1438 {
400500c4 1439 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1440 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1441 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1442 TARGET_64BIT ? 4 : 2);
400500c4
RK
1443 else
1444 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1445 }
77a989d1 1446
0f290768 1447 /* Validate -mbranch-cost= value, or provide default. */
9e555526 1448 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
e075ae69 1449 if (ix86_branch_cost_string)
804a8ee0 1450 {
400500c4
RK
1451 i = atoi (ix86_branch_cost_string);
1452 if (i < 0 || i > 5)
1453 error ("-mbranch-cost=%d is not between 0 and 5", i);
1454 else
1455 ix86_branch_cost = i;
804a8ee0 1456 }
804a8ee0 1457
f996902d
RH
1458 if (ix86_tls_dialect_string)
1459 {
1460 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1461 ix86_tls_dialect = TLS_DIALECT_GNU;
1462 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1463 ix86_tls_dialect = TLS_DIALECT_SUN;
1464 else
1465 error ("bad value (%s) for -mtls-dialect= switch",
1466 ix86_tls_dialect_string);
1467 }
1468
e9a25f70 1469 /* Keep nonleaf frame pointers. */
14c473b9
RS
1470 if (flag_omit_frame_pointer)
1471 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1472 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1473 flag_omit_frame_pointer = 1;
e075ae69
RH
1474
1475 /* If we're doing fast math, we don't care about comparison order
1476 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1477 if (flag_unsafe_math_optimizations)
e075ae69
RH
1478 target_flags &= ~MASK_IEEE_FP;
1479
30c99a84
RH
1480 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1481 since the insns won't need emulation. */
1482 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1483 target_flags &= ~MASK_NO_FANCY_MATH_387;
1484
ba2baa55
RS
1485 /* Likewise, if the target doesn't have a 387, or we've specified
1486 software floating point, don't use 387 inline instrinsics. */
1487 if (!TARGET_80387)
1488 target_flags |= MASK_NO_FANCY_MATH_387;
1489
9e200aaf
KC
1490 /* Turn on SSE2 builtins for -msse3. */
1491 if (TARGET_SSE3)
22c7c85e
L
1492 target_flags |= MASK_SSE2;
1493
1494 /* Turn on SSE builtins for -msse2. */
1495 if (TARGET_SSE2)
1496 target_flags |= MASK_SSE;
1497
14f73b5a
JH
1498 if (TARGET_64BIT)
1499 {
1500 if (TARGET_ALIGN_DOUBLE)
c725bd79 1501 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1502 if (TARGET_RTD)
c725bd79 1503 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1504 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1505 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1506 ix86_fpmath = FPMATH_SSE;
14f73b5a 1507 }
965f5423 1508 else
a5b378d6
JH
1509 {
1510 ix86_fpmath = FPMATH_387;
1511 /* i386 ABI does not specify red zone. It still makes sense to use it
1512 when programmer takes care to stack from being destroyed. */
1513 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1514 target_flags |= MASK_NO_RED_ZONE;
1515 }
965f5423
JH
1516
1517 if (ix86_fpmath_string != 0)
1518 {
1519 if (! strcmp (ix86_fpmath_string, "387"))
1520 ix86_fpmath = FPMATH_387;
1521 else if (! strcmp (ix86_fpmath_string, "sse"))
1522 {
1523 if (!TARGET_SSE)
1524 {
1525 warning ("SSE instruction set disabled, using 387 arithmetics");
1526 ix86_fpmath = FPMATH_387;
1527 }
1528 else
1529 ix86_fpmath = FPMATH_SSE;
1530 }
1531 else if (! strcmp (ix86_fpmath_string, "387,sse")
1532 || ! strcmp (ix86_fpmath_string, "sse,387"))
1533 {
1534 if (!TARGET_SSE)
1535 {
1536 warning ("SSE instruction set disabled, using 387 arithmetics");
1537 ix86_fpmath = FPMATH_387;
1538 }
1539 else if (!TARGET_80387)
1540 {
1541 warning ("387 instruction set disabled, using SSE arithmetics");
1542 ix86_fpmath = FPMATH_SSE;
1543 }
1544 else
1545 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1546 }
fce5a9f2 1547 else
965f5423
JH
1548 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1549 }
14f73b5a 1550
ec0641f6
RS
1551 /* If fpmath doesn't include 387, disable use of x87 intrinsics. */
1552 if (! (ix86_fpmath & FPMATH_387))
1553 target_flags |= MASK_NO_FANCY_MATH_387;
1554
a7180f70
BS
1555 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1556 on by -msse. */
1557 if (TARGET_SSE)
e37af218
RH
1558 {
1559 target_flags |= MASK_MMX;
1560 x86_prefetch_sse = true;
1561 }
c6036a37 1562
47f339cf
BS
1563 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1564 if (TARGET_3DNOW)
1565 {
1566 target_flags |= MASK_MMX;
d1f87653 1567 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
47f339cf
BS
1568 extensions it adds. */
1569 if (x86_3dnow_a & (1 << ix86_arch))
1570 target_flags |= MASK_3DNOW_A;
1571 }
9e555526 1572 if ((x86_accumulate_outgoing_args & TUNEMASK)
9ef1b13a 1573 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1574 && !optimize_size)
1575 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1576
1577 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1578 {
1579 char *p;
1580 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1581 p = strchr (internal_label_prefix, 'X');
1582 internal_label_prefix_len = p - internal_label_prefix;
1583 *p = '\0';
1584 }
ad7b96a9
JH
1585 /* When scheduling description is not available, disable scheduler pass so it
1586 won't slow down the compilation and make x87 code slower. */
1587 if (!TARGET_SCHEDULE)
1588 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
f5316dfe
MM
1589}
1590\f
32b5b1aa 1591void
b96a374d 1592optimization_options (int level, int size ATTRIBUTE_UNUSED)
32b5b1aa 1593{
e9a25f70
JL
1594 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1595 make the problem with not enough registers even worse. */
32b5b1aa
SC
1596#ifdef INSN_SCHEDULING
1597 if (level > 1)
1598 flag_schedule_insns = 0;
1599#endif
55ba61f3
JH
1600
1601 /* The default values of these switches depend on the TARGET_64BIT
1602 that is not known at this moment. Mark these values with 2 and
1603 let user the to override these. In case there is no command line option
1604 specifying them, we will set the defaults in override_options. */
1605 if (optimize >= 1)
1606 flag_omit_frame_pointer = 2;
1607 flag_pcc_struct_return = 2;
1608 flag_asynchronous_unwind_tables = 2;
4f514514
JM
1609#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1610 SUBTARGET_OPTIMIZATION_OPTIONS;
1611#endif
32b5b1aa 1612}
b08de47e 1613\f
91d231cb
JM
1614/* Table of valid machine attributes. */
1615const struct attribute_spec ix86_attribute_table[] =
b08de47e 1616{
91d231cb 1617 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1618 /* Stdcall attribute says callee is responsible for popping arguments
1619 if they are not variable. */
91d231cb 1620 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
e91f04de
CH
1621 /* Fastcall attribute says callee is responsible for popping arguments
1622 if they are not variable. */
1623 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
91d231cb
JM
1624 /* Cdecl attribute says the callee is a normal C declaration */
1625 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1626 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1627 passed in registers. */
91d231cb 1628 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
b2ca3702
MM
1629#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1630 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1631 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3da1eb0b 1632 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb 1633#endif
fe77449a
DR
1634 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1635 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
07a43492
DJ
1636#ifdef SUBTARGET_ATTRIBUTE_TABLE
1637 SUBTARGET_ATTRIBUTE_TABLE,
1638#endif
91d231cb
JM
1639 { NULL, 0, 0, false, false, false, NULL }
1640};
1641
5fbf0217
EB
1642/* Decide whether we can make a sibling call to a function. DECL is the
1643 declaration of the function being targeted by the call and EXP is the
1644 CALL_EXPR representing the call. */
4977bab6
ZW
1645
1646static bool
b96a374d 1647ix86_function_ok_for_sibcall (tree decl, tree exp)
4977bab6
ZW
1648{
1649 /* If we are generating position-independent code, we cannot sibcall
1650 optimize any indirect call, or a direct call to a global function,
1651 as the PLT requires %ebx be live. */
1652 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1653 return false;
1654
1655 /* If we are returning floats on the 80387 register stack, we cannot
1656 make a sibcall from a function that doesn't return a float to a
5fbf0217
EB
1657 function that does or, conversely, from a function that does return
1658 a float to a function that doesn't; the necessary stack adjustment
1659 would not be executed. */
4977bab6 1660 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
5fbf0217 1661 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
4977bab6
ZW
1662 return false;
1663
1664 /* If this call is indirect, we'll need to be able to use a call-clobbered
b96a374d 1665 register for the address of the target function. Make sure that all
4977bab6
ZW
1666 such registers are not used for passing parameters. */
1667 if (!decl && !TARGET_64BIT)
1668 {
e767b5be 1669 tree type;
4977bab6
ZW
1670
1671 /* We're looking at the CALL_EXPR, we need the type of the function. */
1672 type = TREE_OPERAND (exp, 0); /* pointer expression */
1673 type = TREE_TYPE (type); /* pointer type */
1674 type = TREE_TYPE (type); /* function type */
1675
e767b5be 1676 if (ix86_function_regparm (type, NULL) >= 3)
4977bab6
ZW
1677 {
1678 /* ??? Need to count the actual number of registers to be used,
1679 not the possible number of registers. Fix later. */
1680 return false;
1681 }
1682 }
1683
1684 /* Otherwise okay. That also includes certain types of indirect calls. */
1685 return true;
1686}
1687
e91f04de 1688/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
91d231cb
JM
1689 arguments as in struct attribute_spec.handler. */
1690static tree
b96a374d
AJ
1691ix86_handle_cdecl_attribute (tree *node, tree name,
1692 tree args ATTRIBUTE_UNUSED,
1693 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1694{
1695 if (TREE_CODE (*node) != FUNCTION_TYPE
1696 && TREE_CODE (*node) != METHOD_TYPE
1697 && TREE_CODE (*node) != FIELD_DECL
1698 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1699 {
9e637a26 1700 warning ("%qs attribute only applies to functions",
91d231cb
JM
1701 IDENTIFIER_POINTER (name));
1702 *no_add_attrs = true;
1703 }
e91f04de
CH
1704 else
1705 {
1706 if (is_attribute_p ("fastcall", name))
1707 {
1708 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1709 {
1710 error ("fastcall and stdcall attributes are not compatible");
1711 }
1712 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1713 {
1714 error ("fastcall and regparm attributes are not compatible");
1715 }
1716 }
1717 else if (is_attribute_p ("stdcall", name))
1718 {
1719 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1720 {
1721 error ("fastcall and stdcall attributes are not compatible");
1722 }
1723 }
1724 }
b08de47e 1725
91d231cb
JM
1726 if (TARGET_64BIT)
1727 {
9e637a26 1728 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
91d231cb
JM
1729 *no_add_attrs = true;
1730 }
b08de47e 1731
91d231cb
JM
1732 return NULL_TREE;
1733}
b08de47e 1734
91d231cb
JM
1735/* Handle a "regparm" attribute;
1736 arguments as in struct attribute_spec.handler. */
1737static tree
b96a374d
AJ
1738ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1739 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
91d231cb
JM
1740{
1741 if (TREE_CODE (*node) != FUNCTION_TYPE
1742 && TREE_CODE (*node) != METHOD_TYPE
1743 && TREE_CODE (*node) != FIELD_DECL
1744 && TREE_CODE (*node) != TYPE_DECL)
1745 {
9e637a26 1746 warning ("%qs attribute only applies to functions",
91d231cb
JM
1747 IDENTIFIER_POINTER (name));
1748 *no_add_attrs = true;
1749 }
1750 else
1751 {
1752 tree cst;
b08de47e 1753
91d231cb
JM
1754 cst = TREE_VALUE (args);
1755 if (TREE_CODE (cst) != INTEGER_CST)
1756 {
9e637a26 1757 warning ("%qs attribute requires an integer constant argument",
91d231cb
JM
1758 IDENTIFIER_POINTER (name));
1759 *no_add_attrs = true;
1760 }
1761 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1762 {
9e637a26 1763 warning ("argument to %qs attribute larger than %d",
91d231cb
JM
1764 IDENTIFIER_POINTER (name), REGPARM_MAX);
1765 *no_add_attrs = true;
1766 }
e91f04de
CH
1767
1768 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
e767b5be
JH
1769 {
1770 error ("fastcall and regparm attributes are not compatible");
1771 }
b08de47e
MM
1772 }
1773
91d231cb 1774 return NULL_TREE;
b08de47e
MM
1775}
1776
1777/* Return 0 if the attributes for two types are incompatible, 1 if they
1778 are compatible, and 2 if they are nearly compatible (which causes a
1779 warning to be generated). */
1780
8d8e52be 1781static int
b96a374d 1782ix86_comp_type_attributes (tree type1, tree type2)
b08de47e 1783{
0f290768 1784 /* Check for mismatch of non-default calling convention. */
27c38fbe 1785 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1786
1787 if (TREE_CODE (type1) != FUNCTION_TYPE)
1788 return 1;
1789
b96a374d 1790 /* Check for mismatched fastcall types */
e91f04de
CH
1791 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1792 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
b96a374d 1793 return 0;
e91f04de 1794
afcfe58c 1795 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1796 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1797 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
265d94ac
MM
1798 return 0;
1799 if (ix86_function_regparm (type1, NULL)
1800 != ix86_function_regparm (type2, NULL))
afcfe58c 1801 return 0;
b08de47e
MM
1802 return 1;
1803}
b08de47e 1804\f
e767b5be
JH
1805/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1806 DECL may be NULL when calling function indirectly
839a4992 1807 or considering a libcall. */
483ab821
MM
1808
1809static int
e767b5be 1810ix86_function_regparm (tree type, tree decl)
483ab821
MM
1811{
1812 tree attr;
e767b5be
JH
1813 int regparm = ix86_regparm;
1814 bool user_convention = false;
483ab821 1815
e767b5be
JH
1816 if (!TARGET_64BIT)
1817 {
1818 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1819 if (attr)
1820 {
1821 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1822 user_convention = true;
1823 }
1824
1825 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1826 {
1827 regparm = 2;
1828 user_convention = true;
1829 }
1830
1831 /* Use register calling convention for local functions when possible. */
1832 if (!TARGET_64BIT && !user_convention && decl
cb0bc263 1833 && flag_unit_at_a_time && !profile_flag)
e767b5be
JH
1834 {
1835 struct cgraph_local_info *i = cgraph_local_info (decl);
1836 if (i && i->local)
1837 {
1838 /* We can't use regparm(3) for nested functions as these use
1839 static chain pointer in third argument. */
1840 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1841 regparm = 2;
1842 else
1843 regparm = 3;
1844 }
1845 }
1846 }
1847 return regparm;
483ab821
MM
1848}
1849
f676971a 1850/* Return true if EAX is live at the start of the function. Used by
fe9f516f
RH
1851 ix86_expand_prologue to determine if we need special help before
1852 calling allocate_stack_worker. */
1853
1854static bool
1855ix86_eax_live_at_start_p (void)
1856{
1857 /* Cheat. Don't bother working forward from ix86_function_regparm
1858 to the function type to whether an actual argument is located in
1859 eax. Instead just look at cfg info, which is still close enough
1860 to correct at this point. This gives false positives for broken
1861 functions that might use uninitialized data that happens to be
1862 allocated in eax, but who cares? */
1863 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1864}
1865
b08de47e
MM
1866/* Value is the number of bytes of arguments automatically
1867 popped when returning from a subroutine call.
1868 FUNDECL is the declaration node of the function (as a tree),
1869 FUNTYPE is the data type of the function (as a tree),
1870 or for a library call it is an identifier node for the subroutine name.
1871 SIZE is the number of bytes of arguments passed on the stack.
1872
1873 On the 80386, the RTD insn may be used to pop them if the number
1874 of args is fixed, but if the number is variable then the caller
1875 must pop them all. RTD can't be used for library calls now
1876 because the library is compiled with the Unix compiler.
1877 Use of RTD is a selectable option, since it is incompatible with
1878 standard Unix calling sequences. If the option is not selected,
1879 the caller must always pop the args.
1880
1881 The attribute stdcall is equivalent to RTD on a per module basis. */
1882
1883int
b96a374d 1884ix86_return_pops_args (tree fundecl, tree funtype, int size)
79325812 1885{
3345ee7d 1886 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1887
43f3a59d 1888 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1889 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1890
43f3a59d
KH
1891 /* Stdcall and fastcall functions will pop the stack if not
1892 variable args. */
e91f04de
CH
1893 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1894 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
698cdd84 1895 rtd = 1;
79325812 1896
698cdd84
SC
1897 if (rtd
1898 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1899 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1900 == void_type_node)))
698cdd84
SC
1901 return size;
1902 }
79325812 1903
232b8f52 1904 /* Lose any fake structure return argument if it is passed on the stack. */
61f71b34 1905 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
61fec9ff
JB
1906 && !TARGET_64BIT
1907 && !KEEP_AGGREGATE_RETURN_POINTER)
232b8f52 1908 {
e767b5be 1909 int nregs = ix86_function_regparm (funtype, fundecl);
232b8f52
JJ
1910
1911 if (!nregs)
1912 return GET_MODE_SIZE (Pmode);
1913 }
1914
1915 return 0;
b08de47e 1916}
b08de47e
MM
1917\f
1918/* Argument support functions. */
1919
53c17031
JH
1920/* Return true when register may be used to pass function parameters. */
1921bool
b96a374d 1922ix86_function_arg_regno_p (int regno)
53c17031
JH
1923{
1924 int i;
1925 if (!TARGET_64BIT)
0333394e
JJ
1926 return (regno < REGPARM_MAX
1927 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1928 if (SSE_REGNO_P (regno) && TARGET_SSE)
1929 return true;
1930 /* RAX is used as hidden argument to va_arg functions. */
1931 if (!regno)
1932 return true;
1933 for (i = 0; i < REGPARM_MAX; i++)
1934 if (regno == x86_64_int_parameter_registers[i])
1935 return true;
1936 return false;
1937}
1938
fe984136
RH
1939/* Return if we do not know how to pass TYPE solely in registers. */
1940
1941static bool
1942ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1943{
1944 if (must_pass_in_stack_var_size_or_pad (mode, type))
1945 return true;
dcbca208
RH
1946
1947 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1948 The layout_type routine is crafty and tries to trick us into passing
1949 currently unsupported vector types on the stack by using TImode. */
1950 return (!TARGET_64BIT && mode == TImode
1951 && type && TREE_CODE (type) != VECTOR_TYPE);
fe984136
RH
1952}
1953
b08de47e
MM
1954/* Initialize a variable CUM of type CUMULATIVE_ARGS
1955 for a call to a function whose data type is FNTYPE.
1956 For a library call, FNTYPE is 0. */
1957
1958void
b96a374d
AJ
1959init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1960 tree fntype, /* tree ptr for function decl */
1961 rtx libname, /* SYMBOL_REF of library name or 0 */
1962 tree fndecl)
b08de47e
MM
1963{
1964 static CUMULATIVE_ARGS zero_cum;
1965 tree param, next_param;
1966
1967 if (TARGET_DEBUG_ARG)
1968 {
1969 fprintf (stderr, "\ninit_cumulative_args (");
1970 if (fntype)
e9a25f70
JL
1971 fprintf (stderr, "fntype code = %s, ret code = %s",
1972 tree_code_name[(int) TREE_CODE (fntype)],
1973 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1974 else
1975 fprintf (stderr, "no fntype");
1976
1977 if (libname)
1978 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1979 }
1980
1981 *cum = zero_cum;
1982
1983 /* Set up the number of registers to use for passing arguments. */
e767b5be
JH
1984 if (fntype)
1985 cum->nregs = ix86_function_regparm (fntype, fndecl);
1986 else
1987 cum->nregs = ix86_regparm;
78fbfc4b
JB
1988 if (TARGET_SSE)
1989 cum->sse_nregs = SSE_REGPARM_MAX;
1990 if (TARGET_MMX)
1991 cum->mmx_nregs = MMX_REGPARM_MAX;
e1be55d0
JH
1992 cum->warn_sse = true;
1993 cum->warn_mmx = true;
53c17031 1994 cum->maybe_vaarg = false;
b08de47e 1995
e91f04de
CH
1996 /* Use ecx and edx registers if function has fastcall attribute */
1997 if (fntype && !TARGET_64BIT)
1998 {
1999 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2000 {
2001 cum->nregs = 2;
2002 cum->fastcall = 1;
2003 }
2004 }
2005
b08de47e
MM
2006 /* Determine if this function has variable arguments. This is
2007 indicated by the last argument being 'void_type_mode' if there
2008 are no variable arguments. If there are variable arguments, then
78fbfc4b 2009 we won't pass anything in registers in 32-bit mode. */
b08de47e 2010
78fbfc4b 2011 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
b08de47e
MM
2012 {
2013 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 2014 param != 0; param = next_param)
b08de47e
MM
2015 {
2016 next_param = TREE_CHAIN (param);
e9a25f70 2017 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
2018 {
2019 if (!TARGET_64BIT)
e91f04de
CH
2020 {
2021 cum->nregs = 0;
e1be55d0
JH
2022 cum->sse_nregs = 0;
2023 cum->mmx_nregs = 0;
2024 cum->warn_sse = 0;
2025 cum->warn_mmx = 0;
e91f04de
CH
2026 cum->fastcall = 0;
2027 }
53c17031
JH
2028 cum->maybe_vaarg = true;
2029 }
b08de47e
MM
2030 }
2031 }
53c17031
JH
2032 if ((!fntype && !libname)
2033 || (fntype && !TYPE_ARG_TYPES (fntype)))
2034 cum->maybe_vaarg = 1;
b08de47e
MM
2035
2036 if (TARGET_DEBUG_ARG)
2037 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2038
2039 return;
2040}
2041
d1f87653 2042/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
f710504c 2043 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
2044 class and assign registers accordingly. */
2045
2046/* Return the union class of CLASS1 and CLASS2.
2047 See the x86-64 PS ABI for details. */
2048
2049static enum x86_64_reg_class
b96a374d 2050merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
53c17031
JH
2051{
2052 /* Rule #1: If both classes are equal, this is the resulting class. */
2053 if (class1 == class2)
2054 return class1;
2055
2056 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2057 the other class. */
2058 if (class1 == X86_64_NO_CLASS)
2059 return class2;
2060 if (class2 == X86_64_NO_CLASS)
2061 return class1;
2062
2063 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2064 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2065 return X86_64_MEMORY_CLASS;
2066
2067 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2068 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2069 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2070 return X86_64_INTEGERSI_CLASS;
2071 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2072 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2073 return X86_64_INTEGER_CLASS;
2074
2075 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2076 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2077 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2078 return X86_64_MEMORY_CLASS;
2079
2080 /* Rule #6: Otherwise class SSE is used. */
2081 return X86_64_SSE_CLASS;
2082}
2083
2084/* Classify the argument of type TYPE and mode MODE.
2085 CLASSES will be filled by the register class used to pass each word
2086 of the operand. The number of words is returned. In case the parameter
2087 should be passed in memory, 0 is returned. As a special case for zero
2088 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2089
2090 BIT_OFFSET is used internally for handling records and specifies offset
2091 of the offset in bits modulo 256 to avoid overflow cases.
2092
2093 See the x86-64 PS ABI for details.
2094*/
2095
2096static int
b96a374d
AJ
2097classify_argument (enum machine_mode mode, tree type,
2098 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
53c17031 2099{
296e4ae8 2100 HOST_WIDE_INT bytes =
53c17031 2101 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 2102 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 2103
c60ee6f5
JH
2104 /* Variable sized entities are always passed/returned in memory. */
2105 if (bytes < 0)
2106 return 0;
2107
dafc5b82 2108 if (mode != VOIDmode
fe984136 2109 && targetm.calls.must_pass_in_stack (mode, type))
dafc5b82
JH
2110 return 0;
2111
53c17031
JH
2112 if (type && AGGREGATE_TYPE_P (type))
2113 {
2114 int i;
2115 tree field;
2116 enum x86_64_reg_class subclasses[MAX_CLASSES];
2117
2118 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2119 if (bytes > 16)
2120 return 0;
2121
2122 for (i = 0; i < words; i++)
2123 classes[i] = X86_64_NO_CLASS;
2124
2125 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2126 signalize memory class, so handle it as special case. */
2127 if (!words)
2128 {
2129 classes[0] = X86_64_NO_CLASS;
2130 return 1;
2131 }
2132
2133 /* Classify each field of record and merge classes. */
2134 if (TREE_CODE (type) == RECORD_TYPE)
2135 {
91ea38f9 2136 /* For classes first merge in the field of the subclasses. */
fa743e8c 2137 if (TYPE_BINFO (type))
91ea38f9 2138 {
fa743e8c 2139 tree binfo, base_binfo;
e8112eac 2140 int basenum;
91ea38f9 2141
e8112eac
ZK
2142 for (binfo = TYPE_BINFO (type), basenum = 0;
2143 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
91ea38f9 2144 {
91ea38f9 2145 int num;
fa743e8c
NS
2146 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2147 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2148
2149 num = classify_argument (TYPE_MODE (type),
2150 type, subclasses,
2151 (offset + bit_offset) % 256);
2152 if (!num)
2153 return 0;
2154 for (i = 0; i < num; i++)
2155 {
db01f480 2156 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2157 classes[i + pos] =
2158 merge_classes (subclasses[i], classes[i + pos]);
2159 }
2160 }
2161 }
43f3a59d 2162 /* And now merge the fields of structure. */
53c17031
JH
2163 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2164 {
2165 if (TREE_CODE (field) == FIELD_DECL)
2166 {
2167 int num;
2168
2169 /* Bitfields are always classified as integer. Handle them
2170 early, since later code would consider them to be
2171 misaligned integers. */
2172 if (DECL_BIT_FIELD (field))
2173 {
2174 for (i = int_bit_position (field) / 8 / 8;
2175 i < (int_bit_position (field)
2176 + tree_low_cst (DECL_SIZE (field), 0)
b96a374d 2177 + 63) / 8 / 8; i++)
53c17031
JH
2178 classes[i] =
2179 merge_classes (X86_64_INTEGER_CLASS,
2180 classes[i]);
2181 }
2182 else
2183 {
2184 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2185 TREE_TYPE (field), subclasses,
2186 (int_bit_position (field)
2187 + bit_offset) % 256);
2188 if (!num)
2189 return 0;
2190 for (i = 0; i < num; i++)
2191 {
2192 int pos =
db01f480 2193 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
2194 classes[i + pos] =
2195 merge_classes (subclasses[i], classes[i + pos]);
2196 }
2197 }
2198 }
2199 }
2200 }
2201 /* Arrays are handled as small records. */
2202 else if (TREE_CODE (type) == ARRAY_TYPE)
2203 {
2204 int num;
2205 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2206 TREE_TYPE (type), subclasses, bit_offset);
2207 if (!num)
2208 return 0;
2209
2210 /* The partial classes are now full classes. */
2211 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2212 subclasses[0] = X86_64_SSE_CLASS;
2213 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2214 subclasses[0] = X86_64_INTEGER_CLASS;
2215
2216 for (i = 0; i < words; i++)
2217 classes[i] = subclasses[i % num];
2218 }
2219 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
2220 else if (TREE_CODE (type) == UNION_TYPE
2221 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 2222 {
91ea38f9 2223 /* For classes first merge in the field of the subclasses. */
fa743e8c 2224 if (TYPE_BINFO (type))
91ea38f9 2225 {
fa743e8c 2226 tree binfo, base_binfo;
e8112eac 2227 int basenum;
91ea38f9 2228
e8112eac
ZK
2229 for (binfo = TYPE_BINFO (type), basenum = 0;
2230 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
91ea38f9 2231 {
91ea38f9 2232 int num;
fa743e8c
NS
2233 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2234 tree type = BINFO_TYPE (base_binfo);
91ea38f9
JH
2235
2236 num = classify_argument (TYPE_MODE (type),
2237 type, subclasses,
db01f480 2238 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
2239 if (!num)
2240 return 0;
2241 for (i = 0; i < num; i++)
2242 {
c16576e6 2243 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
2244 classes[i + pos] =
2245 merge_classes (subclasses[i], classes[i + pos]);
2246 }
2247 }
2248 }
53c17031
JH
2249 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2250 {
2251 if (TREE_CODE (field) == FIELD_DECL)
2252 {
2253 int num;
2254 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2255 TREE_TYPE (field), subclasses,
2256 bit_offset);
2257 if (!num)
2258 return 0;
2259 for (i = 0; i < num; i++)
2260 classes[i] = merge_classes (subclasses[i], classes[i]);
2261 }
2262 }
2263 }
2264 else
2265 abort ();
2266
2267 /* Final merger cleanup. */
2268 for (i = 0; i < words; i++)
2269 {
2270 /* If one class is MEMORY, everything should be passed in
2271 memory. */
2272 if (classes[i] == X86_64_MEMORY_CLASS)
2273 return 0;
2274
d6a7951f 2275 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
2276 X86_64_SSE_CLASS. */
2277 if (classes[i] == X86_64_SSEUP_CLASS
2278 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2279 classes[i] = X86_64_SSE_CLASS;
2280
d6a7951f 2281 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
2282 if (classes[i] == X86_64_X87UP_CLASS
2283 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2284 classes[i] = X86_64_SSE_CLASS;
2285 }
2286 return words;
2287 }
2288
2289 /* Compute alignment needed. We align all types to natural boundaries with
2290 exception of XFmode that is aligned to 64bits. */
2291 if (mode != VOIDmode && mode != BLKmode)
2292 {
2293 int mode_alignment = GET_MODE_BITSIZE (mode);
2294
2295 if (mode == XFmode)
2296 mode_alignment = 128;
2297 else if (mode == XCmode)
2298 mode_alignment = 256;
2c6b27c3
JH
2299 if (COMPLEX_MODE_P (mode))
2300 mode_alignment /= 2;
f5143c46 2301 /* Misaligned fields are always returned in memory. */
53c17031
JH
2302 if (bit_offset % mode_alignment)
2303 return 0;
2304 }
2305
9e9fb0ce
JB
2306 /* for V1xx modes, just use the base mode */
2307 if (VECTOR_MODE_P (mode)
2308 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2309 mode = GET_MODE_INNER (mode);
2310
53c17031
JH
2311 /* Classification of atomic types. */
2312 switch (mode)
2313 {
2314 case DImode:
2315 case SImode:
2316 case HImode:
2317 case QImode:
2318 case CSImode:
2319 case CHImode:
2320 case CQImode:
2321 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2322 classes[0] = X86_64_INTEGERSI_CLASS;
2323 else
2324 classes[0] = X86_64_INTEGER_CLASS;
2325 return 1;
2326 case CDImode:
2327 case TImode:
2328 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2329 return 2;
2330 case CTImode:
9e9fb0ce 2331 return 0;
53c17031
JH
2332 case SFmode:
2333 if (!(bit_offset % 64))
2334 classes[0] = X86_64_SSESF_CLASS;
2335 else
2336 classes[0] = X86_64_SSE_CLASS;
2337 return 1;
2338 case DFmode:
2339 classes[0] = X86_64_SSEDF_CLASS;
2340 return 1;
f8a1ebc6 2341 case XFmode:
53c17031
JH
2342 classes[0] = X86_64_X87_CLASS;
2343 classes[1] = X86_64_X87UP_CLASS;
2344 return 2;
f8a1ebc6 2345 case TFmode:
9e9fb0ce
JB
2346 classes[0] = X86_64_SSE_CLASS;
2347 classes[1] = X86_64_SSEUP_CLASS;
53c17031
JH
2348 return 2;
2349 case SCmode:
2350 classes[0] = X86_64_SSE_CLASS;
2351 return 1;
9e9fb0ce
JB
2352 case DCmode:
2353 classes[0] = X86_64_SSEDF_CLASS;
2354 classes[1] = X86_64_SSEDF_CLASS;
2355 return 2;
2356 case XCmode:
2357 case TCmode:
2358 /* These modes are larger than 16 bytes. */
2359 return 0;
e95d6b23
JH
2360 case V4SFmode:
2361 case V4SImode:
495333a6
JH
2362 case V16QImode:
2363 case V8HImode:
2364 case V2DFmode:
2365 case V2DImode:
e95d6b23
JH
2366 classes[0] = X86_64_SSE_CLASS;
2367 classes[1] = X86_64_SSEUP_CLASS;
2368 return 2;
2369 case V2SFmode:
2370 case V2SImode:
2371 case V4HImode:
2372 case V8QImode:
9e9fb0ce
JB
2373 classes[0] = X86_64_SSE_CLASS;
2374 return 1;
53c17031 2375 case BLKmode:
e95d6b23 2376 case VOIDmode:
53c17031
JH
2377 return 0;
2378 default:
9e9fb0ce
JB
2379 if (VECTOR_MODE_P (mode))
2380 {
2381 if (bytes > 16)
2382 return 0;
2383 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2384 {
2385 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2386 classes[0] = X86_64_INTEGERSI_CLASS;
2387 else
2388 classes[0] = X86_64_INTEGER_CLASS;
2389 classes[1] = X86_64_INTEGER_CLASS;
2390 return 1 + (bytes > 8);
2391 }
2392 }
53c17031
JH
2393 abort ();
2394 }
2395}
2396
2397/* Examine the argument and return set number of register required in each
f5143c46 2398 class. Return 0 iff parameter should be passed in memory. */
53c17031 2399static int
b96a374d
AJ
2400examine_argument (enum machine_mode mode, tree type, int in_return,
2401 int *int_nregs, int *sse_nregs)
53c17031
JH
2402{
2403 enum x86_64_reg_class class[MAX_CLASSES];
2404 int n = classify_argument (mode, type, class, 0);
2405
2406 *int_nregs = 0;
2407 *sse_nregs = 0;
2408 if (!n)
2409 return 0;
2410 for (n--; n >= 0; n--)
2411 switch (class[n])
2412 {
2413 case X86_64_INTEGER_CLASS:
2414 case X86_64_INTEGERSI_CLASS:
2415 (*int_nregs)++;
2416 break;
2417 case X86_64_SSE_CLASS:
2418 case X86_64_SSESF_CLASS:
2419 case X86_64_SSEDF_CLASS:
2420 (*sse_nregs)++;
2421 break;
2422 case X86_64_NO_CLASS:
2423 case X86_64_SSEUP_CLASS:
2424 break;
2425 case X86_64_X87_CLASS:
2426 case X86_64_X87UP_CLASS:
2427 if (!in_return)
2428 return 0;
2429 break;
2430 case X86_64_MEMORY_CLASS:
2431 abort ();
2432 }
2433 return 1;
2434}
2435/* Construct container for the argument used by GCC interface. See
2436 FUNCTION_ARG for the detailed description. */
2437static rtx
b96a374d
AJ
2438construct_container (enum machine_mode mode, tree type, int in_return,
2439 int nintregs, int nsseregs, const int * intreg,
2440 int sse_regno)
53c17031
JH
2441{
2442 enum machine_mode tmpmode;
2443 int bytes =
2444 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2445 enum x86_64_reg_class class[MAX_CLASSES];
2446 int n;
2447 int i;
2448 int nexps = 0;
2449 int needed_sseregs, needed_intregs;
2450 rtx exp[MAX_CLASSES];
2451 rtx ret;
2452
2453 n = classify_argument (mode, type, class, 0);
2454 if (TARGET_DEBUG_ARG)
2455 {
2456 if (!n)
2457 fprintf (stderr, "Memory class\n");
2458 else
2459 {
2460 fprintf (stderr, "Classes:");
2461 for (i = 0; i < n; i++)
2462 {
2463 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2464 }
2465 fprintf (stderr, "\n");
2466 }
2467 }
2468 if (!n)
2469 return NULL;
2470 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2471 return NULL;
2472 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2473 return NULL;
2474
2475 /* First construct simple cases. Avoid SCmode, since we want to use
2476 single register to pass this type. */
2477 if (n == 1 && mode != SCmode)
2478 switch (class[0])
2479 {
2480 case X86_64_INTEGER_CLASS:
2481 case X86_64_INTEGERSI_CLASS:
2482 return gen_rtx_REG (mode, intreg[0]);
2483 case X86_64_SSE_CLASS:
2484 case X86_64_SSESF_CLASS:
2485 case X86_64_SSEDF_CLASS:
2486 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2487 case X86_64_X87_CLASS:
2488 return gen_rtx_REG (mode, FIRST_STACK_REG);
2489 case X86_64_NO_CLASS:
2490 /* Zero sized array, struct or class. */
2491 return NULL;
2492 default:
2493 abort ();
2494 }
2c6b27c3
JH
2495 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2496 && mode != BLKmode)
e95d6b23 2497 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2498 if (n == 2
2499 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
f8a1ebc6 2500 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
53c17031
JH
2501 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2502 && class[1] == X86_64_INTEGER_CLASS
f8a1ebc6 2503 && (mode == CDImode || mode == TImode || mode == TFmode)
53c17031
JH
2504 && intreg[0] + 1 == intreg[1])
2505 return gen_rtx_REG (mode, intreg[0]);
2506 if (n == 4
2507 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2c6b27c3
JH
2508 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2509 && mode != BLKmode)
f8a1ebc6 2510 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
53c17031
JH
2511
2512 /* Otherwise figure out the entries of the PARALLEL. */
2513 for (i = 0; i < n; i++)
2514 {
2515 switch (class[i])
2516 {
2517 case X86_64_NO_CLASS:
2518 break;
2519 case X86_64_INTEGER_CLASS:
2520 case X86_64_INTEGERSI_CLASS:
d1f87653 2521 /* Merge TImodes on aligned occasions here too. */
53c17031
JH
2522 if (i * 8 + 8 > bytes)
2523 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2524 else if (class[i] == X86_64_INTEGERSI_CLASS)
2525 tmpmode = SImode;
2526 else
2527 tmpmode = DImode;
2528 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2529 if (tmpmode == BLKmode)
2530 tmpmode = DImode;
2531 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2532 gen_rtx_REG (tmpmode, *intreg),
2533 GEN_INT (i*8));
2534 intreg++;
2535 break;
2536 case X86_64_SSESF_CLASS:
2537 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2538 gen_rtx_REG (SFmode,
2539 SSE_REGNO (sse_regno)),
2540 GEN_INT (i*8));
2541 sse_regno++;
2542 break;
2543 case X86_64_SSEDF_CLASS:
2544 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2545 gen_rtx_REG (DFmode,
2546 SSE_REGNO (sse_regno)),
2547 GEN_INT (i*8));
2548 sse_regno++;
2549 break;
2550 case X86_64_SSE_CLASS:
12f5c45e
JH
2551 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2552 tmpmode = TImode;
53c17031
JH
2553 else
2554 tmpmode = DImode;
2555 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2556 gen_rtx_REG (tmpmode,
2557 SSE_REGNO (sse_regno)),
2558 GEN_INT (i*8));
12f5c45e
JH
2559 if (tmpmode == TImode)
2560 i++;
53c17031
JH
2561 sse_regno++;
2562 break;
2563 default:
2564 abort ();
2565 }
2566 }
2567 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2568 for (i = 0; i < nexps; i++)
2569 XVECEXP (ret, 0, i) = exp [i];
2570 return ret;
2571}
2572
b08de47e
MM
2573/* Update the data in CUM to advance over an argument
2574 of mode MODE and data type TYPE.
2575 (TYPE is null for libcalls where that information may not be available.) */
2576
2577void
b96a374d
AJ
2578function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2579 enum machine_mode mode, /* current arg mode */
2580 tree type, /* type of the argument or 0 if lib support */
2581 int named) /* whether or not the argument was named */
b08de47e 2582{
5ac9118e
KG
2583 int bytes =
2584 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2585 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2586
2587 if (TARGET_DEBUG_ARG)
2588 fprintf (stderr,
bcf17554
JH
2589 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2590 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
53c17031 2591 if (TARGET_64BIT)
b08de47e 2592 {
53c17031
JH
2593 int int_nregs, sse_nregs;
2594 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2595 cum->words += words;
2596 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2597 {
53c17031
JH
2598 cum->nregs -= int_nregs;
2599 cum->sse_nregs -= sse_nregs;
2600 cum->regno += int_nregs;
2601 cum->sse_regno += sse_nregs;
82a127a9 2602 }
53c17031
JH
2603 else
2604 cum->words += words;
b08de47e 2605 }
a4f31c00 2606 else
82a127a9 2607 {
bcf17554
JH
2608 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2609 && (!type || !AGGREGATE_TYPE_P (type)))
53c17031
JH
2610 {
2611 cum->sse_words += words;
2612 cum->sse_nregs -= 1;
2613 cum->sse_regno += 1;
2614 if (cum->sse_nregs <= 0)
2615 {
2616 cum->sse_nregs = 0;
2617 cum->sse_regno = 0;
2618 }
2619 }
bcf17554
JH
2620 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2621 && (!type || !AGGREGATE_TYPE_P (type)))
2622 {
2623 cum->mmx_words += words;
2624 cum->mmx_nregs -= 1;
2625 cum->mmx_regno += 1;
2626 if (cum->mmx_nregs <= 0)
2627 {
2628 cum->mmx_nregs = 0;
2629 cum->mmx_regno = 0;
2630 }
2631 }
53c17031 2632 else
82a127a9 2633 {
53c17031
JH
2634 cum->words += words;
2635 cum->nregs -= words;
2636 cum->regno += words;
2637
2638 if (cum->nregs <= 0)
2639 {
2640 cum->nregs = 0;
2641 cum->regno = 0;
2642 }
82a127a9
CM
2643 }
2644 }
b08de47e
MM
2645 return;
2646}
2647
dcbca208
RH
2648/* A subroutine of function_arg. We want to pass a parameter whose nominal
2649 type is MODE in REGNO. We try to minimize ABI variation, so MODE may not
2650 actually be valid for REGNO with the current ISA. In this case, ALT_MODE
2651 is used instead. It must be the same size as MODE, and must be known to
2652 be valid for REGNO. Finally, ORIG_MODE is the original mode of the
2653 parameter, as seen by the type system. This may be different from MODE
2654 when we're mucking with things minimizing ABI variations.
2655
2656 Returns a REG or a PARALLEL as appropriate. */
2657
2658static rtx
2659gen_reg_or_parallel (enum machine_mode mode, enum machine_mode alt_mode,
2660 enum machine_mode orig_mode, unsigned int regno)
2661{
2662 rtx tmp;
2663
2664 if (HARD_REGNO_MODE_OK (regno, mode))
2665 tmp = gen_rtx_REG (mode, regno);
2666 else
2667 {
2668 tmp = gen_rtx_REG (alt_mode, regno);
2669 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2670 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2671 }
2672
2673 return tmp;
2674}
2675
b08de47e
MM
2676/* Define where to put the arguments to a function.
2677 Value is zero to push the argument on the stack,
2678 or a hard register in which to store the argument.
2679
2680 MODE is the argument's machine mode.
2681 TYPE is the data type of the argument (as a tree).
2682 This is null for libcalls where that information may
2683 not be available.
2684 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2685 the preceding args and about the function being called.
2686 NAMED is nonzero if this argument is a named parameter
2687 (otherwise it is an extra parameter matching an ellipsis). */
2688
07933f72 2689rtx
dcbca208
RH
2690function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2691 tree type, int named)
b08de47e 2692{
dcbca208
RH
2693 enum machine_mode mode = orig_mode;
2694 rtx ret = NULL_RTX;
5ac9118e
KG
2695 int bytes =
2696 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e 2697 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bcf17554 2698 static bool warnedsse, warnedmmx;
b08de47e 2699
90d5887b
PB
2700 /* To simplify the code below, represent vector types with a vector mode
2701 even if MMX/SSE are not active. */
2702 if (type
2703 && TREE_CODE (type) == VECTOR_TYPE
2704 && (bytes == 8 || bytes == 16)
2705 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2706 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2707 {
2708 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7c27e184
PB
2709 enum machine_mode newmode
2710 = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2711 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
90d5887b
PB
2712
2713 /* Get the mode which has this inner mode and number of units. */
7c27e184
PB
2714 for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
2715 if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
2716 && GET_MODE_INNER (newmode) == innermode)
2717 {
2718 mode = newmode;
2719 break;
2720 }
90d5887b
PB
2721 }
2722
5bdc5878 2723 /* Handle a hidden AL argument containing number of registers for varargs
53c17031
JH
2724 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2725 any AL settings. */
32ee7d1d 2726 if (mode == VOIDmode)
b08de47e 2727 {
53c17031
JH
2728 if (TARGET_64BIT)
2729 return GEN_INT (cum->maybe_vaarg
2730 ? (cum->sse_nregs < 0
2731 ? SSE_REGPARM_MAX
2732 : cum->sse_regno)
2733 : -1);
2734 else
2735 return constm1_rtx;
b08de47e 2736 }
53c17031
JH
2737 if (TARGET_64BIT)
2738 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2739 &x86_64_int_parameter_registers [cum->regno],
2740 cum->sse_regno);
2741 else
2742 switch (mode)
2743 {
2744 /* For now, pass fp/complex values on the stack. */
2745 default:
2746 break;
2747
2748 case BLKmode:
8d454008
RH
2749 if (bytes < 0)
2750 break;
5efb1046 2751 /* FALLTHRU */
53c17031
JH
2752 case DImode:
2753 case SImode:
2754 case HImode:
2755 case QImode:
2756 if (words <= cum->nregs)
b96a374d
AJ
2757 {
2758 int regno = cum->regno;
2759
2760 /* Fastcall allocates the first two DWORD (SImode) or
2761 smaller arguments to ECX and EDX. */
2762 if (cum->fastcall)
2763 {
2764 if (mode == BLKmode || mode == DImode)
2765 break;
2766
2767 /* ECX not EAX is the first allocated register. */
2768 if (regno == 0)
e767b5be 2769 regno = 2;
b96a374d
AJ
2770 }
2771 ret = gen_rtx_REG (mode, regno);
2772 }
53c17031
JH
2773 break;
2774 case TImode:
bcf17554
JH
2775 case V16QImode:
2776 case V8HImode:
2777 case V4SImode:
2778 case V2DImode:
2779 case V4SFmode:
2780 case V2DFmode:
2781 if (!type || !AGGREGATE_TYPE_P (type))
2782 {
78fbfc4b 2783 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
bcf17554
JH
2784 {
2785 warnedsse = true;
2786 warning ("SSE vector argument without SSE enabled "
2787 "changes the ABI");
2788 }
2789 if (cum->sse_nregs)
dcbca208
RH
2790 ret = gen_reg_or_parallel (mode, TImode, orig_mode,
2791 cum->sse_regno + FIRST_SSE_REG);
bcf17554
JH
2792 }
2793 break;
2794 case V8QImode:
2795 case V4HImode:
2796 case V2SImode:
2797 case V2SFmode:
2798 if (!type || !AGGREGATE_TYPE_P (type))
2799 {
e1be55d0 2800 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
bcf17554
JH
2801 {
2802 warnedmmx = true;
2803 warning ("MMX vector argument without MMX enabled "
2804 "changes the ABI");
2805 }
2806 if (cum->mmx_nregs)
dcbca208
RH
2807 ret = gen_reg_or_parallel (mode, DImode, orig_mode,
2808 cum->mmx_regno + FIRST_MMX_REG);
bcf17554 2809 }
53c17031
JH
2810 break;
2811 }
b08de47e
MM
2812
2813 if (TARGET_DEBUG_ARG)
2814 {
2815 fprintf (stderr,
91ea38f9 2816 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2817 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2818
2819 if (ret)
91ea38f9 2820 print_simple_rtl (stderr, ret);
b08de47e
MM
2821 else
2822 fprintf (stderr, ", stack");
2823
2824 fprintf (stderr, " )\n");
2825 }
2826
2827 return ret;
2828}
53c17031 2829
09b2e78d
ZD
2830/* A C expression that indicates when an argument must be passed by
2831 reference. If nonzero for an argument, a copy of that argument is
2832 made in memory and a pointer to the argument is passed instead of
2833 the argument itself. The pointer is passed in whatever way is
2834 appropriate for passing a pointer to that type. */
2835
8cd5a4e0
RH
2836static bool
2837ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2838 enum machine_mode mode ATTRIBUTE_UNUSED,
2839 tree type, bool named ATTRIBUTE_UNUSED)
09b2e78d
ZD
2840{
2841 if (!TARGET_64BIT)
2842 return 0;
2843
2844 if (type && int_size_in_bytes (type) == -1)
2845 {
2846 if (TARGET_DEBUG_ARG)
2847 fprintf (stderr, "function_arg_pass_by_reference\n");
2848 return 1;
2849 }
2850
2851 return 0;
2852}
2853
8b978a57 2854/* Return true when TYPE should be 128bit aligned for 32bit argument passing
90d5887b 2855 ABI. Only called if TARGET_SSE. */
8b978a57 2856static bool
b96a374d 2857contains_128bit_aligned_vector_p (tree type)
8b978a57
JH
2858{
2859 enum machine_mode mode = TYPE_MODE (type);
2860 if (SSE_REG_MODE_P (mode)
2861 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2862 return true;
2863 if (TYPE_ALIGN (type) < 128)
2864 return false;
2865
2866 if (AGGREGATE_TYPE_P (type))
2867 {
2a43945f 2868 /* Walk the aggregates recursively. */
8b978a57
JH
2869 if (TREE_CODE (type) == RECORD_TYPE
2870 || TREE_CODE (type) == UNION_TYPE
2871 || TREE_CODE (type) == QUAL_UNION_TYPE)
2872 {
2873 tree field;
2874
fa743e8c 2875 if (TYPE_BINFO (type))
8b978a57 2876 {
fa743e8c 2877 tree binfo, base_binfo;
8b978a57
JH
2878 int i;
2879
fa743e8c
NS
2880 for (binfo = TYPE_BINFO (type), i = 0;
2881 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2882 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2883 return true;
8b978a57 2884 }
43f3a59d 2885 /* And now merge the fields of structure. */
8b978a57
JH
2886 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2887 {
2888 if (TREE_CODE (field) == FIELD_DECL
2889 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2890 return true;
2891 }
2892 }
2893 /* Just for use if some languages passes arrays by value. */
2894 else if (TREE_CODE (type) == ARRAY_TYPE)
2895 {
2896 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2897 return true;
2898 }
2899 else
2900 abort ();
2901 }
2902 return false;
2903}
2904
bb498ea3
AH
2905/* Gives the alignment boundary, in bits, of an argument with the
2906 specified mode and type. */
53c17031
JH
2907
2908int
b96a374d 2909ix86_function_arg_boundary (enum machine_mode mode, tree type)
53c17031
JH
2910{
2911 int align;
53c17031
JH
2912 if (type)
2913 align = TYPE_ALIGN (type);
2914 else
2915 align = GET_MODE_ALIGNMENT (mode);
2916 if (align < PARM_BOUNDARY)
2917 align = PARM_BOUNDARY;
8b978a57
JH
2918 if (!TARGET_64BIT)
2919 {
2920 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2921 make an exception for SSE modes since these require 128bit
b96a374d 2922 alignment.
8b978a57
JH
2923
2924 The handling here differs from field_alignment. ICC aligns MMX
2925 arguments to 4 byte boundaries, while structure fields are aligned
2926 to 8 byte boundaries. */
78fbfc4b
JB
2927 if (!TARGET_SSE)
2928 align = PARM_BOUNDARY;
2929 else if (!type)
8b978a57
JH
2930 {
2931 if (!SSE_REG_MODE_P (mode))
2932 align = PARM_BOUNDARY;
2933 }
2934 else
2935 {
2936 if (!contains_128bit_aligned_vector_p (type))
2937 align = PARM_BOUNDARY;
2938 }
8b978a57 2939 }
53c17031
JH
2940 if (align > 128)
2941 align = 128;
2942 return align;
2943}
2944
2945/* Return true if N is a possible register number of function value. */
2946bool
b96a374d 2947ix86_function_value_regno_p (int regno)
53c17031
JH
2948{
2949 if (!TARGET_64BIT)
2950 {
2951 return ((regno) == 0
2952 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2953 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2954 }
2955 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2956 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2957 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2958}
2959
2960/* Define how to find the value returned by a function.
2961 VALTYPE is the data type of the value (as a tree).
2962 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2963 otherwise, FUNC is 0. */
2964rtx
b96a374d 2965ix86_function_value (tree valtype)
53c17031
JH
2966{
2967 if (TARGET_64BIT)
2968 {
2969 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2970 REGPARM_MAX, SSE_REGPARM_MAX,
2971 x86_64_int_return_registers, 0);
d1f87653
KH
2972 /* For zero sized structures, construct_container return NULL, but we need
2973 to keep rest of compiler happy by returning meaningful value. */
53c17031
JH
2974 if (!ret)
2975 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2976 return ret;
2977 }
2978 else
b069de3b
SS
2979 return gen_rtx_REG (TYPE_MODE (valtype),
2980 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2981}
2982
f5143c46 2983/* Return false iff type is returned in memory. */
53c17031 2984int
b96a374d 2985ix86_return_in_memory (tree type)
53c17031 2986{
a30b6839
RH
2987 int needed_intregs, needed_sseregs, size;
2988 enum machine_mode mode = TYPE_MODE (type);
2989
53c17031 2990 if (TARGET_64BIT)
a30b6839
RH
2991 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2992
2993 if (mode == BLKmode)
2994 return 1;
2995
2996 size = int_size_in_bytes (type);
2997
2998 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2999 return 0;
3000
3001 if (VECTOR_MODE_P (mode) || mode == TImode)
53c17031 3002 {
a30b6839
RH
3003 /* User-created vectors small enough to fit in EAX. */
3004 if (size < 8)
5e062767 3005 return 0;
a30b6839
RH
3006
3007 /* MMX/3dNow values are returned on the stack, since we've
3008 got to EMMS/FEMMS before returning. */
3009 if (size == 8)
53c17031 3010 return 1;
a30b6839 3011
0397ac35 3012 /* SSE values are returned in XMM0, except when it doesn't exist. */
a30b6839 3013 if (size == 16)
0397ac35 3014 return (TARGET_SSE ? 0 : 1);
53c17031 3015 }
a30b6839 3016
cf2348cb 3017 if (mode == XFmode)
a30b6839 3018 return 0;
f8a1ebc6 3019
a30b6839
RH
3020 if (size > 12)
3021 return 1;
3022 return 0;
53c17031
JH
3023}
3024
0397ac35
RH
3025/* When returning SSE vector types, we have a choice of either
3026 (1) being abi incompatible with a -march switch, or
3027 (2) generating an error.
3028 Given no good solution, I think the safest thing is one warning.
3029 The user won't be able to use -Werror, but....
3030
3031 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3032 called in response to actually generating a caller or callee that
3033 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3034 via aggregate_value_p for general type probing from tree-ssa. */
3035
3036static rtx
3037ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3038{
3039 static bool warned;
3040
3041 if (!TARGET_SSE && type && !warned)
3042 {
3043 /* Look at the return type of the function, not the function type. */
3044 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3045
3046 if (mode == TImode
3047 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3048 {
3049 warned = true;
3050 warning ("SSE vector return without SSE enabled changes the ABI");
3051 }
3052 }
3053
3054 return NULL;
3055}
3056
53c17031
JH
3057/* Define how to find the value returned by a library function
3058 assuming the value has mode MODE. */
3059rtx
b96a374d 3060ix86_libcall_value (enum machine_mode mode)
53c17031
JH
3061{
3062 if (TARGET_64BIT)
3063 {
3064 switch (mode)
3065 {
f8a1ebc6
JH
3066 case SFmode:
3067 case SCmode:
3068 case DFmode:
3069 case DCmode:
9e9fb0ce 3070 case TFmode:
f8a1ebc6
JH
3071 return gen_rtx_REG (mode, FIRST_SSE_REG);
3072 case XFmode:
f8a1ebc6 3073 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
9e9fb0ce 3074 case XCmode:
f8a1ebc6
JH
3075 case TCmode:
3076 return NULL;
3077 default:
3078 return gen_rtx_REG (mode, 0);
53c17031
JH
3079 }
3080 }
3081 else
f8a1ebc6 3082 return gen_rtx_REG (mode, ix86_value_regno (mode));
b069de3b
SS
3083}
3084
3085/* Given a mode, return the register to use for a return value. */
3086
3087static int
b96a374d 3088ix86_value_regno (enum machine_mode mode)
b069de3b 3089{
a30b6839 3090 /* Floating point return values in %st(0). */
b069de3b
SS
3091 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3092 return FIRST_FLOAT_REG;
a30b6839
RH
3093 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3094 we prevent this case when sse is not available. */
3095 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
b069de3b 3096 return FIRST_SSE_REG;
a30b6839 3097 /* Everything else in %eax. */
b069de3b 3098 return 0;
53c17031 3099}
ad919812
JH
3100\f
3101/* Create the va_list data type. */
53c17031 3102
c35d187f
RH
3103static tree
3104ix86_build_builtin_va_list (void)
ad919812
JH
3105{
3106 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 3107
ad919812
JH
3108 /* For i386 we use plain pointer to argument area. */
3109 if (!TARGET_64BIT)
3110 return build_pointer_type (char_type_node);
3111
f1e639b1 3112 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
3113 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3114
fce5a9f2 3115 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 3116 unsigned_type_node);
fce5a9f2 3117 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
3118 unsigned_type_node);
3119 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3120 ptr_type_node);
3121 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3122 ptr_type_node);
3123
3124 DECL_FIELD_CONTEXT (f_gpr) = record;
3125 DECL_FIELD_CONTEXT (f_fpr) = record;
3126 DECL_FIELD_CONTEXT (f_ovf) = record;
3127 DECL_FIELD_CONTEXT (f_sav) = record;
3128
3129 TREE_CHAIN (record) = type_decl;
3130 TYPE_NAME (record) = type_decl;
3131 TYPE_FIELDS (record) = f_gpr;
3132 TREE_CHAIN (f_gpr) = f_fpr;
3133 TREE_CHAIN (f_fpr) = f_ovf;
3134 TREE_CHAIN (f_ovf) = f_sav;
3135
3136 layout_type (record);
3137
3138 /* The correct type is an array type of one element. */
3139 return build_array_type (record, build_index_type (size_zero_node));
3140}
3141
a0524eb3 3142/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
ad919812 3143
a0524eb3 3144static void
b96a374d
AJ
3145ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3146 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3147 int no_rtl)
ad919812
JH
3148{
3149 CUMULATIVE_ARGS next_cum;
3150 rtx save_area = NULL_RTX, mem;
3151 rtx label;
3152 rtx label_ref;
3153 rtx tmp_reg;
3154 rtx nsse_reg;
3155 int set;
3156 tree fntype;
3157 int stdarg_p;
3158 int i;
3159
3160 if (!TARGET_64BIT)
3161 return;
3162
3163 /* Indicate to allocate space on the stack for varargs save area. */
3164 ix86_save_varrargs_registers = 1;
3165
5474eed5
JH
3166 cfun->stack_alignment_needed = 128;
3167
ad919812
JH
3168 fntype = TREE_TYPE (current_function_decl);
3169 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3170 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3171 != void_type_node));
3172
3173 /* For varargs, we do not want to skip the dummy va_dcl argument.
3174 For stdargs, we do want to skip the last named argument. */
3175 next_cum = *cum;
3176 if (stdarg_p)
3177 function_arg_advance (&next_cum, mode, type, 1);
3178
3179 if (!no_rtl)
3180 save_area = frame_pointer_rtx;
3181
3182 set = get_varargs_alias_set ();
3183
5496b36f 3184 for (i = next_cum.regno; i < ix86_regparm; i++)
ad919812
JH
3185 {
3186 mem = gen_rtx_MEM (Pmode,
3187 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 3188 set_mem_alias_set (mem, set);
ad919812
JH
3189 emit_move_insn (mem, gen_rtx_REG (Pmode,
3190 x86_64_int_parameter_registers[i]));
3191 }
3192
5496b36f 3193 if (next_cum.sse_nregs)
ad919812
JH
3194 {
3195 /* Now emit code to save SSE registers. The AX parameter contains number
d1f87653 3196 of SSE parameter registers used to call this function. We use
ad919812
JH
3197 sse_prologue_save insn template that produces computed jump across
3198 SSE saves. We need some preparation work to get this working. */
3199
3200 label = gen_label_rtx ();
3201 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3202
3203 /* Compute address to jump to :
3204 label - 5*eax + nnamed_sse_arguments*5 */
3205 tmp_reg = gen_reg_rtx (Pmode);
3206 nsse_reg = gen_reg_rtx (Pmode);
3207 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3208 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 3209 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
3210 GEN_INT (4))));
3211 if (next_cum.sse_regno)
3212 emit_move_insn
3213 (nsse_reg,
3214 gen_rtx_CONST (DImode,
3215 gen_rtx_PLUS (DImode,
3216 label_ref,
3217 GEN_INT (next_cum.sse_regno * 4))));
3218 else
3219 emit_move_insn (nsse_reg, label_ref);
3220 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3221
3222 /* Compute address of memory block we save into. We always use pointer
3223 pointing 127 bytes after first byte to store - this is needed to keep
3224 instruction size limited by 4 bytes. */
3225 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
3226 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3227 plus_constant (save_area,
3228 8 * REGPARM_MAX + 127)));
ad919812 3229 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 3230 set_mem_alias_set (mem, set);
8ac61af7 3231 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
3232
3233 /* And finally do the dirty job! */
8ac61af7
RK
3234 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3235 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
3236 }
3237
3238}
3239
3240/* Implement va_start. */
3241
3242void
b96a374d 3243ix86_va_start (tree valist, rtx nextarg)
ad919812
JH
3244{
3245 HOST_WIDE_INT words, n_gpr, n_fpr;
3246 tree f_gpr, f_fpr, f_ovf, f_sav;
3247 tree gpr, fpr, ovf, sav, t;
3248
3249 /* Only 64bit target needs something special. */
3250 if (!TARGET_64BIT)
3251 {
e5faf155 3252 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
3253 return;
3254 }
3255
3256 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3257 f_fpr = TREE_CHAIN (f_gpr);
3258 f_ovf = TREE_CHAIN (f_fpr);
3259 f_sav = TREE_CHAIN (f_ovf);
3260
3261 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
44de5aeb
RK
3262 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3263 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3264 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3265 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
ad919812
JH
3266
3267 /* Count number of gp and fp argument registers used. */
3268 words = current_function_args_info.words;
3269 n_gpr = current_function_args_info.regno;
3270 n_fpr = current_function_args_info.sse_regno;
3271
3272 if (TARGET_DEBUG_ARG)
3273 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 3274 (int) words, (int) n_gpr, (int) n_fpr);
ad919812 3275
5496b36f
JJ
3276 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3277 build_int_cst (NULL_TREE, n_gpr * 8));
3278 TREE_SIDE_EFFECTS (t) = 1;
3279 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812 3280
5496b36f
JJ
3281 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3282 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3283 TREE_SIDE_EFFECTS (t) = 1;
3284 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812
JH
3285
3286 /* Find the overflow area. */
3287 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3288 if (words != 0)
3289 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
7d60be94 3290 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
ad919812
JH
3291 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3292 TREE_SIDE_EFFECTS (t) = 1;
3293 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3294
5496b36f
JJ
3295 /* Find the register save area.
3296 Prologue of the function save it right above stack frame. */
3297 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3298 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3299 TREE_SIDE_EFFECTS (t) = 1;
3300 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
ad919812
JH
3301}
3302
3303/* Implement va_arg. */
cd3ce9b4 3304
23a60a04
JM
3305tree
3306ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
cd3ce9b4 3307{
cd3ce9b4
JM
3308 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3309 tree f_gpr, f_fpr, f_ovf, f_sav;
3310 tree gpr, fpr, ovf, sav, t;
3311 int size, rsize;
3312 tree lab_false, lab_over = NULL_TREE;
3313 tree addr, t2;
3314 rtx container;
3315 int indirect_p = 0;
3316 tree ptrtype;
3317
3318 /* Only 64bit target needs something special. */
3319 if (!TARGET_64BIT)
23a60a04 3320 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
cd3ce9b4
JM
3321
3322 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3323 f_fpr = TREE_CHAIN (f_gpr);
3324 f_ovf = TREE_CHAIN (f_fpr);
3325 f_sav = TREE_CHAIN (f_ovf);
3326
c2433d7d 3327 valist = build_va_arg_indirect_ref (valist);
44de5aeb
RK
3328 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3329 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3330 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3331 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
cd3ce9b4 3332
08b0dc1b
RH
3333 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3334 if (indirect_p)
3335 type = build_pointer_type (type);
cd3ce9b4 3336 size = int_size_in_bytes (type);
cd3ce9b4
JM
3337 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3338
3339 container = construct_container (TYPE_MODE (type), type, 0,
3340 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3341 /*
3342 * Pull the value out of the saved registers ...
3343 */
3344
3345 addr = create_tmp_var (ptr_type_node, "addr");
3346 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3347
3348 if (container)
3349 {
3350 int needed_intregs, needed_sseregs;
e52a6df5 3351 bool need_temp;
cd3ce9b4
JM
3352 tree int_addr, sse_addr;
3353
3354 lab_false = create_artificial_label ();
3355 lab_over = create_artificial_label ();
3356
3357 examine_argument (TYPE_MODE (type), type, 0,
3358 &needed_intregs, &needed_sseregs);
3359
e52a6df5
JB
3360 need_temp = (!REG_P (container)
3361 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3362 || TYPE_ALIGN (type) > 128));
cd3ce9b4
JM
3363
3364 /* In case we are passing structure, verify that it is consecutive block
3365 on the register save area. If not we need to do moves. */
3366 if (!need_temp && !REG_P (container))
3367 {
3368 /* Verify that all registers are strictly consecutive */
3369 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3370 {
3371 int i;
3372
3373 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3374 {
3375 rtx slot = XVECEXP (container, 0, i);
3376 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3377 || INTVAL (XEXP (slot, 1)) != i * 16)
3378 need_temp = 1;
3379 }
3380 }
3381 else
3382 {
3383 int i;
3384
3385 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3386 {
3387 rtx slot = XVECEXP (container, 0, i);
3388 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3389 || INTVAL (XEXP (slot, 1)) != i * 8)
3390 need_temp = 1;
3391 }
3392 }
3393 }
3394 if (!need_temp)
3395 {
3396 int_addr = addr;
3397 sse_addr = addr;
3398 }
3399 else
3400 {
3401 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3402 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3403 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3404 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3405 }
56d76b69 3406
cd3ce9b4
JM
3407 /* First ensure that we fit completely in registers. */
3408 if (needed_intregs)
3409 {
4a90aeeb 3410 t = build_int_cst (TREE_TYPE (gpr),
7d60be94 3411 (REGPARM_MAX - needed_intregs + 1) * 8);
cd3ce9b4
JM
3412 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3413 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3414 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3415 gimplify_and_add (t, pre_p);
3416 }
3417 if (needed_sseregs)
3418 {
4a90aeeb
NS
3419 t = build_int_cst (TREE_TYPE (fpr),
3420 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7d60be94 3421 + REGPARM_MAX * 8);
cd3ce9b4
JM
3422 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3423 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3424 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3425 gimplify_and_add (t, pre_p);
3426 }
3427
3428 /* Compute index to start of area used for integer regs. */
3429 if (needed_intregs)
3430 {
3431 /* int_addr = gpr + sav; */
56d76b69
RH
3432 t = fold_convert (ptr_type_node, gpr);
3433 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
cd3ce9b4
JM
3434 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3435 gimplify_and_add (t, pre_p);
3436 }
3437 if (needed_sseregs)
3438 {
3439 /* sse_addr = fpr + sav; */
56d76b69
RH
3440 t = fold_convert (ptr_type_node, fpr);
3441 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
cd3ce9b4
JM
3442 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3443 gimplify_and_add (t, pre_p);
3444 }
3445 if (need_temp)
3446 {
3447 int i;
3448 tree temp = create_tmp_var (type, "va_arg_tmp");
3449
3450 /* addr = &temp; */
3451 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3452 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3453 gimplify_and_add (t, pre_p);
f676971a 3454
cd3ce9b4
JM
3455 for (i = 0; i < XVECLEN (container, 0); i++)
3456 {
3457 rtx slot = XVECEXP (container, 0, i);
3458 rtx reg = XEXP (slot, 0);
3459 enum machine_mode mode = GET_MODE (reg);
3460 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3461 tree addr_type = build_pointer_type (piece_type);
3462 tree src_addr, src;
3463 int src_offset;
3464 tree dest_addr, dest;
3465
3466 if (SSE_REGNO_P (REGNO (reg)))
3467 {
3468 src_addr = sse_addr;
3469 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3470 }
3471 else
3472 {
3473 src_addr = int_addr;
3474 src_offset = REGNO (reg) * 8;
3475 }
8fe75e43
RH
3476 src_addr = fold_convert (addr_type, src_addr);
3477 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3478 size_int (src_offset)));
c2433d7d 3479 src = build_va_arg_indirect_ref (src_addr);
e6e81735 3480
8fe75e43
RH
3481 dest_addr = fold_convert (addr_type, addr);
3482 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3483 size_int (INTVAL (XEXP (slot, 1)))));
c2433d7d 3484 dest = build_va_arg_indirect_ref (dest_addr);
3a3677ff 3485
8fe75e43
RH
3486 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3487 gimplify_and_add (t, pre_p);
3488 }
3489 }
e6e81735 3490
8fe75e43
RH
3491 if (needed_intregs)
3492 {
3493 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
56d76b69 3494 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8fe75e43
RH
3495 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3496 gimplify_and_add (t, pre_p);
3497 }
3498 if (needed_sseregs)
3499 {
4a90aeeb 3500 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
56d76b69 3501 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8fe75e43
RH
3502 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3503 gimplify_and_add (t, pre_p);
3504 }
e6e81735 3505
8fe75e43
RH
3506 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3507 gimplify_and_add (t, pre_p);
3508
3509 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3510 append_to_statement_list (t, pre_p);
3a3677ff 3511 }
b840bfb0 3512
8fe75e43 3513 /* ... otherwise out of the overflow area. */
e9e80858 3514
8fe75e43
RH
3515 /* Care for on-stack alignment if needed. */
3516 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3517 t = ovf;
3518 else
e9e80858 3519 {
8fe75e43 3520 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4a90aeeb 3521 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
56d76b69 3522 build_int_cst (TREE_TYPE (ovf), align - 1));
4a90aeeb 3523 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
56d76b69 3524 build_int_cst (TREE_TYPE (t), -align));
e9e80858 3525 }
8fe75e43 3526 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
e075ae69 3527
8fe75e43
RH
3528 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3529 gimplify_and_add (t2, pre_p);
e075ae69 3530
8fe75e43 3531 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
56d76b69 3532 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
8fe75e43
RH
3533 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3534 gimplify_and_add (t, pre_p);
e075ae69 3535
8fe75e43 3536 if (container)
2a2ab3f9 3537 {
8fe75e43
RH
3538 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3539 append_to_statement_list (t, pre_p);
2a2ab3f9 3540 }
e075ae69 3541
8fe75e43
RH
3542 ptrtype = build_pointer_type (type);
3543 addr = fold_convert (ptrtype, addr);
0a726ef1 3544
8fe75e43 3545 if (indirect_p)
c2433d7d
FCE
3546 addr = build_va_arg_indirect_ref (addr);
3547 return build_va_arg_indirect_ref (addr);
0a726ef1 3548}
8fe75e43
RH
3549\f
3550/* Return nonzero if OPNUM's MEM should be matched
3551 in movabs* patterns. */
fee2770d
RS
3552
3553int
8fe75e43 3554ix86_check_movabs (rtx insn, int opnum)
4f2c8ebb 3555{
8fe75e43 3556 rtx set, mem;
e075ae69 3557
8fe75e43
RH
3558 set = PATTERN (insn);
3559 if (GET_CODE (set) == PARALLEL)
3560 set = XVECEXP (set, 0, 0);
3561 if (GET_CODE (set) != SET)
e075ae69 3562 abort ();
8fe75e43
RH
3563 mem = XEXP (set, opnum);
3564 while (GET_CODE (mem) == SUBREG)
3565 mem = SUBREG_REG (mem);
3566 if (GET_CODE (mem) != MEM)
2247f6ed 3567 abort ();
8fe75e43 3568 return (volatile_ok || !MEM_VOLATILE_P (mem));
2247f6ed 3569}
e075ae69 3570\f
881b2a96
RS
3571/* Initialize the table of extra 80387 mathematical constants. */
3572
3573static void
b96a374d 3574init_ext_80387_constants (void)
881b2a96
RS
3575{
3576 static const char * cst[5] =
3577 {
3578 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3579 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3580 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3581 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3582 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3583 };
3584 int i;
3585
3586 for (i = 0; i < 5; i++)
3587 {
3588 real_from_string (&ext_80387_constants_table[i], cst[i]);
3589 /* Ensure each constant is rounded to XFmode precision. */
1f48e56d 3590 real_convert (&ext_80387_constants_table[i],
f8a1ebc6 3591 XFmode, &ext_80387_constants_table[i]);
881b2a96
RS
3592 }
3593
3594 ext_80387_constants_init = 1;
3595}
3596
e075ae69 3597/* Return true if the constant is something that can be loaded with
881b2a96 3598 a special instruction. */
57dbca5e
BS
3599
3600int
b96a374d 3601standard_80387_constant_p (rtx x)
57dbca5e 3602{
2b04e52b 3603 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3604 return -1;
881b2a96 3605
2b04e52b
JH
3606 if (x == CONST0_RTX (GET_MODE (x)))
3607 return 1;
3608 if (x == CONST1_RTX (GET_MODE (x)))
3609 return 2;
881b2a96 3610
22cc69c4
RS
3611 /* For XFmode constants, try to find a special 80387 instruction when
3612 optimizing for size or on those CPUs that benefit from them. */
f8a1ebc6 3613 if (GET_MODE (x) == XFmode
22cc69c4 3614 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
881b2a96
RS
3615 {
3616 REAL_VALUE_TYPE r;
3617 int i;
3618
3619 if (! ext_80387_constants_init)
3620 init_ext_80387_constants ();
3621
3622 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3623 for (i = 0; i < 5; i++)
3624 if (real_identical (&r, &ext_80387_constants_table[i]))
3625 return i + 3;
3626 }
3627
e075ae69 3628 return 0;
57dbca5e
BS
3629}
3630
881b2a96
RS
3631/* Return the opcode of the special instruction to be used to load
3632 the constant X. */
3633
3634const char *
b96a374d 3635standard_80387_constant_opcode (rtx x)
881b2a96
RS
3636{
3637 switch (standard_80387_constant_p (x))
3638 {
b96a374d 3639 case 1:
881b2a96
RS
3640 return "fldz";
3641 case 2:
3642 return "fld1";
b96a374d 3643 case 3:
881b2a96
RS
3644 return "fldlg2";
3645 case 4:
3646 return "fldln2";
b96a374d 3647 case 5:
881b2a96
RS
3648 return "fldl2e";
3649 case 6:
3650 return "fldl2t";
b96a374d 3651 case 7:
881b2a96
RS
3652 return "fldpi";
3653 }
3654 abort ();
3655}
3656
3657/* Return the CONST_DOUBLE representing the 80387 constant that is
3658 loaded by the specified special instruction. The argument IDX
3659 matches the return value from standard_80387_constant_p. */
3660
3661rtx
b96a374d 3662standard_80387_constant_rtx (int idx)
881b2a96
RS
3663{
3664 int i;
3665
3666 if (! ext_80387_constants_init)
3667 init_ext_80387_constants ();
3668
3669 switch (idx)
3670 {
3671 case 3:
3672 case 4:
3673 case 5:
3674 case 6:
3675 case 7:
3676 i = idx - 3;
3677 break;
3678
3679 default:
3680 abort ();
3681 }
3682
1f48e56d 3683 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
f8a1ebc6 3684 XFmode);
881b2a96
RS
3685}
3686
2b04e52b
JH
3687/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3688 */
3689int
b96a374d 3690standard_sse_constant_p (rtx x)
2b04e52b 3691{
0e67d460
JH
3692 if (x == const0_rtx)
3693 return 1;
2b04e52b
JH
3694 return (x == CONST0_RTX (GET_MODE (x)));
3695}
3696
2a2ab3f9
JVA
3697/* Returns 1 if OP contains a symbol reference */
3698
3699int
b96a374d 3700symbolic_reference_mentioned_p (rtx op)
2a2ab3f9 3701{
8d531ab9
KH
3702 const char *fmt;
3703 int i;
2a2ab3f9
JVA
3704
3705 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3706 return 1;
3707
3708 fmt = GET_RTX_FORMAT (GET_CODE (op));
3709 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3710 {
3711 if (fmt[i] == 'E')
3712 {
8d531ab9 3713 int j;
2a2ab3f9
JVA
3714
3715 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3716 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3717 return 1;
3718 }
e9a25f70 3719
2a2ab3f9
JVA
3720 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3721 return 1;
3722 }
3723
3724 return 0;
3725}
e075ae69
RH
3726
3727/* Return 1 if it is appropriate to emit `ret' instructions in the
3728 body of a function. Do this only if the epilogue is simple, needing a
3729 couple of insns. Prior to reloading, we can't tell how many registers
3730 must be saved, so return 0 then. Return 0 if there is no frame
6e14af16 3731 marker to de-allocate. */
32b5b1aa
SC
3732
3733int
b96a374d 3734ix86_can_use_return_insn_p (void)
32b5b1aa 3735{
4dd2ac2c 3736 struct ix86_frame frame;
9a7372d6 3737
9a7372d6
RH
3738 if (! reload_completed || frame_pointer_needed)
3739 return 0;
32b5b1aa 3740
9a7372d6
RH
3741 /* Don't allow more than 32 pop, since that's all we can do
3742 with one instruction. */
3743 if (current_function_pops_args
3744 && current_function_args_size >= 32768)
e075ae69 3745 return 0;
32b5b1aa 3746
4dd2ac2c
JH
3747 ix86_compute_frame_layout (&frame);
3748 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3749}
6189a572 3750\f
6fca22eb
RH
3751/* Value should be nonzero if functions must have frame pointers.
3752 Zero means the frame pointer need not be set up (and parms may
3753 be accessed via the stack pointer) in functions that seem suitable. */
3754
3755int
b96a374d 3756ix86_frame_pointer_required (void)
6fca22eb
RH
3757{
3758 /* If we accessed previous frames, then the generated code expects
3759 to be able to access the saved ebp value in our frame. */
3760 if (cfun->machine->accesses_prev_frame)
3761 return 1;
a4f31c00 3762
6fca22eb
RH
3763 /* Several x86 os'es need a frame pointer for other reasons,
3764 usually pertaining to setjmp. */
3765 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3766 return 1;
3767
3768 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3769 the frame pointer by default. Turn it back on now if we've not
3770 got a leaf function. */
a7943381 3771 if (TARGET_OMIT_LEAF_FRAME_POINTER
55ba61f3
JH
3772 && (!current_function_is_leaf))
3773 return 1;
3774
3775 if (current_function_profile)
6fca22eb
RH
3776 return 1;
3777
3778 return 0;
3779}
3780
3781/* Record that the current function accesses previous call frames. */
3782
3783void
b96a374d 3784ix86_setup_frame_addresses (void)
6fca22eb
RH
3785{
3786 cfun->machine->accesses_prev_frame = 1;
3787}
e075ae69 3788\f
145aacc2
RH
3789#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3790# define USE_HIDDEN_LINKONCE 1
3791#else
3792# define USE_HIDDEN_LINKONCE 0
3793#endif
3794
bd09bdeb 3795static int pic_labels_used;
e9a25f70 3796
145aacc2
RH
3797/* Fills in the label name that should be used for a pc thunk for
3798 the given register. */
3799
3800static void
b96a374d 3801get_pc_thunk_name (char name[32], unsigned int regno)
145aacc2
RH
3802{
3803 if (USE_HIDDEN_LINKONCE)
3804 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3805 else
3806 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3807}
3808
3809
e075ae69
RH
3810/* This function generates code for -fpic that loads %ebx with
3811 the return address of the caller and then returns. */
3812
3813void
b96a374d 3814ix86_file_end (void)
e075ae69
RH
3815{
3816 rtx xops[2];
bd09bdeb 3817 int regno;
32b5b1aa 3818
bd09bdeb 3819 for (regno = 0; regno < 8; ++regno)
7c262518 3820 {
145aacc2
RH
3821 char name[32];
3822
bd09bdeb
RH
3823 if (! ((pic_labels_used >> regno) & 1))
3824 continue;
3825
145aacc2 3826 get_pc_thunk_name (name, regno);
bd09bdeb 3827
145aacc2
RH
3828 if (USE_HIDDEN_LINKONCE)
3829 {
3830 tree decl;
3831
3832 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3833 error_mark_node);
3834 TREE_PUBLIC (decl) = 1;
3835 TREE_STATIC (decl) = 1;
3836 DECL_ONE_ONLY (decl) = 1;
3837
3838 (*targetm.asm_out.unique_section) (decl, 0);
3839 named_section (decl, NULL, 0);
3840
a5fe455b
ZW
3841 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3842 fputs ("\t.hidden\t", asm_out_file);
3843 assemble_name (asm_out_file, name);
3844 fputc ('\n', asm_out_file);
3845 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
145aacc2
RH
3846 }
3847 else
3848 {
3849 text_section ();
a5fe455b 3850 ASM_OUTPUT_LABEL (asm_out_file, name);
145aacc2 3851 }
bd09bdeb
RH
3852
3853 xops[0] = gen_rtx_REG (SImode, regno);
3854 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3855 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3856 output_asm_insn ("ret", xops);
7c262518 3857 }
3edc56a9 3858
a5fe455b
ZW
3859 if (NEED_INDICATE_EXEC_STACK)
3860 file_end_indicate_exec_stack ();
32b5b1aa 3861}
32b5b1aa 3862
c8c03509 3863/* Emit code for the SET_GOT patterns. */
32b5b1aa 3864
c8c03509 3865const char *
b96a374d 3866output_set_got (rtx dest)
c8c03509
RH
3867{
3868 rtx xops[3];
0d7d98ee 3869
c8c03509 3870 xops[0] = dest;
5fc0e5df 3871 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 3872
c8c03509 3873 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 3874 {
c8c03509
RH
3875 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3876
3877 if (!flag_pic)
3878 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3879 else
3880 output_asm_insn ("call\t%a2", xops);
3881
b069de3b
SS
3882#if TARGET_MACHO
3883 /* Output the "canonical" label name ("Lxx$pb") here too. This
3884 is what will be referred to by the Mach-O PIC subsystem. */
3885 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3886#endif
4977bab6 3887 (*targetm.asm_out.internal_label) (asm_out_file, "L",
c8c03509
RH
3888 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3889
3890 if (flag_pic)
3891 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 3892 }
e075ae69 3893 else
e5cb57e8 3894 {
145aacc2
RH
3895 char name[32];
3896 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 3897 pic_labels_used |= 1 << REGNO (dest);
f996902d 3898
145aacc2 3899 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
3900 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3901 output_asm_insn ("call\t%X2", xops);
e5cb57e8 3902 }
e5cb57e8 3903
c8c03509
RH
3904 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3905 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 3906 else if (!TARGET_MACHO)
8e9fadc3 3907 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 3908
c8c03509 3909 return "";
e9a25f70 3910}
8dfe5673 3911
0d7d98ee 3912/* Generate an "push" pattern for input ARG. */
e9a25f70 3913
e075ae69 3914static rtx
b96a374d 3915gen_push (rtx arg)
e9a25f70 3916{
c5c76735 3917 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
3918 gen_rtx_MEM (Pmode,
3919 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
3920 stack_pointer_rtx)),
3921 arg);
e9a25f70
JL
3922}
3923
bd09bdeb
RH
3924/* Return >= 0 if there is an unused call-clobbered register available
3925 for the entire function. */
3926
3927static unsigned int
b96a374d 3928ix86_select_alt_pic_regnum (void)
bd09bdeb
RH
3929{
3930 if (current_function_is_leaf && !current_function_profile)
3931 {
3932 int i;
3933 for (i = 2; i >= 0; --i)
3934 if (!regs_ever_live[i])
3935 return i;
3936 }
3937
3938 return INVALID_REGNUM;
3939}
fce5a9f2 3940
4dd2ac2c
JH
3941/* Return 1 if we need to save REGNO. */
3942static int
b96a374d 3943ix86_save_reg (unsigned int regno, int maybe_eh_return)
1020a5ab 3944{
bd09bdeb
RH
3945 if (pic_offset_table_rtx
3946 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3947 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 3948 || current_function_profile
8c38a24f
MM
3949 || current_function_calls_eh_return
3950 || current_function_uses_const_pool))
bd09bdeb
RH
3951 {
3952 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3953 return 0;
3954 return 1;
3955 }
1020a5ab
RH
3956
3957 if (current_function_calls_eh_return && maybe_eh_return)
3958 {
3959 unsigned i;
3960 for (i = 0; ; i++)
3961 {
b531087a 3962 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
3963 if (test == INVALID_REGNUM)
3964 break;
9b690711 3965 if (test == regno)
1020a5ab
RH
3966 return 1;
3967 }
3968 }
4dd2ac2c 3969
1020a5ab
RH
3970 return (regs_ever_live[regno]
3971 && !call_used_regs[regno]
3972 && !fixed_regs[regno]
3973 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
3974}
3975
0903fcab
JH
3976/* Return number of registers to be saved on the stack. */
3977
3978static int
b96a374d 3979ix86_nsaved_regs (void)
0903fcab
JH
3980{
3981 int nregs = 0;
0903fcab
JH
3982 int regno;
3983
4dd2ac2c 3984 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 3985 if (ix86_save_reg (regno, true))
4dd2ac2c 3986 nregs++;
0903fcab
JH
3987 return nregs;
3988}
3989
3990/* Return the offset between two registers, one to be eliminated, and the other
3991 its replacement, at the start of a routine. */
3992
3993HOST_WIDE_INT
b96a374d 3994ix86_initial_elimination_offset (int from, int to)
0903fcab 3995{
4dd2ac2c
JH
3996 struct ix86_frame frame;
3997 ix86_compute_frame_layout (&frame);
564d80f4
JH
3998
3999 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4000 return frame.hard_frame_pointer_offset;
564d80f4
JH
4001 else if (from == FRAME_POINTER_REGNUM
4002 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4003 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4004 else
4005 {
564d80f4
JH
4006 if (to != STACK_POINTER_REGNUM)
4007 abort ();
4008 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4009 return frame.stack_pointer_offset;
564d80f4
JH
4010 else if (from != FRAME_POINTER_REGNUM)
4011 abort ();
0903fcab 4012 else
4dd2ac2c 4013 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4014 }
4015}
4016
4dd2ac2c 4017/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4018
4dd2ac2c 4019static void
b96a374d 4020ix86_compute_frame_layout (struct ix86_frame *frame)
65954bd8 4021{
65954bd8 4022 HOST_WIDE_INT total_size;
95899b34 4023 unsigned int stack_alignment_needed;
b19ee4bd 4024 HOST_WIDE_INT offset;
95899b34 4025 unsigned int preferred_alignment;
4dd2ac2c 4026 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4027
4dd2ac2c 4028 frame->nregs = ix86_nsaved_regs ();
564d80f4 4029 total_size = size;
65954bd8 4030
95899b34
RH
4031 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4032 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4033
d7394366
JH
4034 /* During reload iteration the amount of registers saved can change.
4035 Recompute the value as needed. Do not recompute when amount of registers
4036 didn't change as reload does mutiple calls to the function and does not
4037 expect the decision to change within single iteration. */
4038 if (!optimize_size
4039 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
d9b40e8d
JH
4040 {
4041 int count = frame->nregs;
4042
d7394366 4043 cfun->machine->use_fast_prologue_epilogue_nregs = count;
d9b40e8d
JH
4044 /* The fast prologue uses move instead of push to save registers. This
4045 is significantly longer, but also executes faster as modern hardware
4046 can execute the moves in parallel, but can't do that for push/pop.
b96a374d 4047
d9b40e8d
JH
4048 Be careful about choosing what prologue to emit: When function takes
4049 many instructions to execute we may use slow version as well as in
4050 case function is known to be outside hot spot (this is known with
4051 feedback only). Weight the size of function by number of registers
4052 to save as it is cheap to use one or two push instructions but very
4053 slow to use many of them. */
4054 if (count)
4055 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4056 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4057 || (flag_branch_probabilities
4058 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4059 cfun->machine->use_fast_prologue_epilogue = false;
4060 else
4061 cfun->machine->use_fast_prologue_epilogue
4062 = !expensive_function_p (count);
4063 }
4064 if (TARGET_PROLOGUE_USING_MOVE
4065 && cfun->machine->use_fast_prologue_epilogue)
4066 frame->save_regs_using_mov = true;
4067 else
4068 frame->save_regs_using_mov = false;
4069
4070
9ba81eaa 4071 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4072 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4073
4074 frame->hard_frame_pointer_offset = offset;
564d80f4 4075
fcbfaa65
RK
4076 /* Do some sanity checking of stack_alignment_needed and
4077 preferred_alignment, since i386 port is the only using those features
f710504c 4078 that may break easily. */
564d80f4 4079
44affdae
JH
4080 if (size && !stack_alignment_needed)
4081 abort ();
44affdae
JH
4082 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4083 abort ();
4084 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4085 abort ();
4086 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4087 abort ();
564d80f4 4088
4dd2ac2c
JH
4089 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4090 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4091
4dd2ac2c
JH
4092 /* Register save area */
4093 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4094
8362f420
JH
4095 /* Va-arg area */
4096 if (ix86_save_varrargs_registers)
4097 {
4098 offset += X86_64_VARARGS_SIZE;
4099 frame->va_arg_size = X86_64_VARARGS_SIZE;
4100 }
4101 else
4102 frame->va_arg_size = 0;
4103
4dd2ac2c
JH
4104 /* Align start of frame for local function. */
4105 frame->padding1 = ((offset + stack_alignment_needed - 1)
4106 & -stack_alignment_needed) - offset;
f73ad30e 4107
4dd2ac2c 4108 offset += frame->padding1;
65954bd8 4109
4dd2ac2c
JH
4110 /* Frame pointer points here. */
4111 frame->frame_pointer_offset = offset;
54ff41b7 4112
4dd2ac2c 4113 offset += size;
65954bd8 4114
0b7ae565 4115 /* Add outgoing arguments area. Can be skipped if we eliminated
965514bd
JH
4116 all the function calls as dead code.
4117 Skipping is however impossible when function calls alloca. Alloca
4118 expander assumes that last current_function_outgoing_args_size
4119 of stack frame are unused. */
4120 if (ACCUMULATE_OUTGOING_ARGS
4121 && (!current_function_is_leaf || current_function_calls_alloca))
4dd2ac2c
JH
4122 {
4123 offset += current_function_outgoing_args_size;
4124 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4125 }
4126 else
4127 frame->outgoing_arguments_size = 0;
564d80f4 4128
002ff5bc
RH
4129 /* Align stack boundary. Only needed if we're calling another function
4130 or using alloca. */
4131 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4132 frame->padding2 = ((offset + preferred_alignment - 1)
4133 & -preferred_alignment) - offset;
4134 else
4135 frame->padding2 = 0;
4dd2ac2c
JH
4136
4137 offset += frame->padding2;
4138
4139 /* We've reached end of stack frame. */
4140 frame->stack_pointer_offset = offset;
4141
4142 /* Size prologue needs to allocate. */
4143 frame->to_allocate =
4144 (size + frame->padding1 + frame->padding2
8362f420 4145 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4146
b19ee4bd
JJ
4147 if ((!frame->to_allocate && frame->nregs <= 1)
4148 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
d9b40e8d
JH
4149 frame->save_regs_using_mov = false;
4150
a5b378d6 4151 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
8362f420
JH
4152 && current_function_is_leaf)
4153 {
4154 frame->red_zone_size = frame->to_allocate;
d9b40e8d
JH
4155 if (frame->save_regs_using_mov)
4156 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8362f420
JH
4157 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4158 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4159 }
4160 else
4161 frame->red_zone_size = 0;
4162 frame->to_allocate -= frame->red_zone_size;
4163 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4164#if 0
4165 fprintf (stderr, "nregs: %i\n", frame->nregs);
4166 fprintf (stderr, "size: %i\n", size);
4167 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4168 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4169 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4170 fprintf (stderr, "padding2: %i\n", frame->padding2);
4171 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4172 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4173 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4174 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4175 frame->hard_frame_pointer_offset);
4176 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4177#endif
65954bd8
JL
4178}
4179
0903fcab
JH
4180/* Emit code to save registers in the prologue. */
4181
4182static void
b96a374d 4183ix86_emit_save_regs (void)
0903fcab 4184{
8d531ab9 4185 int regno;
0903fcab 4186 rtx insn;
0903fcab 4187
4dd2ac2c 4188 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4189 if (ix86_save_reg (regno, true))
0903fcab 4190 {
0d7d98ee 4191 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4192 RTX_FRAME_RELATED_P (insn) = 1;
4193 }
4194}
4195
c6036a37
JH
4196/* Emit code to save registers using MOV insns. First register
4197 is restored from POINTER + OFFSET. */
4198static void
b96a374d 4199ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
c6036a37
JH
4200{
4201 int regno;
4202 rtx insn;
4203
4204 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4205 if (ix86_save_reg (regno, true))
4206 {
b72f00af
RK
4207 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4208 Pmode, offset),
c6036a37
JH
4209 gen_rtx_REG (Pmode, regno));
4210 RTX_FRAME_RELATED_P (insn) = 1;
4211 offset += UNITS_PER_WORD;
4212 }
4213}
4214
839a4992 4215/* Expand prologue or epilogue stack adjustment.
b19ee4bd
JJ
4216 The pattern exist to put a dependency on all ebp-based memory accesses.
4217 STYLE should be negative if instructions should be marked as frame related,
4218 zero if %r11 register is live and cannot be freely used and positive
4219 otherwise. */
4220
4221static void
4222pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4223{
4224 rtx insn;
4225
4226 if (! TARGET_64BIT)
4227 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4228 else if (x86_64_immediate_operand (offset, DImode))
4229 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4230 else
4231 {
4232 rtx r11;
4233 /* r11 is used by indirect sibcall return as well, set before the
4234 epilogue and used after the epilogue. ATM indirect sibcall
4235 shouldn't be used together with huge frame sizes in one
4236 function because of the frame_size check in sibcall.c. */
4237 if (style == 0)
4238 abort ();
4239 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4240 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4241 if (style < 0)
4242 RTX_FRAME_RELATED_P (insn) = 1;
4243 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4244 offset));
4245 }
4246 if (style < 0)
4247 RTX_FRAME_RELATED_P (insn) = 1;
4248}
4249
0f290768 4250/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4251
4252void
b96a374d 4253ix86_expand_prologue (void)
2a2ab3f9 4254{
564d80f4 4255 rtx insn;
bd09bdeb 4256 bool pic_reg_used;
4dd2ac2c 4257 struct ix86_frame frame;
c6036a37 4258 HOST_WIDE_INT allocate;
4dd2ac2c 4259
4977bab6 4260 ix86_compute_frame_layout (&frame);
79325812 4261
e075ae69
RH
4262 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4263 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4264
2a2ab3f9
JVA
4265 if (frame_pointer_needed)
4266 {
564d80f4 4267 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4268 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4269
564d80f4 4270 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4271 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4272 }
4273
c6036a37 4274 allocate = frame.to_allocate;
c6036a37 4275
d9b40e8d 4276 if (!frame.save_regs_using_mov)
c6036a37
JH
4277 ix86_emit_save_regs ();
4278 else
4279 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4280
d9b40e8d
JH
4281 /* When using red zone we may start register saving before allocating
4282 the stack frame saving one cycle of the prologue. */
4283 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4284 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4285 : stack_pointer_rtx,
4286 -frame.nregs * UNITS_PER_WORD);
4287
c6036a37 4288 if (allocate == 0)
8dfe5673 4289 ;
e323735c 4290 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
b19ee4bd
JJ
4291 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4292 GEN_INT (-allocate), -1);
79325812 4293 else
8dfe5673 4294 {
fe9f516f
RH
4295 /* Only valid for Win32. */
4296 rtx eax = gen_rtx_REG (SImode, 0);
4297 bool eax_live = ix86_eax_live_at_start_p ();
e9a25f70 4298
8362f420 4299 if (TARGET_64BIT)
b1177d69 4300 abort ();
e075ae69 4301
fe9f516f
RH
4302 if (eax_live)
4303 {
4304 emit_insn (gen_push (eax));
4305 allocate -= 4;
4306 }
4307
4308 insn = emit_move_insn (eax, GEN_INT (allocate));
b1177d69 4309 RTX_FRAME_RELATED_P (insn) = 1;
98417968 4310
b1177d69
KC
4311 insn = emit_insn (gen_allocate_stack_worker (eax));
4312 RTX_FRAME_RELATED_P (insn) = 1;
fe9f516f
RH
4313
4314 if (eax_live)
4315 {
ea5f7a19
RS
4316 rtx t;
4317 if (frame_pointer_needed)
4318 t = plus_constant (hard_frame_pointer_rtx,
4319 allocate
4320 - frame.to_allocate
4321 - frame.nregs * UNITS_PER_WORD);
4322 else
4323 t = plus_constant (stack_pointer_rtx, allocate);
fe9f516f
RH
4324 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4325 }
e075ae69 4326 }
fe9f516f 4327
d9b40e8d 4328 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
c6036a37
JH
4329 {
4330 if (!frame_pointer_needed || !frame.to_allocate)
4331 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4332 else
4333 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4334 -frame.nregs * UNITS_PER_WORD);
4335 }
e9a25f70 4336
bd09bdeb
RH
4337 pic_reg_used = false;
4338 if (pic_offset_table_rtx
4339 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4340 || current_function_profile))
4341 {
4342 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4343
4344 if (alt_pic_reg_used != INVALID_REGNUM)
4345 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4346
4347 pic_reg_used = true;
4348 }
4349
e9a25f70 4350 if (pic_reg_used)
c8c03509
RH
4351 {
4352 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4353
66edd3b4
RH
4354 /* Even with accurate pre-reload life analysis, we can wind up
4355 deleting all references to the pic register after reload.
4356 Consider if cross-jumping unifies two sides of a branch
d1f87653 4357 controlled by a comparison vs the only read from a global.
66edd3b4
RH
4358 In which case, allow the set_got to be deleted, though we're
4359 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4360 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4361 }
77a989d1 4362
66edd3b4
RH
4363 /* Prevent function calls from be scheduled before the call to mcount.
4364 In the pic_reg_used case, make sure that the got load isn't deleted. */
4365 if (current_function_profile)
4366 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4367}
4368
da2d1d3a
JH
4369/* Emit code to restore saved registers using MOV insns. First register
4370 is restored from POINTER + OFFSET. */
4371static void
72613dfa
JH
4372ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4373 int maybe_eh_return)
da2d1d3a
JH
4374{
4375 int regno;
72613dfa 4376 rtx base_address = gen_rtx_MEM (Pmode, pointer);
da2d1d3a 4377
4dd2ac2c 4378 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4379 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4380 {
72613dfa
JH
4381 /* Ensure that adjust_address won't be forced to produce pointer
4382 out of range allowed by x86-64 instruction set. */
4383 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4384 {
4385 rtx r11;
4386
4387 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4388 emit_move_insn (r11, GEN_INT (offset));
4389 emit_insn (gen_adddi3 (r11, r11, pointer));
4390 base_address = gen_rtx_MEM (Pmode, r11);
4391 offset = 0;
4392 }
4dd2ac2c 4393 emit_move_insn (gen_rtx_REG (Pmode, regno),
72613dfa 4394 adjust_address (base_address, Pmode, offset));
4dd2ac2c 4395 offset += UNITS_PER_WORD;
da2d1d3a
JH
4396 }
4397}
4398
0f290768 4399/* Restore function stack, frame, and registers. */
e9a25f70 4400
2a2ab3f9 4401void
b96a374d 4402ix86_expand_epilogue (int style)
2a2ab3f9 4403{
1c71e60e 4404 int regno;
fdb8a883 4405 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4406 struct ix86_frame frame;
65954bd8 4407 HOST_WIDE_INT offset;
4dd2ac2c
JH
4408
4409 ix86_compute_frame_layout (&frame);
2a2ab3f9 4410
a4f31c00 4411 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4412 must be taken for the normal return case of a function using
4413 eh_return: the eax and edx registers are marked as saved, but not
4414 restored along this path. */
4415 offset = frame.nregs;
4416 if (current_function_calls_eh_return && style != 2)
4417 offset -= 2;
4418 offset *= -UNITS_PER_WORD;
2a2ab3f9 4419
fdb8a883
JW
4420 /* If we're only restoring one register and sp is not valid then
4421 using a move instruction to restore the register since it's
0f290768 4422 less work than reloading sp and popping the register.
da2d1d3a
JH
4423
4424 The default code result in stack adjustment using add/lea instruction,
4425 while this code results in LEAVE instruction (or discrete equivalent),
4426 so it is profitable in some other cases as well. Especially when there
4427 are no registers to restore. We also use this code when TARGET_USE_LEAVE
d1f87653 4428 and there is exactly one register to pop. This heuristic may need some
da2d1d3a 4429 tuning in future. */
4dd2ac2c 4430 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4431 || (TARGET_EPILOGUE_USING_MOVE
d9b40e8d 4432 && cfun->machine->use_fast_prologue_epilogue
c6036a37 4433 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4434 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4435 || (frame_pointer_needed && TARGET_USE_LEAVE
d9b40e8d
JH
4436 && cfun->machine->use_fast_prologue_epilogue
4437 && frame.nregs == 1)
2ab0437e 4438 || current_function_calls_eh_return)
2a2ab3f9 4439 {
da2d1d3a
JH
4440 /* Restore registers. We can use ebp or esp to address the memory
4441 locations. If both are available, default to ebp, since offsets
4442 are known to be small. Only exception is esp pointing directly to the
4443 end of block of saved registers, where we may simplify addressing
4444 mode. */
4445
4dd2ac2c 4446 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4447 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4448 frame.to_allocate, style == 2);
da2d1d3a 4449 else
1020a5ab
RH
4450 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4451 offset, style == 2);
4452
4453 /* eh_return epilogues need %ecx added to the stack pointer. */
4454 if (style == 2)
4455 {
4456 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4457
1020a5ab
RH
4458 if (frame_pointer_needed)
4459 {
4460 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4461 tmp = plus_constant (tmp, UNITS_PER_WORD);
4462 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4463
4464 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4465 emit_move_insn (hard_frame_pointer_rtx, tmp);
4466
b19ee4bd
JJ
4467 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4468 const0_rtx, style);
1020a5ab
RH
4469 }
4470 else
4471 {
4472 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4473 tmp = plus_constant (tmp, (frame.to_allocate
4474 + frame.nregs * UNITS_PER_WORD));
4475 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4476 }
4477 }
4478 else if (!frame_pointer_needed)
b19ee4bd
JJ
4479 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4480 GEN_INT (frame.to_allocate
4481 + frame.nregs * UNITS_PER_WORD),
4482 style);
0f290768 4483 /* If not an i386, mov & pop is faster than "leave". */
d9b40e8d
JH
4484 else if (TARGET_USE_LEAVE || optimize_size
4485 || !cfun->machine->use_fast_prologue_epilogue)
8362f420 4486 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4487 else
2a2ab3f9 4488 {
b19ee4bd
JJ
4489 pro_epilogue_adjust_stack (stack_pointer_rtx,
4490 hard_frame_pointer_rtx,
4491 const0_rtx, style);
8362f420
JH
4492 if (TARGET_64BIT)
4493 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4494 else
4495 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4496 }
4497 }
1c71e60e 4498 else
68f654ec 4499 {
1c71e60e
JH
4500 /* First step is to deallocate the stack frame so that we can
4501 pop the registers. */
4502 if (!sp_valid)
4503 {
4504 if (!frame_pointer_needed)
4505 abort ();
b19ee4bd
JJ
4506 pro_epilogue_adjust_stack (stack_pointer_rtx,
4507 hard_frame_pointer_rtx,
4508 GEN_INT (offset), style);
1c71e60e 4509 }
4dd2ac2c 4510 else if (frame.to_allocate)
b19ee4bd
JJ
4511 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4512 GEN_INT (frame.to_allocate), style);
1c71e60e 4513
4dd2ac2c 4514 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4515 if (ix86_save_reg (regno, false))
8362f420
JH
4516 {
4517 if (TARGET_64BIT)
4518 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4519 else
4520 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4521 }
4dd2ac2c 4522 if (frame_pointer_needed)
8362f420 4523 {
f5143c46 4524 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4525 able to grok it fast. */
4526 if (TARGET_USE_LEAVE)
4527 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4528 else if (TARGET_64BIT)
8362f420
JH
4529 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4530 else
4531 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4532 }
68f654ec 4533 }
68f654ec 4534
cbbf65e0 4535 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4536 if (style == 0)
cbbf65e0
RH
4537 return;
4538
2a2ab3f9
JVA
4539 if (current_function_pops_args && current_function_args_size)
4540 {
e075ae69 4541 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4542
b8c752c8
UD
4543 /* i386 can only pop 64K bytes. If asked to pop more, pop
4544 return address, do explicit add, and jump indirectly to the
0f290768 4545 caller. */
2a2ab3f9 4546
b8c752c8 4547 if (current_function_pops_args >= 65536)
2a2ab3f9 4548 {
e075ae69 4549 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4550
b19ee4bd 4551 /* There is no "pascal" calling convention in 64bit ABI. */
8362f420 4552 if (TARGET_64BIT)
b531087a 4553 abort ();
8362f420 4554
e075ae69
RH
4555 emit_insn (gen_popsi1 (ecx));
4556 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4557 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4558 }
79325812 4559 else
e075ae69
RH
4560 emit_jump_insn (gen_return_pop_internal (popc));
4561 }
4562 else
4563 emit_jump_insn (gen_return_internal ());
4564}
bd09bdeb
RH
4565
4566/* Reset from the function's potential modifications. */
4567
4568static void
b96a374d
AJ
4569ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4570 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
bd09bdeb
RH
4571{
4572 if (pic_offset_table_rtx)
4573 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4574}
e075ae69
RH
4575\f
4576/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4577 for an instruction. Return 0 if the structure of the address is
4578 grossly off. Return -1 if the address contains ASHIFT, so it is not
74dc3e94 4579 strictly valid, but still used for computing length of lea instruction. */
e075ae69 4580
8fe75e43 4581int
8d531ab9 4582ix86_decompose_address (rtx addr, struct ix86_address *out)
e075ae69
RH
4583{
4584 rtx base = NULL_RTX;
4585 rtx index = NULL_RTX;
4586 rtx disp = NULL_RTX;
4587 HOST_WIDE_INT scale = 1;
4588 rtx scale_rtx = NULL_RTX;
b446e5a2 4589 int retval = 1;
74dc3e94 4590 enum ix86_address_seg seg = SEG_DEFAULT;
e075ae69 4591
90e4e4c5 4592 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
e075ae69
RH
4593 base = addr;
4594 else if (GET_CODE (addr) == PLUS)
4595 {
74dc3e94
RH
4596 rtx addends[4], op;
4597 int n = 0, i;
e075ae69 4598
74dc3e94
RH
4599 op = addr;
4600 do
e075ae69 4601 {
74dc3e94
RH
4602 if (n >= 4)
4603 return 0;
4604 addends[n++] = XEXP (op, 1);
4605 op = XEXP (op, 0);
2a2ab3f9 4606 }
74dc3e94
RH
4607 while (GET_CODE (op) == PLUS);
4608 if (n >= 4)
4609 return 0;
4610 addends[n] = op;
4611
4612 for (i = n; i >= 0; --i)
e075ae69 4613 {
74dc3e94
RH
4614 op = addends[i];
4615 switch (GET_CODE (op))
4616 {
4617 case MULT:
4618 if (index)
4619 return 0;
4620 index = XEXP (op, 0);
4621 scale_rtx = XEXP (op, 1);
4622 break;
4623
4624 case UNSPEC:
4625 if (XINT (op, 1) == UNSPEC_TP
4626 && TARGET_TLS_DIRECT_SEG_REFS
4627 && seg == SEG_DEFAULT)
4628 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4629 else
4630 return 0;
4631 break;
4632
4633 case REG:
4634 case SUBREG:
4635 if (!base)
4636 base = op;
4637 else if (!index)
4638 index = op;
4639 else
4640 return 0;
4641 break;
4642
4643 case CONST:
4644 case CONST_INT:
4645 case SYMBOL_REF:
4646 case LABEL_REF:
4647 if (disp)
4648 return 0;
4649 disp = op;
4650 break;
4651
4652 default:
4653 return 0;
4654 }
e075ae69 4655 }
e075ae69
RH
4656 }
4657 else if (GET_CODE (addr) == MULT)
4658 {
4659 index = XEXP (addr, 0); /* index*scale */
4660 scale_rtx = XEXP (addr, 1);
4661 }
4662 else if (GET_CODE (addr) == ASHIFT)
4663 {
4664 rtx tmp;
4665
4666 /* We're called for lea too, which implements ashift on occasion. */
4667 index = XEXP (addr, 0);
4668 tmp = XEXP (addr, 1);
4669 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4670 return 0;
e075ae69
RH
4671 scale = INTVAL (tmp);
4672 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4673 return 0;
e075ae69 4674 scale = 1 << scale;
b446e5a2 4675 retval = -1;
2a2ab3f9 4676 }
2a2ab3f9 4677 else
e075ae69
RH
4678 disp = addr; /* displacement */
4679
4680 /* Extract the integral value of scale. */
4681 if (scale_rtx)
e9a25f70 4682 {
e075ae69 4683 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4684 return 0;
e075ae69 4685 scale = INTVAL (scale_rtx);
e9a25f70 4686 }
3b3c6a3f 4687
74dc3e94 4688 /* Allow arg pointer and stack pointer as index if there is not scaling. */
e075ae69 4689 if (base && index && scale == 1
74dc3e94
RH
4690 && (index == arg_pointer_rtx
4691 || index == frame_pointer_rtx
4692 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
e075ae69
RH
4693 {
4694 rtx tmp = base;
4695 base = index;
4696 index = tmp;
4697 }
4698
4699 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4700 if ((base == hard_frame_pointer_rtx
4701 || base == frame_pointer_rtx
4702 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4703 disp = const0_rtx;
4704
4705 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4706 Avoid this by transforming to [%esi+0]. */
9e555526 4707 if (ix86_tune == PROCESSOR_K6 && !optimize_size
e075ae69 4708 && base && !index && !disp
329e1d01 4709 && REG_P (base)
e075ae69
RH
4710 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4711 disp = const0_rtx;
4712
4713 /* Special case: encode reg+reg instead of reg*2. */
4714 if (!base && index && scale && scale == 2)
4715 base = index, scale = 1;
0f290768 4716
e075ae69
RH
4717 /* Special case: scaling cannot be encoded without base or displacement. */
4718 if (!base && !disp && index && scale != 1)
4719 disp = const0_rtx;
4720
4721 out->base = base;
4722 out->index = index;
4723 out->disp = disp;
4724 out->scale = scale;
74dc3e94 4725 out->seg = seg;
3b3c6a3f 4726
b446e5a2 4727 return retval;
e075ae69 4728}
01329426
JH
4729\f
4730/* Return cost of the memory address x.
4731 For i386, it is better to use a complex address than let gcc copy
4732 the address into a reg and make a new pseudo. But not if the address
4733 requires to two regs - that would mean more pseudos with longer
4734 lifetimes. */
dcefdf67 4735static int
b96a374d 4736ix86_address_cost (rtx x)
01329426
JH
4737{
4738 struct ix86_address parts;
4739 int cost = 1;
3b3c6a3f 4740
01329426
JH
4741 if (!ix86_decompose_address (x, &parts))
4742 abort ();
4743
4744 /* More complex memory references are better. */
4745 if (parts.disp && parts.disp != const0_rtx)
4746 cost--;
74dc3e94
RH
4747 if (parts.seg != SEG_DEFAULT)
4748 cost--;
01329426
JH
4749
4750 /* Attempt to minimize number of registers in the address. */
4751 if ((parts.base
4752 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4753 || (parts.index
4754 && (!REG_P (parts.index)
4755 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4756 cost++;
4757
4758 if (parts.base
4759 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4760 && parts.index
4761 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4762 && parts.base != parts.index)
4763 cost++;
4764
4765 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4766 since it's predecode logic can't detect the length of instructions
4767 and it degenerates to vector decoded. Increase cost of such
4768 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4769 to split such addresses or even refuse such addresses at all.
01329426
JH
4770
4771 Following addressing modes are affected:
4772 [base+scale*index]
4773 [scale*index+disp]
4774 [base+index]
0f290768 4775
01329426
JH
4776 The first and last case may be avoidable by explicitly coding the zero in
4777 memory address, but I don't have AMD-K6 machine handy to check this
4778 theory. */
4779
4780 if (TARGET_K6
4781 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4782 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4783 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4784 cost += 10;
0f290768 4785
01329426
JH
4786 return cost;
4787}
4788\f
b949ea8b
JW
4789/* If X is a machine specific address (i.e. a symbol or label being
4790 referenced as a displacement from the GOT implemented using an
4791 UNSPEC), then return the base term. Otherwise return X. */
4792
4793rtx
b96a374d 4794ix86_find_base_term (rtx x)
b949ea8b
JW
4795{
4796 rtx term;
4797
6eb791fc
JH
4798 if (TARGET_64BIT)
4799 {
4800 if (GET_CODE (x) != CONST)
4801 return x;
4802 term = XEXP (x, 0);
4803 if (GET_CODE (term) == PLUS
4804 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4805 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4806 term = XEXP (term, 0);
4807 if (GET_CODE (term) != UNSPEC
8ee41eaf 4808 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4809 return x;
4810
4811 term = XVECEXP (term, 0, 0);
4812
4813 if (GET_CODE (term) != SYMBOL_REF
4814 && GET_CODE (term) != LABEL_REF)
4815 return x;
4816
4817 return term;
4818 }
4819
69bd9368 4820 term = ix86_delegitimize_address (x);
b949ea8b
JW
4821
4822 if (GET_CODE (term) != SYMBOL_REF
4823 && GET_CODE (term) != LABEL_REF)
4824 return x;
4825
4826 return term;
4827}
828a4fe4
MS
4828
4829/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4830 this is used for to form addresses to local data when -fPIC is in
4831 use. */
4832
4833static bool
4834darwin_local_data_pic (rtx disp)
4835{
4836 if (GET_CODE (disp) == MINUS)
4837 {
4838 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4839 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4840 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4841 {
4842 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4843 if (! strcmp (sym_name, "<pic base>"))
4844 return true;
4845 }
4846 }
4847
4848 return false;
4849}
b949ea8b 4850\f
f996902d
RH
4851/* Determine if a given RTX is a valid constant. We already know this
4852 satisfies CONSTANT_P. */
4853
4854bool
b96a374d 4855legitimate_constant_p (rtx x)
f996902d 4856{
f996902d
RH
4857 switch (GET_CODE (x))
4858 {
f996902d 4859 case CONST:
1e19ac74 4860 x = XEXP (x, 0);
f996902d 4861
1e19ac74 4862 if (GET_CODE (x) == PLUS)
828a4fe4 4863 {
1e19ac74 4864 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
828a4fe4 4865 return false;
1e19ac74 4866 x = XEXP (x, 0);
828a4fe4
MS
4867 }
4868
1e19ac74 4869 if (TARGET_MACHO && darwin_local_data_pic (x))
828a4fe4
MS
4870 return true;
4871
f996902d 4872 /* Only some unspecs are valid as "constants". */
1e19ac74
RH
4873 if (GET_CODE (x) == UNSPEC)
4874 switch (XINT (x, 1))
f996902d
RH
4875 {
4876 case UNSPEC_TPOFF:
cb0e3e3f 4877 case UNSPEC_NTPOFF:
1e19ac74 4878 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
cb0e3e3f 4879 case UNSPEC_DTPOFF:
1e19ac74 4880 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
f996902d
RH
4881 default:
4882 return false;
4883 }
1e19ac74
RH
4884
4885 /* We must have drilled down to a symbol. */
4886 if (!symbolic_operand (x, Pmode))
4887 return false;
4888 /* FALLTHRU */
4889
4890 case SYMBOL_REF:
4891 /* TLS symbols are never valid. */
4892 if (tls_symbolic_operand (x, Pmode))
4893 return false;
f996902d
RH
4894 break;
4895
4896 default:
4897 break;
4898 }
4899
4900 /* Otherwise we handle everything else in the move patterns. */
4901 return true;
4902}
4903
3a04ff64
RH
4904/* Determine if it's legal to put X into the constant pool. This
4905 is not possible for the address of thread-local symbols, which
4906 is checked above. */
4907
4908static bool
b96a374d 4909ix86_cannot_force_const_mem (rtx x)
3a04ff64
RH
4910{
4911 return !legitimate_constant_p (x);
4912}
4913
f996902d
RH
4914/* Determine if a given RTX is a valid constant address. */
4915
4916bool
b96a374d 4917constant_address_p (rtx x)
f996902d 4918{
a94f136b 4919 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
f996902d
RH
4920}
4921
4922/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 4923 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
4924 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4925
4926bool
b96a374d 4927legitimate_pic_operand_p (rtx x)
f996902d
RH
4928{
4929 rtx inner;
4930
4931 switch (GET_CODE (x))
4932 {
4933 case CONST:
4934 inner = XEXP (x, 0);
4935
4936 /* Only some unspecs are valid as "constants". */
4937 if (GET_CODE (inner) == UNSPEC)
4938 switch (XINT (inner, 1))
4939 {
4940 case UNSPEC_TPOFF:
4941 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4942 default:
4943 return false;
4944 }
5efb1046 4945 /* FALLTHRU */
f996902d
RH
4946
4947 case SYMBOL_REF:
4948 case LABEL_REF:
4949 return legitimate_pic_address_disp_p (x);
4950
4951 default:
4952 return true;
4953 }
4954}
4955
e075ae69
RH
4956/* Determine if a given CONST RTX is a valid memory displacement
4957 in PIC mode. */
0f290768 4958
59be65f6 4959int
8d531ab9 4960legitimate_pic_address_disp_p (rtx disp)
91bb873f 4961{
f996902d
RH
4962 bool saw_plus;
4963
6eb791fc
JH
4964 /* In 64bit mode we can allow direct addresses of symbols and labels
4965 when they are not dynamic symbols. */
c05dbe81
JH
4966 if (TARGET_64BIT)
4967 {
4968 /* TLS references should always be enclosed in UNSPEC. */
4969 if (tls_symbolic_operand (disp, GET_MODE (disp)))
4970 return 0;
4971 if (GET_CODE (disp) == SYMBOL_REF
4972 && ix86_cmodel == CM_SMALL_PIC
2ae5ae57 4973 && SYMBOL_REF_LOCAL_P (disp))
c05dbe81
JH
4974 return 1;
4975 if (GET_CODE (disp) == LABEL_REF)
4976 return 1;
4977 if (GET_CODE (disp) == CONST
a132b6a8
JJ
4978 && GET_CODE (XEXP (disp, 0)) == PLUS)
4979 {
4980 rtx op0 = XEXP (XEXP (disp, 0), 0);
4981 rtx op1 = XEXP (XEXP (disp, 0), 1);
4982
4983 /* TLS references should always be enclosed in UNSPEC. */
4984 if (tls_symbolic_operand (op0, GET_MODE (op0)))
4985 return 0;
4986 if (((GET_CODE (op0) == SYMBOL_REF
4987 && ix86_cmodel == CM_SMALL_PIC
4988 && SYMBOL_REF_LOCAL_P (op0))
4989 || GET_CODE (op0) == LABEL_REF)
4990 && GET_CODE (op1) == CONST_INT
4991 && INTVAL (op1) < 16*1024*1024
4992 && INTVAL (op1) >= -16*1024*1024)
4993 return 1;
4994 }
c05dbe81 4995 }
91bb873f
RH
4996 if (GET_CODE (disp) != CONST)
4997 return 0;
4998 disp = XEXP (disp, 0);
4999
6eb791fc
JH
5000 if (TARGET_64BIT)
5001 {
5002 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5003 of GOT tables. We should not need these anyway. */
5004 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5005 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5006 return 0;
5007
5008 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5009 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5010 return 0;
5011 return 1;
5012 }
5013
f996902d 5014 saw_plus = false;
91bb873f
RH
5015 if (GET_CODE (disp) == PLUS)
5016 {
5017 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5018 return 0;
5019 disp = XEXP (disp, 0);
f996902d 5020 saw_plus = true;
91bb873f
RH
5021 }
5022
828a4fe4
MS
5023 if (TARGET_MACHO && darwin_local_data_pic (disp))
5024 return 1;
b069de3b 5025
8ee41eaf 5026 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5027 return 0;
5028
623fe810
RH
5029 switch (XINT (disp, 1))
5030 {
8ee41eaf 5031 case UNSPEC_GOT:
f996902d
RH
5032 if (saw_plus)
5033 return false;
623fe810 5034 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5035 case UNSPEC_GOTOFF:
799b33a0
JH
5036 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5037 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5038 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5039 return false;
f996902d 5040 case UNSPEC_GOTTPOFF:
dea73790
JJ
5041 case UNSPEC_GOTNTPOFF:
5042 case UNSPEC_INDNTPOFF:
f996902d
RH
5043 if (saw_plus)
5044 return false;
5045 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5046 case UNSPEC_NTPOFF:
f996902d
RH
5047 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5048 case UNSPEC_DTPOFF:
f996902d 5049 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5050 }
fce5a9f2 5051
623fe810 5052 return 0;
91bb873f
RH
5053}
5054
e075ae69
RH
5055/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5056 memory address for an instruction. The MODE argument is the machine mode
5057 for the MEM expression that wants to use this address.
5058
5059 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5060 convert common non-canonical forms to canonical form so that they will
5061 be recognized. */
5062
3b3c6a3f 5063int
8d531ab9 5064legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
3b3c6a3f 5065{
e075ae69
RH
5066 struct ix86_address parts;
5067 rtx base, index, disp;
5068 HOST_WIDE_INT scale;
5069 const char *reason = NULL;
5070 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5071
5072 if (TARGET_DEBUG_ADDR)
5073 {
5074 fprintf (stderr,
e9a25f70 5075 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5076 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5077 debug_rtx (addr);
5078 }
5079
b446e5a2 5080 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5081 {
e075ae69 5082 reason = "decomposition failed";
50e60bc3 5083 goto report_error;
3b3c6a3f
MM
5084 }
5085
e075ae69
RH
5086 base = parts.base;
5087 index = parts.index;
5088 disp = parts.disp;
5089 scale = parts.scale;
91f0226f 5090
e075ae69 5091 /* Validate base register.
e9a25f70
JL
5092
5093 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5094 is one word out of a two word structure, which is represented internally
5095 as a DImode int. */
e9a25f70 5096
3b3c6a3f
MM
5097 if (base)
5098 {
e075ae69
RH
5099 reason_rtx = base;
5100
90e4e4c5 5101 if (GET_CODE (base) != REG)
3b3c6a3f 5102 {
e075ae69 5103 reason = "base is not a register";
50e60bc3 5104 goto report_error;
3b3c6a3f
MM
5105 }
5106
c954bd01
RH
5107 if (GET_MODE (base) != Pmode)
5108 {
e075ae69 5109 reason = "base is not in Pmode";
50e60bc3 5110 goto report_error;
c954bd01
RH
5111 }
5112
90e4e4c5
RH
5113 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5114 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
3b3c6a3f 5115 {
e075ae69 5116 reason = "base is not valid";
50e60bc3 5117 goto report_error;
3b3c6a3f
MM
5118 }
5119 }
5120
e075ae69 5121 /* Validate index register.
e9a25f70
JL
5122
5123 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5124 is one word out of a two word structure, which is represented internally
5125 as a DImode int. */
e075ae69
RH
5126
5127 if (index)
3b3c6a3f 5128 {
e075ae69
RH
5129 reason_rtx = index;
5130
90e4e4c5 5131 if (GET_CODE (index) != REG)
3b3c6a3f 5132 {
e075ae69 5133 reason = "index is not a register";
50e60bc3 5134 goto report_error;
3b3c6a3f
MM
5135 }
5136
e075ae69 5137 if (GET_MODE (index) != Pmode)
c954bd01 5138 {
e075ae69 5139 reason = "index is not in Pmode";
50e60bc3 5140 goto report_error;
c954bd01
RH
5141 }
5142
90e4e4c5
RH
5143 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5144 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
3b3c6a3f 5145 {
e075ae69 5146 reason = "index is not valid";
50e60bc3 5147 goto report_error;
3b3c6a3f
MM
5148 }
5149 }
3b3c6a3f 5150
e075ae69
RH
5151 /* Validate scale factor. */
5152 if (scale != 1)
3b3c6a3f 5153 {
e075ae69
RH
5154 reason_rtx = GEN_INT (scale);
5155 if (!index)
3b3c6a3f 5156 {
e075ae69 5157 reason = "scale without index";
50e60bc3 5158 goto report_error;
3b3c6a3f
MM
5159 }
5160
e075ae69 5161 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5162 {
e075ae69 5163 reason = "scale is not a valid multiplier";
50e60bc3 5164 goto report_error;
3b3c6a3f
MM
5165 }
5166 }
5167
91bb873f 5168 /* Validate displacement. */
3b3c6a3f
MM
5169 if (disp)
5170 {
e075ae69
RH
5171 reason_rtx = disp;
5172
f996902d
RH
5173 if (GET_CODE (disp) == CONST
5174 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5175 switch (XINT (XEXP (disp, 0), 1))
5176 {
5177 case UNSPEC_GOT:
5178 case UNSPEC_GOTOFF:
5179 case UNSPEC_GOTPCREL:
5180 if (!flag_pic)
5181 abort ();
5182 goto is_legitimate_pic;
5183
5184 case UNSPEC_GOTTPOFF:
dea73790
JJ
5185 case UNSPEC_GOTNTPOFF:
5186 case UNSPEC_INDNTPOFF:
f996902d
RH
5187 case UNSPEC_NTPOFF:
5188 case UNSPEC_DTPOFF:
5189 break;
5190
5191 default:
5192 reason = "invalid address unspec";
5193 goto report_error;
5194 }
5195
b069de3b
SS
5196 else if (flag_pic && (SYMBOLIC_CONST (disp)
5197#if TARGET_MACHO
5198 && !machopic_operand_p (disp)
5199#endif
5200 ))
3b3c6a3f 5201 {
f996902d 5202 is_legitimate_pic:
0d7d98ee
JH
5203 if (TARGET_64BIT && (index || base))
5204 {
75d38379
JJ
5205 /* foo@dtpoff(%rX) is ok. */
5206 if (GET_CODE (disp) != CONST
5207 || GET_CODE (XEXP (disp, 0)) != PLUS
5208 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5209 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5210 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5211 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5212 {
5213 reason = "non-constant pic memory reference";
5214 goto report_error;
5215 }
0d7d98ee 5216 }
75d38379 5217 else if (! legitimate_pic_address_disp_p (disp))
91bb873f 5218 {
e075ae69 5219 reason = "displacement is an invalid pic construct";
50e60bc3 5220 goto report_error;
91bb873f
RH
5221 }
5222
4e9efe54 5223 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5224 includes the pic_offset_table_rtx register.
5225
4e9efe54
JH
5226 While this is good idea, unfortunately these constructs may
5227 be created by "adds using lea" optimization for incorrect
5228 code like:
5229
5230 int a;
5231 int foo(int i)
5232 {
5233 return *(&a+i);
5234 }
5235
50e60bc3 5236 This code is nonsensical, but results in addressing
4e9efe54 5237 GOT table with pic_offset_table_rtx base. We can't
f710504c 5238 just refuse it easily, since it gets matched by
4e9efe54
JH
5239 "addsi3" pattern, that later gets split to lea in the
5240 case output register differs from input. While this
5241 can be handled by separate addsi pattern for this case
5242 that never results in lea, this seems to be easier and
5243 correct fix for crash to disable this test. */
3b3c6a3f 5244 }
a94f136b
JH
5245 else if (GET_CODE (disp) != LABEL_REF
5246 && GET_CODE (disp) != CONST_INT
5247 && (GET_CODE (disp) != CONST
5248 || !legitimate_constant_p (disp))
5249 && (GET_CODE (disp) != SYMBOL_REF
5250 || !legitimate_constant_p (disp)))
f996902d
RH
5251 {
5252 reason = "displacement is not constant";
5253 goto report_error;
5254 }
8fe75e43
RH
5255 else if (TARGET_64BIT
5256 && !x86_64_immediate_operand (disp, VOIDmode))
c05dbe81
JH
5257 {
5258 reason = "displacement is out of range";
5259 goto report_error;
5260 }
3b3c6a3f
MM
5261 }
5262
e075ae69 5263 /* Everything looks valid. */
3b3c6a3f 5264 if (TARGET_DEBUG_ADDR)
e075ae69 5265 fprintf (stderr, "Success.\n");
3b3c6a3f 5266 return TRUE;
e075ae69 5267
5bf0ebab 5268 report_error:
e075ae69
RH
5269 if (TARGET_DEBUG_ADDR)
5270 {
5271 fprintf (stderr, "Error: %s\n", reason);
5272 debug_rtx (reason_rtx);
5273 }
5274 return FALSE;
3b3c6a3f 5275}
3b3c6a3f 5276\f
55efb413
JW
5277/* Return an unique alias set for the GOT. */
5278
0f290768 5279static HOST_WIDE_INT
b96a374d 5280ix86_GOT_alias_set (void)
55efb413 5281{
5bf0ebab
RH
5282 static HOST_WIDE_INT set = -1;
5283 if (set == -1)
5284 set = new_alias_set ();
5285 return set;
0f290768 5286}
55efb413 5287
3b3c6a3f
MM
5288/* Return a legitimate reference for ORIG (an address) using the
5289 register REG. If REG is 0, a new pseudo is generated.
5290
91bb873f 5291 There are two types of references that must be handled:
3b3c6a3f
MM
5292
5293 1. Global data references must load the address from the GOT, via
5294 the PIC reg. An insn is emitted to do this load, and the reg is
5295 returned.
5296
91bb873f
RH
5297 2. Static data references, constant pool addresses, and code labels
5298 compute the address as an offset from the GOT, whose base is in
2ae5ae57 5299 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
91bb873f
RH
5300 differentiate them from global data objects. The returned
5301 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5302
5303 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5304 reg also appears in the address. */
3b3c6a3f 5305
b39edae3 5306static rtx
b96a374d 5307legitimize_pic_address (rtx orig, rtx reg)
3b3c6a3f
MM
5308{
5309 rtx addr = orig;
5310 rtx new = orig;
91bb873f 5311 rtx base;
3b3c6a3f 5312
b069de3b
SS
5313#if TARGET_MACHO
5314 if (reg == 0)
5315 reg = gen_reg_rtx (Pmode);
5316 /* Use the generic Mach-O PIC machinery. */
5317 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5318#endif
5319
c05dbe81
JH
5320 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5321 new = addr;
5322 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
3b3c6a3f 5323 {
c05dbe81
JH
5324 /* This symbol may be referenced via a displacement from the PIC
5325 base address (@GOTOFF). */
3b3c6a3f 5326
c05dbe81
JH
5327 if (reload_in_progress)
5328 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
799b33a0
JH
5329 if (GET_CODE (addr) == CONST)
5330 addr = XEXP (addr, 0);
5331 if (GET_CODE (addr) == PLUS)
5332 {
5333 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5334 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5335 }
5336 else
5337 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
c05dbe81
JH
5338 new = gen_rtx_CONST (Pmode, new);
5339 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5340
c05dbe81
JH
5341 if (reg != 0)
5342 {
5343 emit_move_insn (reg, new);
5344 new = reg;
5345 }
3b3c6a3f 5346 }
91bb873f 5347 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5348 {
14f73b5a
JH
5349 if (TARGET_64BIT)
5350 {
8ee41eaf 5351 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a 5352 new = gen_rtx_CONST (Pmode, new);
542a8afa 5353 new = gen_const_mem (Pmode, new);
14f73b5a
JH
5354 set_mem_alias_set (new, ix86_GOT_alias_set ());
5355
5356 if (reg == 0)
5357 reg = gen_reg_rtx (Pmode);
5358 /* Use directly gen_movsi, otherwise the address is loaded
5359 into register for CSE. We don't want to CSE this addresses,
5360 instead we CSE addresses from the GOT table, so skip this. */
5361 emit_insn (gen_movsi (reg, new));
5362 new = reg;
5363 }
5364 else
5365 {
5366 /* This symbol must be referenced via a load from the
5367 Global Offset Table (@GOT). */
3b3c6a3f 5368
66edd3b4
RH
5369 if (reload_in_progress)
5370 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5371 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5372 new = gen_rtx_CONST (Pmode, new);
5373 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
542a8afa 5374 new = gen_const_mem (Pmode, new);
14f73b5a 5375 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5376
14f73b5a
JH
5377 if (reg == 0)
5378 reg = gen_reg_rtx (Pmode);
5379 emit_move_insn (reg, new);
5380 new = reg;
5381 }
0f290768 5382 }
91bb873f
RH
5383 else
5384 {
5385 if (GET_CODE (addr) == CONST)
3b3c6a3f 5386 {
91bb873f 5387 addr = XEXP (addr, 0);
e3c8ea67
RH
5388
5389 /* We must match stuff we generate before. Assume the only
5390 unspecs that can get here are ours. Not that we could do
43f3a59d 5391 anything with them anyway.... */
e3c8ea67
RH
5392 if (GET_CODE (addr) == UNSPEC
5393 || (GET_CODE (addr) == PLUS
5394 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5395 return orig;
5396 if (GET_CODE (addr) != PLUS)
564d80f4 5397 abort ();
3b3c6a3f 5398 }
91bb873f
RH
5399 if (GET_CODE (addr) == PLUS)
5400 {
5401 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5402
91bb873f
RH
5403 /* Check first to see if this is a constant offset from a @GOTOFF
5404 symbol reference. */
623fe810 5405 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5406 && GET_CODE (op1) == CONST_INT)
5407 {
6eb791fc
JH
5408 if (!TARGET_64BIT)
5409 {
66edd3b4
RH
5410 if (reload_in_progress)
5411 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5412 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5413 UNSPEC_GOTOFF);
6eb791fc
JH
5414 new = gen_rtx_PLUS (Pmode, new, op1);
5415 new = gen_rtx_CONST (Pmode, new);
5416 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5417
6eb791fc
JH
5418 if (reg != 0)
5419 {
5420 emit_move_insn (reg, new);
5421 new = reg;
5422 }
5423 }
5424 else
91bb873f 5425 {
75d38379
JJ
5426 if (INTVAL (op1) < -16*1024*1024
5427 || INTVAL (op1) >= 16*1024*1024)
b8771ace 5428 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
91bb873f
RH
5429 }
5430 }
5431 else
5432 {
5433 base = legitimize_pic_address (XEXP (addr, 0), reg);
5434 new = legitimize_pic_address (XEXP (addr, 1),
5435 base == reg ? NULL_RTX : reg);
5436
5437 if (GET_CODE (new) == CONST_INT)
5438 new = plus_constant (base, INTVAL (new));
5439 else
5440 {
5441 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5442 {
5443 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5444 new = XEXP (new, 1);
5445 }
5446 new = gen_rtx_PLUS (Pmode, base, new);
5447 }
5448 }
5449 }
3b3c6a3f
MM
5450 }
5451 return new;
5452}
5453\f
74dc3e94 5454/* Load the thread pointer. If TO_REG is true, force it into a register. */
f996902d
RH
5455
5456static rtx
b96a374d 5457get_thread_pointer (int to_reg)
f996902d 5458{
74dc3e94 5459 rtx tp, reg, insn;
f996902d
RH
5460
5461 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
74dc3e94
RH
5462 if (!to_reg)
5463 return tp;
f996902d 5464
74dc3e94
RH
5465 reg = gen_reg_rtx (Pmode);
5466 insn = gen_rtx_SET (VOIDmode, reg, tp);
5467 insn = emit_insn (insn);
5468
5469 return reg;
5470}
5471
5472/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5473 false if we expect this to be used for a memory address and true if
5474 we expect to load the address into a register. */
5475
5476static rtx
b96a374d 5477legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
74dc3e94
RH
5478{
5479 rtx dest, base, off, pic;
5480 int type;
5481
5482 switch (model)
5483 {
5484 case TLS_MODEL_GLOBAL_DYNAMIC:
5485 dest = gen_reg_rtx (Pmode);
5486 if (TARGET_64BIT)
5487 {
5488 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5489
5490 start_sequence ();
5491 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5492 insns = get_insns ();
5493 end_sequence ();
5494
5495 emit_libcall_block (insns, dest, rax, x);
5496 }
5497 else
5498 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5499 break;
5500
5501 case TLS_MODEL_LOCAL_DYNAMIC:
5502 base = gen_reg_rtx (Pmode);
5503 if (TARGET_64BIT)
5504 {
5505 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5506
5507 start_sequence ();
5508 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5509 insns = get_insns ();
5510 end_sequence ();
5511
5512 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5513 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5514 emit_libcall_block (insns, base, rax, note);
5515 }
5516 else
5517 emit_insn (gen_tls_local_dynamic_base_32 (base));
5518
5519 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5520 off = gen_rtx_CONST (Pmode, off);
5521
5522 return gen_rtx_PLUS (Pmode, base, off);
5523
5524 case TLS_MODEL_INITIAL_EXEC:
5525 if (TARGET_64BIT)
5526 {
5527 pic = NULL;
5528 type = UNSPEC_GOTNTPOFF;
5529 }
5530 else if (flag_pic)
5531 {
5532 if (reload_in_progress)
5533 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5534 pic = pic_offset_table_rtx;
5535 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5536 }
5537 else if (!TARGET_GNU_TLS)
5538 {
5539 pic = gen_reg_rtx (Pmode);
5540 emit_insn (gen_set_got (pic));
5541 type = UNSPEC_GOTTPOFF;
5542 }
5543 else
5544 {
5545 pic = NULL;
5546 type = UNSPEC_INDNTPOFF;
5547 }
5548
5549 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5550 off = gen_rtx_CONST (Pmode, off);
5551 if (pic)
5552 off = gen_rtx_PLUS (Pmode, pic, off);
542a8afa 5553 off = gen_const_mem (Pmode, off);
74dc3e94
RH
5554 set_mem_alias_set (off, ix86_GOT_alias_set ());
5555
5556 if (TARGET_64BIT || TARGET_GNU_TLS)
5557 {
5558 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5559 off = force_reg (Pmode, off);
5560 return gen_rtx_PLUS (Pmode, base, off);
5561 }
5562 else
5563 {
5564 base = get_thread_pointer (true);
5565 dest = gen_reg_rtx (Pmode);
5566 emit_insn (gen_subsi3 (dest, base, off));
5567 }
5568 break;
5569
5570 case TLS_MODEL_LOCAL_EXEC:
5571 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5572 (TARGET_64BIT || TARGET_GNU_TLS)
5573 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5574 off = gen_rtx_CONST (Pmode, off);
5575
5576 if (TARGET_64BIT || TARGET_GNU_TLS)
5577 {
5578 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5579 return gen_rtx_PLUS (Pmode, base, off);
5580 }
5581 else
5582 {
5583 base = get_thread_pointer (true);
5584 dest = gen_reg_rtx (Pmode);
5585 emit_insn (gen_subsi3 (dest, base, off));
5586 }
5587 break;
5588
5589 default:
5590 abort ();
5591 }
5592
5593 return dest;
f996902d 5594}
fce5a9f2 5595
3b3c6a3f
MM
5596/* Try machine-dependent ways of modifying an illegitimate address
5597 to be legitimate. If we find one, return the new, valid address.
5598 This macro is used in only one place: `memory_address' in explow.c.
5599
5600 OLDX is the address as it was before break_out_memory_refs was called.
5601 In some cases it is useful to look at this to decide what needs to be done.
5602
5603 MODE and WIN are passed so that this macro can use
5604 GO_IF_LEGITIMATE_ADDRESS.
5605
5606 It is always safe for this macro to do nothing. It exists to recognize
5607 opportunities to optimize the output.
5608
5609 For the 80386, we handle X+REG by loading X into a register R and
5610 using R+REG. R will go in a general reg and indexing will be used.
5611 However, if REG is a broken-out memory address or multiplication,
5612 nothing needs to be done because REG can certainly go in a general reg.
5613
5614 When -fpic is used, special handling is needed for symbolic references.
5615 See comments by legitimize_pic_address in i386.c for details. */
5616
5617rtx
8d531ab9 5618legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
3b3c6a3f
MM
5619{
5620 int changed = 0;
5621 unsigned log;
5622
5623 if (TARGET_DEBUG_ADDR)
5624 {
e9a25f70
JL
5625 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5626 GET_MODE_NAME (mode));
3b3c6a3f
MM
5627 debug_rtx (x);
5628 }
5629
8fe75e43 5630 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
f996902d 5631 if (log)
74dc3e94 5632 return legitimize_tls_address (x, log, false);
b39edae3
RH
5633 if (GET_CODE (x) == CONST
5634 && GET_CODE (XEXP (x, 0)) == PLUS
8fe75e43
RH
5635 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5636 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
b39edae3
RH
5637 {
5638 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5639 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5640 }
f996902d 5641
3b3c6a3f
MM
5642 if (flag_pic && SYMBOLIC_CONST (x))
5643 return legitimize_pic_address (x, 0);
5644
5645 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5646 if (GET_CODE (x) == ASHIFT
5647 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5648 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5649 {
5650 changed = 1;
a269a03c
JC
5651 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5652 GEN_INT (1 << log));
3b3c6a3f
MM
5653 }
5654
5655 if (GET_CODE (x) == PLUS)
5656 {
0f290768 5657 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5658
3b3c6a3f
MM
5659 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5660 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5661 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5662 {
5663 changed = 1;
c5c76735
JL
5664 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5665 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5666 GEN_INT (1 << log));
3b3c6a3f
MM
5667 }
5668
5669 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5670 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5671 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5672 {
5673 changed = 1;
c5c76735
JL
5674 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5675 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5676 GEN_INT (1 << log));
3b3c6a3f
MM
5677 }
5678
0f290768 5679 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5680 if (GET_CODE (XEXP (x, 1)) == MULT)
5681 {
5682 rtx tmp = XEXP (x, 0);
5683 XEXP (x, 0) = XEXP (x, 1);
5684 XEXP (x, 1) = tmp;
5685 changed = 1;
5686 }
5687
5688 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5689 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5690 created by virtual register instantiation, register elimination, and
5691 similar optimizations. */
5692 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5693 {
5694 changed = 1;
c5c76735
JL
5695 x = gen_rtx_PLUS (Pmode,
5696 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5697 XEXP (XEXP (x, 1), 0)),
5698 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5699 }
5700
e9a25f70
JL
5701 /* Canonicalize
5702 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5703 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5704 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5705 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5706 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5707 && CONSTANT_P (XEXP (x, 1)))
5708 {
00c79232
ML
5709 rtx constant;
5710 rtx other = NULL_RTX;
3b3c6a3f
MM
5711
5712 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5713 {
5714 constant = XEXP (x, 1);
5715 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5716 }
5717 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5718 {
5719 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5720 other = XEXP (x, 1);
5721 }
5722 else
5723 constant = 0;
5724
5725 if (constant)
5726 {
5727 changed = 1;
c5c76735
JL
5728 x = gen_rtx_PLUS (Pmode,
5729 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5730 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5731 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5732 }
5733 }
5734
5735 if (changed && legitimate_address_p (mode, x, FALSE))
5736 return x;
5737
5738 if (GET_CODE (XEXP (x, 0)) == MULT)
5739 {
5740 changed = 1;
5741 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5742 }
5743
5744 if (GET_CODE (XEXP (x, 1)) == MULT)
5745 {
5746 changed = 1;
5747 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5748 }
5749
5750 if (changed
5751 && GET_CODE (XEXP (x, 1)) == REG
5752 && GET_CODE (XEXP (x, 0)) == REG)
5753 return x;
5754
5755 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5756 {
5757 changed = 1;
5758 x = legitimize_pic_address (x, 0);
5759 }
5760
5761 if (changed && legitimate_address_p (mode, x, FALSE))
5762 return x;
5763
5764 if (GET_CODE (XEXP (x, 0)) == REG)
5765 {
8d531ab9
KH
5766 rtx temp = gen_reg_rtx (Pmode);
5767 rtx val = force_operand (XEXP (x, 1), temp);
3b3c6a3f
MM
5768 if (val != temp)
5769 emit_move_insn (temp, val);
5770
5771 XEXP (x, 1) = temp;
5772 return x;
5773 }
5774
5775 else if (GET_CODE (XEXP (x, 1)) == REG)
5776 {
8d531ab9
KH
5777 rtx temp = gen_reg_rtx (Pmode);
5778 rtx val = force_operand (XEXP (x, 0), temp);
3b3c6a3f
MM
5779 if (val != temp)
5780 emit_move_insn (temp, val);
5781
5782 XEXP (x, 0) = temp;
5783 return x;
5784 }
5785 }
5786
5787 return x;
5788}
2a2ab3f9
JVA
5789\f
5790/* Print an integer constant expression in assembler syntax. Addition
5791 and subtraction are the only arithmetic that may appear in these
5792 expressions. FILE is the stdio stream to write to, X is the rtx, and
5793 CODE is the operand print code from the output string. */
5794
5795static void
b96a374d 5796output_pic_addr_const (FILE *file, rtx x, int code)
2a2ab3f9
JVA
5797{
5798 char buf[256];
5799
5800 switch (GET_CODE (x))
5801 {
5802 case PC:
5803 if (flag_pic)
5804 putc ('.', file);
5805 else
5806 abort ();
5807 break;
5808
5809 case SYMBOL_REF:
79bba51c
AP
5810 /* Mark the decl as referenced so that cgraph will output the function. */
5811 if (SYMBOL_REF_DECL (x))
5812 mark_decl_referenced (SYMBOL_REF_DECL (x));
5813
91bb873f 5814 assemble_name (file, XSTR (x, 0));
12969f45 5815 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
91bb873f 5816 fputs ("@PLT", file);
2a2ab3f9
JVA
5817 break;
5818
91bb873f
RH
5819 case LABEL_REF:
5820 x = XEXP (x, 0);
5efb1046 5821 /* FALLTHRU */
2a2ab3f9
JVA
5822 case CODE_LABEL:
5823 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5824 assemble_name (asm_out_file, buf);
5825 break;
5826
5827 case CONST_INT:
f64cecad 5828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5829 break;
5830
5831 case CONST:
5832 /* This used to output parentheses around the expression,
5833 but that does not work on the 386 (either ATT or BSD assembler). */
5834 output_pic_addr_const (file, XEXP (x, 0), code);
5835 break;
5836
5837 case CONST_DOUBLE:
5838 if (GET_MODE (x) == VOIDmode)
5839 {
5840 /* We can use %d if the number is <32 bits and positive. */
5841 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5842 fprintf (file, "0x%lx%08lx",
5843 (unsigned long) CONST_DOUBLE_HIGH (x),
5844 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5845 else
f64cecad 5846 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5847 }
5848 else
5849 /* We can't handle floating point constants;
5850 PRINT_OPERAND must handle them. */
5851 output_operand_lossage ("floating constant misused");
5852 break;
5853
5854 case PLUS:
e9a25f70 5855 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5856 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5857 {
2a2ab3f9 5858 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5859 putc ('+', file);
e9a25f70 5860 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5861 }
91bb873f 5862 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5863 {
2a2ab3f9 5864 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5865 putc ('+', file);
e9a25f70 5866 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5867 }
91bb873f
RH
5868 else
5869 abort ();
2a2ab3f9
JVA
5870 break;
5871
5872 case MINUS:
b069de3b
SS
5873 if (!TARGET_MACHO)
5874 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5875 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5876 putc ('-', file);
2a2ab3f9 5877 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
5878 if (!TARGET_MACHO)
5879 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
5880 break;
5881
91bb873f
RH
5882 case UNSPEC:
5883 if (XVECLEN (x, 0) != 1)
5bf0ebab 5884 abort ();
91bb873f
RH
5885 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5886 switch (XINT (x, 1))
77ebd435 5887 {
8ee41eaf 5888 case UNSPEC_GOT:
77ebd435
AJ
5889 fputs ("@GOT", file);
5890 break;
8ee41eaf 5891 case UNSPEC_GOTOFF:
77ebd435
AJ
5892 fputs ("@GOTOFF", file);
5893 break;
8ee41eaf 5894 case UNSPEC_GOTPCREL:
edfe8595 5895 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 5896 break;
f996902d 5897 case UNSPEC_GOTTPOFF:
dea73790 5898 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
5899 fputs ("@GOTTPOFF", file);
5900 break;
5901 case UNSPEC_TPOFF:
5902 fputs ("@TPOFF", file);
5903 break;
5904 case UNSPEC_NTPOFF:
75d38379
JJ
5905 if (TARGET_64BIT)
5906 fputs ("@TPOFF", file);
5907 else
5908 fputs ("@NTPOFF", file);
f996902d
RH
5909 break;
5910 case UNSPEC_DTPOFF:
5911 fputs ("@DTPOFF", file);
5912 break;
dea73790 5913 case UNSPEC_GOTNTPOFF:
75d38379
JJ
5914 if (TARGET_64BIT)
5915 fputs ("@GOTTPOFF(%rip)", file);
5916 else
5917 fputs ("@GOTNTPOFF", file);
dea73790
JJ
5918 break;
5919 case UNSPEC_INDNTPOFF:
5920 fputs ("@INDNTPOFF", file);
5921 break;
77ebd435
AJ
5922 default:
5923 output_operand_lossage ("invalid UNSPEC as operand");
5924 break;
5925 }
91bb873f
RH
5926 break;
5927
2a2ab3f9
JVA
5928 default:
5929 output_operand_lossage ("invalid expression as operand");
5930 }
5931}
1865dbb5 5932
b9203463
RH
5933/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5934 We need to emit DTP-relative relocations. */
5935
5936void
b96a374d 5937i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
b9203463 5938{
75d38379
JJ
5939 fputs (ASM_LONG, file);
5940 output_addr_const (file, x);
5941 fputs ("@DTPOFF", file);
b9203463
RH
5942 switch (size)
5943 {
5944 case 4:
b9203463
RH
5945 break;
5946 case 8:
75d38379 5947 fputs (", 0", file);
b9203463 5948 break;
b9203463
RH
5949 default:
5950 abort ();
5951 }
b9203463
RH
5952}
5953
1865dbb5
JM
5954/* In the name of slightly smaller debug output, and to cater to
5955 general assembler losage, recognize PIC+GOTOFF and turn it back
5956 into a direct symbol reference. */
5957
69bd9368 5958static rtx
b96a374d 5959ix86_delegitimize_address (rtx orig_x)
1865dbb5 5960{
ec65b2e3 5961 rtx x = orig_x, y;
1865dbb5 5962
4c8c0dec
JJ
5963 if (GET_CODE (x) == MEM)
5964 x = XEXP (x, 0);
5965
6eb791fc
JH
5966 if (TARGET_64BIT)
5967 {
5968 if (GET_CODE (x) != CONST
5969 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 5970 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 5971 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
5972 return orig_x;
5973 return XVECEXP (XEXP (x, 0), 0, 0);
5974 }
5975
1865dbb5 5976 if (GET_CODE (x) != PLUS
1865dbb5
JM
5977 || GET_CODE (XEXP (x, 1)) != CONST)
5978 return orig_x;
5979
ec65b2e3
JJ
5980 if (GET_CODE (XEXP (x, 0)) == REG
5981 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5982 /* %ebx + GOT/GOTOFF */
5983 y = NULL;
5984 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5985 {
5986 /* %ebx + %reg * scale + GOT/GOTOFF */
5987 y = XEXP (x, 0);
5988 if (GET_CODE (XEXP (y, 0)) == REG
5989 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5990 y = XEXP (y, 1);
5991 else if (GET_CODE (XEXP (y, 1)) == REG
5992 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5993 y = XEXP (y, 0);
5994 else
5995 return orig_x;
5996 if (GET_CODE (y) != REG
5997 && GET_CODE (y) != MULT
5998 && GET_CODE (y) != ASHIFT)
5999 return orig_x;
6000 }
6001 else
6002 return orig_x;
6003
1865dbb5
JM
6004 x = XEXP (XEXP (x, 1), 0);
6005 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6006 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6007 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6008 {
6009 if (y)
6010 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6011 return XVECEXP (x, 0, 0);
6012 }
1865dbb5
JM
6013
6014 if (GET_CODE (x) == PLUS
6015 && GET_CODE (XEXP (x, 0)) == UNSPEC
6016 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6017 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6018 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6019 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6020 {
6021 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6022 if (y)
6023 return gen_rtx_PLUS (Pmode, y, x);
6024 return x;
6025 }
1865dbb5
JM
6026
6027 return orig_x;
6028}
2a2ab3f9 6029\f
a269a03c 6030static void
b96a374d
AJ
6031put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6032 int fp, FILE *file)
a269a03c 6033{
a269a03c
JC
6034 const char *suffix;
6035
9a915772
JH
6036 if (mode == CCFPmode || mode == CCFPUmode)
6037 {
6038 enum rtx_code second_code, bypass_code;
6039 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
f822d252 6040 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
b531087a 6041 abort ();
9a915772
JH
6042 code = ix86_fp_compare_code_to_integer (code);
6043 mode = CCmode;
6044 }
a269a03c
JC
6045 if (reverse)
6046 code = reverse_condition (code);
e075ae69 6047
a269a03c
JC
6048 switch (code)
6049 {
6050 case EQ:
6051 suffix = "e";
6052 break;
a269a03c
JC
6053 case NE:
6054 suffix = "ne";
6055 break;
a269a03c 6056 case GT:
7e08e190 6057 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6058 abort ();
6059 suffix = "g";
a269a03c 6060 break;
a269a03c 6061 case GTU:
e075ae69
RH
6062 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6063 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6064 if (mode != CCmode)
0f290768 6065 abort ();
e075ae69 6066 suffix = fp ? "nbe" : "a";
a269a03c 6067 break;
a269a03c 6068 case LT:
9076b9c1 6069 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6070 suffix = "s";
7e08e190 6071 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6072 suffix = "l";
9076b9c1 6073 else
0f290768 6074 abort ();
a269a03c 6075 break;
a269a03c 6076 case LTU:
9076b9c1 6077 if (mode != CCmode)
0f290768 6078 abort ();
a269a03c
JC
6079 suffix = "b";
6080 break;
a269a03c 6081 case GE:
9076b9c1 6082 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6083 suffix = "ns";
7e08e190 6084 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6085 suffix = "ge";
9076b9c1 6086 else
0f290768 6087 abort ();
a269a03c 6088 break;
a269a03c 6089 case GEU:
e075ae69 6090 /* ??? As above. */
7e08e190 6091 if (mode != CCmode)
0f290768 6092 abort ();
7e08e190 6093 suffix = fp ? "nb" : "ae";
a269a03c 6094 break;
a269a03c 6095 case LE:
7e08e190 6096 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6097 abort ();
6098 suffix = "le";
a269a03c 6099 break;
a269a03c 6100 case LEU:
9076b9c1
JH
6101 if (mode != CCmode)
6102 abort ();
7e08e190 6103 suffix = "be";
a269a03c 6104 break;
3a3677ff 6105 case UNORDERED:
9e7adcb3 6106 suffix = fp ? "u" : "p";
3a3677ff
RH
6107 break;
6108 case ORDERED:
9e7adcb3 6109 suffix = fp ? "nu" : "np";
3a3677ff 6110 break;
a269a03c
JC
6111 default:
6112 abort ();
6113 }
6114 fputs (suffix, file);
6115}
6116
a55f4481
RK
6117/* Print the name of register X to FILE based on its machine mode and number.
6118 If CODE is 'w', pretend the mode is HImode.
6119 If CODE is 'b', pretend the mode is QImode.
6120 If CODE is 'k', pretend the mode is SImode.
6121 If CODE is 'q', pretend the mode is DImode.
6122 If CODE is 'h', pretend the reg is the `high' byte register.
6123 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6124
e075ae69 6125void
b96a374d 6126print_reg (rtx x, int code, FILE *file)
e5cb57e8 6127{
a55f4481
RK
6128 if (REGNO (x) == ARG_POINTER_REGNUM
6129 || REGNO (x) == FRAME_POINTER_REGNUM
6130 || REGNO (x) == FLAGS_REG
6131 || REGNO (x) == FPSR_REG)
480feac0
ZW
6132 abort ();
6133
5bf0ebab 6134 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6135 putc ('%', file);
6136
ef6257cd 6137 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6138 code = 2;
6139 else if (code == 'b')
6140 code = 1;
6141 else if (code == 'k')
6142 code = 4;
3f3f2124
JH
6143 else if (code == 'q')
6144 code = 8;
e075ae69
RH
6145 else if (code == 'y')
6146 code = 3;
6147 else if (code == 'h')
6148 code = 0;
6149 else
6150 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6151
3f3f2124
JH
6152 /* Irritatingly, AMD extended registers use different naming convention
6153 from the normal registers. */
6154 if (REX_INT_REG_P (x))
6155 {
885a70fd
JH
6156 if (!TARGET_64BIT)
6157 abort ();
3f3f2124
JH
6158 switch (code)
6159 {
ef6257cd 6160 case 0:
c725bd79 6161 error ("extended registers have no high halves");
3f3f2124
JH
6162 break;
6163 case 1:
6164 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6165 break;
6166 case 2:
6167 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6168 break;
6169 case 4:
6170 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6171 break;
6172 case 8:
6173 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6174 break;
6175 default:
c725bd79 6176 error ("unsupported operand size for extended register");
3f3f2124
JH
6177 break;
6178 }
6179 return;
6180 }
e075ae69
RH
6181 switch (code)
6182 {
6183 case 3:
6184 if (STACK_TOP_P (x))
6185 {
6186 fputs ("st(0)", file);
6187 break;
6188 }
5efb1046 6189 /* FALLTHRU */
e075ae69 6190 case 8:
3f3f2124 6191 case 4:
e075ae69 6192 case 12:
446988df 6193 if (! ANY_FP_REG_P (x))
885a70fd 6194 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5efb1046 6195 /* FALLTHRU */
a7180f70 6196 case 16:
e075ae69 6197 case 2:
d4c32b6f 6198 normal:
e075ae69
RH
6199 fputs (hi_reg_name[REGNO (x)], file);
6200 break;
6201 case 1:
d4c32b6f
RH
6202 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6203 goto normal;
e075ae69
RH
6204 fputs (qi_reg_name[REGNO (x)], file);
6205 break;
6206 case 0:
d4c32b6f
RH
6207 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6208 goto normal;
e075ae69
RH
6209 fputs (qi_high_reg_name[REGNO (x)], file);
6210 break;
6211 default:
6212 abort ();
fe25fea3 6213 }
e5cb57e8
SC
6214}
6215
f996902d
RH
6216/* Locate some local-dynamic symbol still in use by this function
6217 so that we can print its name in some tls_local_dynamic_base
6218 pattern. */
6219
6220static const char *
b96a374d 6221get_some_local_dynamic_name (void)
f996902d
RH
6222{
6223 rtx insn;
6224
6225 if (cfun->machine->some_ld_name)
6226 return cfun->machine->some_ld_name;
6227
6228 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6229 if (INSN_P (insn)
6230 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6231 return cfun->machine->some_ld_name;
6232
6233 abort ();
6234}
6235
6236static int
b96a374d 6237get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
f996902d
RH
6238{
6239 rtx x = *px;
6240
6241 if (GET_CODE (x) == SYMBOL_REF
6242 && local_dynamic_symbolic_operand (x, Pmode))
6243 {
6244 cfun->machine->some_ld_name = XSTR (x, 0);
6245 return 1;
6246 }
6247
6248 return 0;
6249}
6250
2a2ab3f9 6251/* Meaning of CODE:
fe25fea3 6252 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6253 C -- print opcode suffix for set/cmov insn.
fe25fea3 6254 c -- like C, but print reversed condition
ef6257cd 6255 F,f -- likewise, but for floating-point.
f6f5dff2
RO
6256 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6257 otherwise nothing
2a2ab3f9
JVA
6258 R -- print the prefix for register names.
6259 z -- print the opcode suffix for the size of the current operand.
6260 * -- print a star (in certain assembler syntax)
fb204271 6261 A -- print an absolute memory reference.
2a2ab3f9 6262 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6263 s -- print a shift double count, followed by the assemblers argument
6264 delimiter.
fe25fea3
SC
6265 b -- print the QImode name of the register for the indicated operand.
6266 %b0 would print %al if operands[0] is reg 0.
6267 w -- likewise, print the HImode name of the register.
6268 k -- likewise, print the SImode name of the register.
3f3f2124 6269 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6270 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6271 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6272 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6273 P -- if PIC, print an @PLT suffix.
6274 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6275 & -- print some in-use local-dynamic symbol name.
a46d1d38 6276 */
2a2ab3f9
JVA
6277
6278void
b96a374d 6279print_operand (FILE *file, rtx x, int code)
2a2ab3f9
JVA
6280{
6281 if (code)
6282 {
6283 switch (code)
6284 {
6285 case '*':
80f33d06 6286 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6287 putc ('*', file);
6288 return;
6289
f996902d
RH
6290 case '&':
6291 assemble_name (file, get_some_local_dynamic_name ());
6292 return;
6293
fb204271 6294 case 'A':
80f33d06 6295 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6296 putc ('*', file);
80f33d06 6297 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6298 {
6299 /* Intel syntax. For absolute addresses, registers should not
6300 be surrounded by braces. */
6301 if (GET_CODE (x) != REG)
6302 {
6303 putc ('[', file);
6304 PRINT_OPERAND (file, x, 0);
6305 putc (']', file);
6306 return;
6307 }
6308 }
80f33d06
GS
6309 else
6310 abort ();
fb204271
DN
6311
6312 PRINT_OPERAND (file, x, 0);
6313 return;
6314
6315
2a2ab3f9 6316 case 'L':
80f33d06 6317 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6318 putc ('l', file);
2a2ab3f9
JVA
6319 return;
6320
6321 case 'W':
80f33d06 6322 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6323 putc ('w', file);
2a2ab3f9
JVA
6324 return;
6325
6326 case 'B':
80f33d06 6327 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6328 putc ('b', file);
2a2ab3f9
JVA
6329 return;
6330
6331 case 'Q':
80f33d06 6332 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6333 putc ('l', file);
2a2ab3f9
JVA
6334 return;
6335
6336 case 'S':
80f33d06 6337 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6338 putc ('s', file);
2a2ab3f9
JVA
6339 return;
6340
5f1ec3e6 6341 case 'T':
80f33d06 6342 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6343 putc ('t', file);
5f1ec3e6
JVA
6344 return;
6345
2a2ab3f9
JVA
6346 case 'z':
6347 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6348 registers. */
2a2ab3f9
JVA
6349 if (STACK_REG_P (x))
6350 return;
6351
831c4e87
KC
6352 /* Likewise if using Intel opcodes. */
6353 if (ASSEMBLER_DIALECT == ASM_INTEL)
6354 return;
6355
6356 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6357 switch (GET_MODE_SIZE (GET_MODE (x)))
6358 {
2a2ab3f9 6359 case 2:
155d8a47
JW
6360#ifdef HAVE_GAS_FILDS_FISTS
6361 putc ('s', file);
6362#endif
2a2ab3f9
JVA
6363 return;
6364
6365 case 4:
6366 if (GET_MODE (x) == SFmode)
6367 {
e075ae69 6368 putc ('s', file);
2a2ab3f9
JVA
6369 return;
6370 }
6371 else
e075ae69 6372 putc ('l', file);
2a2ab3f9
JVA
6373 return;
6374
5f1ec3e6 6375 case 12:
2b589241 6376 case 16:
e075ae69
RH
6377 putc ('t', file);
6378 return;
5f1ec3e6 6379
2a2ab3f9
JVA
6380 case 8:
6381 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6382 {
6383#ifdef GAS_MNEMONICS
e075ae69 6384 putc ('q', file);
56c0e8fa 6385#else
e075ae69
RH
6386 putc ('l', file);
6387 putc ('l', file);
56c0e8fa
JVA
6388#endif
6389 }
e075ae69
RH
6390 else
6391 putc ('l', file);
2a2ab3f9 6392 return;
155d8a47
JW
6393
6394 default:
6395 abort ();
2a2ab3f9 6396 }
4af3895e
JVA
6397
6398 case 'b':
6399 case 'w':
6400 case 'k':
3f3f2124 6401 case 'q':
4af3895e
JVA
6402 case 'h':
6403 case 'y':
5cb6195d 6404 case 'X':
e075ae69 6405 case 'P':
4af3895e
JVA
6406 break;
6407
2d49677f
SC
6408 case 's':
6409 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6410 {
6411 PRINT_OPERAND (file, x, 0);
e075ae69 6412 putc (',', file);
2d49677f 6413 }
a269a03c
JC
6414 return;
6415
a46d1d38
JH
6416 case 'D':
6417 /* Little bit of braindamage here. The SSE compare instructions
6418 does use completely different names for the comparisons that the
6419 fp conditional moves. */
6420 switch (GET_CODE (x))
6421 {
6422 case EQ:
6423 case UNEQ:
6424 fputs ("eq", file);
6425 break;
6426 case LT:
6427 case UNLT:
6428 fputs ("lt", file);
6429 break;
6430 case LE:
6431 case UNLE:
6432 fputs ("le", file);
6433 break;
6434 case UNORDERED:
6435 fputs ("unord", file);
6436 break;
6437 case NE:
6438 case LTGT:
6439 fputs ("neq", file);
6440 break;
6441 case UNGE:
6442 case GE:
6443 fputs ("nlt", file);
6444 break;
6445 case UNGT:
6446 case GT:
6447 fputs ("nle", file);
6448 break;
6449 case ORDERED:
6450 fputs ("ord", file);
6451 break;
6452 default:
6453 abort ();
6454 break;
6455 }
6456 return;
048b1c95 6457 case 'O':
f6f5dff2 6458#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6459 if (ASSEMBLER_DIALECT == ASM_ATT)
6460 {
6461 switch (GET_MODE (x))
6462 {
6463 case HImode: putc ('w', file); break;
6464 case SImode:
6465 case SFmode: putc ('l', file); break;
6466 case DImode:
6467 case DFmode: putc ('q', file); break;
6468 default: abort ();
6469 }
6470 putc ('.', file);
6471 }
6472#endif
6473 return;
1853aadd 6474 case 'C':
e075ae69 6475 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6476 return;
fe25fea3 6477 case 'F':
f6f5dff2 6478#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6479 if (ASSEMBLER_DIALECT == ASM_ATT)
6480 putc ('.', file);
6481#endif
e075ae69 6482 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6483 return;
6484
e9a25f70 6485 /* Like above, but reverse condition */
e075ae69 6486 case 'c':
fce5a9f2 6487 /* Check to see if argument to %c is really a constant
c1d5afc4 6488 and not a condition code which needs to be reversed. */
ec8e098d 6489 if (!COMPARISON_P (x))
c1d5afc4
CR
6490 {
6491 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6492 return;
6493 }
e075ae69
RH
6494 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6495 return;
fe25fea3 6496 case 'f':
f6f5dff2 6497#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
048b1c95
JJ
6498 if (ASSEMBLER_DIALECT == ASM_ATT)
6499 putc ('.', file);
6500#endif
e075ae69 6501 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6502 return;
ef6257cd
JH
6503 case '+':
6504 {
6505 rtx x;
e5cb57e8 6506
ef6257cd
JH
6507 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6508 return;
a4f31c00 6509
ef6257cd
JH
6510 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6511 if (x)
6512 {
6513 int pred_val = INTVAL (XEXP (x, 0));
6514
6515 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6516 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6517 {
6518 int taken = pred_val > REG_BR_PROB_BASE / 2;
6519 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6520
6521 /* Emit hints only in the case default branch prediction
d1f87653 6522 heuristics would fail. */
ef6257cd
JH
6523 if (taken != cputaken)
6524 {
6525 /* We use 3e (DS) prefix for taken branches and
6526 2e (CS) prefix for not taken branches. */
6527 if (taken)
6528 fputs ("ds ; ", file);
6529 else
6530 fputs ("cs ; ", file);
6531 }
6532 }
6533 }
6534 return;
6535 }
4af3895e 6536 default:
9e637a26 6537 output_operand_lossage ("invalid operand code '%c'", code);
2a2ab3f9
JVA
6538 }
6539 }
e9a25f70 6540
2a2ab3f9 6541 if (GET_CODE (x) == REG)
a55f4481 6542 print_reg (x, code, file);
e9a25f70 6543
2a2ab3f9
JVA
6544 else if (GET_CODE (x) == MEM)
6545 {
e075ae69 6546 /* No `byte ptr' prefix for call instructions. */
80f33d06 6547 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6548 {
69ddee61 6549 const char * size;
e075ae69
RH
6550 switch (GET_MODE_SIZE (GET_MODE (x)))
6551 {
6552 case 1: size = "BYTE"; break;
6553 case 2: size = "WORD"; break;
6554 case 4: size = "DWORD"; break;
6555 case 8: size = "QWORD"; break;
6556 case 12: size = "XWORD"; break;
a7180f70 6557 case 16: size = "XMMWORD"; break;
e075ae69 6558 default:
564d80f4 6559 abort ();
e075ae69 6560 }
fb204271
DN
6561
6562 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6563 if (code == 'b')
6564 size = "BYTE";
6565 else if (code == 'w')
6566 size = "WORD";
6567 else if (code == 'k')
6568 size = "DWORD";
6569
e075ae69
RH
6570 fputs (size, file);
6571 fputs (" PTR ", file);
2a2ab3f9 6572 }
e075ae69
RH
6573
6574 x = XEXP (x, 0);
0d7d98ee 6575 /* Avoid (%rip) for call operands. */
d10f5ecf 6576 if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6577 && GET_CODE (x) != CONST_INT)
6578 output_addr_const (file, x);
c8b94768
RH
6579 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6580 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6581 else
e075ae69 6582 output_address (x);
2a2ab3f9 6583 }
e9a25f70 6584
2a2ab3f9
JVA
6585 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6586 {
e9a25f70
JL
6587 REAL_VALUE_TYPE r;
6588 long l;
6589
5f1ec3e6
JVA
6590 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6591 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6592
80f33d06 6593 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6594 putc ('$', file);
781f4ec1 6595 fprintf (file, "0x%08lx", l);
5f1ec3e6 6596 }
e9a25f70 6597
74dc3e94
RH
6598 /* These float cases don't actually occur as immediate operands. */
6599 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5f1ec3e6 6600 {
e9a25f70
JL
6601 char dstr[30];
6602
da6eec72 6603 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6604 fprintf (file, "%s", dstr);
2a2ab3f9 6605 }
e9a25f70 6606
2b589241 6607 else if (GET_CODE (x) == CONST_DOUBLE
f8a1ebc6 6608 && GET_MODE (x) == XFmode)
2a2ab3f9 6609 {
e9a25f70
JL
6610 char dstr[30];
6611
da6eec72 6612 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
5f1ec3e6 6613 fprintf (file, "%s", dstr);
2a2ab3f9 6614 }
f996902d 6615
79325812 6616 else
2a2ab3f9 6617 {
4af3895e 6618 if (code != 'P')
2a2ab3f9 6619 {
695dac07 6620 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6621 {
80f33d06 6622 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6623 putc ('$', file);
6624 }
2a2ab3f9
JVA
6625 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6626 || GET_CODE (x) == LABEL_REF)
e075ae69 6627 {
80f33d06 6628 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6629 putc ('$', file);
6630 else
6631 fputs ("OFFSET FLAT:", file);
6632 }
2a2ab3f9 6633 }
e075ae69
RH
6634 if (GET_CODE (x) == CONST_INT)
6635 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6636 else if (flag_pic)
2a2ab3f9
JVA
6637 output_pic_addr_const (file, x, code);
6638 else
6639 output_addr_const (file, x);
6640 }
6641}
6642\f
6643/* Print a memory operand whose address is ADDR. */
6644
6645void
8d531ab9 6646print_operand_address (FILE *file, rtx addr)
2a2ab3f9 6647{
e075ae69
RH
6648 struct ix86_address parts;
6649 rtx base, index, disp;
6650 int scale;
e9a25f70 6651
e075ae69
RH
6652 if (! ix86_decompose_address (addr, &parts))
6653 abort ();
e9a25f70 6654
e075ae69
RH
6655 base = parts.base;
6656 index = parts.index;
6657 disp = parts.disp;
6658 scale = parts.scale;
e9a25f70 6659
74dc3e94
RH
6660 switch (parts.seg)
6661 {
6662 case SEG_DEFAULT:
6663 break;
6664 case SEG_FS:
6665 case SEG_GS:
6666 if (USER_LABEL_PREFIX[0] == 0)
6667 putc ('%', file);
6668 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6669 break;
6670 default:
6671 abort ();
6672 }
6673
e075ae69
RH
6674 if (!base && !index)
6675 {
6676 /* Displacement only requires special attention. */
e9a25f70 6677
e075ae69 6678 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6679 {
74dc3e94 6680 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
fb204271
DN
6681 {
6682 if (USER_LABEL_PREFIX[0] == 0)
6683 putc ('%', file);
6684 fputs ("ds:", file);
6685 }
74dc3e94 6686 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
2a2ab3f9 6687 }
e075ae69 6688 else if (flag_pic)
74dc3e94 6689 output_pic_addr_const (file, disp, 0);
e075ae69 6690 else
74dc3e94 6691 output_addr_const (file, disp);
0d7d98ee
JH
6692
6693 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595 6694 if (TARGET_64BIT
74dc3e94
RH
6695 && ((GET_CODE (disp) == SYMBOL_REF
6696 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6697 || GET_CODE (disp) == LABEL_REF
6698 || (GET_CODE (disp) == CONST
6699 && GET_CODE (XEXP (disp, 0)) == PLUS
6700 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6701 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6702 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
0d7d98ee 6703 fputs ("(%rip)", file);
e075ae69
RH
6704 }
6705 else
6706 {
80f33d06 6707 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6708 {
e075ae69 6709 if (disp)
2a2ab3f9 6710 {
c399861d 6711 if (flag_pic)
e075ae69
RH
6712 output_pic_addr_const (file, disp, 0);
6713 else if (GET_CODE (disp) == LABEL_REF)
6714 output_asm_label (disp);
2a2ab3f9 6715 else
e075ae69 6716 output_addr_const (file, disp);
2a2ab3f9
JVA
6717 }
6718
e075ae69
RH
6719 putc ('(', file);
6720 if (base)
a55f4481 6721 print_reg (base, 0, file);
e075ae69 6722 if (index)
2a2ab3f9 6723 {
e075ae69 6724 putc (',', file);
a55f4481 6725 print_reg (index, 0, file);
e075ae69
RH
6726 if (scale != 1)
6727 fprintf (file, ",%d", scale);
2a2ab3f9 6728 }
e075ae69 6729 putc (')', file);
2a2ab3f9 6730 }
2a2ab3f9
JVA
6731 else
6732 {
e075ae69 6733 rtx offset = NULL_RTX;
e9a25f70 6734
e075ae69
RH
6735 if (disp)
6736 {
6737 /* Pull out the offset of a symbol; print any symbol itself. */
6738 if (GET_CODE (disp) == CONST
6739 && GET_CODE (XEXP (disp, 0)) == PLUS
6740 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6741 {
6742 offset = XEXP (XEXP (disp, 0), 1);
6743 disp = gen_rtx_CONST (VOIDmode,
6744 XEXP (XEXP (disp, 0), 0));
6745 }
ce193852 6746
e075ae69
RH
6747 if (flag_pic)
6748 output_pic_addr_const (file, disp, 0);
6749 else if (GET_CODE (disp) == LABEL_REF)
6750 output_asm_label (disp);
6751 else if (GET_CODE (disp) == CONST_INT)
6752 offset = disp;
6753 else
6754 output_addr_const (file, disp);
6755 }
e9a25f70 6756
e075ae69
RH
6757 putc ('[', file);
6758 if (base)
a8620236 6759 {
a55f4481 6760 print_reg (base, 0, file);
e075ae69
RH
6761 if (offset)
6762 {
6763 if (INTVAL (offset) >= 0)
6764 putc ('+', file);
6765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6766 }
a8620236 6767 }
e075ae69
RH
6768 else if (offset)
6769 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6770 else
e075ae69 6771 putc ('0', file);
e9a25f70 6772
e075ae69
RH
6773 if (index)
6774 {
6775 putc ('+', file);
a55f4481 6776 print_reg (index, 0, file);
e075ae69
RH
6777 if (scale != 1)
6778 fprintf (file, "*%d", scale);
6779 }
6780 putc (']', file);
6781 }
2a2ab3f9
JVA
6782 }
6783}
f996902d
RH
6784
6785bool
b96a374d 6786output_addr_const_extra (FILE *file, rtx x)
f996902d
RH
6787{
6788 rtx op;
6789
6790 if (GET_CODE (x) != UNSPEC)
6791 return false;
6792
6793 op = XVECEXP (x, 0, 0);
6794 switch (XINT (x, 1))
6795 {
6796 case UNSPEC_GOTTPOFF:
6797 output_addr_const (file, op);
dea73790 6798 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
6799 fputs ("@GOTTPOFF", file);
6800 break;
6801 case UNSPEC_TPOFF:
6802 output_addr_const (file, op);
6803 fputs ("@TPOFF", file);
6804 break;
6805 case UNSPEC_NTPOFF:
6806 output_addr_const (file, op);
75d38379
JJ
6807 if (TARGET_64BIT)
6808 fputs ("@TPOFF", file);
6809 else
6810 fputs ("@NTPOFF", file);
f996902d
RH
6811 break;
6812 case UNSPEC_DTPOFF:
6813 output_addr_const (file, op);
6814 fputs ("@DTPOFF", file);
6815 break;
dea73790
JJ
6816 case UNSPEC_GOTNTPOFF:
6817 output_addr_const (file, op);
75d38379
JJ
6818 if (TARGET_64BIT)
6819 fputs ("@GOTTPOFF(%rip)", file);
6820 else
6821 fputs ("@GOTNTPOFF", file);
dea73790
JJ
6822 break;
6823 case UNSPEC_INDNTPOFF:
6824 output_addr_const (file, op);
6825 fputs ("@INDNTPOFF", file);
6826 break;
f996902d
RH
6827
6828 default:
6829 return false;
6830 }
6831
6832 return true;
6833}
2a2ab3f9
JVA
6834\f
6835/* Split one or more DImode RTL references into pairs of SImode
6836 references. The RTL can be REG, offsettable MEM, integer constant, or
6837 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6838 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6839 that parallel "operands". */
2a2ab3f9
JVA
6840
6841void
b96a374d 6842split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2a2ab3f9
JVA
6843{
6844 while (num--)
6845 {
57dbca5e 6846 rtx op = operands[num];
b932f770
JH
6847
6848 /* simplify_subreg refuse to split volatile memory addresses,
6849 but we still have to handle it. */
6850 if (GET_CODE (op) == MEM)
2a2ab3f9 6851 {
f4ef873c 6852 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6853 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
6854 }
6855 else
b932f770 6856 {
38ca929b
JH
6857 lo_half[num] = simplify_gen_subreg (SImode, op,
6858 GET_MODE (op) == VOIDmode
6859 ? DImode : GET_MODE (op), 0);
6860 hi_half[num] = simplify_gen_subreg (SImode, op,
6861 GET_MODE (op) == VOIDmode
6862 ? DImode : GET_MODE (op), 4);
b932f770 6863 }
2a2ab3f9
JVA
6864 }
6865}
44cf5b6a
JH
6866/* Split one or more TImode RTL references into pairs of SImode
6867 references. The RTL can be REG, offsettable MEM, integer constant, or
6868 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6869 split and "num" is its length. lo_half and hi_half are output arrays
6870 that parallel "operands". */
6871
6872void
b96a374d 6873split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
44cf5b6a
JH
6874{
6875 while (num--)
6876 {
6877 rtx op = operands[num];
b932f770
JH
6878
6879 /* simplify_subreg refuse to split volatile memory addresses, but we
6880 still have to handle it. */
6881 if (GET_CODE (op) == MEM)
44cf5b6a
JH
6882 {
6883 lo_half[num] = adjust_address (op, DImode, 0);
6884 hi_half[num] = adjust_address (op, DImode, 8);
6885 }
6886 else
b932f770
JH
6887 {
6888 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6889 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6890 }
44cf5b6a
JH
6891 }
6892}
2a2ab3f9 6893\f
2a2ab3f9
JVA
6894/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6895 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6896 is the expression of the binary operation. The output may either be
6897 emitted here, or returned to the caller, like all output_* functions.
6898
6899 There is no guarantee that the operands are the same mode, as they
0f290768 6900 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 6901
e3c2afab
AM
6902#ifndef SYSV386_COMPAT
6903/* Set to 1 for compatibility with brain-damaged assemblers. No-one
6904 wants to fix the assemblers because that causes incompatibility
6905 with gcc. No-one wants to fix gcc because that causes
6906 incompatibility with assemblers... You can use the option of
6907 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6908#define SYSV386_COMPAT 1
6909#endif
6910
69ddee61 6911const char *
b96a374d 6912output_387_binary_op (rtx insn, rtx *operands)
2a2ab3f9 6913{
e3c2afab 6914 static char buf[30];
69ddee61 6915 const char *p;
1deaa899 6916 const char *ssep;
89b17498 6917 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
2a2ab3f9 6918
e3c2afab
AM
6919#ifdef ENABLE_CHECKING
6920 /* Even if we do not want to check the inputs, this documents input
6921 constraints. Which helps in understanding the following code. */
6922 if (STACK_REG_P (operands[0])
6923 && ((REG_P (operands[1])
6924 && REGNO (operands[0]) == REGNO (operands[1])
6925 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6926 || (REG_P (operands[2])
6927 && REGNO (operands[0]) == REGNO (operands[2])
6928 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6929 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6930 ; /* ok */
1deaa899 6931 else if (!is_sse)
e3c2afab
AM
6932 abort ();
6933#endif
6934
2a2ab3f9
JVA
6935 switch (GET_CODE (operands[3]))
6936 {
6937 case PLUS:
e075ae69
RH
6938 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6939 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6940 p = "fiadd";
6941 else
6942 p = "fadd";
1deaa899 6943 ssep = "add";
2a2ab3f9
JVA
6944 break;
6945
6946 case MINUS:
e075ae69
RH
6947 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6948 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6949 p = "fisub";
6950 else
6951 p = "fsub";
1deaa899 6952 ssep = "sub";
2a2ab3f9
JVA
6953 break;
6954
6955 case MULT:
e075ae69
RH
6956 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6957 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6958 p = "fimul";
6959 else
6960 p = "fmul";
1deaa899 6961 ssep = "mul";
2a2ab3f9
JVA
6962 break;
6963
6964 case DIV:
e075ae69
RH
6965 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6966 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6967 p = "fidiv";
6968 else
6969 p = "fdiv";
1deaa899 6970 ssep = "div";
2a2ab3f9
JVA
6971 break;
6972
6973 default:
6974 abort ();
6975 }
6976
1deaa899
JH
6977 if (is_sse)
6978 {
6979 strcpy (buf, ssep);
6980 if (GET_MODE (operands[0]) == SFmode)
6981 strcat (buf, "ss\t{%2, %0|%0, %2}");
6982 else
6983 strcat (buf, "sd\t{%2, %0|%0, %2}");
6984 return buf;
6985 }
e075ae69 6986 strcpy (buf, p);
2a2ab3f9
JVA
6987
6988 switch (GET_CODE (operands[3]))
6989 {
6990 case MULT:
6991 case PLUS:
6992 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6993 {
e3c2afab 6994 rtx temp = operands[2];
2a2ab3f9
JVA
6995 operands[2] = operands[1];
6996 operands[1] = temp;
6997 }
6998
e3c2afab
AM
6999 /* know operands[0] == operands[1]. */
7000
2a2ab3f9 7001 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7002 {
7003 p = "%z2\t%2";
7004 break;
7005 }
2a2ab3f9
JVA
7006
7007 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7008 {
7009 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7010 /* How is it that we are storing to a dead operand[2]?
7011 Well, presumably operands[1] is dead too. We can't
7012 store the result to st(0) as st(0) gets popped on this
7013 instruction. Instead store to operands[2] (which I
7014 think has to be st(1)). st(1) will be popped later.
7015 gcc <= 2.8.1 didn't have this check and generated
7016 assembly code that the Unixware assembler rejected. */
7017 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7018 else
e3c2afab 7019 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7020 break;
6b28fd63 7021 }
2a2ab3f9
JVA
7022
7023 if (STACK_TOP_P (operands[0]))
e3c2afab 7024 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7025 else
e3c2afab 7026 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7027 break;
2a2ab3f9
JVA
7028
7029 case MINUS:
7030 case DIV:
7031 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7032 {
7033 p = "r%z1\t%1";
7034 break;
7035 }
2a2ab3f9
JVA
7036
7037 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7038 {
7039 p = "%z2\t%2";
7040 break;
7041 }
2a2ab3f9 7042
2a2ab3f9 7043 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7044 {
e3c2afab
AM
7045#if SYSV386_COMPAT
7046 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7047 derived assemblers, confusingly reverse the direction of
7048 the operation for fsub{r} and fdiv{r} when the
7049 destination register is not st(0). The Intel assembler
7050 doesn't have this brain damage. Read !SYSV386_COMPAT to
7051 figure out what the hardware really does. */
7052 if (STACK_TOP_P (operands[0]))
7053 p = "{p\t%0, %2|rp\t%2, %0}";
7054 else
7055 p = "{rp\t%2, %0|p\t%0, %2}";
7056#else
6b28fd63 7057 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7058 /* As above for fmul/fadd, we can't store to st(0). */
7059 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7060 else
e3c2afab
AM
7061 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7062#endif
e075ae69 7063 break;
6b28fd63 7064 }
2a2ab3f9
JVA
7065
7066 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7067 {
e3c2afab 7068#if SYSV386_COMPAT
6b28fd63 7069 if (STACK_TOP_P (operands[0]))
e3c2afab 7070 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7071 else
e3c2afab
AM
7072 p = "{p\t%1, %0|rp\t%0, %1}";
7073#else
7074 if (STACK_TOP_P (operands[0]))
7075 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7076 else
7077 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7078#endif
e075ae69 7079 break;
6b28fd63 7080 }
2a2ab3f9
JVA
7081
7082 if (STACK_TOP_P (operands[0]))
7083 {
7084 if (STACK_TOP_P (operands[1]))
e3c2afab 7085 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7086 else
e3c2afab 7087 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7088 break;
2a2ab3f9
JVA
7089 }
7090 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7091 {
7092#if SYSV386_COMPAT
7093 p = "{\t%1, %0|r\t%0, %1}";
7094#else
7095 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7096#endif
7097 }
2a2ab3f9 7098 else
e3c2afab
AM
7099 {
7100#if SYSV386_COMPAT
7101 p = "{r\t%2, %0|\t%0, %2}";
7102#else
7103 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7104#endif
7105 }
e075ae69 7106 break;
2a2ab3f9
JVA
7107
7108 default:
7109 abort ();
7110 }
e075ae69
RH
7111
7112 strcat (buf, p);
7113 return buf;
2a2ab3f9 7114}
e075ae69 7115
edeacc14
UB
7116/* Output code to initialize control word copies used by trunc?f?i and
7117 rounding patterns. CURRENT_MODE is set to current control word,
7118 while NEW_MODE is set to new control word. */
7119
7a2e09f4 7120void
edeacc14 7121emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7a2e09f4
JH
7122{
7123 rtx reg = gen_reg_rtx (HImode);
7124
edeacc14
UB
7125 emit_insn (gen_x86_fnstcw_1 (current_mode));
7126 emit_move_insn (reg, current_mode);
7127
7a2e09f4
JH
7128 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7129 && !TARGET_64BIT)
edeacc14
UB
7130 {
7131 switch (mode)
7132 {
7133 case I387_CW_FLOOR:
7134 /* round down toward -oo */
7135 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7136 break;
7137
7138 case I387_CW_CEIL:
7139 /* round up toward +oo */
7140 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7141 break;
7142
7143 case I387_CW_TRUNC:
7144 /* round toward zero (truncate) */
7145 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7146 break;
7147
7148 case I387_CW_MASK_PM:
7149 /* mask precision exception for nearbyint() */
7150 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7151 break;
7152
7153 default:
7154 abort();
7155 }
7156 }
7a2e09f4 7157 else
edeacc14
UB
7158 {
7159 switch (mode)
7160 {
7161 case I387_CW_FLOOR:
7162 /* round down toward -oo */
7163 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7164 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7165 break;
7166
7167 case I387_CW_CEIL:
7168 /* round up toward +oo */
7169 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7170 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7171 break;
7172
7173 case I387_CW_TRUNC:
7174 /* round toward zero (truncate) */
7175 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7176 break;
7177
7178 case I387_CW_MASK_PM:
7179 /* mask precision exception for nearbyint() */
7180 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7181 break;
7182
7183 default:
7184 abort();
7185 }
7186 }
7187
7188 emit_move_insn (new_mode, reg);
7a2e09f4
JH
7189}
7190
2a2ab3f9 7191/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7192 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7193 operand may be [SDX]Fmode. */
2a2ab3f9 7194
69ddee61 7195const char *
b96a374d 7196output_fix_trunc (rtx insn, rtx *operands)
2a2ab3f9
JVA
7197{
7198 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7199 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7200
e075ae69
RH
7201 /* Jump through a hoop or two for DImode, since the hardware has no
7202 non-popping instruction. We used to do this a different way, but
7203 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7204 if (dimode_p && !stack_top_dies)
7205 output_asm_insn ("fld\t%y1", operands);
e075ae69 7206
7a2e09f4 7207 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7208 abort ();
7209
e075ae69 7210 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7211 abort ();
e9a25f70 7212
7a2e09f4 7213 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7214 if (stack_top_dies || dimode_p)
7a2e09f4 7215 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7216 else
7a2e09f4 7217 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7218 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7219
e075ae69 7220 return "";
2a2ab3f9 7221}
cda749b1 7222
e075ae69 7223/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7c82106f 7224 should be used. UNORDERED_P is true when fucom should be used. */
e075ae69 7225
69ddee61 7226const char *
b96a374d 7227output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
cda749b1 7228{
e075ae69 7229 int stack_top_dies;
869d095e 7230 rtx cmp_op0, cmp_op1;
7c82106f 7231 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
e075ae69 7232
7c82106f 7233 if (eflags_p)
e075ae69 7234 {
7c82106f
UB
7235 cmp_op0 = operands[0];
7236 cmp_op1 = operands[1];
e075ae69 7237 }
869d095e
UB
7238 else
7239 {
7c82106f
UB
7240 cmp_op0 = operands[1];
7241 cmp_op1 = operands[2];
869d095e
UB
7242 }
7243
0644b628
JH
7244 if (is_sse)
7245 {
7246 if (GET_MODE (operands[0]) == SFmode)
7247 if (unordered_p)
7248 return "ucomiss\t{%1, %0|%0, %1}";
7249 else
a5cf80f0 7250 return "comiss\t{%1, %0|%0, %1}";
0644b628
JH
7251 else
7252 if (unordered_p)
7253 return "ucomisd\t{%1, %0|%0, %1}";
7254 else
a5cf80f0 7255 return "comisd\t{%1, %0|%0, %1}";
0644b628 7256 }
cda749b1 7257
e075ae69 7258 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7259 abort ();
7260
e075ae69 7261 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7262
869d095e
UB
7263 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7264 {
7265 if (stack_top_dies)
7266 {
7267 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7268 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7269 }
7270 else
7271 return "ftst\n\tfnstsw\t%0";
7272 }
7273
e075ae69
RH
7274 if (STACK_REG_P (cmp_op1)
7275 && stack_top_dies
7276 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7277 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7278 {
e075ae69
RH
7279 /* If both the top of the 387 stack dies, and the other operand
7280 is also a stack register that dies, then this must be a
7281 `fcompp' float compare */
7282
7c82106f 7283 if (eflags_p)
e075ae69
RH
7284 {
7285 /* There is no double popping fcomi variant. Fortunately,
7286 eflags is immune from the fstp's cc clobbering. */
7287 if (unordered_p)
7288 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7289 else
7290 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
fb364dc4 7291 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
e075ae69
RH
7292 }
7293 else
cda749b1 7294 {
7c82106f
UB
7295 if (unordered_p)
7296 return "fucompp\n\tfnstsw\t%0";
cda749b1 7297 else
7c82106f 7298 return "fcompp\n\tfnstsw\t%0";
cda749b1 7299 }
cda749b1
JW
7300 }
7301 else
7302 {
e075ae69 7303 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7304
7c82106f 7305 static const char * const alt[16] =
e075ae69 7306 {
7c82106f
UB
7307 "fcom%z2\t%y2\n\tfnstsw\t%0",
7308 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7309 "fucom%z2\t%y2\n\tfnstsw\t%0",
7310 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7311
7c82106f
UB
7312 "ficom%z2\t%y2\n\tfnstsw\t%0",
7313 "ficomp%z2\t%y2\n\tfnstsw\t%0",
e075ae69
RH
7314 NULL,
7315 NULL,
7316
7317 "fcomi\t{%y1, %0|%0, %y1}",
7318 "fcomip\t{%y1, %0|%0, %y1}",
7319 "fucomi\t{%y1, %0|%0, %y1}",
7320 "fucomip\t{%y1, %0|%0, %y1}",
7321
7322 NULL,
7323 NULL,
7324 NULL,
e075ae69
RH
7325 NULL
7326 };
7327
7328 int mask;
69ddee61 7329 const char *ret;
e075ae69
RH
7330
7331 mask = eflags_p << 3;
7c82106f 7332 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
e075ae69
RH
7333 mask |= unordered_p << 1;
7334 mask |= stack_top_dies;
7335
7c82106f 7336 if (mask >= 16)
e075ae69
RH
7337 abort ();
7338 ret = alt[mask];
7339 if (ret == NULL)
7340 abort ();
cda749b1 7341
e075ae69 7342 return ret;
cda749b1
JW
7343 }
7344}
2a2ab3f9 7345
f88c65f7 7346void
b96a374d 7347ix86_output_addr_vec_elt (FILE *file, int value)
f88c65f7
RH
7348{
7349 const char *directive = ASM_LONG;
7350
7351 if (TARGET_64BIT)
7352 {
7353#ifdef ASM_QUAD
7354 directive = ASM_QUAD;
7355#else
7356 abort ();
7357#endif
7358 }
7359
7360 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7361}
7362
7363void
b96a374d 7364ix86_output_addr_diff_elt (FILE *file, int value, int rel)
f88c65f7
RH
7365{
7366 if (TARGET_64BIT)
74411039 7367 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7368 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7369 else if (HAVE_AS_GOTOFF_IN_DATA)
7370 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7371#if TARGET_MACHO
7372 else if (TARGET_MACHO)
86ecdfb6
AP
7373 {
7374 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7375 machopic_output_function_base_name (file);
7376 fprintf(file, "\n");
7377 }
b069de3b 7378#endif
f88c65f7 7379 else
5fc0e5df
KW
7380 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7381 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7382}
32b5b1aa 7383\f
a8bac9ab
RH
7384/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7385 for the target. */
7386
7387void
b96a374d 7388ix86_expand_clear (rtx dest)
a8bac9ab
RH
7389{
7390 rtx tmp;
7391
7392 /* We play register width games, which are only valid after reload. */
7393 if (!reload_completed)
7394 abort ();
7395
7396 /* Avoid HImode and its attendant prefix byte. */
7397 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7398 dest = gen_rtx_REG (SImode, REGNO (dest));
7399
7400 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7401
7402 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7403 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7404 {
7405 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7406 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7407 }
7408
7409 emit_insn (tmp);
7410}
7411
f996902d
RH
7412/* X is an unchanging MEM. If it is a constant pool reference, return
7413 the constant pool rtx, else NULL. */
7414
8fe75e43 7415rtx
b96a374d 7416maybe_get_pool_constant (rtx x)
f996902d 7417{
69bd9368 7418 x = ix86_delegitimize_address (XEXP (x, 0));
f996902d
RH
7419
7420 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7421 return get_pool_constant (x);
7422
7423 return NULL_RTX;
7424}
7425
79325812 7426void
b96a374d 7427ix86_expand_move (enum machine_mode mode, rtx operands[])
32b5b1aa 7428{
e075ae69 7429 int strict = (reload_in_progress || reload_completed);
74dc3e94
RH
7430 rtx op0, op1;
7431 enum tls_model model;
f996902d
RH
7432
7433 op0 = operands[0];
7434 op1 = operands[1];
7435
8fe75e43 7436 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
74dc3e94 7437 if (model)
f996902d 7438 {
74dc3e94
RH
7439 op1 = legitimize_tls_address (op1, model, true);
7440 op1 = force_operand (op1, op0);
7441 if (op1 == op0)
7442 return;
f996902d 7443 }
74dc3e94
RH
7444
7445 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
f996902d 7446 {
b069de3b
SS
7447#if TARGET_MACHO
7448 if (MACHOPIC_PURE)
7449 {
7450 rtx temp = ((reload_in_progress
7451 || ((op0 && GET_CODE (op0) == REG)
7452 && mode == Pmode))
7453 ? op0 : gen_reg_rtx (Pmode));
7454 op1 = machopic_indirect_data_reference (op1, temp);
7455 op1 = machopic_legitimize_pic_address (op1, mode,
7456 temp == op1 ? 0 : temp);
7457 }
74dc3e94
RH
7458 else if (MACHOPIC_INDIRECT)
7459 op1 = machopic_indirect_data_reference (op1, 0);
7460 if (op0 == op1)
7461 return;
7462#else
f996902d
RH
7463 if (GET_CODE (op0) == MEM)
7464 op1 = force_reg (Pmode, op1);
e075ae69 7465 else
b39edae3 7466 op1 = legitimize_address (op1, op1, Pmode);
74dc3e94 7467#endif /* TARGET_MACHO */
e075ae69
RH
7468 }
7469 else
7470 {
f996902d 7471 if (GET_CODE (op0) == MEM
44cf5b6a 7472 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7473 || !push_operand (op0, mode))
7474 && GET_CODE (op1) == MEM)
7475 op1 = force_reg (mode, op1);
e9a25f70 7476
f996902d
RH
7477 if (push_operand (op0, mode)
7478 && ! general_no_elim_operand (op1, mode))
7479 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7480
44cf5b6a
JH
7481 /* Force large constants in 64bit compilation into register
7482 to get them CSEed. */
7483 if (TARGET_64BIT && mode == DImode
f996902d 7484 && immediate_operand (op1, mode)
8fe75e43 7485 && !x86_64_zext_immediate_operand (op1, VOIDmode)
f996902d 7486 && !register_operand (op0, mode)
44cf5b6a 7487 && optimize && !reload_completed && !reload_in_progress)
f996902d 7488 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7489
e075ae69 7490 if (FLOAT_MODE_P (mode))
32b5b1aa 7491 {
d7a29404
JH
7492 /* If we are loading a floating point constant to a register,
7493 force the value to memory now, since we'll get better code
7494 out the back end. */
e075ae69
RH
7495
7496 if (strict)
7497 ;
ddc67067
MM
7498 else if (GET_CODE (op1) == CONST_DOUBLE)
7499 {
7500 op1 = validize_mem (force_const_mem (mode, op1));
7501 if (!register_operand (op0, mode))
7502 {
7503 rtx temp = gen_reg_rtx (mode);
7504 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7505 emit_move_insn (op0, temp);
7506 return;
7507 }
7508 }
32b5b1aa 7509 }
32b5b1aa 7510 }
e9a25f70 7511
74dc3e94 7512 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
e075ae69 7513}
e9a25f70 7514
e37af218 7515void
b96a374d 7516ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
e37af218
RH
7517{
7518 /* Force constants other than zero into memory. We do not know how
7519 the instructions used to build constants modify the upper 64 bits
7520 of the register, once we have that information we may be able
7521 to handle some of them more efficiently. */
7522 if ((reload_in_progress | reload_completed) == 0
7523 && register_operand (operands[0], mode)
fdc4b40b 7524 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
2b28d405 7525 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
e37af218
RH
7526
7527 /* Make operand1 a register if it isn't already. */
f8ca7923 7528 if (!no_new_pseudos
e37af218 7529 && !register_operand (operands[0], mode)
b105d6da 7530 && !register_operand (operands[1], mode))
e37af218 7531 {
59bef189 7532 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7533 emit_move_insn (operands[0], temp);
7534 return;
7535 }
7536
7537 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 7538}
e37af218 7539
e075ae69
RH
7540/* Attempt to expand a binary operator. Make the expansion closer to the
7541 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7542 memory references (one output, two input) in a single insn. */
e9a25f70 7543
e075ae69 7544void
b96a374d
AJ
7545ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7546 rtx operands[])
e075ae69
RH
7547{
7548 int matching_memory;
7549 rtx src1, src2, dst, op, clob;
7550
7551 dst = operands[0];
7552 src1 = operands[1];
7553 src2 = operands[2];
7554
7555 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
ec8e098d 7556 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7557 && (rtx_equal_p (dst, src2)
7558 || immediate_operand (src1, mode)))
7559 {
7560 rtx temp = src1;
7561 src1 = src2;
7562 src2 = temp;
32b5b1aa 7563 }
e9a25f70 7564
e075ae69
RH
7565 /* If the destination is memory, and we do not have matching source
7566 operands, do things in registers. */
7567 matching_memory = 0;
7568 if (GET_CODE (dst) == MEM)
32b5b1aa 7569 {
e075ae69
RH
7570 if (rtx_equal_p (dst, src1))
7571 matching_memory = 1;
ec8e098d 7572 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7573 && rtx_equal_p (dst, src2))
7574 matching_memory = 2;
7575 else
7576 dst = gen_reg_rtx (mode);
7577 }
0f290768 7578
e075ae69
RH
7579 /* Both source operands cannot be in memory. */
7580 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7581 {
7582 if (matching_memory != 2)
7583 src2 = force_reg (mode, src2);
7584 else
7585 src1 = force_reg (mode, src1);
32b5b1aa 7586 }
e9a25f70 7587
06a964de
JH
7588 /* If the operation is not commutable, source 1 cannot be a constant
7589 or non-matching memory. */
0f290768 7590 if ((CONSTANT_P (src1)
06a964de 7591 || (!matching_memory && GET_CODE (src1) == MEM))
ec8e098d 7592 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69 7593 src1 = force_reg (mode, src1);
0f290768 7594
e075ae69 7595 /* If optimizing, copy to regs to improve CSE */
fe577e58 7596 if (optimize && ! no_new_pseudos)
32b5b1aa 7597 {
e075ae69
RH
7598 if (GET_CODE (dst) == MEM)
7599 dst = gen_reg_rtx (mode);
7600 if (GET_CODE (src1) == MEM)
7601 src1 = force_reg (mode, src1);
7602 if (GET_CODE (src2) == MEM)
7603 src2 = force_reg (mode, src2);
32b5b1aa 7604 }
e9a25f70 7605
e075ae69
RH
7606 /* Emit the instruction. */
7607
7608 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7609 if (reload_in_progress)
7610 {
7611 /* Reload doesn't know about the flags register, and doesn't know that
7612 it doesn't want to clobber it. We can only do this with PLUS. */
7613 if (code != PLUS)
7614 abort ();
7615 emit_insn (op);
7616 }
7617 else
32b5b1aa 7618 {
e075ae69
RH
7619 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7620 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7621 }
e9a25f70 7622
e075ae69
RH
7623 /* Fix up the destination if needed. */
7624 if (dst != operands[0])
7625 emit_move_insn (operands[0], dst);
7626}
7627
7628/* Return TRUE or FALSE depending on whether the binary operator meets the
7629 appropriate constraints. */
7630
7631int
b96a374d
AJ
7632ix86_binary_operator_ok (enum rtx_code code,
7633 enum machine_mode mode ATTRIBUTE_UNUSED,
7634 rtx operands[3])
e075ae69
RH
7635{
7636 /* Both source operands cannot be in memory. */
7637 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7638 return 0;
7639 /* If the operation is not commutable, source 1 cannot be a constant. */
ec8e098d 7640 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
e075ae69
RH
7641 return 0;
7642 /* If the destination is memory, we must have a matching source operand. */
7643 if (GET_CODE (operands[0]) == MEM
7644 && ! (rtx_equal_p (operands[0], operands[1])
ec8e098d 7645 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
e075ae69
RH
7646 && rtx_equal_p (operands[0], operands[2]))))
7647 return 0;
06a964de 7648 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7649 have a matching destination. */
06a964de 7650 if (GET_CODE (operands[1]) == MEM
ec8e098d 7651 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
06a964de
JH
7652 && ! rtx_equal_p (operands[0], operands[1]))
7653 return 0;
e075ae69
RH
7654 return 1;
7655}
7656
7657/* Attempt to expand a unary operator. Make the expansion closer to the
7658 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7659 memory references (one output, one input) in a single insn. */
e075ae69 7660
9d81fc27 7661void
b96a374d
AJ
7662ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7663 rtx operands[])
e075ae69 7664{
06a964de
JH
7665 int matching_memory;
7666 rtx src, dst, op, clob;
7667
7668 dst = operands[0];
7669 src = operands[1];
e075ae69 7670
06a964de
JH
7671 /* If the destination is memory, and we do not have matching source
7672 operands, do things in registers. */
7673 matching_memory = 0;
7cacf53e 7674 if (MEM_P (dst))
32b5b1aa 7675 {
06a964de
JH
7676 if (rtx_equal_p (dst, src))
7677 matching_memory = 1;
e075ae69 7678 else
06a964de 7679 dst = gen_reg_rtx (mode);
32b5b1aa 7680 }
e9a25f70 7681
06a964de 7682 /* When source operand is memory, destination must match. */
7cacf53e 7683 if (MEM_P (src) && !matching_memory)
06a964de 7684 src = force_reg (mode, src);
0f290768 7685
7cacf53e 7686 /* If optimizing, copy to regs to improve CSE. */
fe577e58 7687 if (optimize && ! no_new_pseudos)
06a964de
JH
7688 {
7689 if (GET_CODE (dst) == MEM)
7690 dst = gen_reg_rtx (mode);
7691 if (GET_CODE (src) == MEM)
7692 src = force_reg (mode, src);
7693 }
7694
7695 /* Emit the instruction. */
7696
7697 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7698 if (reload_in_progress || code == NOT)
7699 {
7700 /* Reload doesn't know about the flags register, and doesn't know that
7701 it doesn't want to clobber it. */
7702 if (code != NOT)
7703 abort ();
7704 emit_insn (op);
7705 }
7706 else
7707 {
7708 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7709 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7710 }
7711
7712 /* Fix up the destination if needed. */
7713 if (dst != operands[0])
7714 emit_move_insn (operands[0], dst);
e075ae69
RH
7715}
7716
7717/* Return TRUE or FALSE depending on whether the unary operator meets the
7718 appropriate constraints. */
7719
7720int
b96a374d
AJ
7721ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7722 enum machine_mode mode ATTRIBUTE_UNUSED,
7723 rtx operands[2] ATTRIBUTE_UNUSED)
e075ae69 7724{
06a964de
JH
7725 /* If one of operands is memory, source and destination must match. */
7726 if ((GET_CODE (operands[0]) == MEM
7727 || GET_CODE (operands[1]) == MEM)
7728 && ! rtx_equal_p (operands[0], operands[1]))
7729 return FALSE;
e075ae69
RH
7730 return TRUE;
7731}
7cacf53e
RH
7732
7733/* Generate code for floating point ABS or NEG. */
7734
7735void
7736ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7737 rtx operands[])
7738{
7739 rtx mask, set, use, clob, dst, src;
7740 bool matching_memory;
7741 bool use_sse = false;
7742
7743 if (TARGET_SSE_MATH)
7744 {
7745 if (mode == SFmode)
7746 use_sse = true;
7747 else if (mode == DFmode && TARGET_SSE2)
7748 use_sse = true;
7749 }
7750
7751 /* NEG and ABS performed with SSE use bitwise mask operations.
7752 Create the appropriate mask now. */
7753 if (use_sse)
7754 {
7755 HOST_WIDE_INT hi, lo;
7756 int shift = 63;
7757
7758 /* Find the sign bit, sign extended to 2*HWI. */
7759 if (mode == SFmode)
7760 lo = 0x80000000, hi = lo < 0;
7761 else if (HOST_BITS_PER_WIDE_INT >= 64)
7762 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
7763 else
7764 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
7765
7766 /* If we're looking for the absolute value, then we want
7767 the compliment. */
7768 if (code == ABS)
7769 lo = ~lo, hi = ~hi;
7770
7771 /* Force this value into the low part of a fp vector constant. */
7772 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
7773 mask = gen_lowpart (mode, mask);
7774 if (mode == SFmode)
7775 mask = gen_rtx_CONST_VECTOR (V4SFmode,
7776 gen_rtvec (4, mask, CONST0_RTX (SFmode),
7777 CONST0_RTX (SFmode),
7778 CONST0_RTX (SFmode)));
7779 else
7780 mask = gen_rtx_CONST_VECTOR (V2DFmode,
7781 gen_rtvec (2, mask, CONST0_RTX (DFmode)));
7782 mask = force_reg (GET_MODE (mask), mask);
7783 }
7784 else
7785 {
7786 /* When not using SSE, we don't use the mask, but prefer to keep the
7787 same general form of the insn pattern to reduce duplication when
7788 it comes time to split. */
7789 mask = const0_rtx;
7790 }
7791
7792 dst = operands[0];
7793 src = operands[1];
7794
7795 /* If the destination is memory, and we don't have matching source
7796 operands, do things in registers. */
7797 matching_memory = false;
7798 if (MEM_P (dst))
7799 {
7800 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
7801 matching_memory = true;
7802 else
7803 dst = gen_reg_rtx (mode);
7804 }
7805 if (MEM_P (src) && !matching_memory)
7806 src = force_reg (mode, src);
7807
7808 set = gen_rtx_fmt_e (code, mode, src);
7809 set = gen_rtx_SET (VOIDmode, dst, set);
7810 use = gen_rtx_USE (VOIDmode, mask);
7811 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7812 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
7813
7814 if (dst != operands[0])
7815 emit_move_insn (operands[0], dst);
7816}
e075ae69 7817
16189740
RH
7818/* Return TRUE or FALSE depending on whether the first SET in INSN
7819 has source and destination with matching CC modes, and that the
7820 CC mode is at least as constrained as REQ_MODE. */
7821
7822int
b96a374d 7823ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16189740
RH
7824{
7825 rtx set;
7826 enum machine_mode set_mode;
7827
7828 set = PATTERN (insn);
7829 if (GET_CODE (set) == PARALLEL)
7830 set = XVECEXP (set, 0, 0);
7831 if (GET_CODE (set) != SET)
7832 abort ();
9076b9c1
JH
7833 if (GET_CODE (SET_SRC (set)) != COMPARE)
7834 abort ();
16189740
RH
7835
7836 set_mode = GET_MODE (SET_DEST (set));
7837 switch (set_mode)
7838 {
9076b9c1
JH
7839 case CCNOmode:
7840 if (req_mode != CCNOmode
7841 && (req_mode != CCmode
7842 || XEXP (SET_SRC (set), 1) != const0_rtx))
7843 return 0;
7844 break;
16189740 7845 case CCmode:
9076b9c1 7846 if (req_mode == CCGCmode)
16189740 7847 return 0;
5efb1046 7848 /* FALLTHRU */
9076b9c1
JH
7849 case CCGCmode:
7850 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7851 return 0;
5efb1046 7852 /* FALLTHRU */
9076b9c1 7853 case CCGOCmode:
16189740
RH
7854 if (req_mode == CCZmode)
7855 return 0;
5efb1046 7856 /* FALLTHRU */
16189740
RH
7857 case CCZmode:
7858 break;
7859
7860 default:
7861 abort ();
7862 }
7863
7864 return (GET_MODE (SET_SRC (set)) == set_mode);
7865}
7866
e075ae69
RH
7867/* Generate insn patterns to do an integer compare of OPERANDS. */
7868
7869static rtx
b96a374d 7870ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
e075ae69
RH
7871{
7872 enum machine_mode cmpmode;
7873 rtx tmp, flags;
7874
7875 cmpmode = SELECT_CC_MODE (code, op0, op1);
7876 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7877
7878 /* This is very simple, but making the interface the same as in the
7879 FP case makes the rest of the code easier. */
7880 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7881 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7882
7883 /* Return the test that should be put into the flags user, i.e.
7884 the bcc, scc, or cmov instruction. */
7885 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7886}
7887
3a3677ff
RH
7888/* Figure out whether to use ordered or unordered fp comparisons.
7889 Return the appropriate mode to use. */
e075ae69 7890
b1cdafbb 7891enum machine_mode
b96a374d 7892ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
e075ae69 7893{
9e7adcb3
JH
7894 /* ??? In order to make all comparisons reversible, we do all comparisons
7895 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7896 all forms trapping and nontrapping comparisons, we can make inequality
7897 comparisons trapping again, since it results in better code when using
7898 FCOM based compares. */
7899 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7900}
7901
9076b9c1 7902enum machine_mode
b96a374d 7903ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9076b9c1
JH
7904{
7905 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7906 return ix86_fp_compare_mode (code);
7907 switch (code)
7908 {
7909 /* Only zero flag is needed. */
7910 case EQ: /* ZF=0 */
7911 case NE: /* ZF!=0 */
7912 return CCZmode;
7913 /* Codes needing carry flag. */
265dab10
JH
7914 case GEU: /* CF=0 */
7915 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
7916 case LTU: /* CF=1 */
7917 case LEU: /* CF=1 | ZF=1 */
265dab10 7918 return CCmode;
9076b9c1
JH
7919 /* Codes possibly doable only with sign flag when
7920 comparing against zero. */
7921 case GE: /* SF=OF or SF=0 */
7e08e190 7922 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
7923 if (op1 == const0_rtx)
7924 return CCGOCmode;
7925 else
7926 /* For other cases Carry flag is not required. */
7927 return CCGCmode;
7928 /* Codes doable only with sign flag when comparing
7929 against zero, but we miss jump instruction for it
4aae8a9a 7930 so we need to use relational tests against overflow
9076b9c1
JH
7931 that thus needs to be zero. */
7932 case GT: /* ZF=0 & SF=OF */
7933 case LE: /* ZF=1 | SF<>OF */
7934 if (op1 == const0_rtx)
7935 return CCNOmode;
7936 else
7937 return CCGCmode;
7fcd7218
JH
7938 /* strcmp pattern do (use flags) and combine may ask us for proper
7939 mode. */
7940 case USE:
7941 return CCmode;
9076b9c1 7942 default:
0f290768 7943 abort ();
9076b9c1
JH
7944 }
7945}
7946
e129d93a
ILT
7947/* Return the fixed registers used for condition codes. */
7948
7949static bool
7950ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7951{
7952 *p1 = FLAGS_REG;
7953 *p2 = FPSR_REG;
7954 return true;
7955}
7956
7957/* If two condition code modes are compatible, return a condition code
7958 mode which is compatible with both. Otherwise, return
7959 VOIDmode. */
7960
7961static enum machine_mode
7962ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
7963{
7964 if (m1 == m2)
7965 return m1;
7966
7967 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
7968 return VOIDmode;
7969
7970 if ((m1 == CCGCmode && m2 == CCGOCmode)
7971 || (m1 == CCGOCmode && m2 == CCGCmode))
7972 return CCGCmode;
7973
7974 switch (m1)
7975 {
7976 default:
7977 abort ();
7978
7979 case CCmode:
7980 case CCGCmode:
7981 case CCGOCmode:
7982 case CCNOmode:
7983 case CCZmode:
7984 switch (m2)
7985 {
7986 default:
7987 return VOIDmode;
7988
7989 case CCmode:
7990 case CCGCmode:
7991 case CCGOCmode:
7992 case CCNOmode:
7993 case CCZmode:
7994 return CCmode;
7995 }
7996
7997 case CCFPmode:
7998 case CCFPUmode:
7999 /* These are only compatible with themselves, which we already
8000 checked above. */
8001 return VOIDmode;
8002 }
8003}
8004
3a3677ff
RH
8005/* Return true if we should use an FCOMI instruction for this fp comparison. */
8006
a940d8bd 8007int
b96a374d 8008ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
3a3677ff 8009{
9e7adcb3
JH
8010 enum rtx_code swapped_code = swap_condition (code);
8011 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8012 || (ix86_fp_comparison_cost (swapped_code)
8013 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8014}
8015
0f290768 8016/* Swap, force into registers, or otherwise massage the two operands
3a3677ff 8017 to a fp comparison. The operands are updated in place; the new
d1f87653 8018 comparison code is returned. */
3a3677ff
RH
8019
8020static enum rtx_code
b96a374d 8021ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
3a3677ff
RH
8022{
8023 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8024 rtx op0 = *pop0, op1 = *pop1;
8025 enum machine_mode op_mode = GET_MODE (op0);
89b17498 8026 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
3a3677ff 8027
e075ae69 8028 /* All of the unordered compare instructions only work on registers.
45c8c47f
UB
8029 The same is true of the fcomi compare instructions. The same is
8030 true of the XFmode compare instructions if not comparing with
8031 zero (ftst insn is used in this case). */
3a3677ff 8032
0644b628
JH
8033 if (!is_sse
8034 && (fpcmp_mode == CCFPUmode
45c8c47f
UB
8035 || (op_mode == XFmode
8036 && ! (standard_80387_constant_p (op0) == 1
8037 || standard_80387_constant_p (op1) == 1))
0644b628 8038 || ix86_use_fcomi_compare (code)))
e075ae69 8039 {
3a3677ff
RH
8040 op0 = force_reg (op_mode, op0);
8041 op1 = force_reg (op_mode, op1);
e075ae69
RH
8042 }
8043 else
8044 {
8045 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8046 things around if they appear profitable, otherwise force op0
8047 into a register. */
8048
8049 if (standard_80387_constant_p (op0) == 0
8050 || (GET_CODE (op0) == MEM
8051 && ! (standard_80387_constant_p (op1) == 0
8052 || GET_CODE (op1) == MEM)))
32b5b1aa 8053 {
e075ae69
RH
8054 rtx tmp;
8055 tmp = op0, op0 = op1, op1 = tmp;
8056 code = swap_condition (code);
8057 }
8058
8059 if (GET_CODE (op0) != REG)
3a3677ff 8060 op0 = force_reg (op_mode, op0);
e075ae69
RH
8061
8062 if (CONSTANT_P (op1))
8063 {
45c8c47f
UB
8064 int tmp = standard_80387_constant_p (op1);
8065 if (tmp == 0)
3a3677ff 8066 op1 = validize_mem (force_const_mem (op_mode, op1));
45c8c47f
UB
8067 else if (tmp == 1)
8068 {
8069 if (TARGET_CMOVE)
8070 op1 = force_reg (op_mode, op1);
8071 }
8072 else
8073 op1 = force_reg (op_mode, op1);
32b5b1aa
SC
8074 }
8075 }
e9a25f70 8076
9e7adcb3
JH
8077 /* Try to rearrange the comparison to make it cheaper. */
8078 if (ix86_fp_comparison_cost (code)
8079 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8080 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8081 {
8082 rtx tmp;
8083 tmp = op0, op0 = op1, op1 = tmp;
8084 code = swap_condition (code);
8085 if (GET_CODE (op0) != REG)
8086 op0 = force_reg (op_mode, op0);
8087 }
8088
3a3677ff
RH
8089 *pop0 = op0;
8090 *pop1 = op1;
8091 return code;
8092}
8093
c0c102a9
JH
8094/* Convert comparison codes we use to represent FP comparison to integer
8095 code that will result in proper branch. Return UNKNOWN if no such code
8096 is available. */
8fe75e43
RH
8097
8098enum rtx_code
b96a374d 8099ix86_fp_compare_code_to_integer (enum rtx_code code)
c0c102a9
JH
8100{
8101 switch (code)
8102 {
8103 case GT:
8104 return GTU;
8105 case GE:
8106 return GEU;
8107 case ORDERED:
8108 case UNORDERED:
8109 return code;
8110 break;
8111 case UNEQ:
8112 return EQ;
8113 break;
8114 case UNLT:
8115 return LTU;
8116 break;
8117 case UNLE:
8118 return LEU;
8119 break;
8120 case LTGT:
8121 return NE;
8122 break;
8123 default:
8124 return UNKNOWN;
8125 }
8126}
8127
8128/* Split comparison code CODE into comparisons we can do using branch
8129 instructions. BYPASS_CODE is comparison code for branch that will
8130 branch around FIRST_CODE and SECOND_CODE. If some of branches
f822d252 8131 is not required, set value to UNKNOWN.
c0c102a9 8132 We never require more than two branches. */
8fe75e43
RH
8133
8134void
b96a374d
AJ
8135ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8136 enum rtx_code *first_code,
8137 enum rtx_code *second_code)
c0c102a9
JH
8138{
8139 *first_code = code;
f822d252
ZW
8140 *bypass_code = UNKNOWN;
8141 *second_code = UNKNOWN;
c0c102a9
JH
8142
8143 /* The fcomi comparison sets flags as follows:
8144
8145 cmp ZF PF CF
8146 > 0 0 0
8147 < 0 0 1
8148 = 1 0 0
8149 un 1 1 1 */
8150
8151 switch (code)
8152 {
8153 case GT: /* GTU - CF=0 & ZF=0 */
8154 case GE: /* GEU - CF=0 */
8155 case ORDERED: /* PF=0 */
8156 case UNORDERED: /* PF=1 */
8157 case UNEQ: /* EQ - ZF=1 */
8158 case UNLT: /* LTU - CF=1 */
8159 case UNLE: /* LEU - CF=1 | ZF=1 */
8160 case LTGT: /* EQ - ZF=0 */
8161 break;
8162 case LT: /* LTU - CF=1 - fails on unordered */
8163 *first_code = UNLT;
8164 *bypass_code = UNORDERED;
8165 break;
8166 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8167 *first_code = UNLE;
8168 *bypass_code = UNORDERED;
8169 break;
8170 case EQ: /* EQ - ZF=1 - fails on unordered */
8171 *first_code = UNEQ;
8172 *bypass_code = UNORDERED;
8173 break;
8174 case NE: /* NE - ZF=0 - fails on unordered */
8175 *first_code = LTGT;
8176 *second_code = UNORDERED;
8177 break;
8178 case UNGE: /* GEU - CF=0 - fails on unordered */
8179 *first_code = GE;
8180 *second_code = UNORDERED;
8181 break;
8182 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8183 *first_code = GT;
8184 *second_code = UNORDERED;
8185 break;
8186 default:
8187 abort ();
8188 }
8189 if (!TARGET_IEEE_FP)
8190 {
f822d252
ZW
8191 *second_code = UNKNOWN;
8192 *bypass_code = UNKNOWN;
c0c102a9
JH
8193 }
8194}
8195
9e7adcb3 8196/* Return cost of comparison done fcom + arithmetics operations on AX.
5bdc5878 8197 All following functions do use number of instructions as a cost metrics.
9e7adcb3
JH
8198 In future this should be tweaked to compute bytes for optimize_size and
8199 take into account performance of various instructions on various CPUs. */
8200static int
b96a374d 8201ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9e7adcb3
JH
8202{
8203 if (!TARGET_IEEE_FP)
8204 return 4;
8205 /* The cost of code output by ix86_expand_fp_compare. */
8206 switch (code)
8207 {
8208 case UNLE:
8209 case UNLT:
8210 case LTGT:
8211 case GT:
8212 case GE:
8213 case UNORDERED:
8214 case ORDERED:
8215 case UNEQ:
8216 return 4;
8217 break;
8218 case LT:
8219 case NE:
8220 case EQ:
8221 case UNGE:
8222 return 5;
8223 break;
8224 case LE:
8225 case UNGT:
8226 return 6;
8227 break;
8228 default:
8229 abort ();
8230 }
8231}
8232
8233/* Return cost of comparison done using fcomi operation.
8234 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8235static int
b96a374d 8236ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9e7adcb3
JH
8237{
8238 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8239 /* Return arbitrarily high cost when instruction is not supported - this
9e7adcb3
JH
8240 prevents gcc from using it. */
8241 if (!TARGET_CMOVE)
8242 return 1024;
8243 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8244 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9e7adcb3
JH
8245}
8246
8247/* Return cost of comparison done using sahf operation.
8248 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8249static int
b96a374d 8250ix86_fp_comparison_sahf_cost (enum rtx_code code)
9e7adcb3
JH
8251{
8252 enum rtx_code bypass_code, first_code, second_code;
d1f87653 8253 /* Return arbitrarily high cost when instruction is not preferred - this
9e7adcb3
JH
8254 avoids gcc from using it. */
8255 if (!TARGET_USE_SAHF && !optimize_size)
8256 return 1024;
8257 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8258 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9e7adcb3
JH
8259}
8260
8261/* Compute cost of the comparison done using any method.
8262 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8263static int
b96a374d 8264ix86_fp_comparison_cost (enum rtx_code code)
9e7adcb3
JH
8265{
8266 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8267 int min;
8268
8269 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8270 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8271
8272 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8273 if (min > sahf_cost)
8274 min = sahf_cost;
8275 if (min > fcomi_cost)
8276 min = fcomi_cost;
8277 return min;
8278}
c0c102a9 8279
3a3677ff
RH
8280/* Generate insn patterns to do a floating point compare of OPERANDS. */
8281
9e7adcb3 8282static rtx
b96a374d
AJ
8283ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8284 rtx *second_test, rtx *bypass_test)
3a3677ff
RH
8285{
8286 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8287 rtx tmp, tmp2;
9e7adcb3 8288 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8289 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8290
8291 fpcmp_mode = ix86_fp_compare_mode (code);
8292 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8293
9e7adcb3
JH
8294 if (second_test)
8295 *second_test = NULL_RTX;
8296 if (bypass_test)
8297 *bypass_test = NULL_RTX;
8298
c0c102a9
JH
8299 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8300
9e7adcb3 8301 /* Do fcomi/sahf based test when profitable. */
f822d252
ZW
8302 if ((bypass_code == UNKNOWN || bypass_test)
8303 && (second_code == UNKNOWN || second_test)
9e7adcb3 8304 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8305 {
c0c102a9
JH
8306 if (TARGET_CMOVE)
8307 {
8308 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8309 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8310 tmp);
8311 emit_insn (tmp);
8312 }
8313 else
8314 {
8315 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8316 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8317 if (!scratch)
8318 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8319 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8320 emit_insn (gen_x86_sahf_1 (scratch));
8321 }
e075ae69
RH
8322
8323 /* The FP codes work out to act like unsigned. */
9a915772 8324 intcmp_mode = fpcmp_mode;
9e7adcb3 8325 code = first_code;
f822d252 8326 if (bypass_code != UNKNOWN)
9e7adcb3
JH
8327 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8328 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8329 const0_rtx);
f822d252 8330 if (second_code != UNKNOWN)
9e7adcb3
JH
8331 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8332 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8333 const0_rtx);
e075ae69
RH
8334 }
8335 else
8336 {
8337 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8338 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8339 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8340 if (!scratch)
8341 scratch = gen_reg_rtx (HImode);
3a3677ff 8342 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8343
9a915772
JH
8344 /* In the unordered case, we have to check C2 for NaN's, which
8345 doesn't happen to work out to anything nice combination-wise.
8346 So do some bit twiddling on the value we've got in AH to come
8347 up with an appropriate set of condition codes. */
e075ae69 8348
9a915772
JH
8349 intcmp_mode = CCNOmode;
8350 switch (code)
32b5b1aa 8351 {
9a915772
JH
8352 case GT:
8353 case UNGT:
8354 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8355 {
3a3677ff 8356 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8357 code = EQ;
9a915772
JH
8358 }
8359 else
8360 {
8361 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8362 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8363 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8364 intcmp_mode = CCmode;
8365 code = GEU;
8366 }
8367 break;
8368 case LT:
8369 case UNLT:
8370 if (code == LT && TARGET_IEEE_FP)
8371 {
3a3677ff
RH
8372 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8373 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8374 intcmp_mode = CCmode;
8375 code = EQ;
9a915772
JH
8376 }
8377 else
8378 {
8379 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8380 code = NE;
8381 }
8382 break;
8383 case GE:
8384 case UNGE:
8385 if (code == GE || !TARGET_IEEE_FP)
8386 {
3a3677ff 8387 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8388 code = EQ;
9a915772
JH
8389 }
8390 else
8391 {
8392 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8393 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8394 GEN_INT (0x01)));
8395 code = NE;
8396 }
8397 break;
8398 case LE:
8399 case UNLE:
8400 if (code == LE && TARGET_IEEE_FP)
8401 {
3a3677ff
RH
8402 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8403 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8404 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8405 intcmp_mode = CCmode;
8406 code = LTU;
9a915772
JH
8407 }
8408 else
8409 {
8410 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8411 code = NE;
8412 }
8413 break;
8414 case EQ:
8415 case UNEQ:
8416 if (code == EQ && TARGET_IEEE_FP)
8417 {
3a3677ff
RH
8418 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8419 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8420 intcmp_mode = CCmode;
8421 code = EQ;
9a915772
JH
8422 }
8423 else
8424 {
3a3677ff
RH
8425 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8426 code = NE;
8427 break;
9a915772
JH
8428 }
8429 break;
8430 case NE:
8431 case LTGT:
8432 if (code == NE && TARGET_IEEE_FP)
8433 {
3a3677ff 8434 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8435 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8436 GEN_INT (0x40)));
3a3677ff 8437 code = NE;
9a915772
JH
8438 }
8439 else
8440 {
3a3677ff
RH
8441 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8442 code = EQ;
32b5b1aa 8443 }
9a915772
JH
8444 break;
8445
8446 case UNORDERED:
8447 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8448 code = NE;
8449 break;
8450 case ORDERED:
8451 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8452 code = EQ;
8453 break;
8454
8455 default:
8456 abort ();
32b5b1aa 8457 }
32b5b1aa 8458 }
e075ae69
RH
8459
8460 /* Return the test that should be put into the flags user, i.e.
8461 the bcc, scc, or cmov instruction. */
8462 return gen_rtx_fmt_ee (code, VOIDmode,
8463 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8464 const0_rtx);
8465}
8466
9e3e266c 8467rtx
b96a374d 8468ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
e075ae69
RH
8469{
8470 rtx op0, op1, ret;
8471 op0 = ix86_compare_op0;
8472 op1 = ix86_compare_op1;
8473
a1b8572c
JH
8474 if (second_test)
8475 *second_test = NULL_RTX;
8476 if (bypass_test)
8477 *bypass_test = NULL_RTX;
8478
e075ae69 8479 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8480 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8481 second_test, bypass_test);
32b5b1aa 8482 else
e075ae69
RH
8483 ret = ix86_expand_int_compare (code, op0, op1);
8484
8485 return ret;
8486}
8487
03598dea
JH
8488/* Return true if the CODE will result in nontrivial jump sequence. */
8489bool
b96a374d 8490ix86_fp_jump_nontrivial_p (enum rtx_code code)
03598dea
JH
8491{
8492 enum rtx_code bypass_code, first_code, second_code;
8493 if (!TARGET_CMOVE)
8494 return true;
8495 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
f822d252 8496 return bypass_code != UNKNOWN || second_code != UNKNOWN;
03598dea
JH
8497}
8498
e075ae69 8499void
b96a374d 8500ix86_expand_branch (enum rtx_code code, rtx label)
e075ae69 8501{
3a3677ff 8502 rtx tmp;
e075ae69 8503
3a3677ff 8504 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8505 {
3a3677ff
RH
8506 case QImode:
8507 case HImode:
8508 case SImode:
0d7d98ee 8509 simple:
a1b8572c 8510 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8511 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8512 gen_rtx_LABEL_REF (VOIDmode, label),
8513 pc_rtx);
8514 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8515 return;
e075ae69 8516
3a3677ff
RH
8517 case SFmode:
8518 case DFmode:
0f290768 8519 case XFmode:
3a3677ff
RH
8520 {
8521 rtvec vec;
8522 int use_fcomi;
03598dea 8523 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8524
8525 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8526 &ix86_compare_op1);
fce5a9f2 8527
03598dea
JH
8528 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8529
8530 /* Check whether we will use the natural sequence with one jump. If
8531 so, we can expand jump early. Otherwise delay expansion by
8532 creating compound insn to not confuse optimizers. */
f822d252 8533 if (bypass_code == UNKNOWN && second_code == UNKNOWN
03598dea
JH
8534 && TARGET_CMOVE)
8535 {
8536 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8537 gen_rtx_LABEL_REF (VOIDmode, label),
7c82106f 8538 pc_rtx, NULL_RTX, NULL_RTX);
03598dea
JH
8539 }
8540 else
8541 {
8542 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8543 ix86_compare_op0, ix86_compare_op1);
8544 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8545 gen_rtx_LABEL_REF (VOIDmode, label),
8546 pc_rtx);
8547 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8548
8549 use_fcomi = ix86_use_fcomi_compare (code);
8550 vec = rtvec_alloc (3 + !use_fcomi);
8551 RTVEC_ELT (vec, 0) = tmp;
8552 RTVEC_ELT (vec, 1)
8553 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8554 RTVEC_ELT (vec, 2)
8555 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8556 if (! use_fcomi)
8557 RTVEC_ELT (vec, 3)
8558 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8559
8560 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8561 }
3a3677ff
RH
8562 return;
8563 }
32b5b1aa 8564
3a3677ff 8565 case DImode:
0d7d98ee
JH
8566 if (TARGET_64BIT)
8567 goto simple;
3a3677ff
RH
8568 /* Expand DImode branch into multiple compare+branch. */
8569 {
8570 rtx lo[2], hi[2], label2;
8571 enum rtx_code code1, code2, code3;
32b5b1aa 8572
3a3677ff
RH
8573 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8574 {
8575 tmp = ix86_compare_op0;
8576 ix86_compare_op0 = ix86_compare_op1;
8577 ix86_compare_op1 = tmp;
8578 code = swap_condition (code);
8579 }
8580 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8581 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8582
3a3677ff
RH
8583 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8584 avoid two branches. This costs one extra insn, so disable when
8585 optimizing for size. */
32b5b1aa 8586
3a3677ff
RH
8587 if ((code == EQ || code == NE)
8588 && (!optimize_size
8589 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8590 {
8591 rtx xor0, xor1;
32b5b1aa 8592
3a3677ff
RH
8593 xor1 = hi[0];
8594 if (hi[1] != const0_rtx)
8595 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8596 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8597
3a3677ff
RH
8598 xor0 = lo[0];
8599 if (lo[1] != const0_rtx)
8600 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8601 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8602
3a3677ff
RH
8603 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8604 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8605
3a3677ff
RH
8606 ix86_compare_op0 = tmp;
8607 ix86_compare_op1 = const0_rtx;
8608 ix86_expand_branch (code, label);
8609 return;
8610 }
e075ae69 8611
1f9124e4
JJ
8612 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8613 op1 is a constant and the low word is zero, then we can just
8614 examine the high word. */
32b5b1aa 8615
1f9124e4
JJ
8616 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8617 switch (code)
8618 {
8619 case LT: case LTU: case GE: case GEU:
8620 ix86_compare_op0 = hi[0];
8621 ix86_compare_op1 = hi[1];
8622 ix86_expand_branch (code, label);
8623 return;
8624 default:
8625 break;
8626 }
e075ae69 8627
3a3677ff 8628 /* Otherwise, we need two or three jumps. */
e075ae69 8629
3a3677ff 8630 label2 = gen_label_rtx ();
e075ae69 8631
3a3677ff
RH
8632 code1 = code;
8633 code2 = swap_condition (code);
8634 code3 = unsigned_condition (code);
e075ae69 8635
3a3677ff
RH
8636 switch (code)
8637 {
8638 case LT: case GT: case LTU: case GTU:
8639 break;
e075ae69 8640
3a3677ff
RH
8641 case LE: code1 = LT; code2 = GT; break;
8642 case GE: code1 = GT; code2 = LT; break;
8643 case LEU: code1 = LTU; code2 = GTU; break;
8644 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8645
f822d252
ZW
8646 case EQ: code1 = UNKNOWN; code2 = NE; break;
8647 case NE: code2 = UNKNOWN; break;
e075ae69 8648
3a3677ff
RH
8649 default:
8650 abort ();
8651 }
e075ae69 8652
3a3677ff
RH
8653 /*
8654 * a < b =>
8655 * if (hi(a) < hi(b)) goto true;
8656 * if (hi(a) > hi(b)) goto false;
8657 * if (lo(a) < lo(b)) goto true;
8658 * false:
8659 */
8660
8661 ix86_compare_op0 = hi[0];
8662 ix86_compare_op1 = hi[1];
8663
f822d252 8664 if (code1 != UNKNOWN)
3a3677ff 8665 ix86_expand_branch (code1, label);
f822d252 8666 if (code2 != UNKNOWN)
3a3677ff
RH
8667 ix86_expand_branch (code2, label2);
8668
8669 ix86_compare_op0 = lo[0];
8670 ix86_compare_op1 = lo[1];
8671 ix86_expand_branch (code3, label);
8672
f822d252 8673 if (code2 != UNKNOWN)
3a3677ff
RH
8674 emit_label (label2);
8675 return;
8676 }
e075ae69 8677
3a3677ff
RH
8678 default:
8679 abort ();
8680 }
32b5b1aa 8681}
e075ae69 8682
9e7adcb3
JH
8683/* Split branch based on floating point condition. */
8684void
b96a374d 8685ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
7c82106f 8686 rtx target1, rtx target2, rtx tmp, rtx pushed)
9e7adcb3
JH
8687{
8688 rtx second, bypass;
8689 rtx label = NULL_RTX;
03598dea 8690 rtx condition;
6b24c259
JH
8691 int bypass_probability = -1, second_probability = -1, probability = -1;
8692 rtx i;
9e7adcb3
JH
8693
8694 if (target2 != pc_rtx)
8695 {
8696 rtx tmp = target2;
8697 code = reverse_condition_maybe_unordered (code);
8698 target2 = target1;
8699 target1 = tmp;
8700 }
8701
8702 condition = ix86_expand_fp_compare (code, op1, op2,
8703 tmp, &second, &bypass);
6b24c259 8704
7c82106f
UB
8705 /* Remove pushed operand from stack. */
8706 if (pushed)
8707 ix86_free_from_memory (GET_MODE (pushed));
8708
6b24c259
JH
8709 if (split_branch_probability >= 0)
8710 {
8711 /* Distribute the probabilities across the jumps.
8712 Assume the BYPASS and SECOND to be always test
8713 for UNORDERED. */
8714 probability = split_branch_probability;
8715
d6a7951f 8716 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8717 to be updated. Later we may run some experiments and see
8718 if unordered values are more frequent in practice. */
8719 if (bypass)
8720 bypass_probability = 1;
8721 if (second)
8722 second_probability = 1;
8723 }
9e7adcb3
JH
8724 if (bypass != NULL_RTX)
8725 {
8726 label = gen_label_rtx ();
6b24c259
JH
8727 i = emit_jump_insn (gen_rtx_SET
8728 (VOIDmode, pc_rtx,
8729 gen_rtx_IF_THEN_ELSE (VOIDmode,
8730 bypass,
8731 gen_rtx_LABEL_REF (VOIDmode,
8732 label),
8733 pc_rtx)));
8734 if (bypass_probability >= 0)
8735 REG_NOTES (i)
8736 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8737 GEN_INT (bypass_probability),
8738 REG_NOTES (i));
8739 }
8740 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8741 (VOIDmode, pc_rtx,
8742 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8743 condition, target1, target2)));
8744 if (probability >= 0)
8745 REG_NOTES (i)
8746 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8747 GEN_INT (probability),
8748 REG_NOTES (i));
8749 if (second != NULL_RTX)
9e7adcb3 8750 {
6b24c259
JH
8751 i = emit_jump_insn (gen_rtx_SET
8752 (VOIDmode, pc_rtx,
8753 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8754 target2)));
8755 if (second_probability >= 0)
8756 REG_NOTES (i)
8757 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8758 GEN_INT (second_probability),
8759 REG_NOTES (i));
9e7adcb3 8760 }
9e7adcb3
JH
8761 if (label != NULL_RTX)
8762 emit_label (label);
8763}
8764
32b5b1aa 8765int
b96a374d 8766ix86_expand_setcc (enum rtx_code code, rtx dest)
32b5b1aa 8767{
3a627503 8768 rtx ret, tmp, tmpreg, equiv;
a1b8572c 8769 rtx second_test, bypass_test;
e075ae69 8770
885a70fd
JH
8771 if (GET_MODE (ix86_compare_op0) == DImode
8772 && !TARGET_64BIT)
e075ae69
RH
8773 return 0; /* FAIL */
8774
b932f770
JH
8775 if (GET_MODE (dest) != QImode)
8776 abort ();
e075ae69 8777
a1b8572c 8778 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8779 PUT_MODE (ret, QImode);
8780
8781 tmp = dest;
a1b8572c 8782 tmpreg = dest;
32b5b1aa 8783
e075ae69 8784 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8785 if (bypass_test || second_test)
8786 {
8787 rtx test = second_test;
8788 int bypass = 0;
8789 rtx tmp2 = gen_reg_rtx (QImode);
8790 if (bypass_test)
8791 {
8792 if (second_test)
b531087a 8793 abort ();
a1b8572c
JH
8794 test = bypass_test;
8795 bypass = 1;
8796 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8797 }
8798 PUT_MODE (test, QImode);
8799 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8800
8801 if (bypass)
8802 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8803 else
8804 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8805 }
e075ae69 8806
3a627503
RS
8807 /* Attach a REG_EQUAL note describing the comparison result. */
8808 equiv = simplify_gen_relational (code, QImode,
8809 GET_MODE (ix86_compare_op0),
8810 ix86_compare_op0, ix86_compare_op1);
8811 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8812
e075ae69 8813 return 1; /* DONE */
32b5b1aa 8814}
e075ae69 8815
c35d187f
RH
8816/* Expand comparison setting or clearing carry flag. Return true when
8817 successful and set pop for the operation. */
8818static bool
b96a374d 8819ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
4977bab6
ZW
8820{
8821 enum machine_mode mode =
8822 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8823
8824 /* Do not handle DImode compares that go trought special path. Also we can't
43f3a59d 8825 deal with FP compares yet. This is possible to add. */
e6e81735
JH
8826 if ((mode == DImode && !TARGET_64BIT))
8827 return false;
8828 if (FLOAT_MODE_P (mode))
8829 {
8830 rtx second_test = NULL, bypass_test = NULL;
8831 rtx compare_op, compare_seq;
8832
8833 /* Shortcut: following common codes never translate into carry flag compares. */
8834 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8835 || code == ORDERED || code == UNORDERED)
8836 return false;
8837
8838 /* These comparisons require zero flag; swap operands so they won't. */
8839 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8840 && !TARGET_IEEE_FP)
8841 {
8842 rtx tmp = op0;
8843 op0 = op1;
8844 op1 = tmp;
8845 code = swap_condition (code);
8846 }
8847
c51e6d85
KH
8848 /* Try to expand the comparison and verify that we end up with carry flag
8849 based comparison. This is fails to be true only when we decide to expand
8850 comparison using arithmetic that is not too common scenario. */
e6e81735
JH
8851 start_sequence ();
8852 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8853 &second_test, &bypass_test);
8854 compare_seq = get_insns ();
8855 end_sequence ();
8856
8857 if (second_test || bypass_test)
8858 return false;
8859 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8860 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8861 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8862 else
8863 code = GET_CODE (compare_op);
8864 if (code != LTU && code != GEU)
8865 return false;
8866 emit_insn (compare_seq);
8867 *pop = compare_op;
8868 return true;
8869 }
8870 if (!INTEGRAL_MODE_P (mode))
4977bab6
ZW
8871 return false;
8872 switch (code)
8873 {
8874 case LTU:
8875 case GEU:
8876 break;
8877
8878 /* Convert a==0 into (unsigned)a<1. */
8879 case EQ:
8880 case NE:
8881 if (op1 != const0_rtx)
8882 return false;
8883 op1 = const1_rtx;
8884 code = (code == EQ ? LTU : GEU);
8885 break;
8886
8887 /* Convert a>b into b<a or a>=b-1. */
8888 case GTU:
8889 case LEU:
8890 if (GET_CODE (op1) == CONST_INT)
8891 {
8892 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8893 /* Bail out on overflow. We still can swap operands but that
43f3a59d 8894 would force loading of the constant into register. */
4977bab6
ZW
8895 if (op1 == const0_rtx
8896 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8897 return false;
8898 code = (code == GTU ? GEU : LTU);
8899 }
8900 else
8901 {
8902 rtx tmp = op1;
8903 op1 = op0;
8904 op0 = tmp;
8905 code = (code == GTU ? LTU : GEU);
8906 }
8907 break;
8908
ccea753c 8909 /* Convert a>=0 into (unsigned)a<0x80000000. */
4977bab6
ZW
8910 case LT:
8911 case GE:
8912 if (mode == DImode || op1 != const0_rtx)
8913 return false;
ccea753c 8914 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
8915 code = (code == LT ? GEU : LTU);
8916 break;
8917 case LE:
8918 case GT:
8919 if (mode == DImode || op1 != constm1_rtx)
8920 return false;
ccea753c 8921 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
4977bab6
ZW
8922 code = (code == LE ? GEU : LTU);
8923 break;
8924
8925 default:
8926 return false;
8927 }
ebe75517
JH
8928 /* Swapping operands may cause constant to appear as first operand. */
8929 if (!nonimmediate_operand (op0, VOIDmode))
8930 {
8931 if (no_new_pseudos)
8932 return false;
8933 op0 = force_reg (mode, op0);
8934 }
4977bab6
ZW
8935 ix86_compare_op0 = op0;
8936 ix86_compare_op1 = op1;
8937 *pop = ix86_expand_compare (code, NULL, NULL);
8938 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8939 abort ();
8940 return true;
8941}
8942
32b5b1aa 8943int
b96a374d 8944ix86_expand_int_movcc (rtx operands[])
32b5b1aa 8945{
e075ae69
RH
8946 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8947 rtx compare_seq, compare_op;
a1b8572c 8948 rtx second_test, bypass_test;
635559ab 8949 enum machine_mode mode = GET_MODE (operands[0]);
4977bab6 8950 bool sign_bit_compare_p = false;;
3a3677ff 8951
e075ae69 8952 start_sequence ();
a1b8572c 8953 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 8954 compare_seq = get_insns ();
e075ae69
RH
8955 end_sequence ();
8956
8957 compare_code = GET_CODE (compare_op);
8958
4977bab6
ZW
8959 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
8960 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
8961 sign_bit_compare_p = true;
8962
e075ae69
RH
8963 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8964 HImode insns, we'd be swallowed in word prefix ops. */
8965
4977bab6 8966 if ((mode != HImode || TARGET_FAST_PREFIX)
635559ab 8967 && (mode != DImode || TARGET_64BIT)
0f290768 8968 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8969 && GET_CODE (operands[3]) == CONST_INT)
8970 {
8971 rtx out = operands[0];
8972 HOST_WIDE_INT ct = INTVAL (operands[2]);
8973 HOST_WIDE_INT cf = INTVAL (operands[3]);
8974 HOST_WIDE_INT diff;
8975
4977bab6
ZW
8976 diff = ct - cf;
8977 /* Sign bit compares are better done using shifts than we do by using
b96a374d 8978 sbb. */
4977bab6
ZW
8979 if (sign_bit_compare_p
8980 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
8981 ix86_compare_op1, &compare_op))
e075ae69 8982 {
e075ae69
RH
8983 /* Detect overlap between destination and compare sources. */
8984 rtx tmp = out;
8985
4977bab6 8986 if (!sign_bit_compare_p)
36583fea 8987 {
e6e81735
JH
8988 bool fpcmp = false;
8989
4977bab6
ZW
8990 compare_code = GET_CODE (compare_op);
8991
e6e81735
JH
8992 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8993 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8994 {
8995 fpcmp = true;
8996 compare_code = ix86_fp_compare_code_to_integer (compare_code);
8997 }
8998
4977bab6
ZW
8999 /* To simplify rest of code, restrict to the GEU case. */
9000 if (compare_code == LTU)
9001 {
9002 HOST_WIDE_INT tmp = ct;
9003 ct = cf;
9004 cf = tmp;
9005 compare_code = reverse_condition (compare_code);
9006 code = reverse_condition (code);
9007 }
e6e81735
JH
9008 else
9009 {
9010 if (fpcmp)
9011 PUT_CODE (compare_op,
9012 reverse_condition_maybe_unordered
9013 (GET_CODE (compare_op)));
9014 else
9015 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9016 }
4977bab6 9017 diff = ct - cf;
36583fea 9018
4977bab6
ZW
9019 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9020 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9021 tmp = gen_reg_rtx (mode);
e075ae69 9022
4977bab6 9023 if (mode == DImode)
e6e81735 9024 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
4977bab6 9025 else
e6e81735 9026 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
4977bab6 9027 }
14f73b5a 9028 else
4977bab6
ZW
9029 {
9030 if (code == GT || code == GE)
9031 code = reverse_condition (code);
9032 else
9033 {
9034 HOST_WIDE_INT tmp = ct;
9035 ct = cf;
9036 cf = tmp;
5fb48685 9037 diff = ct - cf;
4977bab6
ZW
9038 }
9039 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9040 ix86_compare_op1, VOIDmode, 0, -1);
9041 }
e075ae69 9042
36583fea
JH
9043 if (diff == 1)
9044 {
9045 /*
9046 * cmpl op0,op1
9047 * sbbl dest,dest
9048 * [addl dest, ct]
9049 *
9050 * Size 5 - 8.
9051 */
9052 if (ct)
b96a374d 9053 tmp = expand_simple_binop (mode, PLUS,
635559ab 9054 tmp, GEN_INT (ct),
4977bab6 9055 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9056 }
9057 else if (cf == -1)
9058 {
9059 /*
9060 * cmpl op0,op1
9061 * sbbl dest,dest
9062 * orl $ct, dest
9063 *
9064 * Size 8.
9065 */
635559ab
JH
9066 tmp = expand_simple_binop (mode, IOR,
9067 tmp, GEN_INT (ct),
4977bab6 9068 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9069 }
9070 else if (diff == -1 && ct)
9071 {
9072 /*
9073 * cmpl op0,op1
9074 * sbbl dest,dest
06ec023f 9075 * notl dest
36583fea
JH
9076 * [addl dest, cf]
9077 *
9078 * Size 8 - 11.
9079 */
4977bab6 9080 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
635559ab 9081 if (cf)
b96a374d 9082 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9083 copy_rtx (tmp), GEN_INT (cf),
9084 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea
JH
9085 }
9086 else
9087 {
9088 /*
9089 * cmpl op0,op1
9090 * sbbl dest,dest
06ec023f 9091 * [notl dest]
36583fea
JH
9092 * andl cf - ct, dest
9093 * [addl dest, ct]
9094 *
9095 * Size 8 - 11.
9096 */
06ec023f
RB
9097
9098 if (cf == 0)
9099 {
9100 cf = ct;
9101 ct = 0;
4977bab6 9102 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
06ec023f
RB
9103 }
9104
635559ab 9105 tmp = expand_simple_binop (mode, AND,
4977bab6 9106 copy_rtx (tmp),
d8bf17f9 9107 gen_int_mode (cf - ct, mode),
4977bab6 9108 copy_rtx (tmp), 1, OPTAB_DIRECT);
635559ab 9109 if (ct)
b96a374d 9110 tmp = expand_simple_binop (mode, PLUS,
4977bab6
ZW
9111 copy_rtx (tmp), GEN_INT (ct),
9112 copy_rtx (tmp), 1, OPTAB_DIRECT);
36583fea 9113 }
e075ae69 9114
4977bab6
ZW
9115 if (!rtx_equal_p (tmp, out))
9116 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
e075ae69
RH
9117
9118 return 1; /* DONE */
9119 }
9120
e075ae69
RH
9121 if (diff < 0)
9122 {
9123 HOST_WIDE_INT tmp;
9124 tmp = ct, ct = cf, cf = tmp;
9125 diff = -diff;
734dba19
JH
9126 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9127 {
9128 /* We may be reversing unordered compare to normal compare, that
9129 is not valid in general (we may convert non-trapping condition
9130 to trapping one), however on i386 we currently emit all
9131 comparisons unordered. */
9132 compare_code = reverse_condition_maybe_unordered (compare_code);
9133 code = reverse_condition_maybe_unordered (code);
9134 }
9135 else
9136 {
9137 compare_code = reverse_condition (compare_code);
9138 code = reverse_condition (code);
9139 }
e075ae69 9140 }
0f2a3457 9141
f822d252 9142 compare_code = UNKNOWN;
0f2a3457
JJ
9143 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9144 && GET_CODE (ix86_compare_op1) == CONST_INT)
9145 {
9146 if (ix86_compare_op1 == const0_rtx
9147 && (code == LT || code == GE))
9148 compare_code = code;
9149 else if (ix86_compare_op1 == constm1_rtx)
9150 {
9151 if (code == LE)
9152 compare_code = LT;
9153 else if (code == GT)
9154 compare_code = GE;
9155 }
9156 }
9157
9158 /* Optimize dest = (op0 < 0) ? -1 : cf. */
f822d252 9159 if (compare_code != UNKNOWN
0f2a3457
JJ
9160 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9161 && (cf == -1 || ct == -1))
9162 {
9163 /* If lea code below could be used, only optimize
9164 if it results in a 2 insn sequence. */
9165
9166 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9167 || diff == 3 || diff == 5 || diff == 9)
9168 || (compare_code == LT && ct == -1)
9169 || (compare_code == GE && cf == -1))
9170 {
9171 /*
9172 * notl op1 (if necessary)
9173 * sarl $31, op1
9174 * orl cf, op1
9175 */
9176 if (ct != -1)
9177 {
9178 cf = ct;
b96a374d 9179 ct = -1;
0f2a3457
JJ
9180 code = reverse_condition (code);
9181 }
9182
9183 out = emit_store_flag (out, code, ix86_compare_op0,
9184 ix86_compare_op1, VOIDmode, 0, -1);
9185
9186 out = expand_simple_binop (mode, IOR,
9187 out, GEN_INT (cf),
9188 out, 1, OPTAB_DIRECT);
9189 if (out != operands[0])
9190 emit_move_insn (operands[0], out);
9191
9192 return 1; /* DONE */
9193 }
9194 }
9195
4977bab6 9196
635559ab
JH
9197 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9198 || diff == 3 || diff == 5 || diff == 9)
4977bab6 9199 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
8fe75e43
RH
9200 && (mode != DImode
9201 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
e075ae69
RH
9202 {
9203 /*
9204 * xorl dest,dest
9205 * cmpl op1,op2
9206 * setcc dest
9207 * lea cf(dest*(ct-cf)),dest
9208 *
9209 * Size 14.
9210 *
9211 * This also catches the degenerate setcc-only case.
9212 */
9213
9214 rtx tmp;
9215 int nops;
9216
9217 out = emit_store_flag (out, code, ix86_compare_op0,
9218 ix86_compare_op1, VOIDmode, 0, 1);
9219
9220 nops = 0;
97f51ac4
RB
9221 /* On x86_64 the lea instruction operates on Pmode, so we need
9222 to get arithmetics done in proper mode to match. */
e075ae69 9223 if (diff == 1)
068f5dea 9224 tmp = copy_rtx (out);
e075ae69
RH
9225 else
9226 {
885a70fd 9227 rtx out1;
068f5dea 9228 out1 = copy_rtx (out);
635559ab 9229 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9230 nops++;
9231 if (diff & 1)
9232 {
635559ab 9233 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9234 nops++;
9235 }
9236 }
9237 if (cf != 0)
9238 {
635559ab 9239 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9240 nops++;
9241 }
4977bab6 9242 if (!rtx_equal_p (tmp, out))
e075ae69 9243 {
14f73b5a 9244 if (nops == 1)
a5cf80f0 9245 out = force_operand (tmp, copy_rtx (out));
e075ae69 9246 else
4977bab6 9247 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
e075ae69 9248 }
4977bab6 9249 if (!rtx_equal_p (out, operands[0]))
1985ef90 9250 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9251
9252 return 1; /* DONE */
9253 }
9254
9255 /*
9256 * General case: Jumpful:
9257 * xorl dest,dest cmpl op1, op2
9258 * cmpl op1, op2 movl ct, dest
9259 * setcc dest jcc 1f
9260 * decl dest movl cf, dest
9261 * andl (cf-ct),dest 1:
9262 * addl ct,dest
0f290768 9263 *
e075ae69
RH
9264 * Size 20. Size 14.
9265 *
9266 * This is reasonably steep, but branch mispredict costs are
9267 * high on modern cpus, so consider failing only if optimizing
9268 * for space.
e075ae69
RH
9269 */
9270
4977bab6
ZW
9271 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9272 && BRANCH_COST >= 2)
e075ae69 9273 {
97f51ac4 9274 if (cf == 0)
e075ae69 9275 {
97f51ac4
RB
9276 cf = ct;
9277 ct = 0;
734dba19 9278 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9279 /* We may be reversing unordered compare to normal compare,
9280 that is not valid in general (we may convert non-trapping
9281 condition to trapping one), however on i386 we currently
9282 emit all comparisons unordered. */
9283 code = reverse_condition_maybe_unordered (code);
9284 else
9285 {
9286 code = reverse_condition (code);
f822d252 9287 if (compare_code != UNKNOWN)
0f2a3457
JJ
9288 compare_code = reverse_condition (compare_code);
9289 }
9290 }
9291
f822d252 9292 if (compare_code != UNKNOWN)
0f2a3457
JJ
9293 {
9294 /* notl op1 (if needed)
9295 sarl $31, op1
9296 andl (cf-ct), op1
b96a374d 9297 addl ct, op1
0f2a3457
JJ
9298
9299 For x < 0 (resp. x <= -1) there will be no notl,
9300 so if possible swap the constants to get rid of the
9301 complement.
9302 True/false will be -1/0 while code below (store flag
9303 followed by decrement) is 0/-1, so the constants need
9304 to be exchanged once more. */
9305
9306 if (compare_code == GE || !cf)
734dba19 9307 {
b96a374d 9308 code = reverse_condition (code);
0f2a3457 9309 compare_code = LT;
734dba19
JH
9310 }
9311 else
9312 {
0f2a3457 9313 HOST_WIDE_INT tmp = cf;
b96a374d 9314 cf = ct;
0f2a3457 9315 ct = tmp;
734dba19 9316 }
0f2a3457
JJ
9317
9318 out = emit_store_flag (out, code, ix86_compare_op0,
9319 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9320 }
0f2a3457
JJ
9321 else
9322 {
9323 out = emit_store_flag (out, code, ix86_compare_op0,
9324 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9325
4977bab6
ZW
9326 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9327 copy_rtx (out), 1, OPTAB_DIRECT);
0f2a3457 9328 }
e075ae69 9329
4977bab6 9330 out = expand_simple_binop (mode, AND, copy_rtx (out),
d8bf17f9 9331 gen_int_mode (cf - ct, mode),
4977bab6 9332 copy_rtx (out), 1, OPTAB_DIRECT);
97f51ac4 9333 if (ct)
4977bab6
ZW
9334 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9335 copy_rtx (out), 1, OPTAB_DIRECT);
9336 if (!rtx_equal_p (out, operands[0]))
9337 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9338
9339 return 1; /* DONE */
9340 }
9341 }
9342
4977bab6 9343 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
e075ae69
RH
9344 {
9345 /* Try a few things more with specific constants and a variable. */
9346
78a0d70c 9347 optab op;
e075ae69
RH
9348 rtx var, orig_out, out, tmp;
9349
4977bab6 9350 if (BRANCH_COST <= 2)
e075ae69
RH
9351 return 0; /* FAIL */
9352
0f290768 9353 /* If one of the two operands is an interesting constant, load a
e075ae69 9354 constant with the above and mask it in with a logical operation. */
0f290768 9355
e075ae69
RH
9356 if (GET_CODE (operands[2]) == CONST_INT)
9357 {
9358 var = operands[3];
4977bab6 9359 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
e075ae69 9360 operands[3] = constm1_rtx, op = and_optab;
4977bab6 9361 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
e075ae69 9362 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9363 else
9364 return 0; /* FAIL */
e075ae69
RH
9365 }
9366 else if (GET_CODE (operands[3]) == CONST_INT)
9367 {
9368 var = operands[2];
4977bab6 9369 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
e075ae69 9370 operands[2] = constm1_rtx, op = and_optab;
4977bab6 9371 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
e075ae69 9372 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9373 else
9374 return 0; /* FAIL */
e075ae69 9375 }
78a0d70c 9376 else
e075ae69
RH
9377 return 0; /* FAIL */
9378
9379 orig_out = operands[0];
635559ab 9380 tmp = gen_reg_rtx (mode);
e075ae69
RH
9381 operands[0] = tmp;
9382
9383 /* Recurse to get the constant loaded. */
9384 if (ix86_expand_int_movcc (operands) == 0)
9385 return 0; /* FAIL */
9386
9387 /* Mask in the interesting variable. */
635559ab 9388 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69 9389 OPTAB_WIDEN);
4977bab6
ZW
9390 if (!rtx_equal_p (out, orig_out))
9391 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
e075ae69
RH
9392
9393 return 1; /* DONE */
9394 }
9395
9396 /*
9397 * For comparison with above,
9398 *
9399 * movl cf,dest
9400 * movl ct,tmp
9401 * cmpl op1,op2
9402 * cmovcc tmp,dest
9403 *
9404 * Size 15.
9405 */
9406
635559ab
JH
9407 if (! nonimmediate_operand (operands[2], mode))
9408 operands[2] = force_reg (mode, operands[2]);
9409 if (! nonimmediate_operand (operands[3], mode))
9410 operands[3] = force_reg (mode, operands[3]);
e075ae69 9411
a1b8572c
JH
9412 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9413 {
635559ab 9414 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9415 emit_move_insn (tmp, operands[3]);
9416 operands[3] = tmp;
9417 }
9418 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9419 {
635559ab 9420 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9421 emit_move_insn (tmp, operands[2]);
9422 operands[2] = tmp;
9423 }
4977bab6 9424
c9682caf 9425 if (! register_operand (operands[2], VOIDmode)
b96a374d 9426 && (mode == QImode
4977bab6 9427 || ! register_operand (operands[3], VOIDmode)))
635559ab 9428 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9429
4977bab6
ZW
9430 if (mode == QImode
9431 && ! register_operand (operands[3], VOIDmode))
9432 operands[3] = force_reg (mode, operands[3]);
9433
e075ae69
RH
9434 emit_insn (compare_seq);
9435 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9436 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9437 compare_op, operands[2],
9438 operands[3])));
a1b8572c 9439 if (bypass_test)
4977bab6 9440 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9441 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9442 bypass_test,
4977bab6
ZW
9443 copy_rtx (operands[3]),
9444 copy_rtx (operands[0]))));
a1b8572c 9445 if (second_test)
4977bab6 9446 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
635559ab 9447 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c 9448 second_test,
4977bab6
ZW
9449 copy_rtx (operands[2]),
9450 copy_rtx (operands[0]))));
e075ae69
RH
9451
9452 return 1; /* DONE */
e9a25f70 9453}
e075ae69 9454
32b5b1aa 9455int
b96a374d 9456ix86_expand_fp_movcc (rtx operands[])
32b5b1aa 9457{
e075ae69 9458 enum rtx_code code;
e075ae69 9459 rtx tmp;
a1b8572c 9460 rtx compare_op, second_test, bypass_test;
32b5b1aa 9461
0073023d
JH
9462 /* For SF/DFmode conditional moves based on comparisons
9463 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9464 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9465 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9466 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9467 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9468 && (!TARGET_IEEE_FP
9469 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9470 /* We may be called from the post-reload splitter. */
9471 && (!REG_P (operands[0])
9472 || SSE_REG_P (operands[0])
52a661a6 9473 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9474 {
9475 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9476 code = GET_CODE (operands[1]);
9477
9478 /* See if we have (cross) match between comparison operands and
9479 conditional move operands. */
9480 if (rtx_equal_p (operands[2], op1))
9481 {
9482 rtx tmp = op0;
9483 op0 = op1;
9484 op1 = tmp;
9485 code = reverse_condition_maybe_unordered (code);
9486 }
9487 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9488 {
9489 /* Check for min operation. */
4977bab6 9490 if (code == LT || code == UNLE)
0073023d 9491 {
4977bab6
ZW
9492 if (code == UNLE)
9493 {
9494 rtx tmp = op0;
9495 op0 = op1;
9496 op1 = tmp;
9497 }
0073023d
JH
9498 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9499 if (memory_operand (op0, VOIDmode))
9500 op0 = force_reg (GET_MODE (operands[0]), op0);
9501 if (GET_MODE (operands[0]) == SFmode)
9502 emit_insn (gen_minsf3 (operands[0], op0, op1));
9503 else
9504 emit_insn (gen_mindf3 (operands[0], op0, op1));
9505 return 1;
9506 }
9507 /* Check for max operation. */
4977bab6 9508 if (code == GT || code == UNGE)
0073023d 9509 {
4977bab6
ZW
9510 if (code == UNGE)
9511 {
9512 rtx tmp = op0;
9513 op0 = op1;
9514 op1 = tmp;
9515 }
0073023d
JH
9516 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9517 if (memory_operand (op0, VOIDmode))
9518 op0 = force_reg (GET_MODE (operands[0]), op0);
9519 if (GET_MODE (operands[0]) == SFmode)
9520 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9521 else
9522 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9523 return 1;
9524 }
9525 }
9526 /* Manage condition to be sse_comparison_operator. In case we are
9527 in non-ieee mode, try to canonicalize the destination operand
9528 to be first in the comparison - this helps reload to avoid extra
9529 moves. */
9530 if (!sse_comparison_operator (operands[1], VOIDmode)
9531 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9532 {
9533 rtx tmp = ix86_compare_op0;
9534 ix86_compare_op0 = ix86_compare_op1;
9535 ix86_compare_op1 = tmp;
9536 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9537 VOIDmode, ix86_compare_op0,
9538 ix86_compare_op1);
9539 }
d1f87653 9540 /* Similarly try to manage result to be first operand of conditional
fa9f36a1
JH
9541 move. We also don't support the NE comparison on SSE, so try to
9542 avoid it. */
037f20f1
JH
9543 if ((rtx_equal_p (operands[0], operands[3])
9544 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9545 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9546 {
9547 rtx tmp = operands[2];
9548 operands[2] = operands[3];
92d0fb09 9549 operands[3] = tmp;
0073023d
JH
9550 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9551 (GET_CODE (operands[1])),
9552 VOIDmode, ix86_compare_op0,
9553 ix86_compare_op1);
9554 }
9555 if (GET_MODE (operands[0]) == SFmode)
9556 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9557 operands[2], operands[3],
9558 ix86_compare_op0, ix86_compare_op1));
9559 else
9560 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9561 operands[2], operands[3],
9562 ix86_compare_op0, ix86_compare_op1));
9563 return 1;
9564 }
9565
e075ae69 9566 /* The floating point conditional move instructions don't directly
0f290768 9567 support conditions resulting from a signed integer comparison. */
32b5b1aa 9568
e075ae69 9569 code = GET_CODE (operands[1]);
a1b8572c 9570 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9571
9572 /* The floating point conditional move instructions don't directly
9573 support signed integer comparisons. */
9574
a1b8572c 9575 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9576 {
a1b8572c 9577 if (second_test != NULL || bypass_test != NULL)
b531087a 9578 abort ();
e075ae69 9579 tmp = gen_reg_rtx (QImode);
3a3677ff 9580 ix86_expand_setcc (code, tmp);
e075ae69
RH
9581 code = NE;
9582 ix86_compare_op0 = tmp;
9583 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9584 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9585 }
9586 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9587 {
9588 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9589 emit_move_insn (tmp, operands[3]);
9590 operands[3] = tmp;
9591 }
9592 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9593 {
9594 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9595 emit_move_insn (tmp, operands[2]);
9596 operands[2] = tmp;
e075ae69 9597 }
e9a25f70 9598
e075ae69
RH
9599 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9600 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9601 compare_op,
e075ae69
RH
9602 operands[2],
9603 operands[3])));
a1b8572c
JH
9604 if (bypass_test)
9605 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9606 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9607 bypass_test,
9608 operands[3],
9609 operands[0])));
9610 if (second_test)
9611 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9612 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9613 second_test,
9614 operands[2],
9615 operands[0])));
32b5b1aa 9616
e075ae69 9617 return 1;
32b5b1aa
SC
9618}
9619
7b52eede
JH
9620/* Expand conditional increment or decrement using adb/sbb instructions.
9621 The default case using setcc followed by the conditional move can be
9622 done by generic code. */
9623int
b96a374d 9624ix86_expand_int_addcc (rtx operands[])
7b52eede
JH
9625{
9626 enum rtx_code code = GET_CODE (operands[1]);
9627 rtx compare_op;
9628 rtx val = const0_rtx;
e6e81735 9629 bool fpcmp = false;
e6e81735 9630 enum machine_mode mode = GET_MODE (operands[0]);
7b52eede
JH
9631
9632 if (operands[3] != const1_rtx
9633 && operands[3] != constm1_rtx)
9634 return 0;
9635 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9636 ix86_compare_op1, &compare_op))
9637 return 0;
e6e81735
JH
9638 code = GET_CODE (compare_op);
9639
9640 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9641 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9642 {
9643 fpcmp = true;
9644 code = ix86_fp_compare_code_to_integer (code);
9645 }
9646
9647 if (code != LTU)
9648 {
9649 val = constm1_rtx;
9650 if (fpcmp)
9651 PUT_CODE (compare_op,
9652 reverse_condition_maybe_unordered
9653 (GET_CODE (compare_op)));
9654 else
9655 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9656 }
9657 PUT_MODE (compare_op, mode);
9658
9659 /* Construct either adc or sbb insn. */
9660 if ((code == LTU) == (operands[3] == constm1_rtx))
7b52eede
JH
9661 {
9662 switch (GET_MODE (operands[0]))
9663 {
9664 case QImode:
e6e81735 9665 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9666 break;
9667 case HImode:
e6e81735 9668 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9669 break;
9670 case SImode:
e6e81735 9671 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9672 break;
9673 case DImode:
e6e81735 9674 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
9675 break;
9676 default:
9677 abort ();
9678 }
9679 }
9680 else
9681 {
9682 switch (GET_MODE (operands[0]))
9683 {
9684 case QImode:
e6e81735 9685 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9686 break;
9687 case HImode:
e6e81735 9688 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9689 break;
9690 case SImode:
e6e81735 9691 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
7b52eede
JH
9692 break;
9693 case DImode:
e6e81735 9694 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
7b52eede
JH
9695 break;
9696 default:
9697 abort ();
9698 }
9699 }
9700 return 1; /* DONE */
9701}
9702
9703
2450a057
JH
9704/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9705 works for floating pointer parameters and nonoffsetable memories.
9706 For pushes, it returns just stack offsets; the values will be saved
9707 in the right order. Maximally three parts are generated. */
9708
2b589241 9709static int
b96a374d 9710ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
32b5b1aa 9711{
26e5b205
JH
9712 int size;
9713
9714 if (!TARGET_64BIT)
f8a1ebc6 9715 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
26e5b205
JH
9716 else
9717 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9718
a7180f70
BS
9719 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9720 abort ();
2450a057
JH
9721 if (size < 2 || size > 3)
9722 abort ();
9723
f996902d
RH
9724 /* Optimize constant pool reference to immediates. This is used by fp
9725 moves, that force all constants to memory to allow combining. */
389fdba0 9726 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
f996902d
RH
9727 {
9728 rtx tmp = maybe_get_pool_constant (operand);
9729 if (tmp)
9730 operand = tmp;
9731 }
d7a29404 9732
2450a057 9733 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9734 {
2450a057
JH
9735 /* The only non-offsetable memories we handle are pushes. */
9736 if (! push_operand (operand, VOIDmode))
9737 abort ();
9738
26e5b205
JH
9739 operand = copy_rtx (operand);
9740 PUT_MODE (operand, Pmode);
2450a057
JH
9741 parts[0] = parts[1] = parts[2] = operand;
9742 }
26e5b205 9743 else if (!TARGET_64BIT)
2450a057
JH
9744 {
9745 if (mode == DImode)
9746 split_di (&operand, 1, &parts[0], &parts[1]);
9747 else
e075ae69 9748 {
2450a057
JH
9749 if (REG_P (operand))
9750 {
9751 if (!reload_completed)
9752 abort ();
9753 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9754 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9755 if (size == 3)
9756 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9757 }
9758 else if (offsettable_memref_p (operand))
9759 {
f4ef873c 9760 operand = adjust_address (operand, SImode, 0);
2450a057 9761 parts[0] = operand;
b72f00af 9762 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9763 if (size == 3)
b72f00af 9764 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9765 }
9766 else if (GET_CODE (operand) == CONST_DOUBLE)
9767 {
9768 REAL_VALUE_TYPE r;
2b589241 9769 long l[4];
2450a057
JH
9770
9771 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9772 switch (mode)
9773 {
9774 case XFmode:
9775 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9776 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9777 break;
9778 case DFmode:
9779 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9780 break;
9781 default:
9782 abort ();
9783 }
d8bf17f9
LB
9784 parts[1] = gen_int_mode (l[1], SImode);
9785 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9786 }
9787 else
9788 abort ();
e075ae69 9789 }
2450a057 9790 }
26e5b205
JH
9791 else
9792 {
44cf5b6a
JH
9793 if (mode == TImode)
9794 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9795 if (mode == XFmode || mode == TFmode)
9796 {
f8a1ebc6 9797 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
26e5b205
JH
9798 if (REG_P (operand))
9799 {
9800 if (!reload_completed)
9801 abort ();
9802 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
f8a1ebc6 9803 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
26e5b205
JH
9804 }
9805 else if (offsettable_memref_p (operand))
9806 {
b72f00af 9807 operand = adjust_address (operand, DImode, 0);
26e5b205 9808 parts[0] = operand;
f8a1ebc6 9809 parts[1] = adjust_address (operand, upper_mode, 8);
26e5b205
JH
9810 }
9811 else if (GET_CODE (operand) == CONST_DOUBLE)
9812 {
9813 REAL_VALUE_TYPE r;
9814 long l[3];
9815
9816 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9953b5e1 9817 real_to_target (l, &r, mode);
26e5b205
JH
9818 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9819 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9820 parts[0]
d8bf17f9 9821 = gen_int_mode
44cf5b6a 9822 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9823 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9824 DImode);
26e5b205
JH
9825 else
9826 parts[0] = immed_double_const (l[0], l[1], DImode);
f8a1ebc6
JH
9827 if (upper_mode == SImode)
9828 parts[1] = gen_int_mode (l[2], SImode);
9829 else if (HOST_BITS_PER_WIDE_INT >= 64)
9830 parts[1]
9831 = gen_int_mode
9832 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9833 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9834 DImode);
9835 else
9836 parts[1] = immed_double_const (l[2], l[3], DImode);
26e5b205
JH
9837 }
9838 else
9839 abort ();
9840 }
9841 }
2450a057 9842
2b589241 9843 return size;
2450a057
JH
9844}
9845
9846/* Emit insns to perform a move or push of DI, DF, and XF values.
9847 Return false when normal moves are needed; true when all required
9848 insns have been emitted. Operands 2-4 contain the input values
9849 int the correct order; operands 5-7 contain the output values. */
9850
26e5b205 9851void
b96a374d 9852ix86_split_long_move (rtx operands[])
2450a057
JH
9853{
9854 rtx part[2][3];
26e5b205 9855 int nparts;
2450a057
JH
9856 int push = 0;
9857 int collisions = 0;
26e5b205
JH
9858 enum machine_mode mode = GET_MODE (operands[0]);
9859
9860 /* The DFmode expanders may ask us to move double.
9861 For 64bit target this is single move. By hiding the fact
9862 here we simplify i386.md splitters. */
9863 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9864 {
8cdfa312
RH
9865 /* Optimize constant pool reference to immediates. This is used by
9866 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9867
9868 if (GET_CODE (operands[1]) == MEM
9869 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9870 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9871 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9872 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9873 {
9874 operands[0] = copy_rtx (operands[0]);
9875 PUT_MODE (operands[0], Pmode);
9876 }
26e5b205
JH
9877 else
9878 operands[0] = gen_lowpart (DImode, operands[0]);
9879 operands[1] = gen_lowpart (DImode, operands[1]);
9880 emit_move_insn (operands[0], operands[1]);
9881 return;
9882 }
2450a057 9883
2450a057
JH
9884 /* The only non-offsettable memory we handle is push. */
9885 if (push_operand (operands[0], VOIDmode))
9886 push = 1;
9887 else if (GET_CODE (operands[0]) == MEM
9888 && ! offsettable_memref_p (operands[0]))
9889 abort ();
9890
26e5b205
JH
9891 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9892 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9893
9894 /* When emitting push, take care for source operands on the stack. */
9895 if (push && GET_CODE (operands[1]) == MEM
9896 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9897 {
26e5b205 9898 if (nparts == 3)
886cbb88
JH
9899 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9900 XEXP (part[1][2], 0));
9901 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9902 XEXP (part[1][1], 0));
2450a057
JH
9903 }
9904
0f290768 9905 /* We need to do copy in the right order in case an address register
2450a057
JH
9906 of the source overlaps the destination. */
9907 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9908 {
9909 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9910 collisions++;
9911 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9912 collisions++;
26e5b205 9913 if (nparts == 3
2450a057
JH
9914 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9915 collisions++;
9916
9917 /* Collision in the middle part can be handled by reordering. */
26e5b205 9918 if (collisions == 1 && nparts == 3
2450a057 9919 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9920 {
2450a057
JH
9921 rtx tmp;
9922 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9923 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9924 }
e075ae69 9925
2450a057
JH
9926 /* If there are more collisions, we can't handle it by reordering.
9927 Do an lea to the last part and use only one colliding move. */
9928 else if (collisions > 1)
9929 {
8231b3f9
RH
9930 rtx base;
9931
2450a057 9932 collisions = 1;
8231b3f9
RH
9933
9934 base = part[0][nparts - 1];
9935
9936 /* Handle the case when the last part isn't valid for lea.
9937 Happens in 64-bit mode storing the 12-byte XFmode. */
9938 if (GET_MODE (base) != Pmode)
9939 base = gen_rtx_REG (Pmode, REGNO (base));
9940
9941 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9942 part[1][0] = replace_equiv_address (part[1][0], base);
9943 part[1][1] = replace_equiv_address (part[1][1],
9944 plus_constant (base, UNITS_PER_WORD));
26e5b205 9945 if (nparts == 3)
8231b3f9
RH
9946 part[1][2] = replace_equiv_address (part[1][2],
9947 plus_constant (base, 8));
2450a057
JH
9948 }
9949 }
9950
9951 if (push)
9952 {
26e5b205 9953 if (!TARGET_64BIT)
2b589241 9954 {
26e5b205
JH
9955 if (nparts == 3)
9956 {
f8a1ebc6
JH
9957 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
9958 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
26e5b205
JH
9959 emit_move_insn (part[0][2], part[1][2]);
9960 }
2b589241 9961 }
26e5b205
JH
9962 else
9963 {
9964 /* In 64bit mode we don't have 32bit push available. In case this is
9965 register, it is OK - we will just use larger counterpart. We also
9966 retype memory - these comes from attempt to avoid REX prefix on
9967 moving of second half of TFmode value. */
9968 if (GET_MODE (part[1][1]) == SImode)
9969 {
9970 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9971 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9972 else if (REG_P (part[1][1]))
9973 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9974 else
b531087a 9975 abort ();
886cbb88
JH
9976 if (GET_MODE (part[1][0]) == SImode)
9977 part[1][0] = part[1][1];
26e5b205
JH
9978 }
9979 }
9980 emit_move_insn (part[0][1], part[1][1]);
9981 emit_move_insn (part[0][0], part[1][0]);
9982 return;
2450a057
JH
9983 }
9984
9985 /* Choose correct order to not overwrite the source before it is copied. */
9986 if ((REG_P (part[0][0])
9987 && REG_P (part[1][1])
9988 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9989 || (nparts == 3
2450a057
JH
9990 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9991 || (collisions > 0
9992 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9993 {
26e5b205 9994 if (nparts == 3)
2450a057 9995 {
26e5b205
JH
9996 operands[2] = part[0][2];
9997 operands[3] = part[0][1];
9998 operands[4] = part[0][0];
9999 operands[5] = part[1][2];
10000 operands[6] = part[1][1];
10001 operands[7] = part[1][0];
2450a057
JH
10002 }
10003 else
10004 {
26e5b205
JH
10005 operands[2] = part[0][1];
10006 operands[3] = part[0][0];
10007 operands[5] = part[1][1];
10008 operands[6] = part[1][0];
2450a057
JH
10009 }
10010 }
10011 else
10012 {
26e5b205 10013 if (nparts == 3)
2450a057 10014 {
26e5b205
JH
10015 operands[2] = part[0][0];
10016 operands[3] = part[0][1];
10017 operands[4] = part[0][2];
10018 operands[5] = part[1][0];
10019 operands[6] = part[1][1];
10020 operands[7] = part[1][2];
2450a057
JH
10021 }
10022 else
10023 {
26e5b205
JH
10024 operands[2] = part[0][0];
10025 operands[3] = part[0][1];
10026 operands[5] = part[1][0];
10027 operands[6] = part[1][1];
e075ae69
RH
10028 }
10029 }
903a5059 10030
0e40b5f2 10031 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
903a5059
RS
10032 if (optimize_size)
10033 {
10034 if (GET_CODE (operands[5]) == CONST_INT
10035 && operands[5] != const0_rtx
10036 && REG_P (operands[2]))
10037 {
10038 if (GET_CODE (operands[6]) == CONST_INT
10039 && INTVAL (operands[6]) == INTVAL (operands[5]))
10040 operands[6] = operands[2];
10041
10042 if (nparts == 3
10043 && GET_CODE (operands[7]) == CONST_INT
10044 && INTVAL (operands[7]) == INTVAL (operands[5]))
10045 operands[7] = operands[2];
10046 }
10047
10048 if (nparts == 3
10049 && GET_CODE (operands[6]) == CONST_INT
10050 && operands[6] != const0_rtx
10051 && REG_P (operands[3])
10052 && GET_CODE (operands[7]) == CONST_INT
10053 && INTVAL (operands[7]) == INTVAL (operands[6]))
10054 operands[7] = operands[3];
10055 }
10056
26e5b205
JH
10057 emit_move_insn (operands[2], operands[5]);
10058 emit_move_insn (operands[3], operands[6]);
10059 if (nparts == 3)
10060 emit_move_insn (operands[4], operands[7]);
32b5b1aa 10061
26e5b205 10062 return;
32b5b1aa 10063}
32b5b1aa 10064
1b83d209
RS
10065/* Helper function of ix86_split_ashldi used to generate an SImode
10066 left shift by a constant, either using a single shift or
10067 a sequence of add instructions. */
10068
10069static void
10070ix86_expand_ashlsi3_const (rtx operand, int count)
10071{
10072 if (count == 1)
10073 emit_insn (gen_addsi3 (operand, operand, operand));
10074 else if (!optimize_size
10075 && count * ix86_cost->add <= ix86_cost->shift_const)
10076 {
10077 int i;
10078 for (i=0; i<count; i++)
10079 emit_insn (gen_addsi3 (operand, operand, operand));
10080 }
10081 else
10082 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10083}
10084
e075ae69 10085void
b96a374d 10086ix86_split_ashldi (rtx *operands, rtx scratch)
32b5b1aa 10087{
e075ae69
RH
10088 rtx low[2], high[2];
10089 int count;
b985a30f 10090
e075ae69
RH
10091 if (GET_CODE (operands[2]) == CONST_INT)
10092 {
10093 split_di (operands, 2, low, high);
10094 count = INTVAL (operands[2]) & 63;
32b5b1aa 10095
e075ae69
RH
10096 if (count >= 32)
10097 {
10098 emit_move_insn (high[0], low[1]);
10099 emit_move_insn (low[0], const0_rtx);
b985a30f 10100
e075ae69 10101 if (count > 32)
1b83d209 10102 ix86_expand_ashlsi3_const (high[0], count - 32);
e075ae69
RH
10103 }
10104 else
10105 {
10106 if (!rtx_equal_p (operands[0], operands[1]))
10107 emit_move_insn (operands[0], operands[1]);
10108 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
1b83d209 10109 ix86_expand_ashlsi3_const (low[0], count);
e075ae69 10110 }
93330ea1 10111 return;
e075ae69 10112 }
93330ea1
RH
10113
10114 split_di (operands, 1, low, high);
10115
10116 if (operands[1] == const1_rtx)
e075ae69 10117 {
93330ea1
RH
10118 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10119 can be done with two 32-bit shifts, no branches, no cmoves. */
10120 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10121 {
10122 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
b985a30f 10123
93330ea1
RH
10124 ix86_expand_clear (low[0]);
10125 ix86_expand_clear (high[0]);
10126 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10127
10128 d = gen_lowpart (QImode, low[0]);
10129 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10130 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10131 emit_insn (gen_rtx_SET (VOIDmode, d, s));
b985a30f 10132
93330ea1
RH
10133 d = gen_lowpart (QImode, high[0]);
10134 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10135 s = gen_rtx_NE (QImode, flags, const0_rtx);
10136 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10137 }
32b5b1aa 10138
93330ea1
RH
10139 /* Otherwise, we can get the same results by manually performing
10140 a bit extract operation on bit 5, and then performing the two
10141 shifts. The two methods of getting 0/1 into low/high are exactly
10142 the same size. Avoiding the shift in the bit extract case helps
10143 pentium4 a bit; no one else seems to care much either way. */
10144 else
e075ae69 10145 {
93330ea1
RH
10146 rtx x;
10147
10148 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10149 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
e075ae69 10150 else
93330ea1
RH
10151 x = gen_lowpart (SImode, operands[2]);
10152 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
e075ae69 10153
93330ea1
RH
10154 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10155 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10156 emit_move_insn (low[0], high[0]);
10157 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
e075ae69 10158 }
93330ea1
RH
10159
10160 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10161 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10162 return;
10163 }
10164
10165 if (operands[1] == constm1_rtx)
10166 {
10167 /* For -1LL << N, we can avoid the shld instruction, because we
10168 know that we're shifting 0...31 ones into a -1. */
10169 emit_move_insn (low[0], constm1_rtx);
10170 if (optimize_size)
10171 emit_move_insn (high[0], low[0]);
e075ae69 10172 else
93330ea1 10173 emit_move_insn (high[0], constm1_rtx);
e075ae69 10174 }
93330ea1
RH
10175 else
10176 {
10177 if (!rtx_equal_p (operands[0], operands[1]))
10178 emit_move_insn (operands[0], operands[1]);
10179
10180 split_di (operands, 1, low, high);
10181 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10182 }
10183
10184 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10185
10186 if (TARGET_CMOVE && scratch)
10187 {
10188 ix86_expand_clear (scratch);
10189 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10190 }
10191 else
10192 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
e9a25f70 10193}
32b5b1aa 10194
e075ae69 10195void
b96a374d 10196ix86_split_ashrdi (rtx *operands, rtx scratch)
32b5b1aa 10197{
e075ae69
RH
10198 rtx low[2], high[2];
10199 int count;
32b5b1aa 10200
e075ae69
RH
10201 if (GET_CODE (operands[2]) == CONST_INT)
10202 {
10203 split_di (operands, 2, low, high);
10204 count = INTVAL (operands[2]) & 63;
32b5b1aa 10205
8937b6a2
RS
10206 if (count == 63)
10207 {
10208 emit_move_insn (high[0], high[1]);
10209 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10210 emit_move_insn (low[0], high[0]);
10211
10212 }
10213 else if (count >= 32)
e075ae69
RH
10214 {
10215 emit_move_insn (low[0], high[1]);
93330ea1
RH
10216 emit_move_insn (high[0], low[0]);
10217 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
e075ae69
RH
10218 if (count > 32)
10219 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10220 }
10221 else
10222 {
10223 if (!rtx_equal_p (operands[0], operands[1]))
10224 emit_move_insn (operands[0], operands[1]);
10225 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10226 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10227 }
10228 }
10229 else
32b5b1aa 10230 {
e075ae69
RH
10231 if (!rtx_equal_p (operands[0], operands[1]))
10232 emit_move_insn (operands[0], operands[1]);
10233
10234 split_di (operands, 1, low, high);
10235
10236 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10237 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10238
93330ea1 10239 if (TARGET_CMOVE && scratch)
e075ae69 10240 {
e075ae69
RH
10241 emit_move_insn (scratch, high[0]);
10242 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10243 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10244 scratch));
10245 }
10246 else
10247 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 10248 }
e075ae69 10249}
32b5b1aa 10250
e075ae69 10251void
b96a374d 10252ix86_split_lshrdi (rtx *operands, rtx scratch)
e075ae69
RH
10253{
10254 rtx low[2], high[2];
10255 int count;
32b5b1aa 10256
e075ae69 10257 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 10258 {
e075ae69
RH
10259 split_di (operands, 2, low, high);
10260 count = INTVAL (operands[2]) & 63;
10261
10262 if (count >= 32)
c7271385 10263 {
e075ae69 10264 emit_move_insn (low[0], high[1]);
93330ea1 10265 ix86_expand_clear (high[0]);
32b5b1aa 10266
e075ae69
RH
10267 if (count > 32)
10268 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10269 }
10270 else
10271 {
10272 if (!rtx_equal_p (operands[0], operands[1]))
10273 emit_move_insn (operands[0], operands[1]);
10274 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10275 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10276 }
32b5b1aa 10277 }
e075ae69
RH
10278 else
10279 {
10280 if (!rtx_equal_p (operands[0], operands[1]))
10281 emit_move_insn (operands[0], operands[1]);
32b5b1aa 10282
e075ae69
RH
10283 split_di (operands, 1, low, high);
10284
10285 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10286 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10287
10288 /* Heh. By reversing the arguments, we can reuse this pattern. */
93330ea1 10289 if (TARGET_CMOVE && scratch)
e075ae69 10290 {
93330ea1 10291 ix86_expand_clear (scratch);
e075ae69
RH
10292 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10293 scratch));
10294 }
10295 else
10296 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10297 }
32b5b1aa 10298}
3f803cd9 10299
0407c02b 10300/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
10301 it is aligned to VALUE bytes. If true, jump to the label. */
10302static rtx
b96a374d 10303ix86_expand_aligntest (rtx variable, int value)
0945b39d
JH
10304{
10305 rtx label = gen_label_rtx ();
10306 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10307 if (GET_MODE (variable) == DImode)
10308 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10309 else
10310 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10311 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10312 1, label);
0945b39d
JH
10313 return label;
10314}
10315
10316/* Adjust COUNTER by the VALUE. */
10317static void
b96a374d 10318ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
0945b39d
JH
10319{
10320 if (GET_MODE (countreg) == DImode)
10321 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10322 else
10323 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10324}
10325
10326/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10327rtx
b96a374d 10328ix86_zero_extend_to_Pmode (rtx exp)
0945b39d
JH
10329{
10330 rtx r;
10331 if (GET_MODE (exp) == VOIDmode)
10332 return force_reg (Pmode, exp);
10333 if (GET_MODE (exp) == Pmode)
10334 return copy_to_mode_reg (Pmode, exp);
10335 r = gen_reg_rtx (Pmode);
10336 emit_insn (gen_zero_extendsidi2 (r, exp));
10337 return r;
10338}
10339
10340/* Expand string move (memcpy) operation. Use i386 string operations when
70128ad9 10341 profitable. expand_clrmem contains similar code. */
0945b39d 10342int
70128ad9 10343ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
0945b39d 10344{
4e44c1ef 10345 rtx srcreg, destreg, countreg, srcexp, destexp;
0945b39d
JH
10346 enum machine_mode counter_mode;
10347 HOST_WIDE_INT align = 0;
10348 unsigned HOST_WIDE_INT count = 0;
0945b39d 10349
0945b39d
JH
10350 if (GET_CODE (align_exp) == CONST_INT)
10351 align = INTVAL (align_exp);
10352
d0a5295a
RH
10353 /* Can't use any of this if the user has appropriated esi or edi. */
10354 if (global_regs[4] || global_regs[5])
10355 return 0;
10356
5519a4f9 10357 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10358 if (!TARGET_ALIGN_STRINGOPS)
10359 align = 64;
10360
10361 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10362 {
10363 count = INTVAL (count_exp);
10364 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10365 return 0;
10366 }
0945b39d
JH
10367
10368 /* Figure out proper mode for counter. For 32bits it is always SImode,
10369 for 64bits use SImode when possible, otherwise DImode.
10370 Set count to number of bytes copied when known at compile time. */
8fe75e43
RH
10371 if (!TARGET_64BIT
10372 || GET_MODE (count_exp) == SImode
10373 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
0945b39d
JH
10374 counter_mode = SImode;
10375 else
10376 counter_mode = DImode;
10377
10378 if (counter_mode != SImode && counter_mode != DImode)
10379 abort ();
10380
10381 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
4e44c1ef
JJ
10382 if (destreg != XEXP (dst, 0))
10383 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 10384 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
4e44c1ef
JJ
10385 if (srcreg != XEXP (src, 0))
10386 src = replace_equiv_address_nv (src, srcreg);
0945b39d
JH
10387
10388 /* When optimizing for size emit simple rep ; movsb instruction for
10389 counts not divisible by 4. */
10390
10391 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10392 {
4e44c1ef 10393 emit_insn (gen_cld ());
0945b39d 10394 countreg = ix86_zero_extend_to_Pmode (count_exp);
4e44c1ef
JJ
10395 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10396 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10397 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10398 destexp, srcexp));
0945b39d
JH
10399 }
10400
10401 /* For constant aligned (or small unaligned) copies use rep movsl
10402 followed by code copying the rest. For PentiumPro ensure 8 byte
10403 alignment to allow rep movsl acceleration. */
10404
10405 else if (count != 0
10406 && (align >= 8
10407 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10408 || optimize_size || count < (unsigned int) 64))
0945b39d 10409 {
4e44c1ef 10410 unsigned HOST_WIDE_INT offset = 0;
0945b39d 10411 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
10412 rtx srcmem, dstmem;
10413
10414 emit_insn (gen_cld ());
0945b39d
JH
10415 if (count & ~(size - 1))
10416 {
10417 countreg = copy_to_mode_reg (counter_mode,
10418 GEN_INT ((count >> (size == 4 ? 2 : 3))
10419 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10420 countreg = ix86_zero_extend_to_Pmode (countreg);
f676971a 10421
4e44c1ef
JJ
10422 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10423 GEN_INT (size == 4 ? 2 : 3));
10424 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10425 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10426
10427 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10428 countreg, destexp, srcexp));
10429 offset = count & ~(size - 1);
0945b39d
JH
10430 }
10431 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
10432 {
10433 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10434 offset);
10435 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10436 offset);
10437 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10438 offset += 4;
10439 }
0945b39d 10440 if (count & 0x02)
4e44c1ef
JJ
10441 {
10442 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10443 offset);
10444 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10445 offset);
10446 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10447 offset += 2;
10448 }
0945b39d 10449 if (count & 0x01)
4e44c1ef
JJ
10450 {
10451 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10452 offset);
10453 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10454 offset);
10455 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10456 }
0945b39d
JH
10457 }
10458 /* The generic code based on the glibc implementation:
10459 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10460 allowing accelerated copying there)
10461 - copy the data using rep movsl
10462 - copy the rest. */
10463 else
10464 {
10465 rtx countreg2;
10466 rtx label = NULL;
4e44c1ef 10467 rtx srcmem, dstmem;
37ad04a5
JH
10468 int desired_alignment = (TARGET_PENTIUMPRO
10469 && (count == 0 || count >= (unsigned int) 260)
10470 ? 8 : UNITS_PER_WORD);
4e44c1ef
JJ
10471 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10472 dst = change_address (dst, BLKmode, destreg);
10473 src = change_address (src, BLKmode, srcreg);
0945b39d
JH
10474
10475 /* In case we don't know anything about the alignment, default to
10476 library version, since it is usually equally fast and result in
b96a374d 10477 shorter code.
4977bab6
ZW
10478
10479 Also emit call when we know that the count is large and call overhead
10480 will not be important. */
10481 if (!TARGET_INLINE_ALL_STRINGOPS
10482 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
4e44c1ef 10483 return 0;
0945b39d
JH
10484
10485 if (TARGET_SINGLE_STRINGOP)
10486 emit_insn (gen_cld ());
10487
10488 countreg2 = gen_reg_rtx (Pmode);
10489 countreg = copy_to_mode_reg (counter_mode, count_exp);
10490
10491 /* We don't use loops to align destination and to copy parts smaller
10492 than 4 bytes, because gcc is able to optimize such code better (in
10493 the case the destination or the count really is aligned, gcc is often
10494 able to predict the branches) and also it is friendlier to the
a4f31c00 10495 hardware branch prediction.
0945b39d 10496
d1f87653 10497 Using loops is beneficial for generic case, because we can
0945b39d
JH
10498 handle small counts using the loops. Many CPUs (such as Athlon)
10499 have large REP prefix setup costs.
10500
4aae8a9a 10501 This is quite costly. Maybe we can revisit this decision later or
0945b39d
JH
10502 add some customizability to this code. */
10503
37ad04a5 10504 if (count == 0 && align < desired_alignment)
0945b39d
JH
10505 {
10506 label = gen_label_rtx ();
aaae0bb9 10507 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10508 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10509 }
10510 if (align <= 1)
10511 {
10512 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
10513 srcmem = change_address (src, QImode, srcreg);
10514 dstmem = change_address (dst, QImode, destreg);
10515 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10516 ix86_adjust_counter (countreg, 1);
10517 emit_label (label);
10518 LABEL_NUSES (label) = 1;
10519 }
10520 if (align <= 2)
10521 {
10522 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
10523 srcmem = change_address (src, HImode, srcreg);
10524 dstmem = change_address (dst, HImode, destreg);
10525 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10526 ix86_adjust_counter (countreg, 2);
10527 emit_label (label);
10528 LABEL_NUSES (label) = 1;
10529 }
37ad04a5 10530 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10531 {
10532 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
10533 srcmem = change_address (src, SImode, srcreg);
10534 dstmem = change_address (dst, SImode, destreg);
10535 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10536 ix86_adjust_counter (countreg, 4);
10537 emit_label (label);
10538 LABEL_NUSES (label) = 1;
10539 }
10540
37ad04a5
JH
10541 if (label && desired_alignment > 4 && !TARGET_64BIT)
10542 {
10543 emit_label (label);
10544 LABEL_NUSES (label) = 1;
10545 label = NULL_RTX;
10546 }
0945b39d
JH
10547 if (!TARGET_SINGLE_STRINGOP)
10548 emit_insn (gen_cld ());
10549 if (TARGET_64BIT)
10550 {
10551 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10552 GEN_INT (3)));
4e44c1ef 10553 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
10554 }
10555 else
10556 {
4e44c1ef
JJ
10557 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10558 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 10559 }
4e44c1ef
JJ
10560 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10561 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10562 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10563 countreg2, destexp, srcexp));
0945b39d
JH
10564
10565 if (label)
10566 {
10567 emit_label (label);
10568 LABEL_NUSES (label) = 1;
10569 }
10570 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
10571 {
10572 srcmem = change_address (src, SImode, srcreg);
10573 dstmem = change_address (dst, SImode, destreg);
10574 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10575 }
0945b39d
JH
10576 if ((align <= 4 || count == 0) && TARGET_64BIT)
10577 {
10578 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
10579 srcmem = change_address (src, SImode, srcreg);
10580 dstmem = change_address (dst, SImode, destreg);
10581 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10582 emit_label (label);
10583 LABEL_NUSES (label) = 1;
10584 }
10585 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
10586 {
10587 srcmem = change_address (src, HImode, srcreg);
10588 dstmem = change_address (dst, HImode, destreg);
10589 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10590 }
0945b39d
JH
10591 if (align <= 2 || count == 0)
10592 {
10593 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
10594 srcmem = change_address (src, HImode, srcreg);
10595 dstmem = change_address (dst, HImode, destreg);
10596 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10597 emit_label (label);
10598 LABEL_NUSES (label) = 1;
10599 }
10600 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
10601 {
10602 srcmem = change_address (src, QImode, srcreg);
10603 dstmem = change_address (dst, QImode, destreg);
10604 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10605 }
0945b39d
JH
10606 if (align <= 1 || count == 0)
10607 {
10608 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
10609 srcmem = change_address (src, QImode, srcreg);
10610 dstmem = change_address (dst, QImode, destreg);
10611 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
0945b39d
JH
10612 emit_label (label);
10613 LABEL_NUSES (label) = 1;
10614 }
10615 }
10616
0945b39d
JH
10617 return 1;
10618}
10619
10620/* Expand string clear operation (bzero). Use i386 string operations when
70128ad9 10621 profitable. expand_movmem contains similar code. */
0945b39d 10622int
70128ad9 10623ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
0945b39d 10624{
4e44c1ef 10625 rtx destreg, zeroreg, countreg, destexp;
0945b39d
JH
10626 enum machine_mode counter_mode;
10627 HOST_WIDE_INT align = 0;
10628 unsigned HOST_WIDE_INT count = 0;
10629
10630 if (GET_CODE (align_exp) == CONST_INT)
10631 align = INTVAL (align_exp);
10632
d0a5295a
RH
10633 /* Can't use any of this if the user has appropriated esi. */
10634 if (global_regs[4])
10635 return 0;
10636
5519a4f9 10637 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10638 if (!TARGET_ALIGN_STRINGOPS)
10639 align = 32;
10640
10641 if (GET_CODE (count_exp) == CONST_INT)
26771da7
JH
10642 {
10643 count = INTVAL (count_exp);
10644 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10645 return 0;
10646 }
0945b39d
JH
10647 /* Figure out proper mode for counter. For 32bits it is always SImode,
10648 for 64bits use SImode when possible, otherwise DImode.
10649 Set count to number of bytes copied when known at compile time. */
8fe75e43
RH
10650 if (!TARGET_64BIT
10651 || GET_MODE (count_exp) == SImode
10652 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
0945b39d
JH
10653 counter_mode = SImode;
10654 else
10655 counter_mode = DImode;
10656
4e44c1ef
JJ
10657 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10658 if (destreg != XEXP (dst, 0))
10659 dst = replace_equiv_address_nv (dst, destreg);
0945b39d 10660
0945b39d
JH
10661
10662 /* When optimizing for size emit simple rep ; movsb instruction for
6b32b628
JJ
10663 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10664 sequence is 7 bytes long, so if optimizing for size and count is
10665 small enough that some stosl, stosw and stosb instructions without
10666 rep are shorter, fall back into the next if. */
0945b39d 10667
6b32b628
JJ
10668 if ((!optimize || optimize_size)
10669 && (count == 0
10670 || ((count & 0x03)
10671 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
0945b39d 10672 {
6b32b628
JJ
10673 emit_insn (gen_cld ());
10674
0945b39d
JH
10675 countreg = ix86_zero_extend_to_Pmode (count_exp);
10676 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
4e44c1ef
JJ
10677 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10678 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
0945b39d
JH
10679 }
10680 else if (count != 0
10681 && (align >= 8
10682 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10683 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10684 {
10685 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
4e44c1ef
JJ
10686 unsigned HOST_WIDE_INT offset = 0;
10687
6b32b628
JJ
10688 emit_insn (gen_cld ());
10689
0945b39d
JH
10690 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10691 if (count & ~(size - 1))
10692 {
6b32b628
JJ
10693 unsigned HOST_WIDE_INT repcount;
10694 unsigned int max_nonrep;
10695
10696 repcount = count >> (size == 4 ? 2 : 3);
10697 if (!TARGET_64BIT)
10698 repcount &= 0x3fffffff;
10699
10700 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10701 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10702 bytes. In both cases the latter seems to be faster for small
10703 values of N. */
10704 max_nonrep = size == 4 ? 7 : 4;
10705 if (!optimize_size)
10706 switch (ix86_tune)
10707 {
10708 case PROCESSOR_PENTIUM4:
10709 case PROCESSOR_NOCONA:
10710 max_nonrep = 3;
10711 break;
10712 default:
10713 break;
10714 }
10715
10716 if (repcount <= max_nonrep)
10717 while (repcount-- > 0)
10718 {
10719 rtx mem = adjust_automodify_address_nv (dst,
10720 GET_MODE (zeroreg),
10721 destreg, offset);
0737fbff 10722 emit_insn (gen_strset (destreg, mem, zeroreg));
6b32b628
JJ
10723 offset += size;
10724 }
10725 else
10726 {
10727 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10728 countreg = ix86_zero_extend_to_Pmode (countreg);
10729 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10730 GEN_INT (size == 4 ? 2 : 3));
10731 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10732 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10733 destexp));
10734 offset = count & ~(size - 1);
10735 }
0945b39d
JH
10736 }
10737 if (size == 8 && (count & 0x04))
4e44c1ef
JJ
10738 {
10739 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10740 offset);
10741 emit_insn (gen_strset (destreg, mem,
0945b39d 10742 gen_rtx_SUBREG (SImode, zeroreg, 0)));
4e44c1ef
JJ
10743 offset += 4;
10744 }
0945b39d 10745 if (count & 0x02)
4e44c1ef
JJ
10746 {
10747 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10748 offset);
10749 emit_insn (gen_strset (destreg, mem,
0945b39d 10750 gen_rtx_SUBREG (HImode, zeroreg, 0)));
4e44c1ef
JJ
10751 offset += 2;
10752 }
0945b39d 10753 if (count & 0x01)
4e44c1ef
JJ
10754 {
10755 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10756 offset);
10757 emit_insn (gen_strset (destreg, mem,
0945b39d 10758 gen_rtx_SUBREG (QImode, zeroreg, 0)));
4e44c1ef 10759 }
0945b39d
JH
10760 }
10761 else
10762 {
10763 rtx countreg2;
10764 rtx label = NULL;
37ad04a5
JH
10765 /* Compute desired alignment of the string operation. */
10766 int desired_alignment = (TARGET_PENTIUMPRO
10767 && (count == 0 || count >= (unsigned int) 260)
10768 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10769
10770 /* In case we don't know anything about the alignment, default to
10771 library version, since it is usually equally fast and result in
4977bab6
ZW
10772 shorter code.
10773
10774 Also emit call when we know that the count is large and call overhead
10775 will not be important. */
10776 if (!TARGET_INLINE_ALL_STRINGOPS
10777 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
0945b39d
JH
10778 return 0;
10779
10780 if (TARGET_SINGLE_STRINGOP)
10781 emit_insn (gen_cld ());
10782
10783 countreg2 = gen_reg_rtx (Pmode);
10784 countreg = copy_to_mode_reg (counter_mode, count_exp);
10785 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
4e44c1ef
JJ
10786 /* Get rid of MEM_OFFSET, it won't be accurate. */
10787 dst = change_address (dst, BLKmode, destreg);
0945b39d 10788
37ad04a5 10789 if (count == 0 && align < desired_alignment)
0945b39d
JH
10790 {
10791 label = gen_label_rtx ();
37ad04a5 10792 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10793 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10794 }
10795 if (align <= 1)
10796 {
10797 rtx label = ix86_expand_aligntest (destreg, 1);
4e44c1ef
JJ
10798 emit_insn (gen_strset (destreg, dst,
10799 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
10800 ix86_adjust_counter (countreg, 1);
10801 emit_label (label);
10802 LABEL_NUSES (label) = 1;
10803 }
10804 if (align <= 2)
10805 {
10806 rtx label = ix86_expand_aligntest (destreg, 2);
4e44c1ef
JJ
10807 emit_insn (gen_strset (destreg, dst,
10808 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
10809 ix86_adjust_counter (countreg, 2);
10810 emit_label (label);
10811 LABEL_NUSES (label) = 1;
10812 }
37ad04a5 10813 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10814 {
10815 rtx label = ix86_expand_aligntest (destreg, 4);
4e44c1ef
JJ
10816 emit_insn (gen_strset (destreg, dst,
10817 (TARGET_64BIT
10818 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10819 : zeroreg)));
0945b39d
JH
10820 ix86_adjust_counter (countreg, 4);
10821 emit_label (label);
10822 LABEL_NUSES (label) = 1;
10823 }
10824
37ad04a5
JH
10825 if (label && desired_alignment > 4 && !TARGET_64BIT)
10826 {
10827 emit_label (label);
10828 LABEL_NUSES (label) = 1;
10829 label = NULL_RTX;
10830 }
10831
0945b39d
JH
10832 if (!TARGET_SINGLE_STRINGOP)
10833 emit_insn (gen_cld ());
10834 if (TARGET_64BIT)
10835 {
10836 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10837 GEN_INT (3)));
4e44c1ef 10838 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
0945b39d
JH
10839 }
10840 else
10841 {
4e44c1ef
JJ
10842 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10843 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
0945b39d 10844 }
4e44c1ef
JJ
10845 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10846 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10847
0945b39d
JH
10848 if (label)
10849 {
10850 emit_label (label);
10851 LABEL_NUSES (label) = 1;
10852 }
37ad04a5 10853
0945b39d 10854 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
4e44c1ef
JJ
10855 emit_insn (gen_strset (destreg, dst,
10856 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
10857 if (TARGET_64BIT && (align <= 4 || count == 0))
10858 {
79258dce 10859 rtx label = ix86_expand_aligntest (countreg, 4);
4e44c1ef
JJ
10860 emit_insn (gen_strset (destreg, dst,
10861 gen_rtx_SUBREG (SImode, zeroreg, 0)));
0945b39d
JH
10862 emit_label (label);
10863 LABEL_NUSES (label) = 1;
10864 }
10865 if (align > 2 && count != 0 && (count & 2))
4e44c1ef
JJ
10866 emit_insn (gen_strset (destreg, dst,
10867 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
10868 if (align <= 2 || count == 0)
10869 {
74411039 10870 rtx label = ix86_expand_aligntest (countreg, 2);
4e44c1ef
JJ
10871 emit_insn (gen_strset (destreg, dst,
10872 gen_rtx_SUBREG (HImode, zeroreg, 0)));
0945b39d
JH
10873 emit_label (label);
10874 LABEL_NUSES (label) = 1;
10875 }
10876 if (align > 1 && count != 0 && (count & 1))
4e44c1ef
JJ
10877 emit_insn (gen_strset (destreg, dst,
10878 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
10879 if (align <= 1 || count == 0)
10880 {
74411039 10881 rtx label = ix86_expand_aligntest (countreg, 1);
4e44c1ef
JJ
10882 emit_insn (gen_strset (destreg, dst,
10883 gen_rtx_SUBREG (QImode, zeroreg, 0)));
0945b39d
JH
10884 emit_label (label);
10885 LABEL_NUSES (label) = 1;
10886 }
10887 }
10888 return 1;
10889}
4e44c1ef 10890
0945b39d
JH
10891/* Expand strlen. */
10892int
b96a374d 10893ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
0945b39d
JH
10894{
10895 rtx addr, scratch1, scratch2, scratch3, scratch4;
10896
10897 /* The generic case of strlen expander is long. Avoid it's
10898 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10899
10900 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10901 && !TARGET_INLINE_ALL_STRINGOPS
10902 && !optimize_size
10903 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10904 return 0;
10905
10906 addr = force_reg (Pmode, XEXP (src, 0));
10907 scratch1 = gen_reg_rtx (Pmode);
10908
10909 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10910 && !optimize_size)
10911 {
10912 /* Well it seems that some optimizer does not combine a call like
10913 foo(strlen(bar), strlen(bar));
10914 when the move and the subtraction is done here. It does calculate
10915 the length just once when these instructions are done inside of
10916 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10917 often used and I use one fewer register for the lifetime of
10918 output_strlen_unroll() this is better. */
10919
10920 emit_move_insn (out, addr);
10921
4e44c1ef 10922 ix86_expand_strlensi_unroll_1 (out, src, align);
0945b39d
JH
10923
10924 /* strlensi_unroll_1 returns the address of the zero at the end of
10925 the string, like memchr(), so compute the length by subtracting
10926 the start address. */
10927 if (TARGET_64BIT)
10928 emit_insn (gen_subdi3 (out, out, addr));
10929 else
10930 emit_insn (gen_subsi3 (out, out, addr));
10931 }
10932 else
10933 {
4e44c1ef 10934 rtx unspec;
0945b39d
JH
10935 scratch2 = gen_reg_rtx (Pmode);
10936 scratch3 = gen_reg_rtx (Pmode);
10937 scratch4 = force_reg (Pmode, constm1_rtx);
10938
10939 emit_move_insn (scratch3, addr);
10940 eoschar = force_reg (QImode, eoschar);
10941
10942 emit_insn (gen_cld ());
4e44c1ef
JJ
10943 src = replace_equiv_address_nv (src, scratch3);
10944
10945 /* If .md starts supporting :P, this can be done in .md. */
10946 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10947 scratch4), UNSPEC_SCAS);
10948 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
0945b39d
JH
10949 if (TARGET_64BIT)
10950 {
0945b39d
JH
10951 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10952 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10953 }
10954 else
10955 {
0945b39d
JH
10956 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10957 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10958 }
10959 }
10960 return 1;
10961}
10962
e075ae69
RH
10963/* Expand the appropriate insns for doing strlen if not just doing
10964 repnz; scasb
10965
10966 out = result, initialized with the start address
10967 align_rtx = alignment of the address.
10968 scratch = scratch register, initialized with the startaddress when
77ebd435 10969 not aligned, otherwise undefined
3f803cd9 10970
39e3f58c 10971 This is just the body. It needs the initializations mentioned above and
3f803cd9
SC
10972 some address computing at the end. These things are done in i386.md. */
10973
0945b39d 10974static void
4e44c1ef 10975ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
3f803cd9 10976{
e075ae69
RH
10977 int align;
10978 rtx tmp;
10979 rtx align_2_label = NULL_RTX;
10980 rtx align_3_label = NULL_RTX;
10981 rtx align_4_label = gen_label_rtx ();
10982 rtx end_0_label = gen_label_rtx ();
e075ae69 10983 rtx mem;
e2e52e1b 10984 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10985 rtx scratch = gen_reg_rtx (SImode);
e6e81735 10986 rtx cmp;
e075ae69
RH
10987
10988 align = 0;
10989 if (GET_CODE (align_rtx) == CONST_INT)
10990 align = INTVAL (align_rtx);
3f803cd9 10991
e9a25f70 10992 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10993
e9a25f70 10994 /* Is there a known alignment and is it less than 4? */
e075ae69 10995 if (align < 4)
3f803cd9 10996 {
0945b39d
JH
10997 rtx scratch1 = gen_reg_rtx (Pmode);
10998 emit_move_insn (scratch1, out);
e9a25f70 10999 /* Is there a known alignment and is it not 2? */
e075ae69 11000 if (align != 2)
3f803cd9 11001 {
e075ae69
RH
11002 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11003 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11004
11005 /* Leave just the 3 lower bits. */
0945b39d 11006 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
11007 NULL_RTX, 0, OPTAB_WIDEN);
11008
9076b9c1 11009 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11010 Pmode, 1, align_4_label);
60c81c89 11011 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
d43e0b7d 11012 Pmode, 1, align_2_label);
60c81c89 11013 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
d43e0b7d 11014 Pmode, 1, align_3_label);
3f803cd9
SC
11015 }
11016 else
11017 {
e9a25f70
JL
11018 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11019 check if is aligned to 4 - byte. */
e9a25f70 11020
60c81c89 11021 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
e075ae69
RH
11022 NULL_RTX, 0, OPTAB_WIDEN);
11023
9076b9c1 11024 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 11025 Pmode, 1, align_4_label);
3f803cd9
SC
11026 }
11027
4e44c1ef 11028 mem = change_address (src, QImode, out);
e9a25f70 11029
e075ae69 11030 /* Now compare the bytes. */
e9a25f70 11031
0f290768 11032 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 11033 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 11034 QImode, 1, end_0_label);
3f803cd9 11035
0f290768 11036 /* Increment the address. */
0945b39d
JH
11037 if (TARGET_64BIT)
11038 emit_insn (gen_adddi3 (out, out, const1_rtx));
11039 else
11040 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 11041
e075ae69
RH
11042 /* Not needed with an alignment of 2 */
11043 if (align != 2)
11044 {
11045 emit_label (align_2_label);
3f803cd9 11046
d43e0b7d
RK
11047 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11048 end_0_label);
e075ae69 11049
0945b39d
JH
11050 if (TARGET_64BIT)
11051 emit_insn (gen_adddi3 (out, out, const1_rtx));
11052 else
11053 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
11054
11055 emit_label (align_3_label);
11056 }
11057
d43e0b7d
RK
11058 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11059 end_0_label);
e075ae69 11060
0945b39d
JH
11061 if (TARGET_64BIT)
11062 emit_insn (gen_adddi3 (out, out, const1_rtx));
11063 else
11064 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
11065 }
11066
e075ae69
RH
11067 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11068 align this loop. It gives only huge programs, but does not help to
11069 speed up. */
11070 emit_label (align_4_label);
3f803cd9 11071
4e44c1ef 11072 mem = change_address (src, SImode, out);
e075ae69 11073 emit_move_insn (scratch, mem);
0945b39d
JH
11074 if (TARGET_64BIT)
11075 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11076 else
11077 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 11078
e2e52e1b
JH
11079 /* This formula yields a nonzero result iff one of the bytes is zero.
11080 This saves three branches inside loop and many cycles. */
11081
11082 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11083 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11084 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 11085 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 11086 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
11087 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11088 align_4_label);
e2e52e1b
JH
11089
11090 if (TARGET_CMOVE)
11091 {
11092 rtx reg = gen_reg_rtx (SImode);
0945b39d 11093 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
11094 emit_move_insn (reg, tmpreg);
11095 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11096
0f290768 11097 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 11098 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11099 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11100 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11101 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11102 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
11103 reg,
11104 tmpreg)));
e2e52e1b 11105 /* Emit lea manually to avoid clobbering of flags. */
0945b39d 11106 emit_insn (gen_rtx_SET (SImode, reg2,
60c81c89 11107 gen_rtx_PLUS (Pmode, out, const2_rtx)));
e2e52e1b
JH
11108
11109 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11110 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11111 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 11112 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
11113 reg2,
11114 out)));
e2e52e1b
JH
11115
11116 }
11117 else
11118 {
11119 rtx end_2_label = gen_label_rtx ();
11120 /* Is zero in the first two bytes? */
11121
16189740 11122 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
11123 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11124 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11125 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11126 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11127 pc_rtx);
11128 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11129 JUMP_LABEL (tmp) = end_2_label;
11130
0f290768 11131 /* Not in the first two. Move two bytes forward. */
e2e52e1b 11132 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d 11133 if (TARGET_64BIT)
60c81c89 11134 emit_insn (gen_adddi3 (out, out, const2_rtx));
0945b39d 11135 else
60c81c89 11136 emit_insn (gen_addsi3 (out, out, const2_rtx));
e2e52e1b
JH
11137
11138 emit_label (end_2_label);
11139
11140 }
11141
0f290768 11142 /* Avoid branch in fixing the byte. */
e2e52e1b 11143 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 11144 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
e6e81735 11145 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
0945b39d 11146 if (TARGET_64BIT)
e6e81735 11147 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
0945b39d 11148 else
e6e81735 11149 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
e075ae69
RH
11150
11151 emit_label (end_0_label);
11152}
0e07aff3
RH
11153
11154void
0f901c4c
SH
11155ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11156 rtx callarg2 ATTRIBUTE_UNUSED,
b96a374d 11157 rtx pop, int sibcall)
0e07aff3
RH
11158{
11159 rtx use = NULL, call;
11160
11161 if (pop == const0_rtx)
11162 pop = NULL;
11163 if (TARGET_64BIT && pop)
11164 abort ();
11165
b069de3b
SS
11166#if TARGET_MACHO
11167 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11168 fnaddr = machopic_indirect_call_target (fnaddr);
11169#else
0e07aff3
RH
11170 /* Static functions and indirect calls don't need the pic register. */
11171 if (! TARGET_64BIT && flag_pic
11172 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12969f45 11173 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
66edd3b4 11174 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
11175
11176 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11177 {
11178 rtx al = gen_rtx_REG (QImode, 0);
11179 emit_move_insn (al, callarg2);
11180 use_reg (&use, al);
11181 }
b069de3b 11182#endif /* TARGET_MACHO */
0e07aff3
RH
11183
11184 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11185 {
11186 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11187 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11188 }
4977bab6
ZW
11189 if (sibcall && TARGET_64BIT
11190 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11191 {
11192 rtx addr;
11193 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
b19ee4bd 11194 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
4977bab6
ZW
11195 emit_move_insn (fnaddr, addr);
11196 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11197 }
0e07aff3
RH
11198
11199 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11200 if (retval)
11201 call = gen_rtx_SET (VOIDmode, retval, call);
11202 if (pop)
11203 {
11204 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11205 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11206 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11207 }
11208
11209 call = emit_call_insn (call);
11210 if (use)
11211 CALL_INSN_FUNCTION_USAGE (call) = use;
11212}
fce5a9f2 11213
e075ae69 11214\f
e075ae69
RH
11215/* Clear stack slot assignments remembered from previous functions.
11216 This is called from INIT_EXPANDERS once before RTL is emitted for each
11217 function. */
11218
e2500fed 11219static struct machine_function *
b96a374d 11220ix86_init_machine_status (void)
37b15744 11221{
d7394366
JH
11222 struct machine_function *f;
11223
11224 f = ggc_alloc_cleared (sizeof (struct machine_function));
11225 f->use_fast_prologue_epilogue_nregs = -1;
8330e2c6
AJ
11226
11227 return f;
1526a060
BS
11228}
11229
e075ae69
RH
11230/* Return a MEM corresponding to a stack slot with mode MODE.
11231 Allocate a new slot if necessary.
11232
11233 The RTL for a function can have several slots available: N is
11234 which slot to use. */
11235
11236rtx
b96a374d 11237assign_386_stack_local (enum machine_mode mode, int n)
e075ae69 11238{
ddb0ae00
ZW
11239 struct stack_local_entry *s;
11240
e075ae69
RH
11241 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11242 abort ();
11243
ddb0ae00
ZW
11244 for (s = ix86_stack_locals; s; s = s->next)
11245 if (s->mode == mode && s->n == n)
11246 return s->rtl;
11247
11248 s = (struct stack_local_entry *)
11249 ggc_alloc (sizeof (struct stack_local_entry));
11250 s->n = n;
11251 s->mode = mode;
11252 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
e075ae69 11253
ddb0ae00
ZW
11254 s->next = ix86_stack_locals;
11255 ix86_stack_locals = s;
11256 return s->rtl;
e075ae69 11257}
f996902d
RH
11258
11259/* Construct the SYMBOL_REF for the tls_get_addr function. */
11260
e2500fed 11261static GTY(()) rtx ix86_tls_symbol;
f996902d 11262rtx
b96a374d 11263ix86_tls_get_addr (void)
f996902d 11264{
f996902d 11265
e2500fed 11266 if (!ix86_tls_symbol)
f996902d 11267 {
75d38379
JJ
11268 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11269 (TARGET_GNU_TLS && !TARGET_64BIT)
11270 ? "___tls_get_addr"
11271 : "__tls_get_addr");
f996902d
RH
11272 }
11273
e2500fed 11274 return ix86_tls_symbol;
f996902d 11275}
e075ae69
RH
11276\f
11277/* Calculate the length of the memory address in the instruction
11278 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11279
8fe75e43 11280int
b96a374d 11281memory_address_length (rtx addr)
e075ae69
RH
11282{
11283 struct ix86_address parts;
11284 rtx base, index, disp;
11285 int len;
11286
11287 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
11288 || GET_CODE (addr) == POST_INC
11289 || GET_CODE (addr) == PRE_MODIFY
11290 || GET_CODE (addr) == POST_MODIFY)
e075ae69 11291 return 0;
3f803cd9 11292
e075ae69
RH
11293 if (! ix86_decompose_address (addr, &parts))
11294 abort ();
3f803cd9 11295
e075ae69
RH
11296 base = parts.base;
11297 index = parts.index;
11298 disp = parts.disp;
11299 len = 0;
3f803cd9 11300
7b65ed54
EB
11301 /* Rule of thumb:
11302 - esp as the base always wants an index,
11303 - ebp as the base always wants a displacement. */
11304
e075ae69
RH
11305 /* Register Indirect. */
11306 if (base && !index && !disp)
11307 {
7b65ed54
EB
11308 /* esp (for its index) and ebp (for its displacement) need
11309 the two-byte modrm form. */
e075ae69
RH
11310 if (addr == stack_pointer_rtx
11311 || addr == arg_pointer_rtx
564d80f4
JH
11312 || addr == frame_pointer_rtx
11313 || addr == hard_frame_pointer_rtx)
e075ae69 11314 len = 1;
3f803cd9 11315 }
e9a25f70 11316
e075ae69
RH
11317 /* Direct Addressing. */
11318 else if (disp && !base && !index)
11319 len = 4;
11320
3f803cd9
SC
11321 else
11322 {
e075ae69
RH
11323 /* Find the length of the displacement constant. */
11324 if (disp)
11325 {
11326 if (GET_CODE (disp) == CONST_INT
9b73c90a
EB
11327 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11328 && base)
e075ae69
RH
11329 len = 1;
11330 else
11331 len = 4;
11332 }
7b65ed54
EB
11333 /* ebp always wants a displacement. */
11334 else if (base == hard_frame_pointer_rtx)
11335 len = 1;
3f803cd9 11336
43f3a59d 11337 /* An index requires the two-byte modrm form.... */
7b65ed54
EB
11338 if (index
11339 /* ...like esp, which always wants an index. */
11340 || base == stack_pointer_rtx
11341 || base == arg_pointer_rtx
11342 || base == frame_pointer_rtx)
e075ae69 11343 len += 1;
3f803cd9
SC
11344 }
11345
e075ae69
RH
11346 return len;
11347}
79325812 11348
5bf0ebab
RH
11349/* Compute default value for "length_immediate" attribute. When SHORTFORM
11350 is set, expect that insn have 8bit immediate alternative. */
e075ae69 11351int
b96a374d 11352ix86_attr_length_immediate_default (rtx insn, int shortform)
e075ae69 11353{
6ef67412
JH
11354 int len = 0;
11355 int i;
6c698a6d 11356 extract_insn_cached (insn);
6ef67412
JH
11357 for (i = recog_data.n_operands - 1; i >= 0; --i)
11358 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 11359 {
6ef67412 11360 if (len)
3071fab5 11361 abort ();
6ef67412
JH
11362 if (shortform
11363 && GET_CODE (recog_data.operand[i]) == CONST_INT
11364 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11365 len = 1;
11366 else
11367 {
11368 switch (get_attr_mode (insn))
11369 {
11370 case MODE_QI:
11371 len+=1;
11372 break;
11373 case MODE_HI:
11374 len+=2;
11375 break;
11376 case MODE_SI:
11377 len+=4;
11378 break;
14f73b5a
JH
11379 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11380 case MODE_DI:
11381 len+=4;
11382 break;
6ef67412 11383 default:
c725bd79 11384 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
11385 }
11386 }
3071fab5 11387 }
6ef67412
JH
11388 return len;
11389}
11390/* Compute default value for "length_address" attribute. */
11391int
b96a374d 11392ix86_attr_length_address_default (rtx insn)
6ef67412
JH
11393{
11394 int i;
9b73c90a
EB
11395
11396 if (get_attr_type (insn) == TYPE_LEA)
11397 {
11398 rtx set = PATTERN (insn);
11399 if (GET_CODE (set) == SET)
11400 ;
11401 else if (GET_CODE (set) == PARALLEL
11402 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11403 set = XVECEXP (set, 0, 0);
11404 else
11405 {
11406#ifdef ENABLE_CHECKING
11407 abort ();
11408#endif
11409 return 0;
11410 }
11411
11412 return memory_address_length (SET_SRC (set));
11413 }
11414
6c698a6d 11415 extract_insn_cached (insn);
1ccbefce
RH
11416 for (i = recog_data.n_operands - 1; i >= 0; --i)
11417 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11418 {
6ef67412 11419 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
11420 break;
11421 }
6ef67412 11422 return 0;
3f803cd9 11423}
e075ae69
RH
11424\f
11425/* Return the maximum number of instructions a cpu can issue. */
b657fc39 11426
c237e94a 11427static int
b96a374d 11428ix86_issue_rate (void)
b657fc39 11429{
9e555526 11430 switch (ix86_tune)
b657fc39 11431 {
e075ae69
RH
11432 case PROCESSOR_PENTIUM:
11433 case PROCESSOR_K6:
11434 return 2;
79325812 11435
e075ae69 11436 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
11437 case PROCESSOR_PENTIUM4:
11438 case PROCESSOR_ATHLON:
4977bab6 11439 case PROCESSOR_K8:
89c43c0a 11440 case PROCESSOR_NOCONA:
e075ae69 11441 return 3;
b657fc39 11442
b657fc39 11443 default:
e075ae69 11444 return 1;
b657fc39 11445 }
b657fc39
L
11446}
11447
e075ae69
RH
11448/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11449 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 11450
e075ae69 11451static int
b96a374d 11452ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11453{
11454 rtx set, set2;
b657fc39 11455
e075ae69
RH
11456 /* Simplify the test for uninteresting insns. */
11457 if (insn_type != TYPE_SETCC
11458 && insn_type != TYPE_ICMOV
11459 && insn_type != TYPE_FCMOV
11460 && insn_type != TYPE_IBR)
11461 return 0;
b657fc39 11462
e075ae69
RH
11463 if ((set = single_set (dep_insn)) != 0)
11464 {
11465 set = SET_DEST (set);
11466 set2 = NULL_RTX;
11467 }
11468 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11469 && XVECLEN (PATTERN (dep_insn), 0) == 2
11470 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11471 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11472 {
11473 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11474 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11475 }
78a0d70c
ZW
11476 else
11477 return 0;
b657fc39 11478
78a0d70c
ZW
11479 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11480 return 0;
b657fc39 11481
f5143c46 11482 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11483 not any other potentially set register. */
11484 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11485 return 0;
11486
11487 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11488 return 0;
11489
11490 return 1;
e075ae69 11491}
b657fc39 11492
e075ae69
RH
11493/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11494 address with operands set by DEP_INSN. */
11495
11496static int
b96a374d 11497ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
e075ae69
RH
11498{
11499 rtx addr;
11500
6ad48e84
JH
11501 if (insn_type == TYPE_LEA
11502 && TARGET_PENTIUM)
5fbdde42
RH
11503 {
11504 addr = PATTERN (insn);
11505 if (GET_CODE (addr) == SET)
11506 ;
11507 else if (GET_CODE (addr) == PARALLEL
11508 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11509 addr = XVECEXP (addr, 0, 0);
11510 else
11511 abort ();
11512 addr = SET_SRC (addr);
11513 }
e075ae69
RH
11514 else
11515 {
11516 int i;
6c698a6d 11517 extract_insn_cached (insn);
1ccbefce
RH
11518 for (i = recog_data.n_operands - 1; i >= 0; --i)
11519 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11520 {
1ccbefce 11521 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11522 goto found;
11523 }
11524 return 0;
11525 found:;
b657fc39
L
11526 }
11527
e075ae69 11528 return modified_in_p (addr, dep_insn);
b657fc39 11529}
a269a03c 11530
c237e94a 11531static int
b96a374d 11532ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
a269a03c 11533{
e075ae69 11534 enum attr_type insn_type, dep_insn_type;
8695f61e 11535 enum attr_memory memory;
e075ae69 11536 rtx set, set2;
9b00189f 11537 int dep_insn_code_number;
a269a03c 11538
d1f87653 11539 /* Anti and output dependencies have zero cost on all CPUs. */
e075ae69 11540 if (REG_NOTE_KIND (link) != 0)
309ada50 11541 return 0;
a269a03c 11542
9b00189f
JH
11543 dep_insn_code_number = recog_memoized (dep_insn);
11544
e075ae69 11545 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11546 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11547 return cost;
a269a03c 11548
1c71e60e
JH
11549 insn_type = get_attr_type (insn);
11550 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11551
9e555526 11552 switch (ix86_tune)
a269a03c
JC
11553 {
11554 case PROCESSOR_PENTIUM:
e075ae69
RH
11555 /* Address Generation Interlock adds a cycle of latency. */
11556 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11557 cost += 1;
11558
11559 /* ??? Compares pair with jump/setcc. */
11560 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11561 cost = 0;
11562
d1f87653 11563 /* Floating point stores require value to be ready one cycle earlier. */
0f290768 11564 if (insn_type == TYPE_FMOV
e075ae69
RH
11565 && get_attr_memory (insn) == MEMORY_STORE
11566 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11567 cost += 1;
11568 break;
a269a03c 11569
e075ae69 11570 case PROCESSOR_PENTIUMPRO:
6ad48e84 11571 memory = get_attr_memory (insn);
e075ae69
RH
11572
11573 /* INT->FP conversion is expensive. */
11574 if (get_attr_fp_int_src (dep_insn))
11575 cost += 5;
11576
11577 /* There is one cycle extra latency between an FP op and a store. */
11578 if (insn_type == TYPE_FMOV
11579 && (set = single_set (dep_insn)) != NULL_RTX
11580 && (set2 = single_set (insn)) != NULL_RTX
11581 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11582 && GET_CODE (SET_DEST (set2)) == MEM)
11583 cost += 1;
6ad48e84
JH
11584
11585 /* Show ability of reorder buffer to hide latency of load by executing
11586 in parallel with previous instruction in case
11587 previous instruction is not needed to compute the address. */
11588 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11589 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11590 {
6ad48e84
JH
11591 /* Claim moves to take one cycle, as core can issue one load
11592 at time and the next load can start cycle later. */
11593 if (dep_insn_type == TYPE_IMOV
11594 || dep_insn_type == TYPE_FMOV)
11595 cost = 1;
11596 else if (cost > 1)
11597 cost--;
11598 }
e075ae69 11599 break;
a269a03c 11600
e075ae69 11601 case PROCESSOR_K6:
6ad48e84 11602 memory = get_attr_memory (insn);
8695f61e 11603
e075ae69
RH
11604 /* The esp dependency is resolved before the instruction is really
11605 finished. */
11606 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11607 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11608 return 1;
a269a03c 11609
e075ae69
RH
11610 /* INT->FP conversion is expensive. */
11611 if (get_attr_fp_int_src (dep_insn))
11612 cost += 5;
6ad48e84
JH
11613
11614 /* Show ability of reorder buffer to hide latency of load by executing
11615 in parallel with previous instruction in case
11616 previous instruction is not needed to compute the address. */
11617 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11618 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11619 {
6ad48e84
JH
11620 /* Claim moves to take one cycle, as core can issue one load
11621 at time and the next load can start cycle later. */
11622 if (dep_insn_type == TYPE_IMOV
11623 || dep_insn_type == TYPE_FMOV)
11624 cost = 1;
11625 else if (cost > 2)
11626 cost -= 2;
11627 else
11628 cost = 1;
11629 }
a14003ee 11630 break;
e075ae69 11631
309ada50 11632 case PROCESSOR_ATHLON:
4977bab6 11633 case PROCESSOR_K8:
6ad48e84 11634 memory = get_attr_memory (insn);
6ad48e84 11635
6ad48e84
JH
11636 /* Show ability of reorder buffer to hide latency of load by executing
11637 in parallel with previous instruction in case
11638 previous instruction is not needed to compute the address. */
11639 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11640 && !ix86_agi_dependant (insn, dep_insn, insn_type))
b96a374d 11641 {
26f74aa3
JH
11642 enum attr_unit unit = get_attr_unit (insn);
11643 int loadcost = 3;
11644
11645 /* Because of the difference between the length of integer and
11646 floating unit pipeline preparation stages, the memory operands
b96a374d 11647 for floating point are cheaper.
26f74aa3 11648
c51e6d85 11649 ??? For Athlon it the difference is most probably 2. */
26f74aa3
JH
11650 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11651 loadcost = 3;
11652 else
11653 loadcost = TARGET_ATHLON ? 2 : 0;
11654
11655 if (cost >= loadcost)
11656 cost -= loadcost;
6ad48e84
JH
11657 else
11658 cost = 0;
11659 }
309ada50 11660
a269a03c 11661 default:
a269a03c
JC
11662 break;
11663 }
11664
11665 return cost;
11666}
0a726ef1 11667
9b690711
RH
11668/* How many alternative schedules to try. This should be as wide as the
11669 scheduling freedom in the DFA, but no wider. Making this value too
11670 large results extra work for the scheduler. */
11671
11672static int
b96a374d 11673ia32_multipass_dfa_lookahead (void)
9b690711 11674{
9e555526 11675 if (ix86_tune == PROCESSOR_PENTIUM)
9b690711 11676 return 2;
56bab446 11677
8695f61e
SB
11678 if (ix86_tune == PROCESSOR_PENTIUMPRO
11679 || ix86_tune == PROCESSOR_K6)
56bab446
SB
11680 return 1;
11681
9b690711 11682 else
56bab446 11683 return 0;
9b690711
RH
11684}
11685
0e4970d7 11686\f
7ccf35ed
DN
11687/* Implement the target hook targetm.vectorize.misaligned_mem_ok. */
11688
11689static bool
11690ix86_misaligned_mem_ok (enum machine_mode mode)
11691{
11692 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
11693 return true;
11694 else
11695 return false;
11696}
11697
a7180f70
BS
11698/* Compute the alignment given to a constant that is being placed in memory.
11699 EXP is the constant and ALIGN is the alignment that the object would
11700 ordinarily have.
11701 The value of this function is used instead of that alignment to align
11702 the object. */
11703
11704int
b96a374d 11705ix86_constant_alignment (tree exp, int align)
a7180f70
BS
11706{
11707 if (TREE_CODE (exp) == REAL_CST)
11708 {
11709 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11710 return 64;
11711 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11712 return 128;
11713 }
4137ba7a
JJ
11714 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11715 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11716 return BITS_PER_WORD;
a7180f70
BS
11717
11718 return align;
11719}
11720
11721/* Compute the alignment for a static variable.
11722 TYPE is the data type, and ALIGN is the alignment that
11723 the object would ordinarily have. The value of this function is used
11724 instead of that alignment to align the object. */
11725
11726int
b96a374d 11727ix86_data_alignment (tree type, int align)
a7180f70
BS
11728{
11729 if (AGGREGATE_TYPE_P (type)
11730 && TYPE_SIZE (type)
11731 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11732 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11733 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11734 return 256;
11735
0d7d98ee
JH
11736 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11737 to 16byte boundary. */
11738 if (TARGET_64BIT)
11739 {
11740 if (AGGREGATE_TYPE_P (type)
11741 && TYPE_SIZE (type)
11742 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11743 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11744 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11745 return 128;
11746 }
11747
a7180f70
BS
11748 if (TREE_CODE (type) == ARRAY_TYPE)
11749 {
11750 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11751 return 64;
11752 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11753 return 128;
11754 }
11755 else if (TREE_CODE (type) == COMPLEX_TYPE)
11756 {
0f290768 11757
a7180f70
BS
11758 if (TYPE_MODE (type) == DCmode && align < 64)
11759 return 64;
11760 if (TYPE_MODE (type) == XCmode && align < 128)
11761 return 128;
11762 }
11763 else if ((TREE_CODE (type) == RECORD_TYPE
11764 || TREE_CODE (type) == UNION_TYPE
11765 || TREE_CODE (type) == QUAL_UNION_TYPE)
11766 && TYPE_FIELDS (type))
11767 {
11768 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11769 return 64;
11770 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11771 return 128;
11772 }
11773 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11774 || TREE_CODE (type) == INTEGER_TYPE)
11775 {
11776 if (TYPE_MODE (type) == DFmode && align < 64)
11777 return 64;
11778 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11779 return 128;
11780 }
11781
11782 return align;
11783}
11784
11785/* Compute the alignment for a local variable.
11786 TYPE is the data type, and ALIGN is the alignment that
11787 the object would ordinarily have. The value of this macro is used
11788 instead of that alignment to align the object. */
11789
11790int
b96a374d 11791ix86_local_alignment (tree type, int align)
a7180f70 11792{
0d7d98ee
JH
11793 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11794 to 16byte boundary. */
11795 if (TARGET_64BIT)
11796 {
11797 if (AGGREGATE_TYPE_P (type)
11798 && TYPE_SIZE (type)
11799 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11800 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11801 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11802 return 128;
11803 }
a7180f70
BS
11804 if (TREE_CODE (type) == ARRAY_TYPE)
11805 {
11806 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11807 return 64;
11808 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11809 return 128;
11810 }
11811 else if (TREE_CODE (type) == COMPLEX_TYPE)
11812 {
11813 if (TYPE_MODE (type) == DCmode && align < 64)
11814 return 64;
11815 if (TYPE_MODE (type) == XCmode && align < 128)
11816 return 128;
11817 }
11818 else if ((TREE_CODE (type) == RECORD_TYPE
11819 || TREE_CODE (type) == UNION_TYPE
11820 || TREE_CODE (type) == QUAL_UNION_TYPE)
11821 && TYPE_FIELDS (type))
11822 {
11823 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11824 return 64;
11825 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11826 return 128;
11827 }
11828 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11829 || TREE_CODE (type) == INTEGER_TYPE)
11830 {
0f290768 11831
a7180f70
BS
11832 if (TYPE_MODE (type) == DFmode && align < 64)
11833 return 64;
11834 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11835 return 128;
11836 }
11837 return align;
11838}
0ed08620
JH
11839\f
11840/* Emit RTL insns to initialize the variable parts of a trampoline.
11841 FNADDR is an RTX for the address of the function's pure code.
11842 CXT is an RTX for the static chain value for the function. */
11843void
b96a374d 11844x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
0ed08620
JH
11845{
11846 if (!TARGET_64BIT)
11847 {
11848 /* Compute offset from the end of the jmp to the target function. */
11849 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11850 plus_constant (tramp, 10),
11851 NULL_RTX, 1, OPTAB_DIRECT);
11852 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11853 gen_int_mode (0xb9, QImode));
0ed08620
JH
11854 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11855 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11856 gen_int_mode (0xe9, QImode));
0ed08620
JH
11857 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11858 }
11859 else
11860 {
11861 int offset = 0;
11862 /* Try to load address using shorter movl instead of movabs.
11863 We may want to support movq for kernel mode, but kernel does not use
11864 trampolines at the moment. */
8fe75e43 11865 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
0ed08620
JH
11866 {
11867 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11868 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11869 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11870 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11871 gen_lowpart (SImode, fnaddr));
11872 offset += 6;
11873 }
11874 else
11875 {
11876 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11877 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11878 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11879 fnaddr);
11880 offset += 10;
11881 }
11882 /* Load static chain using movabs to r10. */
11883 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11884 gen_int_mode (0xba49, HImode));
0ed08620
JH
11885 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11886 cxt);
11887 offset += 10;
11888 /* Jump to the r11 */
11889 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11890 gen_int_mode (0xff49, HImode));
0ed08620 11891 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11892 gen_int_mode (0xe3, QImode));
0ed08620
JH
11893 offset += 3;
11894 if (offset > TRAMPOLINE_SIZE)
b531087a 11895 abort ();
0ed08620 11896 }
5791cc29 11897
e7a742ec 11898#ifdef ENABLE_EXECUTE_STACK
f84d109f 11899 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
5791cc29
JT
11900 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11901#endif
0ed08620 11902}
eeb06b1b 11903\f
6e34d3a3
JM
11904#define def_builtin(MASK, NAME, TYPE, CODE) \
11905do { \
11906 if ((MASK) & target_flags \
11907 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11908 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11909 NULL, NULL_TREE); \
eeb06b1b 11910} while (0)
bd793c65 11911
bd793c65
BS
11912struct builtin_description
11913{
8b60264b
KG
11914 const unsigned int mask;
11915 const enum insn_code icode;
11916 const char *const name;
11917 const enum ix86_builtins code;
11918 const enum rtx_code comparison;
11919 const unsigned int flag;
bd793c65
BS
11920};
11921
8b60264b 11922static const struct builtin_description bdesc_comi[] =
bd793c65 11923{
37f22004
L
11924 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11925 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11926 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11927 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11928 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11929 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11930 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11931 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11932 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11933 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11934 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11935 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
1194ca05
JH
11936 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11937 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11938 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11939 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11940 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11941 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11942 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11943 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11944 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11945 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11946 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11947 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
bd793c65
BS
11948};
11949
8b60264b 11950static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11951{
11952 /* SSE */
37f22004
L
11953 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11954 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11955 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11956 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11957 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11958 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11959 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11960 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11961
11962 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11963 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11964 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11965 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11966 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11967 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11968 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11969 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11970 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11971 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11972 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11973 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11974 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11975 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11976 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11977 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11978 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11979 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11980 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11981 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11982
11983 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11984 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11985 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11986 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11987
11988 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11989 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11990 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11991 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11992
11993 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11994 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11995 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11996 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11997 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11998
11999 /* MMX */
eeb06b1b
BS
12000 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12001 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12002 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
d50672ef 12003 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
eeb06b1b
BS
12004 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12005 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12006 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
d50672ef 12007 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
eeb06b1b
BS
12008
12009 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12010 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12011 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12012 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12013 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12014 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12015 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12016 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12017
12018 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12019 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
37f22004 12020 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
12021
12022 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12023 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12024 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12025 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12026
37f22004
L
12027 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12028 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
12029
12030 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12031 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12032 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12033 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12034 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12035 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12036
37f22004
L
12037 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12038 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12039 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12040 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
12041
12042 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12043 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12044 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12045 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12046 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12047 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
12048
12049 /* Special. */
eeb06b1b
BS
12050 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12051 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12052 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12053
37f22004
L
12054 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12055 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12056 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
eeb06b1b
BS
12057
12058 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12059 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12060 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12061 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12062 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12063 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12064
12065 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12066 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12067 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12068 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12069 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12070 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12071
12072 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12073 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12074 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12075 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12076
37f22004 12077 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
fbe5eb6d
BS
12078 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12079
12080 /* SSE2 */
12081 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12082 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12083 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12084 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12085 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12086 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12087 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12088 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12089
12090 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12091 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12092 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12093 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12094 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12095 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12096 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12097 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12098 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12099 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12100 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12101 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12102 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12103 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12104 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
fbe5eb6d
BS
12105 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12106 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12107 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12108 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
fbe5eb6d
BS
12109 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12110
12111 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12112 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12113 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12114 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12115
1877be45
JH
12116 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12117 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12118 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12119 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
fbe5eb6d
BS
12120
12121 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12124
12125 /* SSE2 MMX */
12126 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
d50672ef 12129 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
fbe5eb6d
BS
12130 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
d50672ef 12133 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
fbe5eb6d
BS
12134
12135 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12136 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12137 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12138 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12139 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12140 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12141 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12142 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12143
12144 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12145 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
fbe5eb6d 12146
916b60b7
BS
12147 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12148 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12149 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12150 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
12151
12152 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12153 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12154
12155 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12156 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12157 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12158 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12159 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12160 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12161
12162 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12163 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12164 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12165 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12166
12167 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12168 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
077084dd 12170 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
fbe5eb6d
BS
12171 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12172 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12173 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
f02e1358 12174 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
fbe5eb6d 12175
916b60b7
BS
12176 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12177 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12178 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12179
12180 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12181 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12182
9e9fb0ce
JB
12183 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12184 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12185
916b60b7
BS
12186 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12187 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12188 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12189 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12190 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12191 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12192
12193 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12194 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12195 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12196 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12197 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12198 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12199
12200 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12201 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12202 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12203 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12204
12205 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12206
fbe5eb6d 12207 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
37f22004 12208 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
fbe5eb6d 12209 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
22c7c85e
L
12210 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12211
9e200aaf
KC
12212 /* SSE3 MMX */
12213 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12214 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12215 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12216 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12217 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12218 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
bd793c65
BS
12219};
12220
8b60264b 12221static const struct builtin_description bdesc_1arg[] =
bd793c65 12222{
37f22004
L
12223 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12224 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
fbe5eb6d 12225
37f22004
L
12226 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12227 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12228 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
fbe5eb6d 12229
37f22004
L
12230 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12231 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12232 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12233 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12234 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12235 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
fbe5eb6d
BS
12236
12237 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12238 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12239 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
f02e1358 12240 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
fbe5eb6d
BS
12241
12242 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12243
12244 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12245 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12246
fbe5eb6d
BS
12247 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12248 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12249 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12250 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12251 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12252
fbe5eb6d 12253 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12254
fbe5eb6d
BS
12255 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12256 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
37f22004
L
12257 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12258 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
fbe5eb6d
BS
12259
12260 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12261 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
f02e1358
JH
12262 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12263
22c7c85e
L
12264 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12265
9e200aaf
KC
12266 /* SSE3 */
12267 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12268 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12269 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
bd793c65
BS
12270};
12271
f6155fda 12272void
b96a374d 12273ix86_init_builtins (void)
f6155fda
SS
12274{
12275 if (TARGET_MMX)
12276 ix86_init_mmx_sse_builtins ();
12277}
12278
12279/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12280 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12281 builtins. */
e37af218 12282static void
b96a374d 12283ix86_init_mmx_sse_builtins (void)
bd793c65 12284{
8b60264b 12285 const struct builtin_description * d;
77ebd435 12286 size_t i;
bd793c65 12287
4a5eab38
PB
12288 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12289 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12290 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12291 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12292 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12293 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12294 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12295 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12296 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12297 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12298
bd793c65 12299 tree pchar_type_node = build_pointer_type (char_type_node);
068f5dea
JH
12300 tree pcchar_type_node = build_pointer_type (
12301 build_type_variant (char_type_node, 1, 0));
bd793c65 12302 tree pfloat_type_node = build_pointer_type (float_type_node);
068f5dea
JH
12303 tree pcfloat_type_node = build_pointer_type (
12304 build_type_variant (float_type_node, 1, 0));
bd793c65 12305 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12306 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12307 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12308
12309 /* Comparisons. */
12310 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12311 = build_function_type_list (integer_type_node,
12312 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12313 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12314 = build_function_type_list (V4SI_type_node,
12315 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12316 /* MMX/SSE/integer conversions. */
bd793c65 12317 tree int_ftype_v4sf
b4de2f7d
AH
12318 = build_function_type_list (integer_type_node,
12319 V4SF_type_node, NULL_TREE);
453ee231
JH
12320 tree int64_ftype_v4sf
12321 = build_function_type_list (long_long_integer_type_node,
12322 V4SF_type_node, NULL_TREE);
bd793c65 12323 tree int_ftype_v8qi
b4de2f7d 12324 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12325 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12326 = build_function_type_list (V4SF_type_node,
12327 V4SF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
12328 tree v4sf_ftype_v4sf_int64
12329 = build_function_type_list (V4SF_type_node,
12330 V4SF_type_node, long_long_integer_type_node,
12331 NULL_TREE);
bd793c65 12332 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12333 = build_function_type_list (V4SF_type_node,
12334 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12335 tree int_ftype_v4hi_int
b4de2f7d
AH
12336 = build_function_type_list (integer_type_node,
12337 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12338 tree v4hi_ftype_v4hi_int_int
e7a60f56 12339 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12340 integer_type_node, integer_type_node,
12341 NULL_TREE);
bd793c65
BS
12342 /* Miscellaneous. */
12343 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12344 = build_function_type_list (V8QI_type_node,
12345 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12346 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12347 = build_function_type_list (V4HI_type_node,
12348 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12349 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12350 = build_function_type_list (V4SF_type_node,
12351 V4SF_type_node, V4SF_type_node,
12352 integer_type_node, NULL_TREE);
bd793c65 12353 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12354 = build_function_type_list (V2SI_type_node,
12355 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12356 tree v4hi_ftype_v4hi_int
b4de2f7d 12357 = build_function_type_list (V4HI_type_node,
e7a60f56 12358 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12359 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12360 = build_function_type_list (V4HI_type_node,
12361 V4HI_type_node, long_long_unsigned_type_node,
12362 NULL_TREE);
bd793c65 12363 tree v2si_ftype_v2si_di
b4de2f7d
AH
12364 = build_function_type_list (V2SI_type_node,
12365 V2SI_type_node, long_long_unsigned_type_node,
12366 NULL_TREE);
bd793c65 12367 tree void_ftype_void
b4de2f7d 12368 = build_function_type (void_type_node, void_list_node);
bd793c65 12369 tree void_ftype_unsigned
b4de2f7d 12370 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22c7c85e
L
12371 tree void_ftype_unsigned_unsigned
12372 = build_function_type_list (void_type_node, unsigned_type_node,
12373 unsigned_type_node, NULL_TREE);
12374 tree void_ftype_pcvoid_unsigned_unsigned
12375 = build_function_type_list (void_type_node, const_ptr_type_node,
12376 unsigned_type_node, unsigned_type_node,
12377 NULL_TREE);
bd793c65 12378 tree unsigned_ftype_void
b4de2f7d 12379 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12380 tree di_ftype_void
b4de2f7d 12381 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12382 tree v4sf_ftype_void
b4de2f7d 12383 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12384 tree v2si_ftype_v4sf
b4de2f7d 12385 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12386 /* Loads/stores. */
bd793c65 12387 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12388 = build_function_type_list (void_type_node,
12389 V8QI_type_node, V8QI_type_node,
12390 pchar_type_node, NULL_TREE);
068f5dea
JH
12391 tree v4sf_ftype_pcfloat
12392 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
bd793c65
BS
12393 /* @@@ the type is bogus */
12394 tree v4sf_ftype_v4sf_pv2si
b4de2f7d 12395 = build_function_type_list (V4SF_type_node,
f8ca7923 12396 V4SF_type_node, pv2si_type_node, NULL_TREE);
1255c85c 12397 tree void_ftype_pv2si_v4sf
b4de2f7d 12398 = build_function_type_list (void_type_node,
f8ca7923 12399 pv2si_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12400 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12401 = build_function_type_list (void_type_node,
12402 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12403 tree void_ftype_pdi_di
b4de2f7d
AH
12404 = build_function_type_list (void_type_node,
12405 pdi_type_node, long_long_unsigned_type_node,
12406 NULL_TREE);
916b60b7 12407 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12408 = build_function_type_list (void_type_node,
12409 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12410 /* Normal vector unops. */
12411 tree v4sf_ftype_v4sf
b4de2f7d 12412 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12413
bd793c65
BS
12414 /* Normal vector binops. */
12415 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12416 = build_function_type_list (V4SF_type_node,
12417 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12418 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12419 = build_function_type_list (V8QI_type_node,
12420 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12421 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12422 = build_function_type_list (V4HI_type_node,
12423 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12424 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12425 = build_function_type_list (V2SI_type_node,
12426 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12427 tree di_ftype_di_di
b4de2f7d
AH
12428 = build_function_type_list (long_long_unsigned_type_node,
12429 long_long_unsigned_type_node,
12430 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12431
47f339cf 12432 tree v2si_ftype_v2sf
ae3aa00d 12433 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12434 tree v2sf_ftype_v2si
b4de2f7d 12435 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12436 tree v2si_ftype_v2si
b4de2f7d 12437 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12438 tree v2sf_ftype_v2sf
b4de2f7d 12439 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12440 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12441 = build_function_type_list (V2SF_type_node,
12442 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12443 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12444 = build_function_type_list (V2SI_type_node,
12445 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d 12446 tree pint_type_node = build_pointer_type (integer_type_node);
068f5dea
JH
12447 tree pcint_type_node = build_pointer_type (
12448 build_type_variant (integer_type_node, 1, 0));
fbe5eb6d 12449 tree pdouble_type_node = build_pointer_type (double_type_node);
068f5dea
JH
12450 tree pcdouble_type_node = build_pointer_type (
12451 build_type_variant (double_type_node, 1, 0));
fbe5eb6d 12452 tree int_ftype_v2df_v2df
b4de2f7d
AH
12453 = build_function_type_list (integer_type_node,
12454 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12455
12456 tree ti_ftype_void
b4de2f7d 12457 = build_function_type (intTI_type_node, void_list_node);
f02e1358
JH
12458 tree v2di_ftype_void
12459 = build_function_type (V2DI_type_node, void_list_node);
fbe5eb6d 12460 tree ti_ftype_ti_ti
b4de2f7d
AH
12461 = build_function_type_list (intTI_type_node,
12462 intTI_type_node, intTI_type_node, NULL_TREE);
068f5dea
JH
12463 tree void_ftype_pcvoid
12464 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
fbe5eb6d 12465 tree v2di_ftype_di
b4de2f7d
AH
12466 = build_function_type_list (V2DI_type_node,
12467 long_long_unsigned_type_node, NULL_TREE);
f02e1358
JH
12468 tree di_ftype_v2di
12469 = build_function_type_list (long_long_unsigned_type_node,
12470 V2DI_type_node, NULL_TREE);
fbe5eb6d 12471 tree v4sf_ftype_v4si
b4de2f7d 12472 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12473 tree v4si_ftype_v4sf
b4de2f7d 12474 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12475 tree v2df_ftype_v4si
b4de2f7d 12476 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12477 tree v4si_ftype_v2df
b4de2f7d 12478 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12479 tree v2si_ftype_v2df
b4de2f7d 12480 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12481 tree v4sf_ftype_v2df
b4de2f7d 12482 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12483 tree v2df_ftype_v2si
b4de2f7d 12484 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12485 tree v2df_ftype_v4sf
b4de2f7d 12486 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12487 tree int_ftype_v2df
b4de2f7d 12488 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
453ee231
JH
12489 tree int64_ftype_v2df
12490 = build_function_type_list (long_long_integer_type_node,
b96a374d 12491 V2DF_type_node, NULL_TREE);
fbe5eb6d 12492 tree v2df_ftype_v2df_int
b4de2f7d
AH
12493 = build_function_type_list (V2DF_type_node,
12494 V2DF_type_node, integer_type_node, NULL_TREE);
453ee231
JH
12495 tree v2df_ftype_v2df_int64
12496 = build_function_type_list (V2DF_type_node,
12497 V2DF_type_node, long_long_integer_type_node,
12498 NULL_TREE);
fbe5eb6d 12499 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12500 = build_function_type_list (V4SF_type_node,
12501 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12502 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12503 = build_function_type_list (V2DF_type_node,
12504 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12505 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12506 = build_function_type_list (V2DF_type_node,
12507 V2DF_type_node, V2DF_type_node,
12508 integer_type_node,
12509 NULL_TREE);
fbe5eb6d 12510 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12511 = build_function_type_list (V2DF_type_node,
12512 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12513 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12514 = build_function_type_list (void_type_node,
12515 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12516 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12517 = build_function_type_list (void_type_node,
12518 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12519 tree void_ftype_pint_int
b4de2f7d
AH
12520 = build_function_type_list (void_type_node,
12521 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12522 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12523 = build_function_type_list (void_type_node,
12524 V16QI_type_node, V16QI_type_node,
12525 pchar_type_node, NULL_TREE);
068f5dea
JH
12526 tree v2df_ftype_pcdouble
12527 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
fbe5eb6d 12528 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12529 = build_function_type_list (V2DF_type_node,
12530 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12531 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12532 = build_function_type_list (V16QI_type_node,
12533 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12534 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12535 = build_function_type_list (V8HI_type_node,
12536 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12537 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12538 = build_function_type_list (V4SI_type_node,
12539 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12540 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12541 = build_function_type_list (V2DI_type_node,
12542 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12543 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12544 = build_function_type_list (V2DI_type_node,
12545 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12546 tree v2df_ftype_v2df
b4de2f7d 12547 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12548 tree v2df_ftype_double
b4de2f7d 12549 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12550 tree v2df_ftype_double_double
b4de2f7d
AH
12551 = build_function_type_list (V2DF_type_node,
12552 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12553 tree int_ftype_v8hi_int
b4de2f7d
AH
12554 = build_function_type_list (integer_type_node,
12555 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12556 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12557 = build_function_type_list (V8HI_type_node,
12558 V8HI_type_node, integer_type_node,
12559 integer_type_node, NULL_TREE);
916b60b7 12560 tree v2di_ftype_v2di_int
b4de2f7d
AH
12561 = build_function_type_list (V2DI_type_node,
12562 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12563 tree v4si_ftype_v4si_int
b4de2f7d
AH
12564 = build_function_type_list (V4SI_type_node,
12565 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12566 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12567 = build_function_type_list (V8HI_type_node,
12568 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12569 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12570 = build_function_type_list (V8HI_type_node,
12571 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12572 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12573 = build_function_type_list (V4SI_type_node,
12574 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12575 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12576 = build_function_type_list (V4SI_type_node,
12577 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12578 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12579 = build_function_type_list (long_long_unsigned_type_node,
12580 V8QI_type_node, V8QI_type_node, NULL_TREE);
9e9fb0ce
JB
12581 tree di_ftype_v2si_v2si
12582 = build_function_type_list (long_long_unsigned_type_node,
12583 V2SI_type_node, V2SI_type_node, NULL_TREE);
916b60b7 12584 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12585 = build_function_type_list (V2DI_type_node,
12586 V16QI_type_node, V16QI_type_node, NULL_TREE);
9e9fb0ce
JB
12587 tree v2di_ftype_v4si_v4si
12588 = build_function_type_list (V2DI_type_node,
12589 V4SI_type_node, V4SI_type_node, NULL_TREE);
916b60b7 12590 tree int_ftype_v16qi
b4de2f7d 12591 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
12592 tree v16qi_ftype_pcchar
12593 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
f02e1358
JH
12594 tree void_ftype_pchar_v16qi
12595 = build_function_type_list (void_type_node,
12596 pchar_type_node, V16QI_type_node, NULL_TREE);
068f5dea
JH
12597 tree v4si_ftype_pcint
12598 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12599 tree void_ftype_pcint_v4si
f02e1358 12600 = build_function_type_list (void_type_node,
068f5dea 12601 pcint_type_node, V4SI_type_node, NULL_TREE);
f02e1358
JH
12602 tree v2di_ftype_v2di
12603 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
47f339cf 12604
f8a1ebc6
JH
12605 tree float80_type;
12606 tree float128_type;
12607
12608 /* The __float80 type. */
12609 if (TYPE_MODE (long_double_type_node) == XFmode)
12610 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12611 "__float80");
12612 else
12613 {
12614 /* The __float80 type. */
12615 float80_type = make_node (REAL_TYPE);
968a7562 12616 TYPE_PRECISION (float80_type) = 80;
f8a1ebc6
JH
12617 layout_type (float80_type);
12618 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12619 }
12620
12621 float128_type = make_node (REAL_TYPE);
12622 TYPE_PRECISION (float128_type) = 128;
12623 layout_type (float128_type);
12624 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12625
bd793c65
BS
12626 /* Add all builtins that are more or less simple operations on two
12627 operands. */
ca7558fc 12628 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12629 {
12630 /* Use one of the operands; the target can have a different mode for
12631 mask-generating compares. */
12632 enum machine_mode mode;
12633 tree type;
12634
12635 if (d->name == 0)
12636 continue;
12637 mode = insn_data[d->icode].operand[1].mode;
12638
bd793c65
BS
12639 switch (mode)
12640 {
fbe5eb6d
BS
12641 case V16QImode:
12642 type = v16qi_ftype_v16qi_v16qi;
12643 break;
12644 case V8HImode:
12645 type = v8hi_ftype_v8hi_v8hi;
12646 break;
12647 case V4SImode:
12648 type = v4si_ftype_v4si_v4si;
12649 break;
12650 case V2DImode:
12651 type = v2di_ftype_v2di_v2di;
12652 break;
12653 case V2DFmode:
12654 type = v2df_ftype_v2df_v2df;
12655 break;
12656 case TImode:
12657 type = ti_ftype_ti_ti;
12658 break;
bd793c65
BS
12659 case V4SFmode:
12660 type = v4sf_ftype_v4sf_v4sf;
12661 break;
12662 case V8QImode:
12663 type = v8qi_ftype_v8qi_v8qi;
12664 break;
12665 case V4HImode:
12666 type = v4hi_ftype_v4hi_v4hi;
12667 break;
12668 case V2SImode:
12669 type = v2si_ftype_v2si_v2si;
12670 break;
bd793c65
BS
12671 case DImode:
12672 type = di_ftype_di_di;
12673 break;
12674
12675 default:
12676 abort ();
12677 }
0f290768 12678
bd793c65
BS
12679 /* Override for comparisons. */
12680 if (d->icode == CODE_FOR_maskcmpv4sf3
12681 || d->icode == CODE_FOR_maskncmpv4sf3
12682 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12683 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12684 type = v4si_ftype_v4sf_v4sf;
12685
fbe5eb6d
BS
12686 if (d->icode == CODE_FOR_maskcmpv2df3
12687 || d->icode == CODE_FOR_maskncmpv2df3
12688 || d->icode == CODE_FOR_vmmaskcmpv2df3
12689 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12690 type = v2di_ftype_v2df_v2df;
12691
eeb06b1b 12692 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12693 }
12694
12695 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12696 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12697 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
eeb06b1b
BS
12698 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12699 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12700 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12701
12702 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12703 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12704 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12705
12706 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12707 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12708
12709 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12710 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12711
bd793c65 12712 /* comi/ucomi insns. */
ca7558fc 12713 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12714 if (d->mask == MASK_SSE2)
12715 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12716 else
12717 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12718
1255c85c
BS
12719 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12720 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12721 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12722
37f22004
L
12723 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12724 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12725 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12726 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12727 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12728 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12729 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12730 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12731 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12732 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12733 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12734
12735 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12736 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12737
12738 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12739
12740 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12741 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12742 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12743 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12744 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12745 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12746
12747 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12748 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12749 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12750 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12751
12752 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12753 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12754 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12755 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12756
12757 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12758
12759 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12760
12761 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12762 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12763 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12764 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12765 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12766 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12767
12768 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12769
47f339cf
BS
12770 /* Original 3DNow! */
12771 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12772 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12773 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12774 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12775 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12776 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12777 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12778 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12779 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12780 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12781 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12782 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12783 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12784 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12785 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12786 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12787 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12788 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12789 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12790 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12791
12792 /* 3DNow! extension as used in the Athlon CPU. */
12793 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12794 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12795 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12796 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12797 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12798 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12799
37f22004 12800 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
fbe5eb6d
BS
12801
12802 /* SSE2 */
12803 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12804 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12805
12806 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12807 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
f02e1358 12808 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
fbe5eb6d 12809
068f5dea
JH
12810 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12811 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12812 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
fbe5eb6d
BS
12813 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12814 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12815 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12816
12817 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12818 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12819 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12820 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12821
12822 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12823 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12824 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12825 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12826 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12827
12828 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12829 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12830 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12831 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12832
12833 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12834 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12835
12836 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12837
12838 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12839 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12840
12841 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12842 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12843 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12844 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12845 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12846
12847 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12848
12849 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12850 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
37f22004
L
12851 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12852 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
fbe5eb6d
BS
12853
12854 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12855 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12856 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12857
12858 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
37f22004 12859 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
fbe5eb6d
BS
12860 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12861 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12862
12863 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12864 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12865 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
068f5dea
JH
12866 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12867 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
fbe5eb6d
BS
12868 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12869 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12870
068f5dea 12871 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
fbe5eb6d
BS
12872 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12873 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7 12874
068f5dea
JH
12875 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12876 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12877 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
f02e1358
JH
12878 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12879 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
068f5dea 12880 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
f02e1358
JH
12881 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12882
37f22004 12883 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
f02e1358 12884
9e9fb0ce
JB
12885 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12886 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12887
916b60b7
BS
12888 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12889 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12890 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12891
12892 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12893 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12894 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12895
12896 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12897 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12898
ab3146fd 12899 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
916b60b7
BS
12900 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12901 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12902 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12903
ab3146fd 12904 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
916b60b7
BS
12905 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12906 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12907 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12908
12909 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12910 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12911
12912 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
22c7c85e
L
12913
12914 /* Prescott New Instructions. */
9e200aaf 12915 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
22c7c85e
L
12916 void_ftype_pcvoid_unsigned_unsigned,
12917 IX86_BUILTIN_MONITOR);
9e200aaf 12918 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
22c7c85e
L
12919 void_ftype_unsigned_unsigned,
12920 IX86_BUILTIN_MWAIT);
9e200aaf 12921 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
22c7c85e
L
12922 v4sf_ftype_v4sf,
12923 IX86_BUILTIN_MOVSHDUP);
9e200aaf 12924 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
22c7c85e
L
12925 v4sf_ftype_v4sf,
12926 IX86_BUILTIN_MOVSLDUP);
9e200aaf 12927 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
22c7c85e 12928 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
9e200aaf 12929 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
22c7c85e 12930 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
9e200aaf 12931 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
22c7c85e 12932 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
bd793c65
BS
12933}
12934
12935/* Errors in the source file can cause expand_expr to return const0_rtx
12936 where we expect a vector. To avoid crashing, use one of the vector
12937 clear instructions. */
12938static rtx
b96a374d 12939safe_vector_operand (rtx x, enum machine_mode mode)
bd793c65
BS
12940{
12941 if (x != const0_rtx)
12942 return x;
12943 x = gen_reg_rtx (mode);
12944
47f339cf 12945 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12946 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12947 : gen_rtx_SUBREG (DImode, x, 0)));
12948 else
e37af218 12949 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
4977bab6
ZW
12950 : gen_rtx_SUBREG (V4SFmode, x, 0),
12951 CONST0_RTX (V4SFmode)));
bd793c65
BS
12952 return x;
12953}
12954
12955/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12956
12957static rtx
b96a374d 12958ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
12959{
12960 rtx pat;
12961 tree arg0 = TREE_VALUE (arglist);
12962 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12963 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12964 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12965 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12966 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12967 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12968
12969 if (VECTOR_MODE_P (mode0))
12970 op0 = safe_vector_operand (op0, mode0);
12971 if (VECTOR_MODE_P (mode1))
12972 op1 = safe_vector_operand (op1, mode1);
12973
12974 if (! target
12975 || GET_MODE (target) != tmode
12976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12977 target = gen_reg_rtx (tmode);
12978
d9deed68
JH
12979 if (GET_MODE (op1) == SImode && mode1 == TImode)
12980 {
12981 rtx x = gen_reg_rtx (V4SImode);
12982 emit_insn (gen_sse2_loadd (x, op1));
12983 op1 = gen_lowpart (TImode, x);
12984 }
12985
bd793c65
BS
12986 /* In case the insn wants input operands in modes different from
12987 the result, abort. */
ebe75517
JH
12988 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
12989 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
bd793c65
BS
12990 abort ();
12991
12992 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12993 op0 = copy_to_mode_reg (mode0, op0);
12994 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12995 op1 = copy_to_mode_reg (mode1, op1);
12996
59bef189
RH
12997 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12998 yet one of the two must not be a memory. This is normally enforced
12999 by expanders, but we didn't bother to create one here. */
13000 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13001 op0 = copy_to_mode_reg (mode0, op0);
13002
bd793c65
BS
13003 pat = GEN_FCN (icode) (target, op0, op1);
13004 if (! pat)
13005 return 0;
13006 emit_insn (pat);
13007 return target;
13008}
13009
13010/* Subroutine of ix86_expand_builtin to take care of stores. */
13011
13012static rtx
b96a374d 13013ix86_expand_store_builtin (enum insn_code icode, tree arglist)
bd793c65
BS
13014{
13015 rtx pat;
13016 tree arg0 = TREE_VALUE (arglist);
13017 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13018 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13019 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13020 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13021 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13022
13023 if (VECTOR_MODE_P (mode1))
13024 op1 = safe_vector_operand (op1, mode1);
13025
13026 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7f0e57bd 13027 op1 = copy_to_mode_reg (mode1, op1);
59bef189 13028
bd793c65
BS
13029 pat = GEN_FCN (icode) (op0, op1);
13030 if (pat)
13031 emit_insn (pat);
13032 return 0;
13033}
13034
13035/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13036
13037static rtx
b96a374d
AJ
13038ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13039 rtx target, int do_load)
bd793c65
BS
13040{
13041 rtx pat;
13042 tree arg0 = TREE_VALUE (arglist);
13043 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13044 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13045 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13046
13047 if (! target
13048 || GET_MODE (target) != tmode
13049 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13050 target = gen_reg_rtx (tmode);
13051 if (do_load)
13052 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13053 else
13054 {
13055 if (VECTOR_MODE_P (mode0))
13056 op0 = safe_vector_operand (op0, mode0);
13057
13058 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13059 op0 = copy_to_mode_reg (mode0, op0);
13060 }
13061
13062 pat = GEN_FCN (icode) (target, op0);
13063 if (! pat)
13064 return 0;
13065 emit_insn (pat);
13066 return target;
13067}
13068
13069/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13070 sqrtss, rsqrtss, rcpss. */
13071
13072static rtx
b96a374d 13073ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
bd793c65
BS
13074{
13075 rtx pat;
13076 tree arg0 = TREE_VALUE (arglist);
59bef189 13077 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
13078 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13079 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13080
13081 if (! target
13082 || GET_MODE (target) != tmode
13083 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13084 target = gen_reg_rtx (tmode);
13085
13086 if (VECTOR_MODE_P (mode0))
13087 op0 = safe_vector_operand (op0, mode0);
13088
13089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13090 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 13091
59bef189
RH
13092 op1 = op0;
13093 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13094 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 13095
59bef189 13096 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13097 if (! pat)
13098 return 0;
13099 emit_insn (pat);
13100 return target;
13101}
13102
13103/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13104
13105static rtx
b96a374d
AJ
13106ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13107 rtx target)
bd793c65
BS
13108{
13109 rtx pat;
13110 tree arg0 = TREE_VALUE (arglist);
13111 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13112 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13113 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13114 rtx op2;
13115 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13116 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13117 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13118 enum rtx_code comparison = d->comparison;
13119
13120 if (VECTOR_MODE_P (mode0))
13121 op0 = safe_vector_operand (op0, mode0);
13122 if (VECTOR_MODE_P (mode1))
13123 op1 = safe_vector_operand (op1, mode1);
13124
13125 /* Swap operands if we have a comparison that isn't available in
13126 hardware. */
13127 if (d->flag)
13128 {
21e1b5f1
BS
13129 rtx tmp = gen_reg_rtx (mode1);
13130 emit_move_insn (tmp, op1);
bd793c65 13131 op1 = op0;
21e1b5f1 13132 op0 = tmp;
bd793c65 13133 }
21e1b5f1
BS
13134
13135 if (! target
13136 || GET_MODE (target) != tmode
13137 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
13138 target = gen_reg_rtx (tmode);
13139
13140 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13141 op0 = copy_to_mode_reg (mode0, op0);
13142 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13143 op1 = copy_to_mode_reg (mode1, op1);
13144
13145 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13146 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13147 if (! pat)
13148 return 0;
13149 emit_insn (pat);
13150 return target;
13151}
13152
13153/* Subroutine of ix86_expand_builtin to take care of comi insns. */
13154
13155static rtx
b96a374d
AJ
13156ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13157 rtx target)
bd793c65
BS
13158{
13159 rtx pat;
13160 tree arg0 = TREE_VALUE (arglist);
13161 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13162 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13163 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13164 rtx op2;
13165 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13166 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13167 enum rtx_code comparison = d->comparison;
13168
13169 if (VECTOR_MODE_P (mode0))
13170 op0 = safe_vector_operand (op0, mode0);
13171 if (VECTOR_MODE_P (mode1))
13172 op1 = safe_vector_operand (op1, mode1);
13173
13174 /* Swap operands if we have a comparison that isn't available in
13175 hardware. */
13176 if (d->flag)
13177 {
13178 rtx tmp = op1;
13179 op1 = op0;
13180 op0 = tmp;
bd793c65
BS
13181 }
13182
13183 target = gen_reg_rtx (SImode);
13184 emit_move_insn (target, const0_rtx);
13185 target = gen_rtx_SUBREG (QImode, target, 0);
13186
13187 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13188 op0 = copy_to_mode_reg (mode0, op0);
13189 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13190 op1 = copy_to_mode_reg (mode1, op1);
13191
13192 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
1194ca05 13193 pat = GEN_FCN (d->icode) (op0, op1);
bd793c65
BS
13194 if (! pat)
13195 return 0;
13196 emit_insn (pat);
29628f27
BS
13197 emit_insn (gen_rtx_SET (VOIDmode,
13198 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13199 gen_rtx_fmt_ee (comparison, QImode,
1194ca05 13200 SET_DEST (pat),
29628f27 13201 const0_rtx)));
bd793c65 13202
6f1a6c5b 13203 return SUBREG_REG (target);
bd793c65
BS
13204}
13205
13206/* Expand an expression EXP that calls a built-in function,
13207 with result going to TARGET if that's convenient
13208 (and in mode MODE if that's convenient).
13209 SUBTARGET may be used as the target for computing one of EXP's operands.
13210 IGNORE is nonzero if the value is to be ignored. */
13211
13212rtx
b96a374d
AJ
13213ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13214 enum machine_mode mode ATTRIBUTE_UNUSED,
13215 int ignore ATTRIBUTE_UNUSED)
bd793c65 13216{
8b60264b 13217 const struct builtin_description *d;
77ebd435 13218 size_t i;
bd793c65
BS
13219 enum insn_code icode;
13220 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13221 tree arglist = TREE_OPERAND (exp, 1);
e37af218 13222 tree arg0, arg1, arg2;
bd793c65
BS
13223 rtx op0, op1, op2, pat;
13224 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 13225 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
13226
13227 switch (fcode)
13228 {
13229 case IX86_BUILTIN_EMMS:
13230 emit_insn (gen_emms ());
13231 return 0;
13232
13233 case IX86_BUILTIN_SFENCE:
13234 emit_insn (gen_sfence ());
13235 return 0;
13236
bd793c65 13237 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
13238 case IX86_BUILTIN_PEXTRW128:
13239 icode = (fcode == IX86_BUILTIN_PEXTRW
13240 ? CODE_FOR_mmx_pextrw
13241 : CODE_FOR_sse2_pextrw);
bd793c65
BS
13242 arg0 = TREE_VALUE (arglist);
13243 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13244 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13245 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13246 tmode = insn_data[icode].operand[0].mode;
13247 mode0 = insn_data[icode].operand[1].mode;
13248 mode1 = insn_data[icode].operand[2].mode;
13249
13250 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13251 op0 = copy_to_mode_reg (mode0, op0);
13252 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13253 {
ebe75517
JH
13254 error ("selector must be an integer constant in the range 0..%i",
13255 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
6f1a6c5b 13256 return gen_reg_rtx (tmode);
bd793c65
BS
13257 }
13258 if (target == 0
13259 || GET_MODE (target) != tmode
13260 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13261 target = gen_reg_rtx (tmode);
13262 pat = GEN_FCN (icode) (target, op0, op1);
13263 if (! pat)
13264 return 0;
13265 emit_insn (pat);
13266 return target;
13267
13268 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
13269 case IX86_BUILTIN_PINSRW128:
13270 icode = (fcode == IX86_BUILTIN_PINSRW
13271 ? CODE_FOR_mmx_pinsrw
13272 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13273 arg0 = TREE_VALUE (arglist);
13274 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13275 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13276 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13277 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13278 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13279 tmode = insn_data[icode].operand[0].mode;
13280 mode0 = insn_data[icode].operand[1].mode;
13281 mode1 = insn_data[icode].operand[2].mode;
13282 mode2 = insn_data[icode].operand[3].mode;
13283
13284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13285 op0 = copy_to_mode_reg (mode0, op0);
13286 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13287 op1 = copy_to_mode_reg (mode1, op1);
13288 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13289 {
ebe75517
JH
13290 error ("selector must be an integer constant in the range 0..%i",
13291 fcode == IX86_BUILTIN_PINSRW ? 15:255);
bd793c65
BS
13292 return const0_rtx;
13293 }
13294 if (target == 0
13295 || GET_MODE (target) != tmode
13296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13297 target = gen_reg_rtx (tmode);
13298 pat = GEN_FCN (icode) (target, op0, op1, op2);
13299 if (! pat)
13300 return 0;
13301 emit_insn (pat);
13302 return target;
13303
13304 case IX86_BUILTIN_MASKMOVQ:
077084dd 13305 case IX86_BUILTIN_MASKMOVDQU:
fbe5eb6d
BS
13306 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13307 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
f8ca7923
JH
13308 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13309 : CODE_FOR_sse2_maskmovdqu));
bd793c65
BS
13310 /* Note the arg order is different from the operand order. */
13311 arg1 = TREE_VALUE (arglist);
13312 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13313 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13314 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13315 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13316 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13317 mode0 = insn_data[icode].operand[0].mode;
13318 mode1 = insn_data[icode].operand[1].mode;
13319 mode2 = insn_data[icode].operand[2].mode;
13320
5c464583 13321 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13322 op0 = copy_to_mode_reg (mode0, op0);
13323 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13324 op1 = copy_to_mode_reg (mode1, op1);
13325 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13326 op2 = copy_to_mode_reg (mode2, op2);
13327 pat = GEN_FCN (icode) (op0, op1, op2);
13328 if (! pat)
13329 return 0;
13330 emit_insn (pat);
13331 return 0;
13332
13333 case IX86_BUILTIN_SQRTSS:
13334 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13335 case IX86_BUILTIN_RSQRTSS:
13336 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13337 case IX86_BUILTIN_RCPSS:
13338 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13339
13340 case IX86_BUILTIN_LOADAPS:
13341 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13342
13343 case IX86_BUILTIN_LOADUPS:
13344 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13345
13346 case IX86_BUILTIN_STOREAPS:
e37af218 13347 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
f02e1358 13348
bd793c65 13349 case IX86_BUILTIN_STOREUPS:
e37af218 13350 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13351
13352 case IX86_BUILTIN_LOADSS:
13353 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13354
13355 case IX86_BUILTIN_STORESS:
e37af218 13356 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13357
0f290768 13358 case IX86_BUILTIN_LOADHPS:
bd793c65 13359 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13360 case IX86_BUILTIN_LOADHPD:
13361 case IX86_BUILTIN_LOADLPD:
13362 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13363 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13364 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
997404de 13365 : CODE_FOR_sse2_movsd);
bd793c65
BS
13366 arg0 = TREE_VALUE (arglist);
13367 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13368 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13369 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13370 tmode = insn_data[icode].operand[0].mode;
13371 mode0 = insn_data[icode].operand[1].mode;
13372 mode1 = insn_data[icode].operand[2].mode;
13373
13374 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13375 op0 = copy_to_mode_reg (mode0, op0);
13376 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13377 if (target == 0
13378 || GET_MODE (target) != tmode
13379 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13380 target = gen_reg_rtx (tmode);
13381 pat = GEN_FCN (icode) (target, op0, op1);
13382 if (! pat)
13383 return 0;
13384 emit_insn (pat);
13385 return target;
0f290768 13386
bd793c65
BS
13387 case IX86_BUILTIN_STOREHPS:
13388 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13389 case IX86_BUILTIN_STOREHPD:
13390 case IX86_BUILTIN_STORELPD:
13391 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13392 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13393 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
997404de 13394 : CODE_FOR_sse2_movsd);
bd793c65
BS
13395 arg0 = TREE_VALUE (arglist);
13396 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13397 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13398 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13399 mode0 = insn_data[icode].operand[1].mode;
13400 mode1 = insn_data[icode].operand[2].mode;
13401
13402 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13403 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13404 op1 = copy_to_mode_reg (mode1, op1);
13405
13406 pat = GEN_FCN (icode) (op0, op0, op1);
13407 if (! pat)
13408 return 0;
13409 emit_insn (pat);
13410 return 0;
13411
13412 case IX86_BUILTIN_MOVNTPS:
e37af218 13413 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13414 case IX86_BUILTIN_MOVNTQ:
e37af218 13415 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13416
13417 case IX86_BUILTIN_LDMXCSR:
13418 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13419 target = assign_386_stack_local (SImode, 0);
13420 emit_move_insn (target, op0);
13421 emit_insn (gen_ldmxcsr (target));
13422 return 0;
13423
13424 case IX86_BUILTIN_STMXCSR:
13425 target = assign_386_stack_local (SImode, 0);
13426 emit_insn (gen_stmxcsr (target));
13427 return copy_to_mode_reg (SImode, target);
13428
bd793c65 13429 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13430 case IX86_BUILTIN_SHUFPD:
13431 icode = (fcode == IX86_BUILTIN_SHUFPS
13432 ? CODE_FOR_sse_shufps
13433 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13434 arg0 = TREE_VALUE (arglist);
13435 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13436 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13437 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13438 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13439 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13440 tmode = insn_data[icode].operand[0].mode;
13441 mode0 = insn_data[icode].operand[1].mode;
13442 mode1 = insn_data[icode].operand[2].mode;
13443 mode2 = insn_data[icode].operand[3].mode;
13444
13445 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13446 op0 = copy_to_mode_reg (mode0, op0);
13447 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13448 op1 = copy_to_mode_reg (mode1, op1);
13449 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13450 {
13451 /* @@@ better error message */
13452 error ("mask must be an immediate");
6f1a6c5b 13453 return gen_reg_rtx (tmode);
bd793c65
BS
13454 }
13455 if (target == 0
13456 || GET_MODE (target) != tmode
13457 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13458 target = gen_reg_rtx (tmode);
13459 pat = GEN_FCN (icode) (target, op0, op1, op2);
13460 if (! pat)
13461 return 0;
13462 emit_insn (pat);
13463 return target;
13464
13465 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13466 case IX86_BUILTIN_PSHUFD:
13467 case IX86_BUILTIN_PSHUFHW:
13468 case IX86_BUILTIN_PSHUFLW:
13469 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13470 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13471 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13472 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13473 arg0 = TREE_VALUE (arglist);
13474 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13475 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13476 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13477 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13478 mode1 = insn_data[icode].operand[1].mode;
13479 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13480
29628f27
BS
13481 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13482 op0 = copy_to_mode_reg (mode1, op0);
13483 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13484 {
13485 /* @@@ better error message */
13486 error ("mask must be an immediate");
13487 return const0_rtx;
13488 }
13489 if (target == 0
13490 || GET_MODE (target) != tmode
13491 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13492 target = gen_reg_rtx (tmode);
29628f27 13493 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13494 if (! pat)
13495 return 0;
13496 emit_insn (pat);
13497 return target;
13498
ab3146fd
ZD
13499 case IX86_BUILTIN_PSLLDQI128:
13500 case IX86_BUILTIN_PSRLDQI128:
13501 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13502 : CODE_FOR_sse2_lshrti3);
13503 arg0 = TREE_VALUE (arglist);
13504 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13505 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13506 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13507 tmode = insn_data[icode].operand[0].mode;
13508 mode1 = insn_data[icode].operand[1].mode;
13509 mode2 = insn_data[icode].operand[2].mode;
13510
13511 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13512 {
13513 op0 = copy_to_reg (op0);
13514 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13515 }
13516 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13517 {
13518 error ("shift must be an immediate");
13519 return const0_rtx;
13520 }
13521 target = gen_reg_rtx (V2DImode);
13522 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13523 if (! pat)
13524 return 0;
13525 emit_insn (pat);
13526 return target;
13527
47f339cf
BS
13528 case IX86_BUILTIN_FEMMS:
13529 emit_insn (gen_femms ());
13530 return NULL_RTX;
13531
13532 case IX86_BUILTIN_PAVGUSB:
13533 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13534
13535 case IX86_BUILTIN_PF2ID:
13536 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13537
13538 case IX86_BUILTIN_PFACC:
13539 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13540
13541 case IX86_BUILTIN_PFADD:
13542 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13543
13544 case IX86_BUILTIN_PFCMPEQ:
13545 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13546
13547 case IX86_BUILTIN_PFCMPGE:
13548 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13549
13550 case IX86_BUILTIN_PFCMPGT:
13551 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13552
13553 case IX86_BUILTIN_PFMAX:
13554 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13555
13556 case IX86_BUILTIN_PFMIN:
13557 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13558
13559 case IX86_BUILTIN_PFMUL:
13560 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13561
13562 case IX86_BUILTIN_PFRCP:
13563 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13564
13565 case IX86_BUILTIN_PFRCPIT1:
13566 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13567
13568 case IX86_BUILTIN_PFRCPIT2:
13569 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13570
13571 case IX86_BUILTIN_PFRSQIT1:
13572 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13573
13574 case IX86_BUILTIN_PFRSQRT:
13575 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13576
13577 case IX86_BUILTIN_PFSUB:
13578 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13579
13580 case IX86_BUILTIN_PFSUBR:
13581 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13582
13583 case IX86_BUILTIN_PI2FD:
13584 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13585
13586 case IX86_BUILTIN_PMULHRW:
13587 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13588
47f339cf
BS
13589 case IX86_BUILTIN_PF2IW:
13590 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13591
13592 case IX86_BUILTIN_PFNACC:
13593 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13594
13595 case IX86_BUILTIN_PFPNACC:
13596 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13597
13598 case IX86_BUILTIN_PI2FW:
13599 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13600
13601 case IX86_BUILTIN_PSWAPDSI:
13602 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13603
13604 case IX86_BUILTIN_PSWAPDSF:
13605 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13606
e37af218
RH
13607 case IX86_BUILTIN_SSE_ZERO:
13608 target = gen_reg_rtx (V4SFmode);
4977bab6 13609 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
bd793c65
BS
13610 return target;
13611
bd793c65
BS
13612 case IX86_BUILTIN_MMX_ZERO:
13613 target = gen_reg_rtx (DImode);
13614 emit_insn (gen_mmx_clrdi (target));
13615 return target;
13616
f02e1358
JH
13617 case IX86_BUILTIN_CLRTI:
13618 target = gen_reg_rtx (V2DImode);
13619 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13620 return target;
13621
13622
fbe5eb6d
BS
13623 case IX86_BUILTIN_SQRTSD:
13624 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13625 case IX86_BUILTIN_LOADAPD:
13626 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13627 case IX86_BUILTIN_LOADUPD:
13628 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13629
13630 case IX86_BUILTIN_STOREAPD:
13631 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13632 case IX86_BUILTIN_STOREUPD:
13633 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13634
13635 case IX86_BUILTIN_LOADSD:
13636 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13637
13638 case IX86_BUILTIN_STORESD:
13639 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13640
13641 case IX86_BUILTIN_SETPD1:
13642 target = assign_386_stack_local (DFmode, 0);
13643 arg0 = TREE_VALUE (arglist);
13644 emit_move_insn (adjust_address (target, DFmode, 0),
13645 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13646 op0 = gen_reg_rtx (V2DFmode);
13647 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
60c81c89 13648 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
fbe5eb6d
BS
13649 return op0;
13650
13651 case IX86_BUILTIN_SETPD:
13652 target = assign_386_stack_local (V2DFmode, 0);
13653 arg0 = TREE_VALUE (arglist);
13654 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13655 emit_move_insn (adjust_address (target, DFmode, 0),
13656 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13657 emit_move_insn (adjust_address (target, DFmode, 8),
13658 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13659 op0 = gen_reg_rtx (V2DFmode);
13660 emit_insn (gen_sse2_movapd (op0, target));
13661 return op0;
13662
13663 case IX86_BUILTIN_LOADRPD:
13664 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13665 gen_reg_rtx (V2DFmode), 1);
60c81c89 13666 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
fbe5eb6d
BS
13667 return target;
13668
13669 case IX86_BUILTIN_LOADPD1:
13670 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13671 gen_reg_rtx (V2DFmode), 1);
13672 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13673 return target;
13674
13675 case IX86_BUILTIN_STOREPD1:
13676 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13677 case IX86_BUILTIN_STORERPD:
13678 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13679
48126a97
JH
13680 case IX86_BUILTIN_CLRPD:
13681 target = gen_reg_rtx (V2DFmode);
13682 emit_insn (gen_sse_clrv2df (target));
13683 return target;
13684
fbe5eb6d
BS
13685 case IX86_BUILTIN_MFENCE:
13686 emit_insn (gen_sse2_mfence ());
13687 return 0;
13688 case IX86_BUILTIN_LFENCE:
13689 emit_insn (gen_sse2_lfence ());
13690 return 0;
13691
13692 case IX86_BUILTIN_CLFLUSH:
13693 arg0 = TREE_VALUE (arglist);
13694 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13695 icode = CODE_FOR_sse2_clflush;
1194ca05
JH
13696 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13697 op0 = copy_to_mode_reg (Pmode, op0);
fbe5eb6d
BS
13698
13699 emit_insn (gen_sse2_clflush (op0));
13700 return 0;
13701
13702 case IX86_BUILTIN_MOVNTPD:
13703 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13704 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13705 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13706 case IX86_BUILTIN_MOVNTI:
13707 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13708
f02e1358
JH
13709 case IX86_BUILTIN_LOADDQA:
13710 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13711 case IX86_BUILTIN_LOADDQU:
13712 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13713 case IX86_BUILTIN_LOADD:
13714 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13715
13716 case IX86_BUILTIN_STOREDQA:
13717 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13718 case IX86_BUILTIN_STOREDQU:
13719 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13720 case IX86_BUILTIN_STORED:
13721 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13722
22c7c85e
L
13723 case IX86_BUILTIN_MONITOR:
13724 arg0 = TREE_VALUE (arglist);
13725 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13726 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13727 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13728 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13729 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13730 if (!REG_P (op0))
13731 op0 = copy_to_mode_reg (SImode, op0);
13732 if (!REG_P (op1))
13733 op1 = copy_to_mode_reg (SImode, op1);
13734 if (!REG_P (op2))
13735 op2 = copy_to_mode_reg (SImode, op2);
13736 emit_insn (gen_monitor (op0, op1, op2));
13737 return 0;
13738
13739 case IX86_BUILTIN_MWAIT:
13740 arg0 = TREE_VALUE (arglist);
13741 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13742 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13743 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13744 if (!REG_P (op0))
13745 op0 = copy_to_mode_reg (SImode, op0);
13746 if (!REG_P (op1))
13747 op1 = copy_to_mode_reg (SImode, op1);
13748 emit_insn (gen_mwait (op0, op1));
13749 return 0;
13750
13751 case IX86_BUILTIN_LOADDDUP:
13752 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13753
13754 case IX86_BUILTIN_LDDQU:
13755 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13756 1);
13757
bd793c65
BS
13758 default:
13759 break;
13760 }
13761
ca7558fc 13762 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13763 if (d->code == fcode)
13764 {
13765 /* Compares are treated specially. */
13766 if (d->icode == CODE_FOR_maskcmpv4sf3
13767 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13768 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13769 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13770 || d->icode == CODE_FOR_maskcmpv2df3
13771 || d->icode == CODE_FOR_vmmaskcmpv2df3
13772 || d->icode == CODE_FOR_maskncmpv2df3
13773 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13774 return ix86_expand_sse_compare (d, arglist, target);
13775
13776 return ix86_expand_binop_builtin (d->icode, arglist, target);
13777 }
13778
ca7558fc 13779 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13780 if (d->code == fcode)
13781 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13782
ca7558fc 13783 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13784 if (d->code == fcode)
13785 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13786
bd793c65
BS
13787 /* @@@ Should really do something sensible here. */
13788 return 0;
bd793c65 13789}
4211a8fb
JH
13790
13791/* Store OPERAND to the memory after reload is completed. This means
f710504c 13792 that we can't easily use assign_stack_local. */
4211a8fb 13793rtx
b96a374d 13794ix86_force_to_memory (enum machine_mode mode, rtx operand)
4211a8fb 13795{
898d374d 13796 rtx result;
4211a8fb
JH
13797 if (!reload_completed)
13798 abort ();
a5b378d6 13799 if (TARGET_RED_ZONE)
898d374d
JH
13800 {
13801 result = gen_rtx_MEM (mode,
13802 gen_rtx_PLUS (Pmode,
13803 stack_pointer_rtx,
13804 GEN_INT (-RED_ZONE_SIZE)));
13805 emit_move_insn (result, operand);
13806 }
a5b378d6 13807 else if (!TARGET_RED_ZONE && TARGET_64BIT)
4211a8fb 13808 {
898d374d 13809 switch (mode)
4211a8fb 13810 {
898d374d
JH
13811 case HImode:
13812 case SImode:
13813 operand = gen_lowpart (DImode, operand);
5efb1046 13814 /* FALLTHRU */
898d374d 13815 case DImode:
4211a8fb 13816 emit_insn (
898d374d
JH
13817 gen_rtx_SET (VOIDmode,
13818 gen_rtx_MEM (DImode,
13819 gen_rtx_PRE_DEC (DImode,
13820 stack_pointer_rtx)),
13821 operand));
13822 break;
13823 default:
13824 abort ();
13825 }
13826 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13827 }
13828 else
13829 {
13830 switch (mode)
13831 {
13832 case DImode:
13833 {
13834 rtx operands[2];
13835 split_di (&operand, 1, operands, operands + 1);
13836 emit_insn (
13837 gen_rtx_SET (VOIDmode,
13838 gen_rtx_MEM (SImode,
13839 gen_rtx_PRE_DEC (Pmode,
13840 stack_pointer_rtx)),
13841 operands[1]));
13842 emit_insn (
13843 gen_rtx_SET (VOIDmode,
13844 gen_rtx_MEM (SImode,
13845 gen_rtx_PRE_DEC (Pmode,
13846 stack_pointer_rtx)),
13847 operands[0]));
13848 }
13849 break;
13850 case HImode:
13851 /* It is better to store HImodes as SImodes. */
13852 if (!TARGET_PARTIAL_REG_STALL)
13853 operand = gen_lowpart (SImode, operand);
5efb1046 13854 /* FALLTHRU */
898d374d 13855 case SImode:
4211a8fb 13856 emit_insn (
898d374d
JH
13857 gen_rtx_SET (VOIDmode,
13858 gen_rtx_MEM (GET_MODE (operand),
13859 gen_rtx_PRE_DEC (SImode,
13860 stack_pointer_rtx)),
13861 operand));
13862 break;
13863 default:
13864 abort ();
4211a8fb 13865 }
898d374d 13866 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13867 }
898d374d 13868 return result;
4211a8fb
JH
13869}
13870
13871/* Free operand from the memory. */
13872void
b96a374d 13873ix86_free_from_memory (enum machine_mode mode)
4211a8fb 13874{
a5b378d6 13875 if (!TARGET_RED_ZONE)
898d374d
JH
13876 {
13877 int size;
13878
13879 if (mode == DImode || TARGET_64BIT)
13880 size = 8;
13881 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13882 size = 2;
13883 else
13884 size = 4;
13885 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13886 to pop or add instruction if registers are available. */
13887 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13888 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13889 GEN_INT (size))));
13890 }
4211a8fb 13891}
a946dd00 13892
f84aa48a
JH
13893/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13894 QImode must go into class Q_REGS.
13895 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13896 movdf to do mem-to-mem moves through integer regs. */
f84aa48a 13897enum reg_class
b96a374d 13898ix86_preferred_reload_class (rtx x, enum reg_class class)
f84aa48a 13899{
1877be45
JH
13900 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13901 return NO_REGS;
f84aa48a
JH
13902 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13903 {
13904 /* SSE can't load any constant directly yet. */
13905 if (SSE_CLASS_P (class))
13906 return NO_REGS;
13907 /* Floats can load 0 and 1. */
13908 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13909 {
13910 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13911 if (MAYBE_SSE_CLASS_P (class))
13912 return (reg_class_subset_p (class, GENERAL_REGS)
13913 ? GENERAL_REGS : FLOAT_REGS);
13914 else
13915 return class;
13916 }
13917 /* General regs can load everything. */
13918 if (reg_class_subset_p (class, GENERAL_REGS))
13919 return GENERAL_REGS;
13920 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13921 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13922 return NO_REGS;
13923 }
13924 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13925 return NO_REGS;
13926 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13927 return Q_REGS;
13928 return class;
13929}
13930
13931/* If we are copying between general and FP registers, we need a memory
13932 location. The same is true for SSE and MMX registers.
13933
13934 The macro can't work reliably when one of the CLASSES is class containing
13935 registers from multiple units (SSE, MMX, integer). We avoid this by never
13936 combining those units in single alternative in the machine description.
13937 Ensure that this constraint holds to avoid unexpected surprises.
13938
13939 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13940 enforce these sanity checks. */
13941int
b96a374d
AJ
13942ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
13943 enum machine_mode mode, int strict)
f84aa48a
JH
13944{
13945 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13946 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13947 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13948 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13949 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13950 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13951 {
13952 if (strict)
13953 abort ();
13954 else
13955 return 1;
13956 }
13957 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
8f62128d
JH
13958 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13959 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
13960 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
13961 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
f84aa48a
JH
13962}
13963/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13964 one in class CLASS2.
f84aa48a
JH
13965
13966 It is not required that the cost always equal 2 when FROM is the same as TO;
13967 on some machines it is expensive to move between registers if they are not
13968 general registers. */
13969int
b96a374d
AJ
13970ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
13971 enum reg_class class2)
f84aa48a
JH
13972{
13973 /* In case we require secondary memory, compute cost of the store followed
b96a374d 13974 by load. In order to avoid bad register allocation choices, we need
d631b80a
RH
13975 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13976
f84aa48a
JH
13977 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13978 {
d631b80a
RH
13979 int cost = 1;
13980
13981 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13982 MEMORY_MOVE_COST (mode, class1, 1));
13983 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13984 MEMORY_MOVE_COST (mode, class2, 1));
b96a374d 13985
d631b80a
RH
13986 /* In case of copying from general_purpose_register we may emit multiple
13987 stores followed by single load causing memory size mismatch stall.
d1f87653 13988 Count this as arbitrarily high cost of 20. */
62415523 13989 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
13990 cost += 20;
13991
13992 /* In the case of FP/MMX moves, the registers actually overlap, and we
13993 have to switch modes in order to treat them differently. */
13994 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13995 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13996 cost += 20;
13997
13998 return cost;
f84aa48a 13999 }
d631b80a 14000
92d0fb09 14001 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
14002 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14003 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
14004 return ix86_cost->mmxsse_to_integer;
14005 if (MAYBE_FLOAT_CLASS_P (class1))
14006 return ix86_cost->fp_move;
14007 if (MAYBE_SSE_CLASS_P (class1))
14008 return ix86_cost->sse_move;
14009 if (MAYBE_MMX_CLASS_P (class1))
14010 return ix86_cost->mmx_move;
f84aa48a
JH
14011 return 2;
14012}
14013
a946dd00
JH
14014/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14015int
b96a374d 14016ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
a946dd00
JH
14017{
14018 /* Flags and only flags can only hold CCmode values. */
14019 if (CC_REGNO_P (regno))
14020 return GET_MODE_CLASS (mode) == MODE_CC;
14021 if (GET_MODE_CLASS (mode) == MODE_CC
14022 || GET_MODE_CLASS (mode) == MODE_RANDOM
14023 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14024 return 0;
14025 if (FP_REGNO_P (regno))
14026 return VALID_FP_MODE_P (mode);
14027 if (SSE_REGNO_P (regno))
dcbca208
RH
14028 {
14029 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
14030 return 1;
14031 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
14032 return 1;
14033 return 0;
14034 }
a946dd00 14035 if (MMX_REGNO_P (regno))
dcbca208
RH
14036 {
14037 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
14038 return 1;
14039 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
14040 return 1;
14041 }
a946dd00
JH
14042 /* We handle both integer and floats in the general purpose registers.
14043 In future we should be able to handle vector modes as well. */
14044 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14045 return 0;
14046 /* Take care for QImode values - they can be in non-QI regs, but then
14047 they do cause partial register stalls. */
d2836273 14048 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
14049 return 1;
14050 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14051}
fa79946e
JH
14052
14053/* Return the cost of moving data of mode M between a
14054 register and memory. A value of 2 is the default; this cost is
14055 relative to those in `REGISTER_MOVE_COST'.
14056
14057 If moving between registers and memory is more expensive than
14058 between two registers, you should define this macro to express the
a4f31c00
AJ
14059 relative cost.
14060
fa79946e
JH
14061 Model also increased moving costs of QImode registers in non
14062 Q_REGS classes.
14063 */
14064int
b96a374d 14065ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
fa79946e
JH
14066{
14067 if (FLOAT_CLASS_P (class))
14068 {
14069 int index;
14070 switch (mode)
14071 {
14072 case SFmode:
14073 index = 0;
14074 break;
14075 case DFmode:
14076 index = 1;
14077 break;
14078 case XFmode:
fa79946e
JH
14079 index = 2;
14080 break;
14081 default:
14082 return 100;
14083 }
14084 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14085 }
14086 if (SSE_CLASS_P (class))
14087 {
14088 int index;
14089 switch (GET_MODE_SIZE (mode))
14090 {
14091 case 4:
14092 index = 0;
14093 break;
14094 case 8:
14095 index = 1;
14096 break;
14097 case 16:
14098 index = 2;
14099 break;
14100 default:
14101 return 100;
14102 }
14103 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14104 }
14105 if (MMX_CLASS_P (class))
14106 {
14107 int index;
14108 switch (GET_MODE_SIZE (mode))
14109 {
14110 case 4:
14111 index = 0;
14112 break;
14113 case 8:
14114 index = 1;
14115 break;
14116 default:
14117 return 100;
14118 }
14119 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14120 }
14121 switch (GET_MODE_SIZE (mode))
14122 {
14123 case 1:
14124 if (in)
14125 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14126 : ix86_cost->movzbl_load);
14127 else
14128 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14129 : ix86_cost->int_store[0] + 4);
14130 break;
14131 case 2:
14132 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14133 default:
14134 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14135 if (mode == TFmode)
14136 mode = XFmode;
3bb7e126 14137 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
bce75972
VM
14138 * (((int) GET_MODE_SIZE (mode)
14139 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
fa79946e
JH
14140 }
14141}
0ecf09f9 14142
3c50106f
RH
14143/* Compute a (partial) cost for rtx X. Return true if the complete
14144 cost has been computed, and false if subexpressions should be
14145 scanned. In either case, *TOTAL contains the cost result. */
14146
14147static bool
b96a374d 14148ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
3c50106f
RH
14149{
14150 enum machine_mode mode = GET_MODE (x);
14151
14152 switch (code)
14153 {
14154 case CONST_INT:
14155 case CONST:
14156 case LABEL_REF:
14157 case SYMBOL_REF:
8fe75e43 14158 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
3c50106f 14159 *total = 3;
8fe75e43 14160 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
3c50106f 14161 *total = 2;
3504dad3
JH
14162 else if (flag_pic && SYMBOLIC_CONST (x)
14163 && (!TARGET_64BIT
14164 || (!GET_CODE (x) != LABEL_REF
14165 && (GET_CODE (x) != SYMBOL_REF
12969f45 14166 || !SYMBOL_REF_LOCAL_P (x)))))
3c50106f
RH
14167 *total = 1;
14168 else
14169 *total = 0;
14170 return true;
14171
14172 case CONST_DOUBLE:
14173 if (mode == VOIDmode)
14174 *total = 0;
14175 else
14176 switch (standard_80387_constant_p (x))
14177 {
14178 case 1: /* 0.0 */
14179 *total = 1;
14180 break;
881b2a96 14181 default: /* Other constants */
3c50106f
RH
14182 *total = 2;
14183 break;
881b2a96
RS
14184 case 0:
14185 case -1:
3c50106f
RH
14186 /* Start with (MEM (SYMBOL_REF)), since that's where
14187 it'll probably end up. Add a penalty for size. */
14188 *total = (COSTS_N_INSNS (1)
3504dad3 14189 + (flag_pic != 0 && !TARGET_64BIT)
3c50106f
RH
14190 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14191 break;
14192 }
14193 return true;
14194
14195 case ZERO_EXTEND:
14196 /* The zero extensions is often completely free on x86_64, so make
14197 it as cheap as possible. */
14198 if (TARGET_64BIT && mode == DImode
14199 && GET_MODE (XEXP (x, 0)) == SImode)
14200 *total = 1;
14201 else if (TARGET_ZERO_EXTEND_WITH_AND)
14202 *total = COSTS_N_INSNS (ix86_cost->add);
14203 else
14204 *total = COSTS_N_INSNS (ix86_cost->movzx);
14205 return false;
14206
14207 case SIGN_EXTEND:
14208 *total = COSTS_N_INSNS (ix86_cost->movsx);
14209 return false;
14210
14211 case ASHIFT:
14212 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14213 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14214 {
14215 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14216 if (value == 1)
14217 {
14218 *total = COSTS_N_INSNS (ix86_cost->add);
14219 return false;
14220 }
14221 if ((value == 2 || value == 3)
3c50106f
RH
14222 && ix86_cost->lea <= ix86_cost->shift_const)
14223 {
14224 *total = COSTS_N_INSNS (ix86_cost->lea);
14225 return false;
14226 }
14227 }
5efb1046 14228 /* FALLTHRU */
3c50106f
RH
14229
14230 case ROTATE:
14231 case ASHIFTRT:
14232 case LSHIFTRT:
14233 case ROTATERT:
14234 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14235 {
14236 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14237 {
14238 if (INTVAL (XEXP (x, 1)) > 32)
14239 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14240 else
14241 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14242 }
14243 else
14244 {
14245 if (GET_CODE (XEXP (x, 1)) == AND)
14246 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14247 else
14248 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14249 }
14250 }
14251 else
14252 {
14253 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14254 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14255 else
14256 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14257 }
14258 return false;
14259
14260 case MULT:
14261 if (FLOAT_MODE_P (mode))
3c50106f 14262 {
4a5eab38
PB
14263 *total = COSTS_N_INSNS (ix86_cost->fmul);
14264 return false;
3c50106f
RH
14265 }
14266 else
14267 {
4a5eab38
PB
14268 rtx op0 = XEXP (x, 0);
14269 rtx op1 = XEXP (x, 1);
14270 int nbits;
14271 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14272 {
14273 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14274 for (nbits = 0; value != 0; value &= value - 1)
14275 nbits++;
14276 }
14277 else
14278 /* This is arbitrary. */
14279 nbits = 7;
14280
14281 /* Compute costs correctly for widening multiplication. */
14282 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14283 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14284 == GET_MODE_SIZE (mode))
14285 {
14286 int is_mulwiden = 0;
14287 enum machine_mode inner_mode = GET_MODE (op0);
14288
14289 if (GET_CODE (op0) == GET_CODE (op1))
14290 is_mulwiden = 1, op1 = XEXP (op1, 0);
14291 else if (GET_CODE (op1) == CONST_INT)
14292 {
14293 if (GET_CODE (op0) == SIGN_EXTEND)
14294 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14295 == INTVAL (op1);
14296 else
14297 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14298 }
14299
14300 if (is_mulwiden)
14301 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14302 }
f676971a 14303
4a5eab38
PB
14304 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14305 + nbits * ix86_cost->mult_bit)
14306 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14307
14308 return true;
3c50106f 14309 }
3c50106f
RH
14310
14311 case DIV:
14312 case UDIV:
14313 case MOD:
14314 case UMOD:
14315 if (FLOAT_MODE_P (mode))
14316 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14317 else
14318 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14319 return false;
14320
14321 case PLUS:
14322 if (FLOAT_MODE_P (mode))
14323 *total = COSTS_N_INSNS (ix86_cost->fadd);
e0c00392 14324 else if (GET_MODE_CLASS (mode) == MODE_INT
3c50106f
RH
14325 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14326 {
14327 if (GET_CODE (XEXP (x, 0)) == PLUS
14328 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14329 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14330 && CONSTANT_P (XEXP (x, 1)))
14331 {
14332 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14333 if (val == 2 || val == 4 || val == 8)
14334 {
14335 *total = COSTS_N_INSNS (ix86_cost->lea);
14336 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14337 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14338 outer_code);
14339 *total += rtx_cost (XEXP (x, 1), outer_code);
14340 return true;
14341 }
14342 }
14343 else if (GET_CODE (XEXP (x, 0)) == MULT
14344 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14345 {
14346 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14347 if (val == 2 || val == 4 || val == 8)
14348 {
14349 *total = COSTS_N_INSNS (ix86_cost->lea);
14350 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14351 *total += rtx_cost (XEXP (x, 1), outer_code);
14352 return true;
14353 }
14354 }
14355 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14356 {
14357 *total = COSTS_N_INSNS (ix86_cost->lea);
14358 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14359 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14360 *total += rtx_cost (XEXP (x, 1), outer_code);
14361 return true;
14362 }
14363 }
5efb1046 14364 /* FALLTHRU */
3c50106f
RH
14365
14366 case MINUS:
14367 if (FLOAT_MODE_P (mode))
14368 {
14369 *total = COSTS_N_INSNS (ix86_cost->fadd);
14370 return false;
14371 }
5efb1046 14372 /* FALLTHRU */
3c50106f
RH
14373
14374 case AND:
14375 case IOR:
14376 case XOR:
14377 if (!TARGET_64BIT && mode == DImode)
14378 {
14379 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14380 + (rtx_cost (XEXP (x, 0), outer_code)
14381 << (GET_MODE (XEXP (x, 0)) != DImode))
14382 + (rtx_cost (XEXP (x, 1), outer_code)
b96a374d 14383 << (GET_MODE (XEXP (x, 1)) != DImode)));
3c50106f
RH
14384 return true;
14385 }
5efb1046 14386 /* FALLTHRU */
3c50106f
RH
14387
14388 case NEG:
14389 if (FLOAT_MODE_P (mode))
14390 {
14391 *total = COSTS_N_INSNS (ix86_cost->fchs);
14392 return false;
14393 }
5efb1046 14394 /* FALLTHRU */
3c50106f
RH
14395
14396 case NOT:
14397 if (!TARGET_64BIT && mode == DImode)
14398 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14399 else
14400 *total = COSTS_N_INSNS (ix86_cost->add);
14401 return false;
14402
c271ba77
KH
14403 case COMPARE:
14404 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
14405 && XEXP (XEXP (x, 0), 1) == const1_rtx
14406 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
14407 && XEXP (x, 1) == const0_rtx)
14408 {
14409 /* This kind of construct is implemented using test[bwl].
14410 Treat it as if we had an AND. */
14411 *total = (COSTS_N_INSNS (ix86_cost->add)
14412 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
14413 + rtx_cost (const1_rtx, outer_code));
14414 return true;
14415 }
14416 return false;
14417
3c50106f 14418 case FLOAT_EXTEND:
dcbca208
RH
14419 if (!TARGET_SSE_MATH
14420 || mode == XFmode
14421 || (mode == DFmode && !TARGET_SSE2))
3c50106f
RH
14422 *total = 0;
14423 return false;
14424
14425 case ABS:
14426 if (FLOAT_MODE_P (mode))
14427 *total = COSTS_N_INSNS (ix86_cost->fabs);
14428 return false;
14429
14430 case SQRT:
14431 if (FLOAT_MODE_P (mode))
14432 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14433 return false;
14434
74dc3e94
RH
14435 case UNSPEC:
14436 if (XINT (x, 1) == UNSPEC_TP)
14437 *total = 0;
14438 return false;
14439
3c50106f
RH
14440 default:
14441 return false;
14442 }
14443}
14444
b069de3b
SS
14445#if TARGET_MACHO
14446
14447static int current_machopic_label_num;
14448
14449/* Given a symbol name and its associated stub, write out the
14450 definition of the stub. */
14451
14452void
b96a374d 14453machopic_output_stub (FILE *file, const char *symb, const char *stub)
b069de3b
SS
14454{
14455 unsigned int length;
14456 char *binder_name, *symbol_name, lazy_ptr_name[32];
14457 int label = ++current_machopic_label_num;
14458
14459 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14460 symb = (*targetm.strip_name_encoding) (symb);
14461
14462 length = strlen (stub);
14463 binder_name = alloca (length + 32);
14464 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14465
14466 length = strlen (symb);
14467 symbol_name = alloca (length + 32);
14468 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14469
14470 sprintf (lazy_ptr_name, "L%d$lz", label);
14471
14472 if (MACHOPIC_PURE)
14473 machopic_picsymbol_stub_section ();
14474 else
14475 machopic_symbol_stub_section ();
14476
14477 fprintf (file, "%s:\n", stub);
14478 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14479
14480 if (MACHOPIC_PURE)
14481 {
14482 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14483 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14484 fprintf (file, "\tjmp %%edx\n");
14485 }
14486 else
14487 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
b96a374d 14488
b069de3b 14489 fprintf (file, "%s:\n", binder_name);
b96a374d 14490
b069de3b
SS
14491 if (MACHOPIC_PURE)
14492 {
14493 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14494 fprintf (file, "\tpushl %%eax\n");
14495 }
14496 else
14497 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14498
14499 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14500
14501 machopic_lazy_symbol_ptr_section ();
14502 fprintf (file, "%s:\n", lazy_ptr_name);
14503 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14504 fprintf (file, "\t.long %s\n", binder_name);
14505}
14506#endif /* TARGET_MACHO */
14507
162f023b
JH
14508/* Order the registers for register allocator. */
14509
14510void
b96a374d 14511x86_order_regs_for_local_alloc (void)
162f023b
JH
14512{
14513 int pos = 0;
14514 int i;
14515
14516 /* First allocate the local general purpose registers. */
14517 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14518 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14519 reg_alloc_order [pos++] = i;
14520
14521 /* Global general purpose registers. */
14522 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14523 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14524 reg_alloc_order [pos++] = i;
14525
14526 /* x87 registers come first in case we are doing FP math
14527 using them. */
14528 if (!TARGET_SSE_MATH)
14529 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14530 reg_alloc_order [pos++] = i;
fce5a9f2 14531
162f023b
JH
14532 /* SSE registers. */
14533 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14534 reg_alloc_order [pos++] = i;
14535 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14536 reg_alloc_order [pos++] = i;
14537
d1f87653 14538 /* x87 registers. */
162f023b
JH
14539 if (TARGET_SSE_MATH)
14540 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14541 reg_alloc_order [pos++] = i;
14542
14543 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14544 reg_alloc_order [pos++] = i;
14545
14546 /* Initialize the rest of array as we do not allocate some registers
14547 at all. */
14548 while (pos < FIRST_PSEUDO_REGISTER)
14549 reg_alloc_order [pos++] = 0;
14550}
194734e9 14551
4977bab6
ZW
14552#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14553#define TARGET_USE_MS_BITFIELD_LAYOUT 0
14554#endif
14555
fe77449a
DR
14556/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14557 struct attribute_spec.handler. */
14558static tree
b96a374d
AJ
14559ix86_handle_struct_attribute (tree *node, tree name,
14560 tree args ATTRIBUTE_UNUSED,
14561 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
fe77449a
DR
14562{
14563 tree *type = NULL;
14564 if (DECL_P (*node))
14565 {
14566 if (TREE_CODE (*node) == TYPE_DECL)
14567 type = &TREE_TYPE (*node);
14568 }
14569 else
14570 type = node;
14571
14572 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14573 || TREE_CODE (*type) == UNION_TYPE)))
14574 {
9e637a26 14575 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
fe77449a
DR
14576 *no_add_attrs = true;
14577 }
14578
14579 else if ((is_attribute_p ("ms_struct", name)
14580 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14581 || ((is_attribute_p ("gcc_struct", name)
14582 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14583 {
9e637a26 14584 warning ("%qs incompatible attribute ignored",
fe77449a
DR
14585 IDENTIFIER_POINTER (name));
14586 *no_add_attrs = true;
14587 }
14588
14589 return NULL_TREE;
14590}
14591
4977bab6 14592static bool
b96a374d 14593ix86_ms_bitfield_layout_p (tree record_type)
4977bab6 14594{
fe77449a 14595 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
021bad8e 14596 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
fe77449a 14597 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
4977bab6
ZW
14598}
14599
483ab821
MM
14600/* Returns an expression indicating where the this parameter is
14601 located on entry to the FUNCTION. */
14602
14603static rtx
b96a374d 14604x86_this_parameter (tree function)
483ab821
MM
14605{
14606 tree type = TREE_TYPE (function);
14607
3961e8fe
RH
14608 if (TARGET_64BIT)
14609 {
61f71b34 14610 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
3961e8fe
RH
14611 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14612 }
14613
e767b5be 14614 if (ix86_function_regparm (type, function) > 0)
483ab821
MM
14615 {
14616 tree parm;
14617
14618 parm = TYPE_ARG_TYPES (type);
14619 /* Figure out whether or not the function has a variable number of
14620 arguments. */
3961e8fe 14621 for (; parm; parm = TREE_CHAIN (parm))
483ab821
MM
14622 if (TREE_VALUE (parm) == void_type_node)
14623 break;
e767b5be 14624 /* If not, the this parameter is in the first argument. */
483ab821 14625 if (parm)
e767b5be
JH
14626 {
14627 int regno = 0;
14628 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14629 regno = 2;
02e02343 14630 return gen_rtx_REG (SImode, regno);
e767b5be 14631 }
483ab821
MM
14632 }
14633
61f71b34 14634 if (aggregate_value_p (TREE_TYPE (type), type))
483ab821
MM
14635 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14636 else
14637 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14638}
14639
3961e8fe
RH
14640/* Determine whether x86_output_mi_thunk can succeed. */
14641
14642static bool
b96a374d
AJ
14643x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14644 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14645 HOST_WIDE_INT vcall_offset, tree function)
3961e8fe
RH
14646{
14647 /* 64-bit can handle anything. */
14648 if (TARGET_64BIT)
14649 return true;
14650
14651 /* For 32-bit, everything's fine if we have one free register. */
e767b5be 14652 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
3961e8fe
RH
14653 return true;
14654
14655 /* Need a free register for vcall_offset. */
14656 if (vcall_offset)
14657 return false;
14658
14659 /* Need a free register for GOT references. */
14660 if (flag_pic && !(*targetm.binds_local_p) (function))
14661 return false;
14662
14663 /* Otherwise ok. */
14664 return true;
14665}
14666
14667/* Output the assembler code for a thunk function. THUNK_DECL is the
14668 declaration for the thunk function itself, FUNCTION is the decl for
14669 the target function. DELTA is an immediate constant offset to be
272d0bee 14670 added to THIS. If VCALL_OFFSET is nonzero, the word at
3961e8fe 14671 *(*this + vcall_offset) should be added to THIS. */
483ab821 14672
c590b625 14673static void
b96a374d
AJ
14674x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14675 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14676 HOST_WIDE_INT vcall_offset, tree function)
194734e9 14677{
194734e9 14678 rtx xops[3];
3961e8fe
RH
14679 rtx this = x86_this_parameter (function);
14680 rtx this_reg, tmp;
194734e9 14681
3961e8fe
RH
14682 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14683 pull it in now and let DELTA benefit. */
14684 if (REG_P (this))
14685 this_reg = this;
14686 else if (vcall_offset)
14687 {
14688 /* Put the this parameter into %eax. */
14689 xops[0] = this;
14690 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14691 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14692 }
14693 else
14694 this_reg = NULL_RTX;
14695
14696 /* Adjust the this parameter by a fixed constant. */
14697 if (delta)
194734e9 14698 {
483ab821 14699 xops[0] = GEN_INT (delta);
3961e8fe
RH
14700 xops[1] = this_reg ? this_reg : this;
14701 if (TARGET_64BIT)
194734e9 14702 {
3961e8fe
RH
14703 if (!x86_64_general_operand (xops[0], DImode))
14704 {
14705 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14706 xops[1] = tmp;
14707 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14708 xops[0] = tmp;
14709 xops[1] = this;
14710 }
14711 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
194734e9
JH
14712 }
14713 else
3961e8fe 14714 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
194734e9 14715 }
3961e8fe
RH
14716
14717 /* Adjust the this parameter by a value stored in the vtable. */
14718 if (vcall_offset)
194734e9 14719 {
3961e8fe
RH
14720 if (TARGET_64BIT)
14721 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14722 else
e767b5be
JH
14723 {
14724 int tmp_regno = 2 /* ECX */;
14725 if (lookup_attribute ("fastcall",
14726 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14727 tmp_regno = 0 /* EAX */;
14728 tmp = gen_rtx_REG (SImode, tmp_regno);
14729 }
483ab821 14730
3961e8fe
RH
14731 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14732 xops[1] = tmp;
14733 if (TARGET_64BIT)
14734 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14735 else
14736 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
483ab821 14737
3961e8fe
RH
14738 /* Adjust the this parameter. */
14739 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14740 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14741 {
14742 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14743 xops[0] = GEN_INT (vcall_offset);
14744 xops[1] = tmp2;
14745 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14746 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
483ab821 14747 }
3961e8fe
RH
14748 xops[1] = this_reg;
14749 if (TARGET_64BIT)
14750 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14751 else
14752 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14753 }
194734e9 14754
3961e8fe
RH
14755 /* If necessary, drop THIS back to its stack slot. */
14756 if (this_reg && this_reg != this)
14757 {
14758 xops[0] = this_reg;
14759 xops[1] = this;
14760 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14761 }
194734e9 14762
89ce1c8f 14763 xops[0] = XEXP (DECL_RTL (function), 0);
3961e8fe
RH
14764 if (TARGET_64BIT)
14765 {
14766 if (!flag_pic || (*targetm.binds_local_p) (function))
14767 output_asm_insn ("jmp\t%P0", xops);
14768 else
fcbe3b89 14769 {
89ce1c8f 14770 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
fcbe3b89
RH
14771 tmp = gen_rtx_CONST (Pmode, tmp);
14772 tmp = gen_rtx_MEM (QImode, tmp);
14773 xops[0] = tmp;
14774 output_asm_insn ("jmp\t%A0", xops);
14775 }
3961e8fe
RH
14776 }
14777 else
14778 {
14779 if (!flag_pic || (*targetm.binds_local_p) (function))
14780 output_asm_insn ("jmp\t%P0", xops);
194734e9 14781 else
21ff35fb 14782#if TARGET_MACHO
095fa594
SH
14783 if (TARGET_MACHO)
14784 {
11abc112 14785 rtx sym_ref = XEXP (DECL_RTL (function), 0);
f676971a
EC
14786 tmp = (gen_rtx_SYMBOL_REF
14787 (Pmode,
11abc112 14788 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
095fa594
SH
14789 tmp = gen_rtx_MEM (QImode, tmp);
14790 xops[0] = tmp;
14791 output_asm_insn ("jmp\t%0", xops);
14792 }
14793 else
14794#endif /* TARGET_MACHO */
194734e9 14795 {
3961e8fe
RH
14796 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14797 output_set_got (tmp);
14798
14799 xops[1] = tmp;
14800 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14801 output_asm_insn ("jmp\t{*}%1", xops);
194734e9
JH
14802 }
14803 }
14804}
e2500fed 14805
1bc7c5b6 14806static void
b96a374d 14807x86_file_start (void)
1bc7c5b6
ZW
14808{
14809 default_file_start ();
14810 if (X86_FILE_START_VERSION_DIRECTIVE)
14811 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14812 if (X86_FILE_START_FLTUSED)
14813 fputs ("\t.global\t__fltused\n", asm_out_file);
14814 if (ix86_asm_dialect == ASM_INTEL)
14815 fputs ("\t.intel_syntax\n", asm_out_file);
14816}
14817
e932b21b 14818int
b96a374d 14819x86_field_alignment (tree field, int computed)
e932b21b
JH
14820{
14821 enum machine_mode mode;
ad9335eb
JJ
14822 tree type = TREE_TYPE (field);
14823
14824 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 14825 return computed;
ad9335eb
JJ
14826 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14827 ? get_inner_array_type (type) : type);
39e3a681
JJ
14828 if (mode == DFmode || mode == DCmode
14829 || GET_MODE_CLASS (mode) == MODE_INT
14830 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
14831 return MIN (32, computed);
14832 return computed;
14833}
14834
a5fa1ecd
JH
14835/* Output assembler code to FILE to increment profiler label # LABELNO
14836 for profiling a function entry. */
14837void
b96a374d 14838x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
a5fa1ecd
JH
14839{
14840 if (TARGET_64BIT)
14841 if (flag_pic)
14842 {
14843#ifndef NO_PROFILE_COUNTERS
14844 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14845#endif
14846 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14847 }
14848 else
14849 {
14850#ifndef NO_PROFILE_COUNTERS
14851 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14852#endif
14853 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14854 }
14855 else if (flag_pic)
14856 {
14857#ifndef NO_PROFILE_COUNTERS
14858 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14859 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14860#endif
14861 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14862 }
14863 else
14864 {
14865#ifndef NO_PROFILE_COUNTERS
ff6e2d3e 14866 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
a5fa1ecd
JH
14867 PROFILE_COUNT_REGISTER);
14868#endif
14869 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14870 }
14871}
14872
d2c49530
JH
14873/* We don't have exact information about the insn sizes, but we may assume
14874 quite safely that we are informed about all 1 byte insns and memory
c51e6d85 14875 address sizes. This is enough to eliminate unnecessary padding in
d2c49530
JH
14876 99% of cases. */
14877
14878static int
b96a374d 14879min_insn_size (rtx insn)
d2c49530
JH
14880{
14881 int l = 0;
14882
14883 if (!INSN_P (insn) || !active_insn_p (insn))
14884 return 0;
14885
14886 /* Discard alignments we've emit and jump instructions. */
14887 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14888 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14889 return 0;
14890 if (GET_CODE (insn) == JUMP_INSN
14891 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14892 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14893 return 0;
14894
14895 /* Important case - calls are always 5 bytes.
14896 It is common to have many calls in the row. */
14897 if (GET_CODE (insn) == CALL_INSN
14898 && symbolic_reference_mentioned_p (PATTERN (insn))
14899 && !SIBLING_CALL_P (insn))
14900 return 5;
14901 if (get_attr_length (insn) <= 1)
14902 return 1;
14903
14904 /* For normal instructions we may rely on the sizes of addresses
14905 and the presence of symbol to require 4 bytes of encoding.
14906 This is not the case for jumps where references are PC relative. */
14907 if (GET_CODE (insn) != JUMP_INSN)
14908 {
14909 l = get_attr_length_address (insn);
14910 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14911 l = 4;
14912 }
14913 if (l)
14914 return 1+l;
14915 else
14916 return 2;
14917}
14918
c51e6d85 14919/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
d2c49530
JH
14920 window. */
14921
14922static void
be04394b 14923ix86_avoid_jump_misspredicts (void)
d2c49530
JH
14924{
14925 rtx insn, start = get_insns ();
14926 int nbytes = 0, njumps = 0;
14927 int isjump = 0;
14928
14929 /* Look for all minimal intervals of instructions containing 4 jumps.
14930 The intervals are bounded by START and INSN. NBYTES is the total
14931 size of instructions in the interval including INSN and not including
14932 START. When the NBYTES is smaller than 16 bytes, it is possible
14933 that the end of START and INSN ends up in the same 16byte page.
14934
14935 The smallest offset in the page INSN can start is the case where START
14936 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14937 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14938 */
14939 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14940 {
14941
14942 nbytes += min_insn_size (insn);
c263766c
RH
14943 if (dump_file)
14944 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
d2c49530
JH
14945 INSN_UID (insn), min_insn_size (insn));
14946 if ((GET_CODE (insn) == JUMP_INSN
14947 && GET_CODE (PATTERN (insn)) != ADDR_VEC
14948 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
14949 || GET_CODE (insn) == CALL_INSN)
14950 njumps++;
14951 else
14952 continue;
14953
14954 while (njumps > 3)
14955 {
14956 start = NEXT_INSN (start);
14957 if ((GET_CODE (start) == JUMP_INSN
14958 && GET_CODE (PATTERN (start)) != ADDR_VEC
14959 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
14960 || GET_CODE (start) == CALL_INSN)
14961 njumps--, isjump = 1;
14962 else
14963 isjump = 0;
14964 nbytes -= min_insn_size (start);
14965 }
14966 if (njumps < 0)
14967 abort ();
c263766c
RH
14968 if (dump_file)
14969 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
d2c49530
JH
14970 INSN_UID (start), INSN_UID (insn), nbytes);
14971
14972 if (njumps == 3 && isjump && nbytes < 16)
14973 {
14974 int padsize = 15 - nbytes + min_insn_size (insn);
14975
c263766c
RH
14976 if (dump_file)
14977 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
14978 INSN_UID (insn), padsize);
d2c49530
JH
14979 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
14980 }
14981 }
14982}
14983
be04394b 14984/* AMD Athlon works faster
d1f87653 14985 when RET is not destination of conditional jump or directly preceded
2a500b9e
JH
14986 by other jump instruction. We avoid the penalty by inserting NOP just
14987 before the RET instructions in such cases. */
18dbd950 14988static void
be04394b 14989ix86_pad_returns (void)
2a500b9e
JH
14990{
14991 edge e;
628f6a4e 14992 edge_iterator ei;
2a500b9e 14993
628f6a4e
BE
14994 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
14995 {
14996 basic_block bb = e->src;
14997 rtx ret = BB_END (bb);
14998 rtx prev;
14999 bool replace = false;
15000
15001 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15002 || !maybe_hot_bb_p (bb))
15003 continue;
15004 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15005 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15006 break;
15007 if (prev && GET_CODE (prev) == CODE_LABEL)
15008 {
15009 edge e;
15010 edge_iterator ei;
15011
15012 FOR_EACH_EDGE (e, ei, bb->preds)
15013 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15014 && !(e->flags & EDGE_FALLTHRU))
15015 replace = true;
15016 }
15017 if (!replace)
15018 {
15019 prev = prev_active_insn (ret);
15020 if (prev
15021 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15022 || GET_CODE (prev) == CALL_INSN))
253c7a00 15023 replace = true;
628f6a4e
BE
15024 /* Empty functions get branch mispredict even when the jump destination
15025 is not visible to us. */
15026 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15027 replace = true;
15028 }
15029 if (replace)
15030 {
15031 emit_insn_before (gen_return_internal_long (), ret);
15032 delete_insn (ret);
15033 }
15034 }
be04394b
JH
15035}
15036
15037/* Implement machine specific optimizations. We implement padding of returns
15038 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15039static void
15040ix86_reorg (void)
15041{
15042 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15043 ix86_pad_returns ();
15044 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15045 ix86_avoid_jump_misspredicts ();
2a500b9e
JH
15046}
15047
4977bab6
ZW
15048/* Return nonzero when QImode register that must be represented via REX prefix
15049 is used. */
15050bool
b96a374d 15051x86_extended_QIreg_mentioned_p (rtx insn)
4977bab6
ZW
15052{
15053 int i;
15054 extract_insn_cached (insn);
15055 for (i = 0; i < recog_data.n_operands; i++)
15056 if (REG_P (recog_data.operand[i])
15057 && REGNO (recog_data.operand[i]) >= 4)
15058 return true;
15059 return false;
15060}
15061
15062/* Return nonzero when P points to register encoded via REX prefix.
15063 Called via for_each_rtx. */
15064static int
b96a374d 15065extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
4977bab6
ZW
15066{
15067 unsigned int regno;
15068 if (!REG_P (*p))
15069 return 0;
15070 regno = REGNO (*p);
15071 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15072}
15073
15074/* Return true when INSN mentions register that must be encoded using REX
15075 prefix. */
15076bool
b96a374d 15077x86_extended_reg_mentioned_p (rtx insn)
4977bab6
ZW
15078{
15079 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15080}
15081
1d6ba901 15082/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
8d705469
JH
15083 optabs would emit if we didn't have TFmode patterns. */
15084
15085void
b96a374d 15086x86_emit_floatuns (rtx operands[2])
8d705469
JH
15087{
15088 rtx neglab, donelab, i0, i1, f0, in, out;
1d6ba901
ZD
15089 enum machine_mode mode, inmode;
15090
15091 inmode = GET_MODE (operands[1]);
15092 if (inmode != SImode
15093 && inmode != DImode)
15094 abort ();
8d705469
JH
15095
15096 out = operands[0];
1d6ba901 15097 in = force_reg (inmode, operands[1]);
8d705469
JH
15098 mode = GET_MODE (out);
15099 neglab = gen_label_rtx ();
15100 donelab = gen_label_rtx ();
15101 i1 = gen_reg_rtx (Pmode);
15102 f0 = gen_reg_rtx (mode);
15103
15104 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15105
15106 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15107 emit_jump_insn (gen_jump (donelab));
15108 emit_barrier ();
15109
15110 emit_label (neglab);
15111
15112 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15113 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15114 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15115 expand_float (f0, i0, 0);
15116 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15117
15118 emit_label (donelab);
15119}
15120
997404de
JH
15121/* Initialize vector TARGET via VALS. */
15122void
15123ix86_expand_vector_init (rtx target, rtx vals)
15124{
15125 enum machine_mode mode = GET_MODE (target);
15126 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15127 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15128 int i;
f676971a 15129
997404de
JH
15130 for (i = n_elts - 1; i >= 0; i--)
15131 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15132 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15133 break;
15134
f676971a 15135 /* Few special cases first...
997404de
JH
15136 ... constants are best loaded from constant pool. */
15137 if (i < 0)
15138 {
15139 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15140 return;
15141 }
15142
15143 /* ... values where only first field is non-constant are best loaded
1ae58c30 15144 from the pool and overwritten via move later. */
997404de
JH
15145 if (!i)
15146 {
15147 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15148 GET_MODE_INNER (mode), 0);
15149
15150 op = force_reg (mode, op);
15151 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15152 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15153 switch (GET_MODE (target))
15154 {
15155 case V2DFmode:
15156 emit_insn (gen_sse2_movsd (target, target, op));
15157 break;
15158 case V4SFmode:
15159 emit_insn (gen_sse_movss (target, target, op));
15160 break;
15161 default:
15162 break;
15163 }
15164 return;
15165 }
15166
15167 /* And the busy sequence doing rotations. */
15168 switch (GET_MODE (target))
15169 {
15170 case V2DFmode:
15171 {
15172 rtx vecop0 =
15173 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15174 rtx vecop1 =
15175 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15176
15177 vecop0 = force_reg (V2DFmode, vecop0);
15178 vecop1 = force_reg (V2DFmode, vecop1);
15179 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15180 }
15181 break;
15182 case V4SFmode:
15183 {
15184 rtx vecop0 =
15185 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15186 rtx vecop1 =
15187 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15188 rtx vecop2 =
15189 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15190 rtx vecop3 =
15191 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15192 rtx tmp1 = gen_reg_rtx (V4SFmode);
15193 rtx tmp2 = gen_reg_rtx (V4SFmode);
15194
15195 vecop0 = force_reg (V4SFmode, vecop0);
15196 vecop1 = force_reg (V4SFmode, vecop1);
15197 vecop2 = force_reg (V4SFmode, vecop2);
15198 vecop3 = force_reg (V4SFmode, vecop3);
15199 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15200 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15201 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15202 }
15203 break;
15204 default:
15205 abort ();
15206 }
15207}
15208
f676971a
EC
15209/* Implements target hook vector_mode_supported_p. */
15210static bool
15211ix86_vector_mode_supported_p (enum machine_mode mode)
15212{
dcbca208 15213 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
f676971a 15214 return true;
dcbca208 15215 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
f676971a 15216 return true;
dcbca208 15217 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
f676971a 15218 return true;
dcbca208
RH
15219 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
15220 return true;
15221 return false;
f676971a
EC
15222}
15223
67dfe110
KH
15224/* Worker function for TARGET_MD_ASM_CLOBBERS.
15225
15226 We do this in the new i386 backend to maintain source compatibility
15227 with the old cc0-based compiler. */
15228
15229static tree
15230ix86_md_asm_clobbers (tree clobbers)
15231{
f676971a
EC
15232 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15233 clobbers);
15234 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15235 clobbers);
15236 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15237 clobbers);
67dfe110
KH
15238 return clobbers;
15239}
15240
3c5cb3e4
KH
15241/* Worker function for REVERSE_CONDITION. */
15242
15243enum rtx_code
15244ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15245{
15246 return (mode != CCFPmode && mode != CCFPUmode
15247 ? reverse_condition (code)
15248 : reverse_condition_maybe_unordered (code));
15249}
15250
5ea9cb6e
RS
15251/* Output code to perform an x87 FP register move, from OPERANDS[1]
15252 to OPERANDS[0]. */
15253
15254const char *
15255output_387_reg_move (rtx insn, rtx *operands)
15256{
15257 if (REG_P (operands[1])
15258 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15259 {
15260 if (REGNO (operands[0]) == FIRST_STACK_REG
15261 && TARGET_USE_FFREEP)
15262 return "ffreep\t%y0";
15263 return "fstp\t%y0";
15264 }
15265 if (STACK_TOP_P (operands[0]))
15266 return "fld%z1\t%y1";
15267 return "fst\t%y0";
15268}
15269
5ae27cfa
UB
15270/* Output code to perform a conditional jump to LABEL, if C2 flag in
15271 FP status register is set. */
15272
15273void
15274ix86_emit_fp_unordered_jump (rtx label)
15275{
15276 rtx reg = gen_reg_rtx (HImode);
15277 rtx temp;
15278
15279 emit_insn (gen_x86_fnstsw_1 (reg));
2484cc35
UB
15280
15281 if (TARGET_USE_SAHF)
15282 {
15283 emit_insn (gen_x86_sahf_1 (reg));
15284
f676971a 15285 temp = gen_rtx_REG (CCmode, FLAGS_REG);
2484cc35
UB
15286 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15287 }
15288 else
15289 {
15290 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15291
f676971a 15292 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
2484cc35
UB
15293 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15294 }
f676971a 15295
5ae27cfa
UB
15296 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15297 gen_rtx_LABEL_REF (VOIDmode, label),
15298 pc_rtx);
15299 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15300 emit_jump_insn (temp);
15301}
15302
c2fcfa4f
UB
15303/* Output code to perform a log1p XFmode calculation. */
15304
15305void ix86_emit_i387_log1p (rtx op0, rtx op1)
15306{
15307 rtx label1 = gen_label_rtx ();
15308 rtx label2 = gen_label_rtx ();
15309
15310 rtx tmp = gen_reg_rtx (XFmode);
15311 rtx tmp2 = gen_reg_rtx (XFmode);
15312
15313 emit_insn (gen_absxf2 (tmp, op1));
15314 emit_insn (gen_cmpxf (tmp,
15315 CONST_DOUBLE_FROM_REAL_VALUE (
15316 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15317 XFmode)));
15318 emit_jump_insn (gen_bge (label1));
15319
15320 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15321 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15322 emit_jump (label2);
15323
15324 emit_label (label1);
15325 emit_move_insn (tmp, CONST1_RTX (XFmode));
15326 emit_insn (gen_addxf3 (tmp, op1, tmp));
15327 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15328 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15329
15330 emit_label (label2);
15331}
f676971a 15332
a8e68029
DJ
15333/* Solaris named-section hook. Parameters are as for
15334 named_section_real. */
15335
15336static void
15337i386_solaris_elf_named_section (const char *name, unsigned int flags,
15338 tree decl)
15339{
15340 /* With Binutils 2.15, the "@unwind" marker must be specified on
15341 every occurrence of the ".eh_frame" section, not just the first
15342 one. */
15343 if (TARGET_64BIT
15344 && strcmp (name, ".eh_frame") == 0)
15345 {
15346 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
15347 flags & SECTION_WRITE ? "aw" : "a");
15348 return;
15349 }
15350 default_elf_asm_named_section (name, flags, decl);
15351}
15352
e2500fed 15353#include "gt-i386.h"
This page took 4.672492 seconds and 5 git commands to generate.