]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
8819ba84bb0b6fa599a435fc4440c3c986ffb968
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE register
566 parts instead of whole registers, so we may maintain just lower part of
567 scalar values in proper format leaving the upper part undefined. */
568 const int x86_sse_partial_regs = m_ATHLON_K8;
569 /* Athlon optimizes partial-register FPS special case, thus avoiding the
570 need for extra instructions beforehand */
571 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
577 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
578 /* Some CPU cores are not able to predict more than 4 branch instructions in
579 the 16 byte window. */
580 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
581 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
582 const int x86_use_bt = m_ATHLON_K8;
583
584 /* In case the average insn count for single function invocation is
585 lower than this constant, emit fast (but longer) prologue and
586 epilogue code. */
587 #define FAST_PROLOGUE_INSN_COUNT 20
588
589 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
590 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
591 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
592 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
593
594 /* Array of the smallest class containing reg number REGNO, indexed by
595 REGNO. Used by REGNO_REG_CLASS in i386.h. */
596
597 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
598 {
599 /* ax, dx, cx, bx */
600 AREG, DREG, CREG, BREG,
601 /* si, di, bp, sp */
602 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
603 /* FP registers */
604 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
605 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
606 /* arg pointer */
607 NON_Q_REGS,
608 /* flags, fpsr, dirflag, frame */
609 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
612 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
613 MMX_REGS, MMX_REGS,
614 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
615 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
616 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
617 SSE_REGS, SSE_REGS,
618 };
619
620 /* The "default" register map used in 32bit mode. */
621
622 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
623 {
624 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
625 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
626 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
627 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
628 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
629 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
630 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
631 };
632
633 static int const x86_64_int_parameter_registers[6] =
634 {
635 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
636 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
637 };
638
639 static int const x86_64_int_return_registers[4] =
640 {
641 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
642 };
643
644 /* The "default" register map used in 64bit mode. */
645 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
646 {
647 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
648 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
649 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
650 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
651 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
652 8,9,10,11,12,13,14,15, /* extended integer registers */
653 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
654 };
655
656 /* Define the register numbers to be used in Dwarf debugging information.
657 The SVR4 reference port C compiler uses the following register numbers
658 in its Dwarf output code:
659 0 for %eax (gcc regno = 0)
660 1 for %ecx (gcc regno = 2)
661 2 for %edx (gcc regno = 1)
662 3 for %ebx (gcc regno = 3)
663 4 for %esp (gcc regno = 7)
664 5 for %ebp (gcc regno = 6)
665 6 for %esi (gcc regno = 4)
666 7 for %edi (gcc regno = 5)
667 The following three DWARF register numbers are never generated by
668 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
669 believes these numbers have these meanings.
670 8 for %eip (no gcc equivalent)
671 9 for %eflags (gcc regno = 17)
672 10 for %trapno (no gcc equivalent)
673 It is not at all clear how we should number the FP stack registers
674 for the x86 architecture. If the version of SDB on x86/svr4 were
675 a bit less brain dead with respect to floating-point then we would
676 have a precedent to follow with respect to DWARF register numbers
677 for x86 FP registers, but the SDB on x86/svr4 is so completely
678 broken with respect to FP registers that it is hardly worth thinking
679 of it as something to strive for compatibility with.
680 The version of x86/svr4 SDB I have at the moment does (partially)
681 seem to believe that DWARF register number 11 is associated with
682 the x86 register %st(0), but that's about all. Higher DWARF
683 register numbers don't seem to be associated with anything in
684 particular, and even for DWARF regno 11, SDB only seems to under-
685 stand that it should say that a variable lives in %st(0) (when
686 asked via an `=' command) if we said it was in DWARF regno 11,
687 but SDB still prints garbage when asked for the value of the
688 variable in question (via a `/' command).
689 (Also note that the labels SDB prints for various FP stack regs
690 when doing an `x' command are all wrong.)
691 Note that these problems generally don't affect the native SVR4
692 C compiler because it doesn't allow the use of -O with -g and
693 because when it is *not* optimizing, it allocates a memory
694 location for each floating-point variable, and the memory
695 location is what gets described in the DWARF AT_location
696 attribute for the variable in question.
697 Regardless of the severe mental illness of the x86/svr4 SDB, we
698 do something sensible here and we use the following DWARF
699 register numbers. Note that these are all stack-top-relative
700 numbers.
701 11 for %st(0) (gcc regno = 8)
702 12 for %st(1) (gcc regno = 9)
703 13 for %st(2) (gcc regno = 10)
704 14 for %st(3) (gcc regno = 11)
705 15 for %st(4) (gcc regno = 12)
706 16 for %st(5) (gcc regno = 13)
707 17 for %st(6) (gcc regno = 14)
708 18 for %st(7) (gcc regno = 15)
709 */
710 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
711 {
712 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
713 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
714 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
715 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
716 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
717 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
718 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
719 };
720
721 /* Test and compare insns in i386.md store the information needed to
722 generate branch and scc insns here. */
723
724 rtx ix86_compare_op0 = NULL_RTX;
725 rtx ix86_compare_op1 = NULL_RTX;
726
727 #define MAX_386_STACK_LOCALS 3
728 /* Size of the register save area. */
729 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
730
731 /* Define the structure for the machine field in struct function. */
732
733 struct stack_local_entry GTY(())
734 {
735 unsigned short mode;
736 unsigned short n;
737 rtx rtl;
738 struct stack_local_entry *next;
739 };
740
741 /* Structure describing stack frame layout.
742 Stack grows downward:
743
744 [arguments]
745 <- ARG_POINTER
746 saved pc
747
748 saved frame pointer if frame_pointer_needed
749 <- HARD_FRAME_POINTER
750 [saved regs]
751
752 [padding1] \
753 )
754 [va_arg registers] (
755 > to_allocate <- FRAME_POINTER
756 [frame] (
757 )
758 [padding2] /
759 */
760 struct ix86_frame
761 {
762 int nregs;
763 int padding1;
764 int va_arg_size;
765 HOST_WIDE_INT frame;
766 int padding2;
767 int outgoing_arguments_size;
768 int red_zone_size;
769
770 HOST_WIDE_INT to_allocate;
771 /* The offsets relative to ARG_POINTER. */
772 HOST_WIDE_INT frame_pointer_offset;
773 HOST_WIDE_INT hard_frame_pointer_offset;
774 HOST_WIDE_INT stack_pointer_offset;
775
776 /* When save_regs_using_mov is set, emit prologue using
777 move instead of push instructions. */
778 bool save_regs_using_mov;
779 };
780
781 /* Used to enable/disable debugging features. */
782 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
783 /* Code model option as passed by user. */
784 const char *ix86_cmodel_string;
785 /* Parsed value. */
786 enum cmodel ix86_cmodel;
787 /* Asm dialect. */
788 const char *ix86_asm_string;
789 enum asm_dialect ix86_asm_dialect = ASM_ATT;
790 /* TLS dialext. */
791 const char *ix86_tls_dialect_string;
792 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
793
794 /* Which unit we are generating floating point math for. */
795 enum fpmath_unit ix86_fpmath;
796
797 /* Which cpu are we scheduling for. */
798 enum processor_type ix86_tune;
799 /* Which instruction set architecture to use. */
800 enum processor_type ix86_arch;
801
802 /* Strings to hold which cpu and instruction set architecture to use. */
803 const char *ix86_tune_string; /* for -mtune=<xxx> */
804 const char *ix86_arch_string; /* for -march=<xxx> */
805 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
806
807 /* # of registers to use to pass arguments. */
808 const char *ix86_regparm_string;
809
810 /* true if sse prefetch instruction is not NOOP. */
811 int x86_prefetch_sse;
812
813 /* ix86_regparm_string as a number */
814 int ix86_regparm;
815
816 /* Alignment to use for loops and jumps: */
817
818 /* Power of two alignment for loops. */
819 const char *ix86_align_loops_string;
820
821 /* Power of two alignment for non-loop jumps. */
822 const char *ix86_align_jumps_string;
823
824 /* Power of two alignment for stack boundary in bytes. */
825 const char *ix86_preferred_stack_boundary_string;
826
827 /* Preferred alignment for stack boundary in bits. */
828 unsigned int ix86_preferred_stack_boundary;
829
830 /* Values 1-5: see jump.c */
831 int ix86_branch_cost;
832 const char *ix86_branch_cost_string;
833
834 /* Power of two alignment for functions. */
835 const char *ix86_align_funcs_string;
836
837 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
838 char internal_label_prefix[16];
839 int internal_label_prefix_len;
840 \f
841 static void output_pic_addr_const (FILE *, rtx, int);
842 static void put_condition_code (enum rtx_code, enum machine_mode,
843 int, int, FILE *);
844 static const char *get_some_local_dynamic_name (void);
845 static int get_some_local_dynamic_name_1 (rtx *, void *);
846 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
847 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
848 rtx *);
849 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
850 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
851 enum machine_mode);
852 static rtx get_thread_pointer (int);
853 static rtx legitimize_tls_address (rtx, enum tls_model, int);
854 static void get_pc_thunk_name (char [32], unsigned int);
855 static rtx gen_push (rtx);
856 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
857 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
858 static struct machine_function * ix86_init_machine_status (void);
859 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
860 static int ix86_nsaved_regs (void);
861 static void ix86_emit_save_regs (void);
862 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
863 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
864 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
865 static HOST_WIDE_INT ix86_GOT_alias_set (void);
866 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
867 static rtx ix86_expand_aligntest (rtx, int);
868 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
869 static int ix86_issue_rate (void);
870 static int ix86_adjust_cost (rtx, rtx, rtx, int);
871 static int ia32_multipass_dfa_lookahead (void);
872 static bool ix86_misaligned_mem_ok (enum machine_mode);
873 static void ix86_init_mmx_sse_builtins (void);
874 static rtx x86_this_parameter (tree);
875 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
876 HOST_WIDE_INT, tree);
877 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
878 static void x86_file_start (void);
879 static void ix86_reorg (void);
880 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
881 static tree ix86_build_builtin_va_list (void);
882 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
883 tree, int *, int);
884 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
885 static bool ix86_vector_mode_supported_p (enum machine_mode);
886
887 static int ix86_address_cost (rtx);
888 static bool ix86_cannot_force_const_mem (rtx);
889 static rtx ix86_delegitimize_address (rtx);
890
891 struct builtin_description;
892 static rtx ix86_expand_sse_comi (const struct builtin_description *,
893 tree, rtx);
894 static rtx ix86_expand_sse_compare (const struct builtin_description *,
895 tree, rtx);
896 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
897 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
898 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
899 static rtx ix86_expand_store_builtin (enum insn_code, tree);
900 static rtx safe_vector_operand (rtx, enum machine_mode);
901 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
902 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
903 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
904 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
905 static int ix86_fp_comparison_cost (enum rtx_code code);
906 static unsigned int ix86_select_alt_pic_regnum (void);
907 static int ix86_save_reg (unsigned int, int);
908 static void ix86_compute_frame_layout (struct ix86_frame *);
909 static int ix86_comp_type_attributes (tree, tree);
910 static int ix86_function_regparm (tree, tree);
911 const struct attribute_spec ix86_attribute_table[];
912 static bool ix86_function_ok_for_sibcall (tree, tree);
913 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
914 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
915 static int ix86_value_regno (enum machine_mode);
916 static bool contains_128bit_aligned_vector_p (tree);
917 static rtx ix86_struct_value_rtx (tree, int);
918 static bool ix86_ms_bitfield_layout_p (tree);
919 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
920 static int extended_reg_mentioned_1 (rtx *, void *);
921 static bool ix86_rtx_costs (rtx, int, int, int *);
922 static int min_insn_size (rtx);
923 static tree ix86_md_asm_clobbers (tree clobbers);
924 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
925 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
926 tree, bool);
927
928 /* This function is only used on Solaris. */
929 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
930 ATTRIBUTE_UNUSED;
931
932 /* Register class used for passing given 64bit part of the argument.
933 These represent classes as documented by the PS ABI, with the exception
934 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
935 use SF or DFmode move instead of DImode to avoid reformatting penalties.
936
937 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
938 whenever possible (upper half does contain padding).
939 */
940 enum x86_64_reg_class
941 {
942 X86_64_NO_CLASS,
943 X86_64_INTEGER_CLASS,
944 X86_64_INTEGERSI_CLASS,
945 X86_64_SSE_CLASS,
946 X86_64_SSESF_CLASS,
947 X86_64_SSEDF_CLASS,
948 X86_64_SSEUP_CLASS,
949 X86_64_X87_CLASS,
950 X86_64_X87UP_CLASS,
951 X86_64_MEMORY_CLASS
952 };
953 static const char * const x86_64_reg_class_name[] =
954 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
955
956 #define MAX_CLASSES 4
957 static int classify_argument (enum machine_mode, tree,
958 enum x86_64_reg_class [MAX_CLASSES], int);
959 static int examine_argument (enum machine_mode, tree, int, int *, int *);
960 static rtx construct_container (enum machine_mode, tree, int, int, int,
961 const int *, int);
962 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
963 enum x86_64_reg_class);
964
965 /* Table of constants used by fldpi, fldln2, etc.... */
966 static REAL_VALUE_TYPE ext_80387_constants_table [5];
967 static bool ext_80387_constants_init = 0;
968 static void init_ext_80387_constants (void);
969 \f
970 /* Initialize the GCC target structure. */
971 #undef TARGET_ATTRIBUTE_TABLE
972 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
973 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
974 # undef TARGET_MERGE_DECL_ATTRIBUTES
975 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
976 #endif
977
978 #undef TARGET_COMP_TYPE_ATTRIBUTES
979 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
980
981 #undef TARGET_INIT_BUILTINS
982 #define TARGET_INIT_BUILTINS ix86_init_builtins
983
984 #undef TARGET_EXPAND_BUILTIN
985 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
986
987 #undef TARGET_ASM_FUNCTION_EPILOGUE
988 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
989
990 #undef TARGET_ASM_OPEN_PAREN
991 #define TARGET_ASM_OPEN_PAREN ""
992 #undef TARGET_ASM_CLOSE_PAREN
993 #define TARGET_ASM_CLOSE_PAREN ""
994
995 #undef TARGET_ASM_ALIGNED_HI_OP
996 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
997 #undef TARGET_ASM_ALIGNED_SI_OP
998 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
999 #ifdef ASM_QUAD
1000 #undef TARGET_ASM_ALIGNED_DI_OP
1001 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1002 #endif
1003
1004 #undef TARGET_ASM_UNALIGNED_HI_OP
1005 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1006 #undef TARGET_ASM_UNALIGNED_SI_OP
1007 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1008 #undef TARGET_ASM_UNALIGNED_DI_OP
1009 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1010
1011 #undef TARGET_SCHED_ADJUST_COST
1012 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1013 #undef TARGET_SCHED_ISSUE_RATE
1014 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1015 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1016 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1017 ia32_multipass_dfa_lookahead
1018
1019 #undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
1020 #define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok
1021
1022 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1023 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1024
1025 #ifdef HAVE_AS_TLS
1026 #undef TARGET_HAVE_TLS
1027 #define TARGET_HAVE_TLS true
1028 #endif
1029 #undef TARGET_CANNOT_FORCE_CONST_MEM
1030 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1031
1032 #undef TARGET_DELEGITIMIZE_ADDRESS
1033 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1034
1035 #undef TARGET_MS_BITFIELD_LAYOUT_P
1036 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1037
1038 #undef TARGET_ASM_OUTPUT_MI_THUNK
1039 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1040 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1041 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1042
1043 #undef TARGET_ASM_FILE_START
1044 #define TARGET_ASM_FILE_START x86_file_start
1045
1046 #undef TARGET_RTX_COSTS
1047 #define TARGET_RTX_COSTS ix86_rtx_costs
1048 #undef TARGET_ADDRESS_COST
1049 #define TARGET_ADDRESS_COST ix86_address_cost
1050
1051 #undef TARGET_FIXED_CONDITION_CODE_REGS
1052 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1053 #undef TARGET_CC_MODES_COMPATIBLE
1054 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1055
1056 #undef TARGET_MACHINE_DEPENDENT_REORG
1057 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1058
1059 #undef TARGET_BUILD_BUILTIN_VA_LIST
1060 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1061
1062 #undef TARGET_MD_ASM_CLOBBERS
1063 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1064
1065 #undef TARGET_PROMOTE_PROTOTYPES
1066 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1067 #undef TARGET_STRUCT_VALUE_RTX
1068 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1069 #undef TARGET_SETUP_INCOMING_VARARGS
1070 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1071 #undef TARGET_MUST_PASS_IN_STACK
1072 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1073 #undef TARGET_PASS_BY_REFERENCE
1074 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1075
1076 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1077 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1078
1079 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1080 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1081
1082 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1083 #undef TARGET_INSERT_ATTRIBUTES
1084 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1085 #endif
1086
1087 struct gcc_target targetm = TARGET_INITIALIZER;
1088
1089 \f
1090 /* The svr4 ABI for the i386 says that records and unions are returned
1091 in memory. */
1092 #ifndef DEFAULT_PCC_STRUCT_RETURN
1093 #define DEFAULT_PCC_STRUCT_RETURN 1
1094 #endif
1095
1096 /* Sometimes certain combinations of command options do not make
1097 sense on a particular target machine. You can define a macro
1098 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1099 defined, is executed once just after all the command options have
1100 been parsed.
1101
1102 Don't use this macro to turn on various extra optimizations for
1103 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1104
1105 void
1106 override_options (void)
1107 {
1108 int i;
1109 int ix86_tune_defaulted = 0;
1110
1111 /* Comes from final.c -- no real reason to change it. */
1112 #define MAX_CODE_ALIGN 16
1113
1114 static struct ptt
1115 {
1116 const struct processor_costs *cost; /* Processor costs */
1117 const int target_enable; /* Target flags to enable. */
1118 const int target_disable; /* Target flags to disable. */
1119 const int align_loop; /* Default alignments. */
1120 const int align_loop_max_skip;
1121 const int align_jump;
1122 const int align_jump_max_skip;
1123 const int align_func;
1124 }
1125 const processor_target_table[PROCESSOR_max] =
1126 {
1127 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1128 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1129 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1131 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1132 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1133 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1134 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1135 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1136 };
1137
1138 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1139 static struct pta
1140 {
1141 const char *const name; /* processor name or nickname. */
1142 const enum processor_type processor;
1143 const enum pta_flags
1144 {
1145 PTA_SSE = 1,
1146 PTA_SSE2 = 2,
1147 PTA_SSE3 = 4,
1148 PTA_MMX = 8,
1149 PTA_PREFETCH_SSE = 16,
1150 PTA_3DNOW = 32,
1151 PTA_3DNOW_A = 64,
1152 PTA_64BIT = 128
1153 } flags;
1154 }
1155 const processor_alias_table[] =
1156 {
1157 {"i386", PROCESSOR_I386, 0},
1158 {"i486", PROCESSOR_I486, 0},
1159 {"i586", PROCESSOR_PENTIUM, 0},
1160 {"pentium", PROCESSOR_PENTIUM, 0},
1161 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1162 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1163 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1164 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1165 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1166 {"i686", PROCESSOR_PENTIUMPRO, 0},
1167 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1168 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1169 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1170 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1171 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1172 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1173 | PTA_MMX | PTA_PREFETCH_SSE},
1174 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1175 | PTA_MMX | PTA_PREFETCH_SSE},
1176 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1177 | PTA_MMX | PTA_PREFETCH_SSE},
1178 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1179 | PTA_MMX | PTA_PREFETCH_SSE},
1180 {"k6", PROCESSOR_K6, PTA_MMX},
1181 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1182 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1183 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1184 | PTA_3DNOW_A},
1185 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1186 | PTA_3DNOW | PTA_3DNOW_A},
1187 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1188 | PTA_3DNOW_A | PTA_SSE},
1189 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1190 | PTA_3DNOW_A | PTA_SSE},
1191 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1192 | PTA_3DNOW_A | PTA_SSE},
1193 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1194 | PTA_SSE | PTA_SSE2 },
1195 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1196 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1197 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1198 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1199 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1200 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1201 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1202 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1203 };
1204
1205 int const pta_size = ARRAY_SIZE (processor_alias_table);
1206
1207 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1208 SUBTARGET_OVERRIDE_OPTIONS;
1209 #endif
1210
1211 /* Set the default values for switches whose default depends on TARGET_64BIT
1212 in case they weren't overwritten by command line options. */
1213 if (TARGET_64BIT)
1214 {
1215 if (flag_omit_frame_pointer == 2)
1216 flag_omit_frame_pointer = 1;
1217 if (flag_asynchronous_unwind_tables == 2)
1218 flag_asynchronous_unwind_tables = 1;
1219 if (flag_pcc_struct_return == 2)
1220 flag_pcc_struct_return = 0;
1221 }
1222 else
1223 {
1224 if (flag_omit_frame_pointer == 2)
1225 flag_omit_frame_pointer = 0;
1226 if (flag_asynchronous_unwind_tables == 2)
1227 flag_asynchronous_unwind_tables = 0;
1228 if (flag_pcc_struct_return == 2)
1229 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1230 }
1231
1232 if (!ix86_tune_string && ix86_arch_string)
1233 ix86_tune_string = ix86_arch_string;
1234 if (!ix86_tune_string)
1235 {
1236 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1237 ix86_tune_defaulted = 1;
1238 }
1239 if (!ix86_arch_string)
1240 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1241
1242 if (ix86_cmodel_string != 0)
1243 {
1244 if (!strcmp (ix86_cmodel_string, "small"))
1245 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1246 else if (flag_pic)
1247 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1248 else if (!strcmp (ix86_cmodel_string, "32"))
1249 ix86_cmodel = CM_32;
1250 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1251 ix86_cmodel = CM_KERNEL;
1252 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1253 ix86_cmodel = CM_MEDIUM;
1254 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1255 ix86_cmodel = CM_LARGE;
1256 else
1257 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1258 }
1259 else
1260 {
1261 ix86_cmodel = CM_32;
1262 if (TARGET_64BIT)
1263 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1264 }
1265 if (ix86_asm_string != 0)
1266 {
1267 if (!strcmp (ix86_asm_string, "intel"))
1268 ix86_asm_dialect = ASM_INTEL;
1269 else if (!strcmp (ix86_asm_string, "att"))
1270 ix86_asm_dialect = ASM_ATT;
1271 else
1272 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1273 }
1274 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1275 error ("code model %qs not supported in the %s bit mode",
1276 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1277 if (ix86_cmodel == CM_LARGE)
1278 sorry ("code model %<large%> not supported yet");
1279 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1280 sorry ("%i-bit mode not compiled in",
1281 (target_flags & MASK_64BIT) ? 64 : 32);
1282
1283 for (i = 0; i < pta_size; i++)
1284 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1285 {
1286 ix86_arch = processor_alias_table[i].processor;
1287 /* Default cpu tuning to the architecture. */
1288 ix86_tune = ix86_arch;
1289 if (processor_alias_table[i].flags & PTA_MMX
1290 && !(target_flags_explicit & MASK_MMX))
1291 target_flags |= MASK_MMX;
1292 if (processor_alias_table[i].flags & PTA_3DNOW
1293 && !(target_flags_explicit & MASK_3DNOW))
1294 target_flags |= MASK_3DNOW;
1295 if (processor_alias_table[i].flags & PTA_3DNOW_A
1296 && !(target_flags_explicit & MASK_3DNOW_A))
1297 target_flags |= MASK_3DNOW_A;
1298 if (processor_alias_table[i].flags & PTA_SSE
1299 && !(target_flags_explicit & MASK_SSE))
1300 target_flags |= MASK_SSE;
1301 if (processor_alias_table[i].flags & PTA_SSE2
1302 && !(target_flags_explicit & MASK_SSE2))
1303 target_flags |= MASK_SSE2;
1304 if (processor_alias_table[i].flags & PTA_SSE3
1305 && !(target_flags_explicit & MASK_SSE3))
1306 target_flags |= MASK_SSE3;
1307 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1308 x86_prefetch_sse = true;
1309 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1310 error ("CPU you selected does not support x86-64 "
1311 "instruction set");
1312 break;
1313 }
1314
1315 if (i == pta_size)
1316 error ("bad value (%s) for -march= switch", ix86_arch_string);
1317
1318 for (i = 0; i < pta_size; i++)
1319 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1320 {
1321 ix86_tune = processor_alias_table[i].processor;
1322 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1323 {
1324 if (ix86_tune_defaulted)
1325 {
1326 ix86_tune_string = "x86-64";
1327 for (i = 0; i < pta_size; i++)
1328 if (! strcmp (ix86_tune_string,
1329 processor_alias_table[i].name))
1330 break;
1331 ix86_tune = processor_alias_table[i].processor;
1332 }
1333 else
1334 error ("CPU you selected does not support x86-64 "
1335 "instruction set");
1336 }
1337 /* Intel CPUs have always interpreted SSE prefetch instructions as
1338 NOPs; so, we can enable SSE prefetch instructions even when
1339 -mtune (rather than -march) points us to a processor that has them.
1340 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1341 higher processors. */
1342 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1343 x86_prefetch_sse = true;
1344 break;
1345 }
1346 if (i == pta_size)
1347 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1348
1349 if (optimize_size)
1350 ix86_cost = &size_cost;
1351 else
1352 ix86_cost = processor_target_table[ix86_tune].cost;
1353 target_flags |= processor_target_table[ix86_tune].target_enable;
1354 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1355
1356 /* Arrange to set up i386_stack_locals for all functions. */
1357 init_machine_status = ix86_init_machine_status;
1358
1359 /* Validate -mregparm= value. */
1360 if (ix86_regparm_string)
1361 {
1362 i = atoi (ix86_regparm_string);
1363 if (i < 0 || i > REGPARM_MAX)
1364 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1365 else
1366 ix86_regparm = i;
1367 }
1368 else
1369 if (TARGET_64BIT)
1370 ix86_regparm = REGPARM_MAX;
1371
1372 /* If the user has provided any of the -malign-* options,
1373 warn and use that value only if -falign-* is not set.
1374 Remove this code in GCC 3.2 or later. */
1375 if (ix86_align_loops_string)
1376 {
1377 warning ("-malign-loops is obsolete, use -falign-loops");
1378 if (align_loops == 0)
1379 {
1380 i = atoi (ix86_align_loops_string);
1381 if (i < 0 || i > MAX_CODE_ALIGN)
1382 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1383 else
1384 align_loops = 1 << i;
1385 }
1386 }
1387
1388 if (ix86_align_jumps_string)
1389 {
1390 warning ("-malign-jumps is obsolete, use -falign-jumps");
1391 if (align_jumps == 0)
1392 {
1393 i = atoi (ix86_align_jumps_string);
1394 if (i < 0 || i > MAX_CODE_ALIGN)
1395 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1396 else
1397 align_jumps = 1 << i;
1398 }
1399 }
1400
1401 if (ix86_align_funcs_string)
1402 {
1403 warning ("-malign-functions is obsolete, use -falign-functions");
1404 if (align_functions == 0)
1405 {
1406 i = atoi (ix86_align_funcs_string);
1407 if (i < 0 || i > MAX_CODE_ALIGN)
1408 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1409 else
1410 align_functions = 1 << i;
1411 }
1412 }
1413
1414 /* Default align_* from the processor table. */
1415 if (align_loops == 0)
1416 {
1417 align_loops = processor_target_table[ix86_tune].align_loop;
1418 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1419 }
1420 if (align_jumps == 0)
1421 {
1422 align_jumps = processor_target_table[ix86_tune].align_jump;
1423 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1424 }
1425 if (align_functions == 0)
1426 {
1427 align_functions = processor_target_table[ix86_tune].align_func;
1428 }
1429
1430 /* Validate -mpreferred-stack-boundary= value, or provide default.
1431 The default of 128 bits is for Pentium III's SSE __m128, but we
1432 don't want additional code to keep the stack aligned when
1433 optimizing for code size. */
1434 ix86_preferred_stack_boundary = (optimize_size
1435 ? TARGET_64BIT ? 128 : 32
1436 : 128);
1437 if (ix86_preferred_stack_boundary_string)
1438 {
1439 i = atoi (ix86_preferred_stack_boundary_string);
1440 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1441 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1442 TARGET_64BIT ? 4 : 2);
1443 else
1444 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1445 }
1446
1447 /* Validate -mbranch-cost= value, or provide default. */
1448 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1449 if (ix86_branch_cost_string)
1450 {
1451 i = atoi (ix86_branch_cost_string);
1452 if (i < 0 || i > 5)
1453 error ("-mbranch-cost=%d is not between 0 and 5", i);
1454 else
1455 ix86_branch_cost = i;
1456 }
1457
1458 if (ix86_tls_dialect_string)
1459 {
1460 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1461 ix86_tls_dialect = TLS_DIALECT_GNU;
1462 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1463 ix86_tls_dialect = TLS_DIALECT_SUN;
1464 else
1465 error ("bad value (%s) for -mtls-dialect= switch",
1466 ix86_tls_dialect_string);
1467 }
1468
1469 /* Keep nonleaf frame pointers. */
1470 if (flag_omit_frame_pointer)
1471 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1472 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1473 flag_omit_frame_pointer = 1;
1474
1475 /* If we're doing fast math, we don't care about comparison order
1476 wrt NaNs. This lets us use a shorter comparison sequence. */
1477 if (flag_unsafe_math_optimizations)
1478 target_flags &= ~MASK_IEEE_FP;
1479
1480 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1481 since the insns won't need emulation. */
1482 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1483 target_flags &= ~MASK_NO_FANCY_MATH_387;
1484
1485 /* Likewise, if the target doesn't have a 387, or we've specified
1486 software floating point, don't use 387 inline instrinsics. */
1487 if (!TARGET_80387)
1488 target_flags |= MASK_NO_FANCY_MATH_387;
1489
1490 /* Turn on SSE2 builtins for -msse3. */
1491 if (TARGET_SSE3)
1492 target_flags |= MASK_SSE2;
1493
1494 /* Turn on SSE builtins for -msse2. */
1495 if (TARGET_SSE2)
1496 target_flags |= MASK_SSE;
1497
1498 if (TARGET_64BIT)
1499 {
1500 if (TARGET_ALIGN_DOUBLE)
1501 error ("-malign-double makes no sense in the 64bit mode");
1502 if (TARGET_RTD)
1503 error ("-mrtd calling convention not supported in the 64bit mode");
1504 /* Enable by default the SSE and MMX builtins. */
1505 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1506 ix86_fpmath = FPMATH_SSE;
1507 }
1508 else
1509 {
1510 ix86_fpmath = FPMATH_387;
1511 /* i386 ABI does not specify red zone. It still makes sense to use it
1512 when programmer takes care to stack from being destroyed. */
1513 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1514 target_flags |= MASK_NO_RED_ZONE;
1515 }
1516
1517 if (ix86_fpmath_string != 0)
1518 {
1519 if (! strcmp (ix86_fpmath_string, "387"))
1520 ix86_fpmath = FPMATH_387;
1521 else if (! strcmp (ix86_fpmath_string, "sse"))
1522 {
1523 if (!TARGET_SSE)
1524 {
1525 warning ("SSE instruction set disabled, using 387 arithmetics");
1526 ix86_fpmath = FPMATH_387;
1527 }
1528 else
1529 ix86_fpmath = FPMATH_SSE;
1530 }
1531 else if (! strcmp (ix86_fpmath_string, "387,sse")
1532 || ! strcmp (ix86_fpmath_string, "sse,387"))
1533 {
1534 if (!TARGET_SSE)
1535 {
1536 warning ("SSE instruction set disabled, using 387 arithmetics");
1537 ix86_fpmath = FPMATH_387;
1538 }
1539 else if (!TARGET_80387)
1540 {
1541 warning ("387 instruction set disabled, using SSE arithmetics");
1542 ix86_fpmath = FPMATH_SSE;
1543 }
1544 else
1545 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1546 }
1547 else
1548 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1549 }
1550
1551 /* If fpmath doesn't include 387, disable use of x87 intrinsics. */
1552 if (! (ix86_fpmath & FPMATH_387))
1553 target_flags |= MASK_NO_FANCY_MATH_387;
1554
1555 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1556 on by -msse. */
1557 if (TARGET_SSE)
1558 {
1559 target_flags |= MASK_MMX;
1560 x86_prefetch_sse = true;
1561 }
1562
1563 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1564 if (TARGET_3DNOW)
1565 {
1566 target_flags |= MASK_MMX;
1567 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1568 extensions it adds. */
1569 if (x86_3dnow_a & (1 << ix86_arch))
1570 target_flags |= MASK_3DNOW_A;
1571 }
1572 if ((x86_accumulate_outgoing_args & TUNEMASK)
1573 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1574 && !optimize_size)
1575 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1576
1577 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1578 {
1579 char *p;
1580 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1581 p = strchr (internal_label_prefix, 'X');
1582 internal_label_prefix_len = p - internal_label_prefix;
1583 *p = '\0';
1584 }
1585 /* When scheduling description is not available, disable scheduler pass so it
1586 won't slow down the compilation and make x87 code slower. */
1587 if (!TARGET_SCHEDULE)
1588 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1589 }
1590 \f
1591 void
1592 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1593 {
1594 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1595 make the problem with not enough registers even worse. */
1596 #ifdef INSN_SCHEDULING
1597 if (level > 1)
1598 flag_schedule_insns = 0;
1599 #endif
1600
1601 /* The default values of these switches depend on the TARGET_64BIT
1602 that is not known at this moment. Mark these values with 2 and
1603 let user the to override these. In case there is no command line option
1604 specifying them, we will set the defaults in override_options. */
1605 if (optimize >= 1)
1606 flag_omit_frame_pointer = 2;
1607 flag_pcc_struct_return = 2;
1608 flag_asynchronous_unwind_tables = 2;
1609 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1610 SUBTARGET_OPTIMIZATION_OPTIONS;
1611 #endif
1612 }
1613 \f
1614 /* Table of valid machine attributes. */
1615 const struct attribute_spec ix86_attribute_table[] =
1616 {
1617 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1618 /* Stdcall attribute says callee is responsible for popping arguments
1619 if they are not variable. */
1620 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1621 /* Fastcall attribute says callee is responsible for popping arguments
1622 if they are not variable. */
1623 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1624 /* Cdecl attribute says the callee is a normal C declaration */
1625 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1626 /* Regparm attribute specifies how many integer arguments are to be
1627 passed in registers. */
1628 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1629 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1630 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1631 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1632 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1633 #endif
1634 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1635 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1636 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1637 SUBTARGET_ATTRIBUTE_TABLE,
1638 #endif
1639 { NULL, 0, 0, false, false, false, NULL }
1640 };
1641
1642 /* Decide whether we can make a sibling call to a function. DECL is the
1643 declaration of the function being targeted by the call and EXP is the
1644 CALL_EXPR representing the call. */
1645
1646 static bool
1647 ix86_function_ok_for_sibcall (tree decl, tree exp)
1648 {
1649 /* If we are generating position-independent code, we cannot sibcall
1650 optimize any indirect call, or a direct call to a global function,
1651 as the PLT requires %ebx be live. */
1652 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1653 return false;
1654
1655 /* If we are returning floats on the 80387 register stack, we cannot
1656 make a sibcall from a function that doesn't return a float to a
1657 function that does or, conversely, from a function that does return
1658 a float to a function that doesn't; the necessary stack adjustment
1659 would not be executed. */
1660 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1661 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1662 return false;
1663
1664 /* If this call is indirect, we'll need to be able to use a call-clobbered
1665 register for the address of the target function. Make sure that all
1666 such registers are not used for passing parameters. */
1667 if (!decl && !TARGET_64BIT)
1668 {
1669 tree type;
1670
1671 /* We're looking at the CALL_EXPR, we need the type of the function. */
1672 type = TREE_OPERAND (exp, 0); /* pointer expression */
1673 type = TREE_TYPE (type); /* pointer type */
1674 type = TREE_TYPE (type); /* function type */
1675
1676 if (ix86_function_regparm (type, NULL) >= 3)
1677 {
1678 /* ??? Need to count the actual number of registers to be used,
1679 not the possible number of registers. Fix later. */
1680 return false;
1681 }
1682 }
1683
1684 /* Otherwise okay. That also includes certain types of indirect calls. */
1685 return true;
1686 }
1687
1688 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1689 arguments as in struct attribute_spec.handler. */
1690 static tree
1691 ix86_handle_cdecl_attribute (tree *node, tree name,
1692 tree args ATTRIBUTE_UNUSED,
1693 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1694 {
1695 if (TREE_CODE (*node) != FUNCTION_TYPE
1696 && TREE_CODE (*node) != METHOD_TYPE
1697 && TREE_CODE (*node) != FIELD_DECL
1698 && TREE_CODE (*node) != TYPE_DECL)
1699 {
1700 warning ("%qs attribute only applies to functions",
1701 IDENTIFIER_POINTER (name));
1702 *no_add_attrs = true;
1703 }
1704 else
1705 {
1706 if (is_attribute_p ("fastcall", name))
1707 {
1708 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1709 {
1710 error ("fastcall and stdcall attributes are not compatible");
1711 }
1712 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1713 {
1714 error ("fastcall and regparm attributes are not compatible");
1715 }
1716 }
1717 else if (is_attribute_p ("stdcall", name))
1718 {
1719 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1720 {
1721 error ("fastcall and stdcall attributes are not compatible");
1722 }
1723 }
1724 }
1725
1726 if (TARGET_64BIT)
1727 {
1728 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1729 *no_add_attrs = true;
1730 }
1731
1732 return NULL_TREE;
1733 }
1734
1735 /* Handle a "regparm" attribute;
1736 arguments as in struct attribute_spec.handler. */
1737 static tree
1738 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1739 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1740 {
1741 if (TREE_CODE (*node) != FUNCTION_TYPE
1742 && TREE_CODE (*node) != METHOD_TYPE
1743 && TREE_CODE (*node) != FIELD_DECL
1744 && TREE_CODE (*node) != TYPE_DECL)
1745 {
1746 warning ("%qs attribute only applies to functions",
1747 IDENTIFIER_POINTER (name));
1748 *no_add_attrs = true;
1749 }
1750 else
1751 {
1752 tree cst;
1753
1754 cst = TREE_VALUE (args);
1755 if (TREE_CODE (cst) != INTEGER_CST)
1756 {
1757 warning ("%qs attribute requires an integer constant argument",
1758 IDENTIFIER_POINTER (name));
1759 *no_add_attrs = true;
1760 }
1761 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1762 {
1763 warning ("argument to %qs attribute larger than %d",
1764 IDENTIFIER_POINTER (name), REGPARM_MAX);
1765 *no_add_attrs = true;
1766 }
1767
1768 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1769 {
1770 error ("fastcall and regparm attributes are not compatible");
1771 }
1772 }
1773
1774 return NULL_TREE;
1775 }
1776
1777 /* Return 0 if the attributes for two types are incompatible, 1 if they
1778 are compatible, and 2 if they are nearly compatible (which causes a
1779 warning to be generated). */
1780
1781 static int
1782 ix86_comp_type_attributes (tree type1, tree type2)
1783 {
1784 /* Check for mismatch of non-default calling convention. */
1785 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1786
1787 if (TREE_CODE (type1) != FUNCTION_TYPE)
1788 return 1;
1789
1790 /* Check for mismatched fastcall types */
1791 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1792 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1793 return 0;
1794
1795 /* Check for mismatched return types (cdecl vs stdcall). */
1796 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1797 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1798 return 0;
1799 if (ix86_function_regparm (type1, NULL)
1800 != ix86_function_regparm (type2, NULL))
1801 return 0;
1802 return 1;
1803 }
1804 \f
1805 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1806 DECL may be NULL when calling function indirectly
1807 or considering a libcall. */
1808
1809 static int
1810 ix86_function_regparm (tree type, tree decl)
1811 {
1812 tree attr;
1813 int regparm = ix86_regparm;
1814 bool user_convention = false;
1815
1816 if (!TARGET_64BIT)
1817 {
1818 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1819 if (attr)
1820 {
1821 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1822 user_convention = true;
1823 }
1824
1825 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1826 {
1827 regparm = 2;
1828 user_convention = true;
1829 }
1830
1831 /* Use register calling convention for local functions when possible. */
1832 if (!TARGET_64BIT && !user_convention && decl
1833 && flag_unit_at_a_time && !profile_flag)
1834 {
1835 struct cgraph_local_info *i = cgraph_local_info (decl);
1836 if (i && i->local)
1837 {
1838 /* We can't use regparm(3) for nested functions as these use
1839 static chain pointer in third argument. */
1840 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1841 regparm = 2;
1842 else
1843 regparm = 3;
1844 }
1845 }
1846 }
1847 return regparm;
1848 }
1849
1850 /* Return true if EAX is live at the start of the function. Used by
1851 ix86_expand_prologue to determine if we need special help before
1852 calling allocate_stack_worker. */
1853
1854 static bool
1855 ix86_eax_live_at_start_p (void)
1856 {
1857 /* Cheat. Don't bother working forward from ix86_function_regparm
1858 to the function type to whether an actual argument is located in
1859 eax. Instead just look at cfg info, which is still close enough
1860 to correct at this point. This gives false positives for broken
1861 functions that might use uninitialized data that happens to be
1862 allocated in eax, but who cares? */
1863 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1864 }
1865
1866 /* Value is the number of bytes of arguments automatically
1867 popped when returning from a subroutine call.
1868 FUNDECL is the declaration node of the function (as a tree),
1869 FUNTYPE is the data type of the function (as a tree),
1870 or for a library call it is an identifier node for the subroutine name.
1871 SIZE is the number of bytes of arguments passed on the stack.
1872
1873 On the 80386, the RTD insn may be used to pop them if the number
1874 of args is fixed, but if the number is variable then the caller
1875 must pop them all. RTD can't be used for library calls now
1876 because the library is compiled with the Unix compiler.
1877 Use of RTD is a selectable option, since it is incompatible with
1878 standard Unix calling sequences. If the option is not selected,
1879 the caller must always pop the args.
1880
1881 The attribute stdcall is equivalent to RTD on a per module basis. */
1882
1883 int
1884 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1885 {
1886 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1887
1888 /* Cdecl functions override -mrtd, and never pop the stack. */
1889 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1890
1891 /* Stdcall and fastcall functions will pop the stack if not
1892 variable args. */
1893 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1894 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1895 rtd = 1;
1896
1897 if (rtd
1898 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1899 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1900 == void_type_node)))
1901 return size;
1902 }
1903
1904 /* Lose any fake structure return argument if it is passed on the stack. */
1905 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1906 && !TARGET_64BIT
1907 && !KEEP_AGGREGATE_RETURN_POINTER)
1908 {
1909 int nregs = ix86_function_regparm (funtype, fundecl);
1910
1911 if (!nregs)
1912 return GET_MODE_SIZE (Pmode);
1913 }
1914
1915 return 0;
1916 }
1917 \f
1918 /* Argument support functions. */
1919
1920 /* Return true when register may be used to pass function parameters. */
1921 bool
1922 ix86_function_arg_regno_p (int regno)
1923 {
1924 int i;
1925 if (!TARGET_64BIT)
1926 return (regno < REGPARM_MAX
1927 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1928 if (SSE_REGNO_P (regno) && TARGET_SSE)
1929 return true;
1930 /* RAX is used as hidden argument to va_arg functions. */
1931 if (!regno)
1932 return true;
1933 for (i = 0; i < REGPARM_MAX; i++)
1934 if (regno == x86_64_int_parameter_registers[i])
1935 return true;
1936 return false;
1937 }
1938
1939 /* Return if we do not know how to pass TYPE solely in registers. */
1940
1941 static bool
1942 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1943 {
1944 if (must_pass_in_stack_var_size_or_pad (mode, type))
1945 return true;
1946 return (!TARGET_64BIT && type && mode == TImode);
1947 }
1948
1949 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1950 for a call to a function whose data type is FNTYPE.
1951 For a library call, FNTYPE is 0. */
1952
1953 void
1954 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1955 tree fntype, /* tree ptr for function decl */
1956 rtx libname, /* SYMBOL_REF of library name or 0 */
1957 tree fndecl)
1958 {
1959 static CUMULATIVE_ARGS zero_cum;
1960 tree param, next_param;
1961
1962 if (TARGET_DEBUG_ARG)
1963 {
1964 fprintf (stderr, "\ninit_cumulative_args (");
1965 if (fntype)
1966 fprintf (stderr, "fntype code = %s, ret code = %s",
1967 tree_code_name[(int) TREE_CODE (fntype)],
1968 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1969 else
1970 fprintf (stderr, "no fntype");
1971
1972 if (libname)
1973 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1974 }
1975
1976 *cum = zero_cum;
1977
1978 /* Set up the number of registers to use for passing arguments. */
1979 if (fntype)
1980 cum->nregs = ix86_function_regparm (fntype, fndecl);
1981 else
1982 cum->nregs = ix86_regparm;
1983 if (TARGET_SSE)
1984 cum->sse_nregs = SSE_REGPARM_MAX;
1985 if (TARGET_MMX)
1986 cum->mmx_nregs = MMX_REGPARM_MAX;
1987 cum->warn_sse = true;
1988 cum->warn_mmx = true;
1989 cum->maybe_vaarg = false;
1990
1991 /* Use ecx and edx registers if function has fastcall attribute */
1992 if (fntype && !TARGET_64BIT)
1993 {
1994 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1995 {
1996 cum->nregs = 2;
1997 cum->fastcall = 1;
1998 }
1999 }
2000
2001 /* Determine if this function has variable arguments. This is
2002 indicated by the last argument being 'void_type_mode' if there
2003 are no variable arguments. If there are variable arguments, then
2004 we won't pass anything in registers in 32-bit mode. */
2005
2006 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2007 {
2008 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2009 param != 0; param = next_param)
2010 {
2011 next_param = TREE_CHAIN (param);
2012 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2013 {
2014 if (!TARGET_64BIT)
2015 {
2016 cum->nregs = 0;
2017 cum->sse_nregs = 0;
2018 cum->mmx_nregs = 0;
2019 cum->warn_sse = 0;
2020 cum->warn_mmx = 0;
2021 cum->fastcall = 0;
2022 }
2023 cum->maybe_vaarg = true;
2024 }
2025 }
2026 }
2027 if ((!fntype && !libname)
2028 || (fntype && !TYPE_ARG_TYPES (fntype)))
2029 cum->maybe_vaarg = 1;
2030
2031 if (TARGET_DEBUG_ARG)
2032 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2033
2034 return;
2035 }
2036
2037 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2038 of this code is to classify each 8bytes of incoming argument by the register
2039 class and assign registers accordingly. */
2040
2041 /* Return the union class of CLASS1 and CLASS2.
2042 See the x86-64 PS ABI for details. */
2043
2044 static enum x86_64_reg_class
2045 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2046 {
2047 /* Rule #1: If both classes are equal, this is the resulting class. */
2048 if (class1 == class2)
2049 return class1;
2050
2051 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2052 the other class. */
2053 if (class1 == X86_64_NO_CLASS)
2054 return class2;
2055 if (class2 == X86_64_NO_CLASS)
2056 return class1;
2057
2058 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2059 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2060 return X86_64_MEMORY_CLASS;
2061
2062 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2063 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2064 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2065 return X86_64_INTEGERSI_CLASS;
2066 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2067 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2068 return X86_64_INTEGER_CLASS;
2069
2070 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2071 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2072 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2073 return X86_64_MEMORY_CLASS;
2074
2075 /* Rule #6: Otherwise class SSE is used. */
2076 return X86_64_SSE_CLASS;
2077 }
2078
2079 /* Classify the argument of type TYPE and mode MODE.
2080 CLASSES will be filled by the register class used to pass each word
2081 of the operand. The number of words is returned. In case the parameter
2082 should be passed in memory, 0 is returned. As a special case for zero
2083 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2084
2085 BIT_OFFSET is used internally for handling records and specifies offset
2086 of the offset in bits modulo 256 to avoid overflow cases.
2087
2088 See the x86-64 PS ABI for details.
2089 */
2090
2091 static int
2092 classify_argument (enum machine_mode mode, tree type,
2093 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2094 {
2095 HOST_WIDE_INT bytes =
2096 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2097 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2098
2099 /* Variable sized entities are always passed/returned in memory. */
2100 if (bytes < 0)
2101 return 0;
2102
2103 if (mode != VOIDmode
2104 && targetm.calls.must_pass_in_stack (mode, type))
2105 return 0;
2106
2107 if (type && AGGREGATE_TYPE_P (type))
2108 {
2109 int i;
2110 tree field;
2111 enum x86_64_reg_class subclasses[MAX_CLASSES];
2112
2113 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2114 if (bytes > 16)
2115 return 0;
2116
2117 for (i = 0; i < words; i++)
2118 classes[i] = X86_64_NO_CLASS;
2119
2120 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2121 signalize memory class, so handle it as special case. */
2122 if (!words)
2123 {
2124 classes[0] = X86_64_NO_CLASS;
2125 return 1;
2126 }
2127
2128 /* Classify each field of record and merge classes. */
2129 if (TREE_CODE (type) == RECORD_TYPE)
2130 {
2131 /* For classes first merge in the field of the subclasses. */
2132 if (TYPE_BINFO (type))
2133 {
2134 tree binfo, base_binfo;
2135 int basenum;
2136
2137 for (binfo = TYPE_BINFO (type), basenum = 0;
2138 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2139 {
2140 int num;
2141 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2142 tree type = BINFO_TYPE (base_binfo);
2143
2144 num = classify_argument (TYPE_MODE (type),
2145 type, subclasses,
2146 (offset + bit_offset) % 256);
2147 if (!num)
2148 return 0;
2149 for (i = 0; i < num; i++)
2150 {
2151 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2152 classes[i + pos] =
2153 merge_classes (subclasses[i], classes[i + pos]);
2154 }
2155 }
2156 }
2157 /* And now merge the fields of structure. */
2158 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2159 {
2160 if (TREE_CODE (field) == FIELD_DECL)
2161 {
2162 int num;
2163
2164 /* Bitfields are always classified as integer. Handle them
2165 early, since later code would consider them to be
2166 misaligned integers. */
2167 if (DECL_BIT_FIELD (field))
2168 {
2169 for (i = int_bit_position (field) / 8 / 8;
2170 i < (int_bit_position (field)
2171 + tree_low_cst (DECL_SIZE (field), 0)
2172 + 63) / 8 / 8; i++)
2173 classes[i] =
2174 merge_classes (X86_64_INTEGER_CLASS,
2175 classes[i]);
2176 }
2177 else
2178 {
2179 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2180 TREE_TYPE (field), subclasses,
2181 (int_bit_position (field)
2182 + bit_offset) % 256);
2183 if (!num)
2184 return 0;
2185 for (i = 0; i < num; i++)
2186 {
2187 int pos =
2188 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2189 classes[i + pos] =
2190 merge_classes (subclasses[i], classes[i + pos]);
2191 }
2192 }
2193 }
2194 }
2195 }
2196 /* Arrays are handled as small records. */
2197 else if (TREE_CODE (type) == ARRAY_TYPE)
2198 {
2199 int num;
2200 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2201 TREE_TYPE (type), subclasses, bit_offset);
2202 if (!num)
2203 return 0;
2204
2205 /* The partial classes are now full classes. */
2206 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2207 subclasses[0] = X86_64_SSE_CLASS;
2208 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2209 subclasses[0] = X86_64_INTEGER_CLASS;
2210
2211 for (i = 0; i < words; i++)
2212 classes[i] = subclasses[i % num];
2213 }
2214 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2215 else if (TREE_CODE (type) == UNION_TYPE
2216 || TREE_CODE (type) == QUAL_UNION_TYPE)
2217 {
2218 /* For classes first merge in the field of the subclasses. */
2219 if (TYPE_BINFO (type))
2220 {
2221 tree binfo, base_binfo;
2222 int basenum;
2223
2224 for (binfo = TYPE_BINFO (type), basenum = 0;
2225 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2226 {
2227 int num;
2228 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2229 tree type = BINFO_TYPE (base_binfo);
2230
2231 num = classify_argument (TYPE_MODE (type),
2232 type, subclasses,
2233 (offset + (bit_offset % 64)) % 256);
2234 if (!num)
2235 return 0;
2236 for (i = 0; i < num; i++)
2237 {
2238 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2239 classes[i + pos] =
2240 merge_classes (subclasses[i], classes[i + pos]);
2241 }
2242 }
2243 }
2244 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2245 {
2246 if (TREE_CODE (field) == FIELD_DECL)
2247 {
2248 int num;
2249 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2250 TREE_TYPE (field), subclasses,
2251 bit_offset);
2252 if (!num)
2253 return 0;
2254 for (i = 0; i < num; i++)
2255 classes[i] = merge_classes (subclasses[i], classes[i]);
2256 }
2257 }
2258 }
2259 else
2260 abort ();
2261
2262 /* Final merger cleanup. */
2263 for (i = 0; i < words; i++)
2264 {
2265 /* If one class is MEMORY, everything should be passed in
2266 memory. */
2267 if (classes[i] == X86_64_MEMORY_CLASS)
2268 return 0;
2269
2270 /* The X86_64_SSEUP_CLASS should be always preceded by
2271 X86_64_SSE_CLASS. */
2272 if (classes[i] == X86_64_SSEUP_CLASS
2273 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2274 classes[i] = X86_64_SSE_CLASS;
2275
2276 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2277 if (classes[i] == X86_64_X87UP_CLASS
2278 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2279 classes[i] = X86_64_SSE_CLASS;
2280 }
2281 return words;
2282 }
2283
2284 /* Compute alignment needed. We align all types to natural boundaries with
2285 exception of XFmode that is aligned to 64bits. */
2286 if (mode != VOIDmode && mode != BLKmode)
2287 {
2288 int mode_alignment = GET_MODE_BITSIZE (mode);
2289
2290 if (mode == XFmode)
2291 mode_alignment = 128;
2292 else if (mode == XCmode)
2293 mode_alignment = 256;
2294 if (COMPLEX_MODE_P (mode))
2295 mode_alignment /= 2;
2296 /* Misaligned fields are always returned in memory. */
2297 if (bit_offset % mode_alignment)
2298 return 0;
2299 }
2300
2301 /* for V1xx modes, just use the base mode */
2302 if (VECTOR_MODE_P (mode)
2303 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2304 mode = GET_MODE_INNER (mode);
2305
2306 /* Classification of atomic types. */
2307 switch (mode)
2308 {
2309 case DImode:
2310 case SImode:
2311 case HImode:
2312 case QImode:
2313 case CSImode:
2314 case CHImode:
2315 case CQImode:
2316 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2317 classes[0] = X86_64_INTEGERSI_CLASS;
2318 else
2319 classes[0] = X86_64_INTEGER_CLASS;
2320 return 1;
2321 case CDImode:
2322 case TImode:
2323 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2324 return 2;
2325 case CTImode:
2326 return 0;
2327 case SFmode:
2328 if (!(bit_offset % 64))
2329 classes[0] = X86_64_SSESF_CLASS;
2330 else
2331 classes[0] = X86_64_SSE_CLASS;
2332 return 1;
2333 case DFmode:
2334 classes[0] = X86_64_SSEDF_CLASS;
2335 return 1;
2336 case XFmode:
2337 classes[0] = X86_64_X87_CLASS;
2338 classes[1] = X86_64_X87UP_CLASS;
2339 return 2;
2340 case TFmode:
2341 classes[0] = X86_64_SSE_CLASS;
2342 classes[1] = X86_64_SSEUP_CLASS;
2343 return 2;
2344 case SCmode:
2345 classes[0] = X86_64_SSE_CLASS;
2346 return 1;
2347 case DCmode:
2348 classes[0] = X86_64_SSEDF_CLASS;
2349 classes[1] = X86_64_SSEDF_CLASS;
2350 return 2;
2351 case XCmode:
2352 case TCmode:
2353 /* These modes are larger than 16 bytes. */
2354 return 0;
2355 case V4SFmode:
2356 case V4SImode:
2357 case V16QImode:
2358 case V8HImode:
2359 case V2DFmode:
2360 case V2DImode:
2361 classes[0] = X86_64_SSE_CLASS;
2362 classes[1] = X86_64_SSEUP_CLASS;
2363 return 2;
2364 case V2SFmode:
2365 case V2SImode:
2366 case V4HImode:
2367 case V8QImode:
2368 classes[0] = X86_64_SSE_CLASS;
2369 return 1;
2370 case BLKmode:
2371 case VOIDmode:
2372 return 0;
2373 default:
2374 if (VECTOR_MODE_P (mode))
2375 {
2376 if (bytes > 16)
2377 return 0;
2378 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2379 {
2380 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2381 classes[0] = X86_64_INTEGERSI_CLASS;
2382 else
2383 classes[0] = X86_64_INTEGER_CLASS;
2384 classes[1] = X86_64_INTEGER_CLASS;
2385 return 1 + (bytes > 8);
2386 }
2387 }
2388 abort ();
2389 }
2390 }
2391
2392 /* Examine the argument and return set number of register required in each
2393 class. Return 0 iff parameter should be passed in memory. */
2394 static int
2395 examine_argument (enum machine_mode mode, tree type, int in_return,
2396 int *int_nregs, int *sse_nregs)
2397 {
2398 enum x86_64_reg_class class[MAX_CLASSES];
2399 int n = classify_argument (mode, type, class, 0);
2400
2401 *int_nregs = 0;
2402 *sse_nregs = 0;
2403 if (!n)
2404 return 0;
2405 for (n--; n >= 0; n--)
2406 switch (class[n])
2407 {
2408 case X86_64_INTEGER_CLASS:
2409 case X86_64_INTEGERSI_CLASS:
2410 (*int_nregs)++;
2411 break;
2412 case X86_64_SSE_CLASS:
2413 case X86_64_SSESF_CLASS:
2414 case X86_64_SSEDF_CLASS:
2415 (*sse_nregs)++;
2416 break;
2417 case X86_64_NO_CLASS:
2418 case X86_64_SSEUP_CLASS:
2419 break;
2420 case X86_64_X87_CLASS:
2421 case X86_64_X87UP_CLASS:
2422 if (!in_return)
2423 return 0;
2424 break;
2425 case X86_64_MEMORY_CLASS:
2426 abort ();
2427 }
2428 return 1;
2429 }
2430 /* Construct container for the argument used by GCC interface. See
2431 FUNCTION_ARG for the detailed description. */
2432 static rtx
2433 construct_container (enum machine_mode mode, tree type, int in_return,
2434 int nintregs, int nsseregs, const int * intreg,
2435 int sse_regno)
2436 {
2437 enum machine_mode tmpmode;
2438 int bytes =
2439 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2440 enum x86_64_reg_class class[MAX_CLASSES];
2441 int n;
2442 int i;
2443 int nexps = 0;
2444 int needed_sseregs, needed_intregs;
2445 rtx exp[MAX_CLASSES];
2446 rtx ret;
2447
2448 n = classify_argument (mode, type, class, 0);
2449 if (TARGET_DEBUG_ARG)
2450 {
2451 if (!n)
2452 fprintf (stderr, "Memory class\n");
2453 else
2454 {
2455 fprintf (stderr, "Classes:");
2456 for (i = 0; i < n; i++)
2457 {
2458 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2459 }
2460 fprintf (stderr, "\n");
2461 }
2462 }
2463 if (!n)
2464 return NULL;
2465 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2466 return NULL;
2467 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2468 return NULL;
2469
2470 /* First construct simple cases. Avoid SCmode, since we want to use
2471 single register to pass this type. */
2472 if (n == 1 && mode != SCmode)
2473 switch (class[0])
2474 {
2475 case X86_64_INTEGER_CLASS:
2476 case X86_64_INTEGERSI_CLASS:
2477 return gen_rtx_REG (mode, intreg[0]);
2478 case X86_64_SSE_CLASS:
2479 case X86_64_SSESF_CLASS:
2480 case X86_64_SSEDF_CLASS:
2481 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2482 case X86_64_X87_CLASS:
2483 return gen_rtx_REG (mode, FIRST_STACK_REG);
2484 case X86_64_NO_CLASS:
2485 /* Zero sized array, struct or class. */
2486 return NULL;
2487 default:
2488 abort ();
2489 }
2490 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2491 && mode != BLKmode)
2492 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2493 if (n == 2
2494 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2495 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2496 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2497 && class[1] == X86_64_INTEGER_CLASS
2498 && (mode == CDImode || mode == TImode || mode == TFmode)
2499 && intreg[0] + 1 == intreg[1])
2500 return gen_rtx_REG (mode, intreg[0]);
2501 if (n == 4
2502 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2503 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2504 && mode != BLKmode)
2505 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2506
2507 /* Otherwise figure out the entries of the PARALLEL. */
2508 for (i = 0; i < n; i++)
2509 {
2510 switch (class[i])
2511 {
2512 case X86_64_NO_CLASS:
2513 break;
2514 case X86_64_INTEGER_CLASS:
2515 case X86_64_INTEGERSI_CLASS:
2516 /* Merge TImodes on aligned occasions here too. */
2517 if (i * 8 + 8 > bytes)
2518 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2519 else if (class[i] == X86_64_INTEGERSI_CLASS)
2520 tmpmode = SImode;
2521 else
2522 tmpmode = DImode;
2523 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2524 if (tmpmode == BLKmode)
2525 tmpmode = DImode;
2526 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2527 gen_rtx_REG (tmpmode, *intreg),
2528 GEN_INT (i*8));
2529 intreg++;
2530 break;
2531 case X86_64_SSESF_CLASS:
2532 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2533 gen_rtx_REG (SFmode,
2534 SSE_REGNO (sse_regno)),
2535 GEN_INT (i*8));
2536 sse_regno++;
2537 break;
2538 case X86_64_SSEDF_CLASS:
2539 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2540 gen_rtx_REG (DFmode,
2541 SSE_REGNO (sse_regno)),
2542 GEN_INT (i*8));
2543 sse_regno++;
2544 break;
2545 case X86_64_SSE_CLASS:
2546 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2547 tmpmode = TImode;
2548 else
2549 tmpmode = DImode;
2550 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2551 gen_rtx_REG (tmpmode,
2552 SSE_REGNO (sse_regno)),
2553 GEN_INT (i*8));
2554 if (tmpmode == TImode)
2555 i++;
2556 sse_regno++;
2557 break;
2558 default:
2559 abort ();
2560 }
2561 }
2562 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2563 for (i = 0; i < nexps; i++)
2564 XVECEXP (ret, 0, i) = exp [i];
2565 return ret;
2566 }
2567
2568 /* Update the data in CUM to advance over an argument
2569 of mode MODE and data type TYPE.
2570 (TYPE is null for libcalls where that information may not be available.) */
2571
2572 void
2573 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2574 enum machine_mode mode, /* current arg mode */
2575 tree type, /* type of the argument or 0 if lib support */
2576 int named) /* whether or not the argument was named */
2577 {
2578 int bytes =
2579 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2580 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2581
2582 if (TARGET_DEBUG_ARG)
2583 fprintf (stderr,
2584 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2585 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2586 if (TARGET_64BIT)
2587 {
2588 int int_nregs, sse_nregs;
2589 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2590 cum->words += words;
2591 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2592 {
2593 cum->nregs -= int_nregs;
2594 cum->sse_nregs -= sse_nregs;
2595 cum->regno += int_nregs;
2596 cum->sse_regno += sse_nregs;
2597 }
2598 else
2599 cum->words += words;
2600 }
2601 else
2602 {
2603 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2604 && (!type || !AGGREGATE_TYPE_P (type)))
2605 {
2606 cum->sse_words += words;
2607 cum->sse_nregs -= 1;
2608 cum->sse_regno += 1;
2609 if (cum->sse_nregs <= 0)
2610 {
2611 cum->sse_nregs = 0;
2612 cum->sse_regno = 0;
2613 }
2614 }
2615 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2616 && (!type || !AGGREGATE_TYPE_P (type)))
2617 {
2618 cum->mmx_words += words;
2619 cum->mmx_nregs -= 1;
2620 cum->mmx_regno += 1;
2621 if (cum->mmx_nregs <= 0)
2622 {
2623 cum->mmx_nregs = 0;
2624 cum->mmx_regno = 0;
2625 }
2626 }
2627 else
2628 {
2629 cum->words += words;
2630 cum->nregs -= words;
2631 cum->regno += words;
2632
2633 if (cum->nregs <= 0)
2634 {
2635 cum->nregs = 0;
2636 cum->regno = 0;
2637 }
2638 }
2639 }
2640 return;
2641 }
2642
2643 /* Define where to put the arguments to a function.
2644 Value is zero to push the argument on the stack,
2645 or a hard register in which to store the argument.
2646
2647 MODE is the argument's machine mode.
2648 TYPE is the data type of the argument (as a tree).
2649 This is null for libcalls where that information may
2650 not be available.
2651 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2652 the preceding args and about the function being called.
2653 NAMED is nonzero if this argument is a named parameter
2654 (otherwise it is an extra parameter matching an ellipsis). */
2655
2656 rtx
2657 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2658 enum machine_mode mode, /* current arg mode */
2659 tree type, /* type of the argument or 0 if lib support */
2660 int named) /* != 0 for normal args, == 0 for ... args */
2661 {
2662 rtx ret = NULL_RTX;
2663 int bytes =
2664 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2665 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2666 static bool warnedsse, warnedmmx;
2667
2668 /* To simplify the code below, represent vector types with a vector mode
2669 even if MMX/SSE are not active. */
2670 if (type
2671 && TREE_CODE (type) == VECTOR_TYPE
2672 && (bytes == 8 || bytes == 16)
2673 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2674 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2675 {
2676 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2677 enum machine_mode newmode
2678 = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2679 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2680
2681 /* Get the mode which has this inner mode and number of units. */
2682 for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
2683 if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
2684 && GET_MODE_INNER (newmode) == innermode)
2685 {
2686 mode = newmode;
2687 break;
2688 }
2689 }
2690
2691 /* Handle a hidden AL argument containing number of registers for varargs
2692 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2693 any AL settings. */
2694 if (mode == VOIDmode)
2695 {
2696 if (TARGET_64BIT)
2697 return GEN_INT (cum->maybe_vaarg
2698 ? (cum->sse_nregs < 0
2699 ? SSE_REGPARM_MAX
2700 : cum->sse_regno)
2701 : -1);
2702 else
2703 return constm1_rtx;
2704 }
2705 if (TARGET_64BIT)
2706 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2707 &x86_64_int_parameter_registers [cum->regno],
2708 cum->sse_regno);
2709 else
2710 switch (mode)
2711 {
2712 /* For now, pass fp/complex values on the stack. */
2713 default:
2714 break;
2715
2716 case BLKmode:
2717 if (bytes < 0)
2718 break;
2719 /* FALLTHRU */
2720 case DImode:
2721 case SImode:
2722 case HImode:
2723 case QImode:
2724 if (words <= cum->nregs)
2725 {
2726 int regno = cum->regno;
2727
2728 /* Fastcall allocates the first two DWORD (SImode) or
2729 smaller arguments to ECX and EDX. */
2730 if (cum->fastcall)
2731 {
2732 if (mode == BLKmode || mode == DImode)
2733 break;
2734
2735 /* ECX not EAX is the first allocated register. */
2736 if (regno == 0)
2737 regno = 2;
2738 }
2739 ret = gen_rtx_REG (mode, regno);
2740 }
2741 break;
2742 case TImode:
2743 case V16QImode:
2744 case V8HImode:
2745 case V4SImode:
2746 case V2DImode:
2747 case V4SFmode:
2748 case V2DFmode:
2749 if (!type || !AGGREGATE_TYPE_P (type))
2750 {
2751 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2752 {
2753 warnedsse = true;
2754 warning ("SSE vector argument without SSE enabled "
2755 "changes the ABI");
2756 }
2757 if (cum->sse_nregs)
2758 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2759 }
2760 break;
2761 case V8QImode:
2762 case V4HImode:
2763 case V2SImode:
2764 case V2SFmode:
2765 if (!type || !AGGREGATE_TYPE_P (type))
2766 {
2767 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2768 {
2769 warnedmmx = true;
2770 warning ("MMX vector argument without MMX enabled "
2771 "changes the ABI");
2772 }
2773 if (cum->mmx_nregs)
2774 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2775 }
2776 break;
2777 }
2778
2779 if (TARGET_DEBUG_ARG)
2780 {
2781 fprintf (stderr,
2782 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2783 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2784
2785 if (ret)
2786 print_simple_rtl (stderr, ret);
2787 else
2788 fprintf (stderr, ", stack");
2789
2790 fprintf (stderr, " )\n");
2791 }
2792
2793 return ret;
2794 }
2795
2796 /* A C expression that indicates when an argument must be passed by
2797 reference. If nonzero for an argument, a copy of that argument is
2798 made in memory and a pointer to the argument is passed instead of
2799 the argument itself. The pointer is passed in whatever way is
2800 appropriate for passing a pointer to that type. */
2801
2802 static bool
2803 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2804 enum machine_mode mode ATTRIBUTE_UNUSED,
2805 tree type, bool named ATTRIBUTE_UNUSED)
2806 {
2807 if (!TARGET_64BIT)
2808 return 0;
2809
2810 if (type && int_size_in_bytes (type) == -1)
2811 {
2812 if (TARGET_DEBUG_ARG)
2813 fprintf (stderr, "function_arg_pass_by_reference\n");
2814 return 1;
2815 }
2816
2817 return 0;
2818 }
2819
2820 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2821 ABI. Only called if TARGET_SSE. */
2822 static bool
2823 contains_128bit_aligned_vector_p (tree type)
2824 {
2825 enum machine_mode mode = TYPE_MODE (type);
2826 if (SSE_REG_MODE_P (mode)
2827 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2828 return true;
2829 if (TYPE_ALIGN (type) < 128)
2830 return false;
2831
2832 if (AGGREGATE_TYPE_P (type))
2833 {
2834 /* Walk the aggregates recursively. */
2835 if (TREE_CODE (type) == RECORD_TYPE
2836 || TREE_CODE (type) == UNION_TYPE
2837 || TREE_CODE (type) == QUAL_UNION_TYPE)
2838 {
2839 tree field;
2840
2841 if (TYPE_BINFO (type))
2842 {
2843 tree binfo, base_binfo;
2844 int i;
2845
2846 for (binfo = TYPE_BINFO (type), i = 0;
2847 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2848 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2849 return true;
2850 }
2851 /* And now merge the fields of structure. */
2852 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2853 {
2854 if (TREE_CODE (field) == FIELD_DECL
2855 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2856 return true;
2857 }
2858 }
2859 /* Just for use if some languages passes arrays by value. */
2860 else if (TREE_CODE (type) == ARRAY_TYPE)
2861 {
2862 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2863 return true;
2864 }
2865 else
2866 abort ();
2867 }
2868 return false;
2869 }
2870
2871 /* Gives the alignment boundary, in bits, of an argument with the
2872 specified mode and type. */
2873
2874 int
2875 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2876 {
2877 int align;
2878 if (type)
2879 align = TYPE_ALIGN (type);
2880 else
2881 align = GET_MODE_ALIGNMENT (mode);
2882 if (align < PARM_BOUNDARY)
2883 align = PARM_BOUNDARY;
2884 if (!TARGET_64BIT)
2885 {
2886 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2887 make an exception for SSE modes since these require 128bit
2888 alignment.
2889
2890 The handling here differs from field_alignment. ICC aligns MMX
2891 arguments to 4 byte boundaries, while structure fields are aligned
2892 to 8 byte boundaries. */
2893 if (!TARGET_SSE)
2894 align = PARM_BOUNDARY;
2895 else if (!type)
2896 {
2897 if (!SSE_REG_MODE_P (mode))
2898 align = PARM_BOUNDARY;
2899 }
2900 else
2901 {
2902 if (!contains_128bit_aligned_vector_p (type))
2903 align = PARM_BOUNDARY;
2904 }
2905 }
2906 if (align > 128)
2907 align = 128;
2908 return align;
2909 }
2910
2911 /* Return true if N is a possible register number of function value. */
2912 bool
2913 ix86_function_value_regno_p (int regno)
2914 {
2915 if (!TARGET_64BIT)
2916 {
2917 return ((regno) == 0
2918 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2919 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2920 }
2921 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2922 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2923 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2924 }
2925
2926 /* Define how to find the value returned by a function.
2927 VALTYPE is the data type of the value (as a tree).
2928 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2929 otherwise, FUNC is 0. */
2930 rtx
2931 ix86_function_value (tree valtype)
2932 {
2933 if (TARGET_64BIT)
2934 {
2935 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2936 REGPARM_MAX, SSE_REGPARM_MAX,
2937 x86_64_int_return_registers, 0);
2938 /* For zero sized structures, construct_container return NULL, but we need
2939 to keep rest of compiler happy by returning meaningful value. */
2940 if (!ret)
2941 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2942 return ret;
2943 }
2944 else
2945 return gen_rtx_REG (TYPE_MODE (valtype),
2946 ix86_value_regno (TYPE_MODE (valtype)));
2947 }
2948
2949 /* Return false iff type is returned in memory. */
2950 int
2951 ix86_return_in_memory (tree type)
2952 {
2953 int needed_intregs, needed_sseregs, size;
2954 enum machine_mode mode = TYPE_MODE (type);
2955
2956 if (TARGET_64BIT)
2957 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2958
2959 if (mode == BLKmode)
2960 return 1;
2961
2962 size = int_size_in_bytes (type);
2963
2964 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2965 return 0;
2966
2967 if (VECTOR_MODE_P (mode) || mode == TImode)
2968 {
2969 /* User-created vectors small enough to fit in EAX. */
2970 if (size < 8)
2971 return 0;
2972
2973 /* MMX/3dNow values are returned on the stack, since we've
2974 got to EMMS/FEMMS before returning. */
2975 if (size == 8)
2976 return 1;
2977
2978 /* SSE values are returned in XMM0, except when it doesn't exist. */
2979 if (size == 16)
2980 return (TARGET_SSE ? 0 : 1);
2981 }
2982
2983 if (mode == XFmode)
2984 return 0;
2985
2986 if (size > 12)
2987 return 1;
2988 return 0;
2989 }
2990
2991 /* When returning SSE vector types, we have a choice of either
2992 (1) being abi incompatible with a -march switch, or
2993 (2) generating an error.
2994 Given no good solution, I think the safest thing is one warning.
2995 The user won't be able to use -Werror, but....
2996
2997 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2998 called in response to actually generating a caller or callee that
2999 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3000 via aggregate_value_p for general type probing from tree-ssa. */
3001
3002 static rtx
3003 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3004 {
3005 static bool warned;
3006
3007 if (!TARGET_SSE && type && !warned)
3008 {
3009 /* Look at the return type of the function, not the function type. */
3010 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3011
3012 if (mode == TImode
3013 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3014 {
3015 warned = true;
3016 warning ("SSE vector return without SSE enabled changes the ABI");
3017 }
3018 }
3019
3020 return NULL;
3021 }
3022
3023 /* Define how to find the value returned by a library function
3024 assuming the value has mode MODE. */
3025 rtx
3026 ix86_libcall_value (enum machine_mode mode)
3027 {
3028 if (TARGET_64BIT)
3029 {
3030 switch (mode)
3031 {
3032 case SFmode:
3033 case SCmode:
3034 case DFmode:
3035 case DCmode:
3036 case TFmode:
3037 return gen_rtx_REG (mode, FIRST_SSE_REG);
3038 case XFmode:
3039 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3040 case XCmode:
3041 case TCmode:
3042 return NULL;
3043 default:
3044 return gen_rtx_REG (mode, 0);
3045 }
3046 }
3047 else
3048 return gen_rtx_REG (mode, ix86_value_regno (mode));
3049 }
3050
3051 /* Given a mode, return the register to use for a return value. */
3052
3053 static int
3054 ix86_value_regno (enum machine_mode mode)
3055 {
3056 /* Floating point return values in %st(0). */
3057 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3058 return FIRST_FLOAT_REG;
3059 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3060 we prevent this case when sse is not available. */
3061 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3062 return FIRST_SSE_REG;
3063 /* Everything else in %eax. */
3064 return 0;
3065 }
3066 \f
3067 /* Create the va_list data type. */
3068
3069 static tree
3070 ix86_build_builtin_va_list (void)
3071 {
3072 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3073
3074 /* For i386 we use plain pointer to argument area. */
3075 if (!TARGET_64BIT)
3076 return build_pointer_type (char_type_node);
3077
3078 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3079 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3080
3081 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3082 unsigned_type_node);
3083 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3084 unsigned_type_node);
3085 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3086 ptr_type_node);
3087 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3088 ptr_type_node);
3089
3090 DECL_FIELD_CONTEXT (f_gpr) = record;
3091 DECL_FIELD_CONTEXT (f_fpr) = record;
3092 DECL_FIELD_CONTEXT (f_ovf) = record;
3093 DECL_FIELD_CONTEXT (f_sav) = record;
3094
3095 TREE_CHAIN (record) = type_decl;
3096 TYPE_NAME (record) = type_decl;
3097 TYPE_FIELDS (record) = f_gpr;
3098 TREE_CHAIN (f_gpr) = f_fpr;
3099 TREE_CHAIN (f_fpr) = f_ovf;
3100 TREE_CHAIN (f_ovf) = f_sav;
3101
3102 layout_type (record);
3103
3104 /* The correct type is an array type of one element. */
3105 return build_array_type (record, build_index_type (size_zero_node));
3106 }
3107
3108 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3109
3110 static void
3111 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3112 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3113 int no_rtl)
3114 {
3115 CUMULATIVE_ARGS next_cum;
3116 rtx save_area = NULL_RTX, mem;
3117 rtx label;
3118 rtx label_ref;
3119 rtx tmp_reg;
3120 rtx nsse_reg;
3121 int set;
3122 tree fntype;
3123 int stdarg_p;
3124 int i;
3125
3126 if (!TARGET_64BIT)
3127 return;
3128
3129 /* Indicate to allocate space on the stack for varargs save area. */
3130 ix86_save_varrargs_registers = 1;
3131
3132 cfun->stack_alignment_needed = 128;
3133
3134 fntype = TREE_TYPE (current_function_decl);
3135 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3136 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3137 != void_type_node));
3138
3139 /* For varargs, we do not want to skip the dummy va_dcl argument.
3140 For stdargs, we do want to skip the last named argument. */
3141 next_cum = *cum;
3142 if (stdarg_p)
3143 function_arg_advance (&next_cum, mode, type, 1);
3144
3145 if (!no_rtl)
3146 save_area = frame_pointer_rtx;
3147
3148 set = get_varargs_alias_set ();
3149
3150 for (i = next_cum.regno; i < ix86_regparm; i++)
3151 {
3152 mem = gen_rtx_MEM (Pmode,
3153 plus_constant (save_area, i * UNITS_PER_WORD));
3154 set_mem_alias_set (mem, set);
3155 emit_move_insn (mem, gen_rtx_REG (Pmode,
3156 x86_64_int_parameter_registers[i]));
3157 }
3158
3159 if (next_cum.sse_nregs)
3160 {
3161 /* Now emit code to save SSE registers. The AX parameter contains number
3162 of SSE parameter registers used to call this function. We use
3163 sse_prologue_save insn template that produces computed jump across
3164 SSE saves. We need some preparation work to get this working. */
3165
3166 label = gen_label_rtx ();
3167 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3168
3169 /* Compute address to jump to :
3170 label - 5*eax + nnamed_sse_arguments*5 */
3171 tmp_reg = gen_reg_rtx (Pmode);
3172 nsse_reg = gen_reg_rtx (Pmode);
3173 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3174 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3175 gen_rtx_MULT (Pmode, nsse_reg,
3176 GEN_INT (4))));
3177 if (next_cum.sse_regno)
3178 emit_move_insn
3179 (nsse_reg,
3180 gen_rtx_CONST (DImode,
3181 gen_rtx_PLUS (DImode,
3182 label_ref,
3183 GEN_INT (next_cum.sse_regno * 4))));
3184 else
3185 emit_move_insn (nsse_reg, label_ref);
3186 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3187
3188 /* Compute address of memory block we save into. We always use pointer
3189 pointing 127 bytes after first byte to store - this is needed to keep
3190 instruction size limited by 4 bytes. */
3191 tmp_reg = gen_reg_rtx (Pmode);
3192 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3193 plus_constant (save_area,
3194 8 * REGPARM_MAX + 127)));
3195 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3196 set_mem_alias_set (mem, set);
3197 set_mem_align (mem, BITS_PER_WORD);
3198
3199 /* And finally do the dirty job! */
3200 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3201 GEN_INT (next_cum.sse_regno), label));
3202 }
3203
3204 }
3205
3206 /* Implement va_start. */
3207
3208 void
3209 ix86_va_start (tree valist, rtx nextarg)
3210 {
3211 HOST_WIDE_INT words, n_gpr, n_fpr;
3212 tree f_gpr, f_fpr, f_ovf, f_sav;
3213 tree gpr, fpr, ovf, sav, t;
3214
3215 /* Only 64bit target needs something special. */
3216 if (!TARGET_64BIT)
3217 {
3218 std_expand_builtin_va_start (valist, nextarg);
3219 return;
3220 }
3221
3222 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3223 f_fpr = TREE_CHAIN (f_gpr);
3224 f_ovf = TREE_CHAIN (f_fpr);
3225 f_sav = TREE_CHAIN (f_ovf);
3226
3227 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3228 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3229 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3230 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3231 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3232
3233 /* Count number of gp and fp argument registers used. */
3234 words = current_function_args_info.words;
3235 n_gpr = current_function_args_info.regno;
3236 n_fpr = current_function_args_info.sse_regno;
3237
3238 if (TARGET_DEBUG_ARG)
3239 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3240 (int) words, (int) n_gpr, (int) n_fpr);
3241
3242 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3243 build_int_cst (NULL_TREE, n_gpr * 8));
3244 TREE_SIDE_EFFECTS (t) = 1;
3245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3246
3247 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3248 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3249 TREE_SIDE_EFFECTS (t) = 1;
3250 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3251
3252 /* Find the overflow area. */
3253 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3254 if (words != 0)
3255 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3256 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3257 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3258 TREE_SIDE_EFFECTS (t) = 1;
3259 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3260
3261 /* Find the register save area.
3262 Prologue of the function save it right above stack frame. */
3263 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3264 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3265 TREE_SIDE_EFFECTS (t) = 1;
3266 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3267 }
3268
3269 /* Implement va_arg. */
3270
3271 tree
3272 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3273 {
3274 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3275 tree f_gpr, f_fpr, f_ovf, f_sav;
3276 tree gpr, fpr, ovf, sav, t;
3277 int size, rsize;
3278 tree lab_false, lab_over = NULL_TREE;
3279 tree addr, t2;
3280 rtx container;
3281 int indirect_p = 0;
3282 tree ptrtype;
3283
3284 /* Only 64bit target needs something special. */
3285 if (!TARGET_64BIT)
3286 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3287
3288 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3289 f_fpr = TREE_CHAIN (f_gpr);
3290 f_ovf = TREE_CHAIN (f_fpr);
3291 f_sav = TREE_CHAIN (f_ovf);
3292
3293 valist = build_va_arg_indirect_ref (valist);
3294 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3295 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3296 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3297 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3298
3299 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3300 if (indirect_p)
3301 type = build_pointer_type (type);
3302 size = int_size_in_bytes (type);
3303 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3304
3305 container = construct_container (TYPE_MODE (type), type, 0,
3306 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3307 /*
3308 * Pull the value out of the saved registers ...
3309 */
3310
3311 addr = create_tmp_var (ptr_type_node, "addr");
3312 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3313
3314 if (container)
3315 {
3316 int needed_intregs, needed_sseregs;
3317 bool need_temp;
3318 tree int_addr, sse_addr;
3319
3320 lab_false = create_artificial_label ();
3321 lab_over = create_artificial_label ();
3322
3323 examine_argument (TYPE_MODE (type), type, 0,
3324 &needed_intregs, &needed_sseregs);
3325
3326 need_temp = (!REG_P (container)
3327 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3328 || TYPE_ALIGN (type) > 128));
3329
3330 /* In case we are passing structure, verify that it is consecutive block
3331 on the register save area. If not we need to do moves. */
3332 if (!need_temp && !REG_P (container))
3333 {
3334 /* Verify that all registers are strictly consecutive */
3335 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3336 {
3337 int i;
3338
3339 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3340 {
3341 rtx slot = XVECEXP (container, 0, i);
3342 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3343 || INTVAL (XEXP (slot, 1)) != i * 16)
3344 need_temp = 1;
3345 }
3346 }
3347 else
3348 {
3349 int i;
3350
3351 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3352 {
3353 rtx slot = XVECEXP (container, 0, i);
3354 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3355 || INTVAL (XEXP (slot, 1)) != i * 8)
3356 need_temp = 1;
3357 }
3358 }
3359 }
3360 if (!need_temp)
3361 {
3362 int_addr = addr;
3363 sse_addr = addr;
3364 }
3365 else
3366 {
3367 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3368 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3369 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3370 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3371 }
3372
3373 /* First ensure that we fit completely in registers. */
3374 if (needed_intregs)
3375 {
3376 t = build_int_cst (TREE_TYPE (gpr),
3377 (REGPARM_MAX - needed_intregs + 1) * 8);
3378 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3379 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3380 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3381 gimplify_and_add (t, pre_p);
3382 }
3383 if (needed_sseregs)
3384 {
3385 t = build_int_cst (TREE_TYPE (fpr),
3386 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3387 + REGPARM_MAX * 8);
3388 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3389 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3390 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3391 gimplify_and_add (t, pre_p);
3392 }
3393
3394 /* Compute index to start of area used for integer regs. */
3395 if (needed_intregs)
3396 {
3397 /* int_addr = gpr + sav; */
3398 t = fold_convert (ptr_type_node, gpr);
3399 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3400 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3401 gimplify_and_add (t, pre_p);
3402 }
3403 if (needed_sseregs)
3404 {
3405 /* sse_addr = fpr + sav; */
3406 t = fold_convert (ptr_type_node, fpr);
3407 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3408 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3409 gimplify_and_add (t, pre_p);
3410 }
3411 if (need_temp)
3412 {
3413 int i;
3414 tree temp = create_tmp_var (type, "va_arg_tmp");
3415
3416 /* addr = &temp; */
3417 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3418 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3419 gimplify_and_add (t, pre_p);
3420
3421 for (i = 0; i < XVECLEN (container, 0); i++)
3422 {
3423 rtx slot = XVECEXP (container, 0, i);
3424 rtx reg = XEXP (slot, 0);
3425 enum machine_mode mode = GET_MODE (reg);
3426 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3427 tree addr_type = build_pointer_type (piece_type);
3428 tree src_addr, src;
3429 int src_offset;
3430 tree dest_addr, dest;
3431
3432 if (SSE_REGNO_P (REGNO (reg)))
3433 {
3434 src_addr = sse_addr;
3435 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3436 }
3437 else
3438 {
3439 src_addr = int_addr;
3440 src_offset = REGNO (reg) * 8;
3441 }
3442 src_addr = fold_convert (addr_type, src_addr);
3443 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3444 size_int (src_offset)));
3445 src = build_va_arg_indirect_ref (src_addr);
3446
3447 dest_addr = fold_convert (addr_type, addr);
3448 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3449 size_int (INTVAL (XEXP (slot, 1)))));
3450 dest = build_va_arg_indirect_ref (dest_addr);
3451
3452 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3453 gimplify_and_add (t, pre_p);
3454 }
3455 }
3456
3457 if (needed_intregs)
3458 {
3459 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3460 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3461 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3462 gimplify_and_add (t, pre_p);
3463 }
3464 if (needed_sseregs)
3465 {
3466 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3467 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3468 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3469 gimplify_and_add (t, pre_p);
3470 }
3471
3472 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3473 gimplify_and_add (t, pre_p);
3474
3475 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3476 append_to_statement_list (t, pre_p);
3477 }
3478
3479 /* ... otherwise out of the overflow area. */
3480
3481 /* Care for on-stack alignment if needed. */
3482 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3483 t = ovf;
3484 else
3485 {
3486 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3487 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3488 build_int_cst (TREE_TYPE (ovf), align - 1));
3489 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3490 build_int_cst (TREE_TYPE (t), -align));
3491 }
3492 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3493
3494 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3495 gimplify_and_add (t2, pre_p);
3496
3497 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3498 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3499 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3500 gimplify_and_add (t, pre_p);
3501
3502 if (container)
3503 {
3504 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3505 append_to_statement_list (t, pre_p);
3506 }
3507
3508 ptrtype = build_pointer_type (type);
3509 addr = fold_convert (ptrtype, addr);
3510
3511 if (indirect_p)
3512 addr = build_va_arg_indirect_ref (addr);
3513 return build_va_arg_indirect_ref (addr);
3514 }
3515 \f
3516 /* Return nonzero if OPNUM's MEM should be matched
3517 in movabs* patterns. */
3518
3519 int
3520 ix86_check_movabs (rtx insn, int opnum)
3521 {
3522 rtx set, mem;
3523
3524 set = PATTERN (insn);
3525 if (GET_CODE (set) == PARALLEL)
3526 set = XVECEXP (set, 0, 0);
3527 if (GET_CODE (set) != SET)
3528 abort ();
3529 mem = XEXP (set, opnum);
3530 while (GET_CODE (mem) == SUBREG)
3531 mem = SUBREG_REG (mem);
3532 if (GET_CODE (mem) != MEM)
3533 abort ();
3534 return (volatile_ok || !MEM_VOLATILE_P (mem));
3535 }
3536 \f
3537 /* Initialize the table of extra 80387 mathematical constants. */
3538
3539 static void
3540 init_ext_80387_constants (void)
3541 {
3542 static const char * cst[5] =
3543 {
3544 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3545 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3546 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3547 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3548 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3549 };
3550 int i;
3551
3552 for (i = 0; i < 5; i++)
3553 {
3554 real_from_string (&ext_80387_constants_table[i], cst[i]);
3555 /* Ensure each constant is rounded to XFmode precision. */
3556 real_convert (&ext_80387_constants_table[i],
3557 XFmode, &ext_80387_constants_table[i]);
3558 }
3559
3560 ext_80387_constants_init = 1;
3561 }
3562
3563 /* Return true if the constant is something that can be loaded with
3564 a special instruction. */
3565
3566 int
3567 standard_80387_constant_p (rtx x)
3568 {
3569 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3570 return -1;
3571
3572 if (x == CONST0_RTX (GET_MODE (x)))
3573 return 1;
3574 if (x == CONST1_RTX (GET_MODE (x)))
3575 return 2;
3576
3577 /* For XFmode constants, try to find a special 80387 instruction when
3578 optimizing for size or on those CPUs that benefit from them. */
3579 if (GET_MODE (x) == XFmode
3580 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3581 {
3582 REAL_VALUE_TYPE r;
3583 int i;
3584
3585 if (! ext_80387_constants_init)
3586 init_ext_80387_constants ();
3587
3588 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3589 for (i = 0; i < 5; i++)
3590 if (real_identical (&r, &ext_80387_constants_table[i]))
3591 return i + 3;
3592 }
3593
3594 return 0;
3595 }
3596
3597 /* Return the opcode of the special instruction to be used to load
3598 the constant X. */
3599
3600 const char *
3601 standard_80387_constant_opcode (rtx x)
3602 {
3603 switch (standard_80387_constant_p (x))
3604 {
3605 case 1:
3606 return "fldz";
3607 case 2:
3608 return "fld1";
3609 case 3:
3610 return "fldlg2";
3611 case 4:
3612 return "fldln2";
3613 case 5:
3614 return "fldl2e";
3615 case 6:
3616 return "fldl2t";
3617 case 7:
3618 return "fldpi";
3619 }
3620 abort ();
3621 }
3622
3623 /* Return the CONST_DOUBLE representing the 80387 constant that is
3624 loaded by the specified special instruction. The argument IDX
3625 matches the return value from standard_80387_constant_p. */
3626
3627 rtx
3628 standard_80387_constant_rtx (int idx)
3629 {
3630 int i;
3631
3632 if (! ext_80387_constants_init)
3633 init_ext_80387_constants ();
3634
3635 switch (idx)
3636 {
3637 case 3:
3638 case 4:
3639 case 5:
3640 case 6:
3641 case 7:
3642 i = idx - 3;
3643 break;
3644
3645 default:
3646 abort ();
3647 }
3648
3649 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3650 XFmode);
3651 }
3652
3653 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3654 */
3655 int
3656 standard_sse_constant_p (rtx x)
3657 {
3658 if (x == const0_rtx)
3659 return 1;
3660 return (x == CONST0_RTX (GET_MODE (x)));
3661 }
3662
3663 /* Returns 1 if OP contains a symbol reference */
3664
3665 int
3666 symbolic_reference_mentioned_p (rtx op)
3667 {
3668 const char *fmt;
3669 int i;
3670
3671 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3672 return 1;
3673
3674 fmt = GET_RTX_FORMAT (GET_CODE (op));
3675 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3676 {
3677 if (fmt[i] == 'E')
3678 {
3679 int j;
3680
3681 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3682 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3683 return 1;
3684 }
3685
3686 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3687 return 1;
3688 }
3689
3690 return 0;
3691 }
3692
3693 /* Return 1 if it is appropriate to emit `ret' instructions in the
3694 body of a function. Do this only if the epilogue is simple, needing a
3695 couple of insns. Prior to reloading, we can't tell how many registers
3696 must be saved, so return 0 then. Return 0 if there is no frame
3697 marker to de-allocate. */
3698
3699 int
3700 ix86_can_use_return_insn_p (void)
3701 {
3702 struct ix86_frame frame;
3703
3704 if (! reload_completed || frame_pointer_needed)
3705 return 0;
3706
3707 /* Don't allow more than 32 pop, since that's all we can do
3708 with one instruction. */
3709 if (current_function_pops_args
3710 && current_function_args_size >= 32768)
3711 return 0;
3712
3713 ix86_compute_frame_layout (&frame);
3714 return frame.to_allocate == 0 && frame.nregs == 0;
3715 }
3716 \f
3717 /* Value should be nonzero if functions must have frame pointers.
3718 Zero means the frame pointer need not be set up (and parms may
3719 be accessed via the stack pointer) in functions that seem suitable. */
3720
3721 int
3722 ix86_frame_pointer_required (void)
3723 {
3724 /* If we accessed previous frames, then the generated code expects
3725 to be able to access the saved ebp value in our frame. */
3726 if (cfun->machine->accesses_prev_frame)
3727 return 1;
3728
3729 /* Several x86 os'es need a frame pointer for other reasons,
3730 usually pertaining to setjmp. */
3731 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3732 return 1;
3733
3734 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3735 the frame pointer by default. Turn it back on now if we've not
3736 got a leaf function. */
3737 if (TARGET_OMIT_LEAF_FRAME_POINTER
3738 && (!current_function_is_leaf))
3739 return 1;
3740
3741 if (current_function_profile)
3742 return 1;
3743
3744 return 0;
3745 }
3746
3747 /* Record that the current function accesses previous call frames. */
3748
3749 void
3750 ix86_setup_frame_addresses (void)
3751 {
3752 cfun->machine->accesses_prev_frame = 1;
3753 }
3754 \f
3755 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3756 # define USE_HIDDEN_LINKONCE 1
3757 #else
3758 # define USE_HIDDEN_LINKONCE 0
3759 #endif
3760
3761 static int pic_labels_used;
3762
3763 /* Fills in the label name that should be used for a pc thunk for
3764 the given register. */
3765
3766 static void
3767 get_pc_thunk_name (char name[32], unsigned int regno)
3768 {
3769 if (USE_HIDDEN_LINKONCE)
3770 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3771 else
3772 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3773 }
3774
3775
3776 /* This function generates code for -fpic that loads %ebx with
3777 the return address of the caller and then returns. */
3778
3779 void
3780 ix86_file_end (void)
3781 {
3782 rtx xops[2];
3783 int regno;
3784
3785 for (regno = 0; regno < 8; ++regno)
3786 {
3787 char name[32];
3788
3789 if (! ((pic_labels_used >> regno) & 1))
3790 continue;
3791
3792 get_pc_thunk_name (name, regno);
3793
3794 if (USE_HIDDEN_LINKONCE)
3795 {
3796 tree decl;
3797
3798 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3799 error_mark_node);
3800 TREE_PUBLIC (decl) = 1;
3801 TREE_STATIC (decl) = 1;
3802 DECL_ONE_ONLY (decl) = 1;
3803
3804 (*targetm.asm_out.unique_section) (decl, 0);
3805 named_section (decl, NULL, 0);
3806
3807 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3808 fputs ("\t.hidden\t", asm_out_file);
3809 assemble_name (asm_out_file, name);
3810 fputc ('\n', asm_out_file);
3811 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3812 }
3813 else
3814 {
3815 text_section ();
3816 ASM_OUTPUT_LABEL (asm_out_file, name);
3817 }
3818
3819 xops[0] = gen_rtx_REG (SImode, regno);
3820 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3821 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3822 output_asm_insn ("ret", xops);
3823 }
3824
3825 if (NEED_INDICATE_EXEC_STACK)
3826 file_end_indicate_exec_stack ();
3827 }
3828
3829 /* Emit code for the SET_GOT patterns. */
3830
3831 const char *
3832 output_set_got (rtx dest)
3833 {
3834 rtx xops[3];
3835
3836 xops[0] = dest;
3837 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3838
3839 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3840 {
3841 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3842
3843 if (!flag_pic)
3844 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3845 else
3846 output_asm_insn ("call\t%a2", xops);
3847
3848 #if TARGET_MACHO
3849 /* Output the "canonical" label name ("Lxx$pb") here too. This
3850 is what will be referred to by the Mach-O PIC subsystem. */
3851 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3852 #endif
3853 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3854 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3855
3856 if (flag_pic)
3857 output_asm_insn ("pop{l}\t%0", xops);
3858 }
3859 else
3860 {
3861 char name[32];
3862 get_pc_thunk_name (name, REGNO (dest));
3863 pic_labels_used |= 1 << REGNO (dest);
3864
3865 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3866 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3867 output_asm_insn ("call\t%X2", xops);
3868 }
3869
3870 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3871 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3872 else if (!TARGET_MACHO)
3873 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3874
3875 return "";
3876 }
3877
3878 /* Generate an "push" pattern for input ARG. */
3879
3880 static rtx
3881 gen_push (rtx arg)
3882 {
3883 return gen_rtx_SET (VOIDmode,
3884 gen_rtx_MEM (Pmode,
3885 gen_rtx_PRE_DEC (Pmode,
3886 stack_pointer_rtx)),
3887 arg);
3888 }
3889
3890 /* Return >= 0 if there is an unused call-clobbered register available
3891 for the entire function. */
3892
3893 static unsigned int
3894 ix86_select_alt_pic_regnum (void)
3895 {
3896 if (current_function_is_leaf && !current_function_profile)
3897 {
3898 int i;
3899 for (i = 2; i >= 0; --i)
3900 if (!regs_ever_live[i])
3901 return i;
3902 }
3903
3904 return INVALID_REGNUM;
3905 }
3906
3907 /* Return 1 if we need to save REGNO. */
3908 static int
3909 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3910 {
3911 if (pic_offset_table_rtx
3912 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3913 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3914 || current_function_profile
3915 || current_function_calls_eh_return
3916 || current_function_uses_const_pool))
3917 {
3918 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3919 return 0;
3920 return 1;
3921 }
3922
3923 if (current_function_calls_eh_return && maybe_eh_return)
3924 {
3925 unsigned i;
3926 for (i = 0; ; i++)
3927 {
3928 unsigned test = EH_RETURN_DATA_REGNO (i);
3929 if (test == INVALID_REGNUM)
3930 break;
3931 if (test == regno)
3932 return 1;
3933 }
3934 }
3935
3936 return (regs_ever_live[regno]
3937 && !call_used_regs[regno]
3938 && !fixed_regs[regno]
3939 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3940 }
3941
3942 /* Return number of registers to be saved on the stack. */
3943
3944 static int
3945 ix86_nsaved_regs (void)
3946 {
3947 int nregs = 0;
3948 int regno;
3949
3950 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3951 if (ix86_save_reg (regno, true))
3952 nregs++;
3953 return nregs;
3954 }
3955
3956 /* Return the offset between two registers, one to be eliminated, and the other
3957 its replacement, at the start of a routine. */
3958
3959 HOST_WIDE_INT
3960 ix86_initial_elimination_offset (int from, int to)
3961 {
3962 struct ix86_frame frame;
3963 ix86_compute_frame_layout (&frame);
3964
3965 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3966 return frame.hard_frame_pointer_offset;
3967 else if (from == FRAME_POINTER_REGNUM
3968 && to == HARD_FRAME_POINTER_REGNUM)
3969 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3970 else
3971 {
3972 if (to != STACK_POINTER_REGNUM)
3973 abort ();
3974 else if (from == ARG_POINTER_REGNUM)
3975 return frame.stack_pointer_offset;
3976 else if (from != FRAME_POINTER_REGNUM)
3977 abort ();
3978 else
3979 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3980 }
3981 }
3982
3983 /* Fill structure ix86_frame about frame of currently computed function. */
3984
3985 static void
3986 ix86_compute_frame_layout (struct ix86_frame *frame)
3987 {
3988 HOST_WIDE_INT total_size;
3989 unsigned int stack_alignment_needed;
3990 HOST_WIDE_INT offset;
3991 unsigned int preferred_alignment;
3992 HOST_WIDE_INT size = get_frame_size ();
3993
3994 frame->nregs = ix86_nsaved_regs ();
3995 total_size = size;
3996
3997 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3998 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3999
4000 /* During reload iteration the amount of registers saved can change.
4001 Recompute the value as needed. Do not recompute when amount of registers
4002 didn't change as reload does mutiple calls to the function and does not
4003 expect the decision to change within single iteration. */
4004 if (!optimize_size
4005 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4006 {
4007 int count = frame->nregs;
4008
4009 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4010 /* The fast prologue uses move instead of push to save registers. This
4011 is significantly longer, but also executes faster as modern hardware
4012 can execute the moves in parallel, but can't do that for push/pop.
4013
4014 Be careful about choosing what prologue to emit: When function takes
4015 many instructions to execute we may use slow version as well as in
4016 case function is known to be outside hot spot (this is known with
4017 feedback only). Weight the size of function by number of registers
4018 to save as it is cheap to use one or two push instructions but very
4019 slow to use many of them. */
4020 if (count)
4021 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4022 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4023 || (flag_branch_probabilities
4024 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4025 cfun->machine->use_fast_prologue_epilogue = false;
4026 else
4027 cfun->machine->use_fast_prologue_epilogue
4028 = !expensive_function_p (count);
4029 }
4030 if (TARGET_PROLOGUE_USING_MOVE
4031 && cfun->machine->use_fast_prologue_epilogue)
4032 frame->save_regs_using_mov = true;
4033 else
4034 frame->save_regs_using_mov = false;
4035
4036
4037 /* Skip return address and saved base pointer. */
4038 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4039
4040 frame->hard_frame_pointer_offset = offset;
4041
4042 /* Do some sanity checking of stack_alignment_needed and
4043 preferred_alignment, since i386 port is the only using those features
4044 that may break easily. */
4045
4046 if (size && !stack_alignment_needed)
4047 abort ();
4048 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4049 abort ();
4050 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4051 abort ();
4052 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4053 abort ();
4054
4055 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4056 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4057
4058 /* Register save area */
4059 offset += frame->nregs * UNITS_PER_WORD;
4060
4061 /* Va-arg area */
4062 if (ix86_save_varrargs_registers)
4063 {
4064 offset += X86_64_VARARGS_SIZE;
4065 frame->va_arg_size = X86_64_VARARGS_SIZE;
4066 }
4067 else
4068 frame->va_arg_size = 0;
4069
4070 /* Align start of frame for local function. */
4071 frame->padding1 = ((offset + stack_alignment_needed - 1)
4072 & -stack_alignment_needed) - offset;
4073
4074 offset += frame->padding1;
4075
4076 /* Frame pointer points here. */
4077 frame->frame_pointer_offset = offset;
4078
4079 offset += size;
4080
4081 /* Add outgoing arguments area. Can be skipped if we eliminated
4082 all the function calls as dead code.
4083 Skipping is however impossible when function calls alloca. Alloca
4084 expander assumes that last current_function_outgoing_args_size
4085 of stack frame are unused. */
4086 if (ACCUMULATE_OUTGOING_ARGS
4087 && (!current_function_is_leaf || current_function_calls_alloca))
4088 {
4089 offset += current_function_outgoing_args_size;
4090 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4091 }
4092 else
4093 frame->outgoing_arguments_size = 0;
4094
4095 /* Align stack boundary. Only needed if we're calling another function
4096 or using alloca. */
4097 if (!current_function_is_leaf || current_function_calls_alloca)
4098 frame->padding2 = ((offset + preferred_alignment - 1)
4099 & -preferred_alignment) - offset;
4100 else
4101 frame->padding2 = 0;
4102
4103 offset += frame->padding2;
4104
4105 /* We've reached end of stack frame. */
4106 frame->stack_pointer_offset = offset;
4107
4108 /* Size prologue needs to allocate. */
4109 frame->to_allocate =
4110 (size + frame->padding1 + frame->padding2
4111 + frame->outgoing_arguments_size + frame->va_arg_size);
4112
4113 if ((!frame->to_allocate && frame->nregs <= 1)
4114 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4115 frame->save_regs_using_mov = false;
4116
4117 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4118 && current_function_is_leaf)
4119 {
4120 frame->red_zone_size = frame->to_allocate;
4121 if (frame->save_regs_using_mov)
4122 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4123 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4124 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4125 }
4126 else
4127 frame->red_zone_size = 0;
4128 frame->to_allocate -= frame->red_zone_size;
4129 frame->stack_pointer_offset -= frame->red_zone_size;
4130 #if 0
4131 fprintf (stderr, "nregs: %i\n", frame->nregs);
4132 fprintf (stderr, "size: %i\n", size);
4133 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4134 fprintf (stderr, "padding1: %i\n", frame->padding1);
4135 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4136 fprintf (stderr, "padding2: %i\n", frame->padding2);
4137 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4138 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4139 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4140 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4141 frame->hard_frame_pointer_offset);
4142 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4143 #endif
4144 }
4145
4146 /* Emit code to save registers in the prologue. */
4147
4148 static void
4149 ix86_emit_save_regs (void)
4150 {
4151 int regno;
4152 rtx insn;
4153
4154 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4155 if (ix86_save_reg (regno, true))
4156 {
4157 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4158 RTX_FRAME_RELATED_P (insn) = 1;
4159 }
4160 }
4161
4162 /* Emit code to save registers using MOV insns. First register
4163 is restored from POINTER + OFFSET. */
4164 static void
4165 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4166 {
4167 int regno;
4168 rtx insn;
4169
4170 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4171 if (ix86_save_reg (regno, true))
4172 {
4173 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4174 Pmode, offset),
4175 gen_rtx_REG (Pmode, regno));
4176 RTX_FRAME_RELATED_P (insn) = 1;
4177 offset += UNITS_PER_WORD;
4178 }
4179 }
4180
4181 /* Expand prologue or epilogue stack adjustment.
4182 The pattern exist to put a dependency on all ebp-based memory accesses.
4183 STYLE should be negative if instructions should be marked as frame related,
4184 zero if %r11 register is live and cannot be freely used and positive
4185 otherwise. */
4186
4187 static void
4188 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4189 {
4190 rtx insn;
4191
4192 if (! TARGET_64BIT)
4193 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4194 else if (x86_64_immediate_operand (offset, DImode))
4195 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4196 else
4197 {
4198 rtx r11;
4199 /* r11 is used by indirect sibcall return as well, set before the
4200 epilogue and used after the epilogue. ATM indirect sibcall
4201 shouldn't be used together with huge frame sizes in one
4202 function because of the frame_size check in sibcall.c. */
4203 if (style == 0)
4204 abort ();
4205 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4206 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4207 if (style < 0)
4208 RTX_FRAME_RELATED_P (insn) = 1;
4209 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4210 offset));
4211 }
4212 if (style < 0)
4213 RTX_FRAME_RELATED_P (insn) = 1;
4214 }
4215
4216 /* Expand the prologue into a bunch of separate insns. */
4217
4218 void
4219 ix86_expand_prologue (void)
4220 {
4221 rtx insn;
4222 bool pic_reg_used;
4223 struct ix86_frame frame;
4224 HOST_WIDE_INT allocate;
4225
4226 ix86_compute_frame_layout (&frame);
4227
4228 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4229 slower on all targets. Also sdb doesn't like it. */
4230
4231 if (frame_pointer_needed)
4232 {
4233 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4234 RTX_FRAME_RELATED_P (insn) = 1;
4235
4236 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4237 RTX_FRAME_RELATED_P (insn) = 1;
4238 }
4239
4240 allocate = frame.to_allocate;
4241
4242 if (!frame.save_regs_using_mov)
4243 ix86_emit_save_regs ();
4244 else
4245 allocate += frame.nregs * UNITS_PER_WORD;
4246
4247 /* When using red zone we may start register saving before allocating
4248 the stack frame saving one cycle of the prologue. */
4249 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4250 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4251 : stack_pointer_rtx,
4252 -frame.nregs * UNITS_PER_WORD);
4253
4254 if (allocate == 0)
4255 ;
4256 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4257 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4258 GEN_INT (-allocate), -1);
4259 else
4260 {
4261 /* Only valid for Win32. */
4262 rtx eax = gen_rtx_REG (SImode, 0);
4263 bool eax_live = ix86_eax_live_at_start_p ();
4264
4265 if (TARGET_64BIT)
4266 abort ();
4267
4268 if (eax_live)
4269 {
4270 emit_insn (gen_push (eax));
4271 allocate -= 4;
4272 }
4273
4274 insn = emit_move_insn (eax, GEN_INT (allocate));
4275 RTX_FRAME_RELATED_P (insn) = 1;
4276
4277 insn = emit_insn (gen_allocate_stack_worker (eax));
4278 RTX_FRAME_RELATED_P (insn) = 1;
4279
4280 if (eax_live)
4281 {
4282 rtx t;
4283 if (frame_pointer_needed)
4284 t = plus_constant (hard_frame_pointer_rtx,
4285 allocate
4286 - frame.to_allocate
4287 - frame.nregs * UNITS_PER_WORD);
4288 else
4289 t = plus_constant (stack_pointer_rtx, allocate);
4290 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4291 }
4292 }
4293
4294 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4295 {
4296 if (!frame_pointer_needed || !frame.to_allocate)
4297 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4298 else
4299 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4300 -frame.nregs * UNITS_PER_WORD);
4301 }
4302
4303 pic_reg_used = false;
4304 if (pic_offset_table_rtx
4305 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4306 || current_function_profile))
4307 {
4308 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4309
4310 if (alt_pic_reg_used != INVALID_REGNUM)
4311 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4312
4313 pic_reg_used = true;
4314 }
4315
4316 if (pic_reg_used)
4317 {
4318 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4319
4320 /* Even with accurate pre-reload life analysis, we can wind up
4321 deleting all references to the pic register after reload.
4322 Consider if cross-jumping unifies two sides of a branch
4323 controlled by a comparison vs the only read from a global.
4324 In which case, allow the set_got to be deleted, though we're
4325 too late to do anything about the ebx save in the prologue. */
4326 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4327 }
4328
4329 /* Prevent function calls from be scheduled before the call to mcount.
4330 In the pic_reg_used case, make sure that the got load isn't deleted. */
4331 if (current_function_profile)
4332 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4333 }
4334
4335 /* Emit code to restore saved registers using MOV insns. First register
4336 is restored from POINTER + OFFSET. */
4337 static void
4338 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4339 int maybe_eh_return)
4340 {
4341 int regno;
4342 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4343
4344 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4345 if (ix86_save_reg (regno, maybe_eh_return))
4346 {
4347 /* Ensure that adjust_address won't be forced to produce pointer
4348 out of range allowed by x86-64 instruction set. */
4349 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4350 {
4351 rtx r11;
4352
4353 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4354 emit_move_insn (r11, GEN_INT (offset));
4355 emit_insn (gen_adddi3 (r11, r11, pointer));
4356 base_address = gen_rtx_MEM (Pmode, r11);
4357 offset = 0;
4358 }
4359 emit_move_insn (gen_rtx_REG (Pmode, regno),
4360 adjust_address (base_address, Pmode, offset));
4361 offset += UNITS_PER_WORD;
4362 }
4363 }
4364
4365 /* Restore function stack, frame, and registers. */
4366
4367 void
4368 ix86_expand_epilogue (int style)
4369 {
4370 int regno;
4371 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4372 struct ix86_frame frame;
4373 HOST_WIDE_INT offset;
4374
4375 ix86_compute_frame_layout (&frame);
4376
4377 /* Calculate start of saved registers relative to ebp. Special care
4378 must be taken for the normal return case of a function using
4379 eh_return: the eax and edx registers are marked as saved, but not
4380 restored along this path. */
4381 offset = frame.nregs;
4382 if (current_function_calls_eh_return && style != 2)
4383 offset -= 2;
4384 offset *= -UNITS_PER_WORD;
4385
4386 /* If we're only restoring one register and sp is not valid then
4387 using a move instruction to restore the register since it's
4388 less work than reloading sp and popping the register.
4389
4390 The default code result in stack adjustment using add/lea instruction,
4391 while this code results in LEAVE instruction (or discrete equivalent),
4392 so it is profitable in some other cases as well. Especially when there
4393 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4394 and there is exactly one register to pop. This heuristic may need some
4395 tuning in future. */
4396 if ((!sp_valid && frame.nregs <= 1)
4397 || (TARGET_EPILOGUE_USING_MOVE
4398 && cfun->machine->use_fast_prologue_epilogue
4399 && (frame.nregs > 1 || frame.to_allocate))
4400 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4401 || (frame_pointer_needed && TARGET_USE_LEAVE
4402 && cfun->machine->use_fast_prologue_epilogue
4403 && frame.nregs == 1)
4404 || current_function_calls_eh_return)
4405 {
4406 /* Restore registers. We can use ebp or esp to address the memory
4407 locations. If both are available, default to ebp, since offsets
4408 are known to be small. Only exception is esp pointing directly to the
4409 end of block of saved registers, where we may simplify addressing
4410 mode. */
4411
4412 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4413 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4414 frame.to_allocate, style == 2);
4415 else
4416 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4417 offset, style == 2);
4418
4419 /* eh_return epilogues need %ecx added to the stack pointer. */
4420 if (style == 2)
4421 {
4422 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4423
4424 if (frame_pointer_needed)
4425 {
4426 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4427 tmp = plus_constant (tmp, UNITS_PER_WORD);
4428 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4429
4430 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4431 emit_move_insn (hard_frame_pointer_rtx, tmp);
4432
4433 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4434 const0_rtx, style);
4435 }
4436 else
4437 {
4438 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4439 tmp = plus_constant (tmp, (frame.to_allocate
4440 + frame.nregs * UNITS_PER_WORD));
4441 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4442 }
4443 }
4444 else if (!frame_pointer_needed)
4445 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4446 GEN_INT (frame.to_allocate
4447 + frame.nregs * UNITS_PER_WORD),
4448 style);
4449 /* If not an i386, mov & pop is faster than "leave". */
4450 else if (TARGET_USE_LEAVE || optimize_size
4451 || !cfun->machine->use_fast_prologue_epilogue)
4452 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4453 else
4454 {
4455 pro_epilogue_adjust_stack (stack_pointer_rtx,
4456 hard_frame_pointer_rtx,
4457 const0_rtx, style);
4458 if (TARGET_64BIT)
4459 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4460 else
4461 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4462 }
4463 }
4464 else
4465 {
4466 /* First step is to deallocate the stack frame so that we can
4467 pop the registers. */
4468 if (!sp_valid)
4469 {
4470 if (!frame_pointer_needed)
4471 abort ();
4472 pro_epilogue_adjust_stack (stack_pointer_rtx,
4473 hard_frame_pointer_rtx,
4474 GEN_INT (offset), style);
4475 }
4476 else if (frame.to_allocate)
4477 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4478 GEN_INT (frame.to_allocate), style);
4479
4480 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4481 if (ix86_save_reg (regno, false))
4482 {
4483 if (TARGET_64BIT)
4484 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4485 else
4486 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4487 }
4488 if (frame_pointer_needed)
4489 {
4490 /* Leave results in shorter dependency chains on CPUs that are
4491 able to grok it fast. */
4492 if (TARGET_USE_LEAVE)
4493 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4494 else if (TARGET_64BIT)
4495 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4496 else
4497 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4498 }
4499 }
4500
4501 /* Sibcall epilogues don't want a return instruction. */
4502 if (style == 0)
4503 return;
4504
4505 if (current_function_pops_args && current_function_args_size)
4506 {
4507 rtx popc = GEN_INT (current_function_pops_args);
4508
4509 /* i386 can only pop 64K bytes. If asked to pop more, pop
4510 return address, do explicit add, and jump indirectly to the
4511 caller. */
4512
4513 if (current_function_pops_args >= 65536)
4514 {
4515 rtx ecx = gen_rtx_REG (SImode, 2);
4516
4517 /* There is no "pascal" calling convention in 64bit ABI. */
4518 if (TARGET_64BIT)
4519 abort ();
4520
4521 emit_insn (gen_popsi1 (ecx));
4522 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4523 emit_jump_insn (gen_return_indirect_internal (ecx));
4524 }
4525 else
4526 emit_jump_insn (gen_return_pop_internal (popc));
4527 }
4528 else
4529 emit_jump_insn (gen_return_internal ());
4530 }
4531
4532 /* Reset from the function's potential modifications. */
4533
4534 static void
4535 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4536 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4537 {
4538 if (pic_offset_table_rtx)
4539 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4540 }
4541 \f
4542 /* Extract the parts of an RTL expression that is a valid memory address
4543 for an instruction. Return 0 if the structure of the address is
4544 grossly off. Return -1 if the address contains ASHIFT, so it is not
4545 strictly valid, but still used for computing length of lea instruction. */
4546
4547 int
4548 ix86_decompose_address (rtx addr, struct ix86_address *out)
4549 {
4550 rtx base = NULL_RTX;
4551 rtx index = NULL_RTX;
4552 rtx disp = NULL_RTX;
4553 HOST_WIDE_INT scale = 1;
4554 rtx scale_rtx = NULL_RTX;
4555 int retval = 1;
4556 enum ix86_address_seg seg = SEG_DEFAULT;
4557
4558 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4559 base = addr;
4560 else if (GET_CODE (addr) == PLUS)
4561 {
4562 rtx addends[4], op;
4563 int n = 0, i;
4564
4565 op = addr;
4566 do
4567 {
4568 if (n >= 4)
4569 return 0;
4570 addends[n++] = XEXP (op, 1);
4571 op = XEXP (op, 0);
4572 }
4573 while (GET_CODE (op) == PLUS);
4574 if (n >= 4)
4575 return 0;
4576 addends[n] = op;
4577
4578 for (i = n; i >= 0; --i)
4579 {
4580 op = addends[i];
4581 switch (GET_CODE (op))
4582 {
4583 case MULT:
4584 if (index)
4585 return 0;
4586 index = XEXP (op, 0);
4587 scale_rtx = XEXP (op, 1);
4588 break;
4589
4590 case UNSPEC:
4591 if (XINT (op, 1) == UNSPEC_TP
4592 && TARGET_TLS_DIRECT_SEG_REFS
4593 && seg == SEG_DEFAULT)
4594 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4595 else
4596 return 0;
4597 break;
4598
4599 case REG:
4600 case SUBREG:
4601 if (!base)
4602 base = op;
4603 else if (!index)
4604 index = op;
4605 else
4606 return 0;
4607 break;
4608
4609 case CONST:
4610 case CONST_INT:
4611 case SYMBOL_REF:
4612 case LABEL_REF:
4613 if (disp)
4614 return 0;
4615 disp = op;
4616 break;
4617
4618 default:
4619 return 0;
4620 }
4621 }
4622 }
4623 else if (GET_CODE (addr) == MULT)
4624 {
4625 index = XEXP (addr, 0); /* index*scale */
4626 scale_rtx = XEXP (addr, 1);
4627 }
4628 else if (GET_CODE (addr) == ASHIFT)
4629 {
4630 rtx tmp;
4631
4632 /* We're called for lea too, which implements ashift on occasion. */
4633 index = XEXP (addr, 0);
4634 tmp = XEXP (addr, 1);
4635 if (GET_CODE (tmp) != CONST_INT)
4636 return 0;
4637 scale = INTVAL (tmp);
4638 if ((unsigned HOST_WIDE_INT) scale > 3)
4639 return 0;
4640 scale = 1 << scale;
4641 retval = -1;
4642 }
4643 else
4644 disp = addr; /* displacement */
4645
4646 /* Extract the integral value of scale. */
4647 if (scale_rtx)
4648 {
4649 if (GET_CODE (scale_rtx) != CONST_INT)
4650 return 0;
4651 scale = INTVAL (scale_rtx);
4652 }
4653
4654 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4655 if (base && index && scale == 1
4656 && (index == arg_pointer_rtx
4657 || index == frame_pointer_rtx
4658 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4659 {
4660 rtx tmp = base;
4661 base = index;
4662 index = tmp;
4663 }
4664
4665 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4666 if ((base == hard_frame_pointer_rtx
4667 || base == frame_pointer_rtx
4668 || base == arg_pointer_rtx) && !disp)
4669 disp = const0_rtx;
4670
4671 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4672 Avoid this by transforming to [%esi+0]. */
4673 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4674 && base && !index && !disp
4675 && REG_P (base)
4676 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4677 disp = const0_rtx;
4678
4679 /* Special case: encode reg+reg instead of reg*2. */
4680 if (!base && index && scale && scale == 2)
4681 base = index, scale = 1;
4682
4683 /* Special case: scaling cannot be encoded without base or displacement. */
4684 if (!base && !disp && index && scale != 1)
4685 disp = const0_rtx;
4686
4687 out->base = base;
4688 out->index = index;
4689 out->disp = disp;
4690 out->scale = scale;
4691 out->seg = seg;
4692
4693 return retval;
4694 }
4695 \f
4696 /* Return cost of the memory address x.
4697 For i386, it is better to use a complex address than let gcc copy
4698 the address into a reg and make a new pseudo. But not if the address
4699 requires to two regs - that would mean more pseudos with longer
4700 lifetimes. */
4701 static int
4702 ix86_address_cost (rtx x)
4703 {
4704 struct ix86_address parts;
4705 int cost = 1;
4706
4707 if (!ix86_decompose_address (x, &parts))
4708 abort ();
4709
4710 /* More complex memory references are better. */
4711 if (parts.disp && parts.disp != const0_rtx)
4712 cost--;
4713 if (parts.seg != SEG_DEFAULT)
4714 cost--;
4715
4716 /* Attempt to minimize number of registers in the address. */
4717 if ((parts.base
4718 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4719 || (parts.index
4720 && (!REG_P (parts.index)
4721 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4722 cost++;
4723
4724 if (parts.base
4725 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4726 && parts.index
4727 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4728 && parts.base != parts.index)
4729 cost++;
4730
4731 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4732 since it's predecode logic can't detect the length of instructions
4733 and it degenerates to vector decoded. Increase cost of such
4734 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4735 to split such addresses or even refuse such addresses at all.
4736
4737 Following addressing modes are affected:
4738 [base+scale*index]
4739 [scale*index+disp]
4740 [base+index]
4741
4742 The first and last case may be avoidable by explicitly coding the zero in
4743 memory address, but I don't have AMD-K6 machine handy to check this
4744 theory. */
4745
4746 if (TARGET_K6
4747 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4748 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4749 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4750 cost += 10;
4751
4752 return cost;
4753 }
4754 \f
4755 /* If X is a machine specific address (i.e. a symbol or label being
4756 referenced as a displacement from the GOT implemented using an
4757 UNSPEC), then return the base term. Otherwise return X. */
4758
4759 rtx
4760 ix86_find_base_term (rtx x)
4761 {
4762 rtx term;
4763
4764 if (TARGET_64BIT)
4765 {
4766 if (GET_CODE (x) != CONST)
4767 return x;
4768 term = XEXP (x, 0);
4769 if (GET_CODE (term) == PLUS
4770 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4771 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4772 term = XEXP (term, 0);
4773 if (GET_CODE (term) != UNSPEC
4774 || XINT (term, 1) != UNSPEC_GOTPCREL)
4775 return x;
4776
4777 term = XVECEXP (term, 0, 0);
4778
4779 if (GET_CODE (term) != SYMBOL_REF
4780 && GET_CODE (term) != LABEL_REF)
4781 return x;
4782
4783 return term;
4784 }
4785
4786 term = ix86_delegitimize_address (x);
4787
4788 if (GET_CODE (term) != SYMBOL_REF
4789 && GET_CODE (term) != LABEL_REF)
4790 return x;
4791
4792 return term;
4793 }
4794
4795 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4796 this is used for to form addresses to local data when -fPIC is in
4797 use. */
4798
4799 static bool
4800 darwin_local_data_pic (rtx disp)
4801 {
4802 if (GET_CODE (disp) == MINUS)
4803 {
4804 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4805 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4806 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4807 {
4808 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4809 if (! strcmp (sym_name, "<pic base>"))
4810 return true;
4811 }
4812 }
4813
4814 return false;
4815 }
4816 \f
4817 /* Determine if a given RTX is a valid constant. We already know this
4818 satisfies CONSTANT_P. */
4819
4820 bool
4821 legitimate_constant_p (rtx x)
4822 {
4823 switch (GET_CODE (x))
4824 {
4825 case CONST:
4826 x = XEXP (x, 0);
4827
4828 if (GET_CODE (x) == PLUS)
4829 {
4830 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4831 return false;
4832 x = XEXP (x, 0);
4833 }
4834
4835 if (TARGET_MACHO && darwin_local_data_pic (x))
4836 return true;
4837
4838 /* Only some unspecs are valid as "constants". */
4839 if (GET_CODE (x) == UNSPEC)
4840 switch (XINT (x, 1))
4841 {
4842 case UNSPEC_TPOFF:
4843 case UNSPEC_NTPOFF:
4844 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4845 case UNSPEC_DTPOFF:
4846 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4847 default:
4848 return false;
4849 }
4850
4851 /* We must have drilled down to a symbol. */
4852 if (!symbolic_operand (x, Pmode))
4853 return false;
4854 /* FALLTHRU */
4855
4856 case SYMBOL_REF:
4857 /* TLS symbols are never valid. */
4858 if (tls_symbolic_operand (x, Pmode))
4859 return false;
4860 break;
4861
4862 default:
4863 break;
4864 }
4865
4866 /* Otherwise we handle everything else in the move patterns. */
4867 return true;
4868 }
4869
4870 /* Determine if it's legal to put X into the constant pool. This
4871 is not possible for the address of thread-local symbols, which
4872 is checked above. */
4873
4874 static bool
4875 ix86_cannot_force_const_mem (rtx x)
4876 {
4877 return !legitimate_constant_p (x);
4878 }
4879
4880 /* Determine if a given RTX is a valid constant address. */
4881
4882 bool
4883 constant_address_p (rtx x)
4884 {
4885 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4886 }
4887
4888 /* Nonzero if the constant value X is a legitimate general operand
4889 when generating PIC code. It is given that flag_pic is on and
4890 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4891
4892 bool
4893 legitimate_pic_operand_p (rtx x)
4894 {
4895 rtx inner;
4896
4897 switch (GET_CODE (x))
4898 {
4899 case CONST:
4900 inner = XEXP (x, 0);
4901
4902 /* Only some unspecs are valid as "constants". */
4903 if (GET_CODE (inner) == UNSPEC)
4904 switch (XINT (inner, 1))
4905 {
4906 case UNSPEC_TPOFF:
4907 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4908 default:
4909 return false;
4910 }
4911 /* FALLTHRU */
4912
4913 case SYMBOL_REF:
4914 case LABEL_REF:
4915 return legitimate_pic_address_disp_p (x);
4916
4917 default:
4918 return true;
4919 }
4920 }
4921
4922 /* Determine if a given CONST RTX is a valid memory displacement
4923 in PIC mode. */
4924
4925 int
4926 legitimate_pic_address_disp_p (rtx disp)
4927 {
4928 bool saw_plus;
4929
4930 /* In 64bit mode we can allow direct addresses of symbols and labels
4931 when they are not dynamic symbols. */
4932 if (TARGET_64BIT)
4933 {
4934 /* TLS references should always be enclosed in UNSPEC. */
4935 if (tls_symbolic_operand (disp, GET_MODE (disp)))
4936 return 0;
4937 if (GET_CODE (disp) == SYMBOL_REF
4938 && ix86_cmodel == CM_SMALL_PIC
4939 && SYMBOL_REF_LOCAL_P (disp))
4940 return 1;
4941 if (GET_CODE (disp) == LABEL_REF)
4942 return 1;
4943 if (GET_CODE (disp) == CONST
4944 && GET_CODE (XEXP (disp, 0)) == PLUS)
4945 {
4946 rtx op0 = XEXP (XEXP (disp, 0), 0);
4947 rtx op1 = XEXP (XEXP (disp, 0), 1);
4948
4949 /* TLS references should always be enclosed in UNSPEC. */
4950 if (tls_symbolic_operand (op0, GET_MODE (op0)))
4951 return 0;
4952 if (((GET_CODE (op0) == SYMBOL_REF
4953 && ix86_cmodel == CM_SMALL_PIC
4954 && SYMBOL_REF_LOCAL_P (op0))
4955 || GET_CODE (op0) == LABEL_REF)
4956 && GET_CODE (op1) == CONST_INT
4957 && INTVAL (op1) < 16*1024*1024
4958 && INTVAL (op1) >= -16*1024*1024)
4959 return 1;
4960 }
4961 }
4962 if (GET_CODE (disp) != CONST)
4963 return 0;
4964 disp = XEXP (disp, 0);
4965
4966 if (TARGET_64BIT)
4967 {
4968 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4969 of GOT tables. We should not need these anyway. */
4970 if (GET_CODE (disp) != UNSPEC
4971 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4972 return 0;
4973
4974 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4975 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4976 return 0;
4977 return 1;
4978 }
4979
4980 saw_plus = false;
4981 if (GET_CODE (disp) == PLUS)
4982 {
4983 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4984 return 0;
4985 disp = XEXP (disp, 0);
4986 saw_plus = true;
4987 }
4988
4989 if (TARGET_MACHO && darwin_local_data_pic (disp))
4990 return 1;
4991
4992 if (GET_CODE (disp) != UNSPEC)
4993 return 0;
4994
4995 switch (XINT (disp, 1))
4996 {
4997 case UNSPEC_GOT:
4998 if (saw_plus)
4999 return false;
5000 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5001 case UNSPEC_GOTOFF:
5002 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5003 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5004 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5005 return false;
5006 case UNSPEC_GOTTPOFF:
5007 case UNSPEC_GOTNTPOFF:
5008 case UNSPEC_INDNTPOFF:
5009 if (saw_plus)
5010 return false;
5011 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5012 case UNSPEC_NTPOFF:
5013 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5014 case UNSPEC_DTPOFF:
5015 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5016 }
5017
5018 return 0;
5019 }
5020
5021 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5022 memory address for an instruction. The MODE argument is the machine mode
5023 for the MEM expression that wants to use this address.
5024
5025 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5026 convert common non-canonical forms to canonical form so that they will
5027 be recognized. */
5028
5029 int
5030 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5031 {
5032 struct ix86_address parts;
5033 rtx base, index, disp;
5034 HOST_WIDE_INT scale;
5035 const char *reason = NULL;
5036 rtx reason_rtx = NULL_RTX;
5037
5038 if (TARGET_DEBUG_ADDR)
5039 {
5040 fprintf (stderr,
5041 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5042 GET_MODE_NAME (mode), strict);
5043 debug_rtx (addr);
5044 }
5045
5046 if (ix86_decompose_address (addr, &parts) <= 0)
5047 {
5048 reason = "decomposition failed";
5049 goto report_error;
5050 }
5051
5052 base = parts.base;
5053 index = parts.index;
5054 disp = parts.disp;
5055 scale = parts.scale;
5056
5057 /* Validate base register.
5058
5059 Don't allow SUBREG's here, it can lead to spill failures when the base
5060 is one word out of a two word structure, which is represented internally
5061 as a DImode int. */
5062
5063 if (base)
5064 {
5065 reason_rtx = base;
5066
5067 if (GET_CODE (base) != REG)
5068 {
5069 reason = "base is not a register";
5070 goto report_error;
5071 }
5072
5073 if (GET_MODE (base) != Pmode)
5074 {
5075 reason = "base is not in Pmode";
5076 goto report_error;
5077 }
5078
5079 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5080 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5081 {
5082 reason = "base is not valid";
5083 goto report_error;
5084 }
5085 }
5086
5087 /* Validate index register.
5088
5089 Don't allow SUBREG's here, it can lead to spill failures when the index
5090 is one word out of a two word structure, which is represented internally
5091 as a DImode int. */
5092
5093 if (index)
5094 {
5095 reason_rtx = index;
5096
5097 if (GET_CODE (index) != REG)
5098 {
5099 reason = "index is not a register";
5100 goto report_error;
5101 }
5102
5103 if (GET_MODE (index) != Pmode)
5104 {
5105 reason = "index is not in Pmode";
5106 goto report_error;
5107 }
5108
5109 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5110 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5111 {
5112 reason = "index is not valid";
5113 goto report_error;
5114 }
5115 }
5116
5117 /* Validate scale factor. */
5118 if (scale != 1)
5119 {
5120 reason_rtx = GEN_INT (scale);
5121 if (!index)
5122 {
5123 reason = "scale without index";
5124 goto report_error;
5125 }
5126
5127 if (scale != 2 && scale != 4 && scale != 8)
5128 {
5129 reason = "scale is not a valid multiplier";
5130 goto report_error;
5131 }
5132 }
5133
5134 /* Validate displacement. */
5135 if (disp)
5136 {
5137 reason_rtx = disp;
5138
5139 if (GET_CODE (disp) == CONST
5140 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5141 switch (XINT (XEXP (disp, 0), 1))
5142 {
5143 case UNSPEC_GOT:
5144 case UNSPEC_GOTOFF:
5145 case UNSPEC_GOTPCREL:
5146 if (!flag_pic)
5147 abort ();
5148 goto is_legitimate_pic;
5149
5150 case UNSPEC_GOTTPOFF:
5151 case UNSPEC_GOTNTPOFF:
5152 case UNSPEC_INDNTPOFF:
5153 case UNSPEC_NTPOFF:
5154 case UNSPEC_DTPOFF:
5155 break;
5156
5157 default:
5158 reason = "invalid address unspec";
5159 goto report_error;
5160 }
5161
5162 else if (flag_pic && (SYMBOLIC_CONST (disp)
5163 #if TARGET_MACHO
5164 && !machopic_operand_p (disp)
5165 #endif
5166 ))
5167 {
5168 is_legitimate_pic:
5169 if (TARGET_64BIT && (index || base))
5170 {
5171 /* foo@dtpoff(%rX) is ok. */
5172 if (GET_CODE (disp) != CONST
5173 || GET_CODE (XEXP (disp, 0)) != PLUS
5174 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5175 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5176 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5177 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5178 {
5179 reason = "non-constant pic memory reference";
5180 goto report_error;
5181 }
5182 }
5183 else if (! legitimate_pic_address_disp_p (disp))
5184 {
5185 reason = "displacement is an invalid pic construct";
5186 goto report_error;
5187 }
5188
5189 /* This code used to verify that a symbolic pic displacement
5190 includes the pic_offset_table_rtx register.
5191
5192 While this is good idea, unfortunately these constructs may
5193 be created by "adds using lea" optimization for incorrect
5194 code like:
5195
5196 int a;
5197 int foo(int i)
5198 {
5199 return *(&a+i);
5200 }
5201
5202 This code is nonsensical, but results in addressing
5203 GOT table with pic_offset_table_rtx base. We can't
5204 just refuse it easily, since it gets matched by
5205 "addsi3" pattern, that later gets split to lea in the
5206 case output register differs from input. While this
5207 can be handled by separate addsi pattern for this case
5208 that never results in lea, this seems to be easier and
5209 correct fix for crash to disable this test. */
5210 }
5211 else if (GET_CODE (disp) != LABEL_REF
5212 && GET_CODE (disp) != CONST_INT
5213 && (GET_CODE (disp) != CONST
5214 || !legitimate_constant_p (disp))
5215 && (GET_CODE (disp) != SYMBOL_REF
5216 || !legitimate_constant_p (disp)))
5217 {
5218 reason = "displacement is not constant";
5219 goto report_error;
5220 }
5221 else if (TARGET_64BIT
5222 && !x86_64_immediate_operand (disp, VOIDmode))
5223 {
5224 reason = "displacement is out of range";
5225 goto report_error;
5226 }
5227 }
5228
5229 /* Everything looks valid. */
5230 if (TARGET_DEBUG_ADDR)
5231 fprintf (stderr, "Success.\n");
5232 return TRUE;
5233
5234 report_error:
5235 if (TARGET_DEBUG_ADDR)
5236 {
5237 fprintf (stderr, "Error: %s\n", reason);
5238 debug_rtx (reason_rtx);
5239 }
5240 return FALSE;
5241 }
5242 \f
5243 /* Return an unique alias set for the GOT. */
5244
5245 static HOST_WIDE_INT
5246 ix86_GOT_alias_set (void)
5247 {
5248 static HOST_WIDE_INT set = -1;
5249 if (set == -1)
5250 set = new_alias_set ();
5251 return set;
5252 }
5253
5254 /* Return a legitimate reference for ORIG (an address) using the
5255 register REG. If REG is 0, a new pseudo is generated.
5256
5257 There are two types of references that must be handled:
5258
5259 1. Global data references must load the address from the GOT, via
5260 the PIC reg. An insn is emitted to do this load, and the reg is
5261 returned.
5262
5263 2. Static data references, constant pool addresses, and code labels
5264 compute the address as an offset from the GOT, whose base is in
5265 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5266 differentiate them from global data objects. The returned
5267 address is the PIC reg + an unspec constant.
5268
5269 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5270 reg also appears in the address. */
5271
5272 static rtx
5273 legitimize_pic_address (rtx orig, rtx reg)
5274 {
5275 rtx addr = orig;
5276 rtx new = orig;
5277 rtx base;
5278
5279 #if TARGET_MACHO
5280 if (reg == 0)
5281 reg = gen_reg_rtx (Pmode);
5282 /* Use the generic Mach-O PIC machinery. */
5283 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5284 #endif
5285
5286 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5287 new = addr;
5288 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5289 {
5290 /* This symbol may be referenced via a displacement from the PIC
5291 base address (@GOTOFF). */
5292
5293 if (reload_in_progress)
5294 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5295 if (GET_CODE (addr) == CONST)
5296 addr = XEXP (addr, 0);
5297 if (GET_CODE (addr) == PLUS)
5298 {
5299 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5300 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5301 }
5302 else
5303 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5304 new = gen_rtx_CONST (Pmode, new);
5305 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5306
5307 if (reg != 0)
5308 {
5309 emit_move_insn (reg, new);
5310 new = reg;
5311 }
5312 }
5313 else if (GET_CODE (addr) == SYMBOL_REF)
5314 {
5315 if (TARGET_64BIT)
5316 {
5317 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5318 new = gen_rtx_CONST (Pmode, new);
5319 new = gen_const_mem (Pmode, new);
5320 set_mem_alias_set (new, ix86_GOT_alias_set ());
5321
5322 if (reg == 0)
5323 reg = gen_reg_rtx (Pmode);
5324 /* Use directly gen_movsi, otherwise the address is loaded
5325 into register for CSE. We don't want to CSE this addresses,
5326 instead we CSE addresses from the GOT table, so skip this. */
5327 emit_insn (gen_movsi (reg, new));
5328 new = reg;
5329 }
5330 else
5331 {
5332 /* This symbol must be referenced via a load from the
5333 Global Offset Table (@GOT). */
5334
5335 if (reload_in_progress)
5336 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5337 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5338 new = gen_rtx_CONST (Pmode, new);
5339 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5340 new = gen_const_mem (Pmode, new);
5341 set_mem_alias_set (new, ix86_GOT_alias_set ());
5342
5343 if (reg == 0)
5344 reg = gen_reg_rtx (Pmode);
5345 emit_move_insn (reg, new);
5346 new = reg;
5347 }
5348 }
5349 else
5350 {
5351 if (GET_CODE (addr) == CONST)
5352 {
5353 addr = XEXP (addr, 0);
5354
5355 /* We must match stuff we generate before. Assume the only
5356 unspecs that can get here are ours. Not that we could do
5357 anything with them anyway.... */
5358 if (GET_CODE (addr) == UNSPEC
5359 || (GET_CODE (addr) == PLUS
5360 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5361 return orig;
5362 if (GET_CODE (addr) != PLUS)
5363 abort ();
5364 }
5365 if (GET_CODE (addr) == PLUS)
5366 {
5367 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5368
5369 /* Check first to see if this is a constant offset from a @GOTOFF
5370 symbol reference. */
5371 if (local_symbolic_operand (op0, Pmode)
5372 && GET_CODE (op1) == CONST_INT)
5373 {
5374 if (!TARGET_64BIT)
5375 {
5376 if (reload_in_progress)
5377 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5378 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5379 UNSPEC_GOTOFF);
5380 new = gen_rtx_PLUS (Pmode, new, op1);
5381 new = gen_rtx_CONST (Pmode, new);
5382 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5383
5384 if (reg != 0)
5385 {
5386 emit_move_insn (reg, new);
5387 new = reg;
5388 }
5389 }
5390 else
5391 {
5392 if (INTVAL (op1) < -16*1024*1024
5393 || INTVAL (op1) >= 16*1024*1024)
5394 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5395 }
5396 }
5397 else
5398 {
5399 base = legitimize_pic_address (XEXP (addr, 0), reg);
5400 new = legitimize_pic_address (XEXP (addr, 1),
5401 base == reg ? NULL_RTX : reg);
5402
5403 if (GET_CODE (new) == CONST_INT)
5404 new = plus_constant (base, INTVAL (new));
5405 else
5406 {
5407 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5408 {
5409 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5410 new = XEXP (new, 1);
5411 }
5412 new = gen_rtx_PLUS (Pmode, base, new);
5413 }
5414 }
5415 }
5416 }
5417 return new;
5418 }
5419 \f
5420 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5421
5422 static rtx
5423 get_thread_pointer (int to_reg)
5424 {
5425 rtx tp, reg, insn;
5426
5427 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5428 if (!to_reg)
5429 return tp;
5430
5431 reg = gen_reg_rtx (Pmode);
5432 insn = gen_rtx_SET (VOIDmode, reg, tp);
5433 insn = emit_insn (insn);
5434
5435 return reg;
5436 }
5437
5438 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5439 false if we expect this to be used for a memory address and true if
5440 we expect to load the address into a register. */
5441
5442 static rtx
5443 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5444 {
5445 rtx dest, base, off, pic;
5446 int type;
5447
5448 switch (model)
5449 {
5450 case TLS_MODEL_GLOBAL_DYNAMIC:
5451 dest = gen_reg_rtx (Pmode);
5452 if (TARGET_64BIT)
5453 {
5454 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5455
5456 start_sequence ();
5457 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5458 insns = get_insns ();
5459 end_sequence ();
5460
5461 emit_libcall_block (insns, dest, rax, x);
5462 }
5463 else
5464 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5465 break;
5466
5467 case TLS_MODEL_LOCAL_DYNAMIC:
5468 base = gen_reg_rtx (Pmode);
5469 if (TARGET_64BIT)
5470 {
5471 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5472
5473 start_sequence ();
5474 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5475 insns = get_insns ();
5476 end_sequence ();
5477
5478 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5479 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5480 emit_libcall_block (insns, base, rax, note);
5481 }
5482 else
5483 emit_insn (gen_tls_local_dynamic_base_32 (base));
5484
5485 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5486 off = gen_rtx_CONST (Pmode, off);
5487
5488 return gen_rtx_PLUS (Pmode, base, off);
5489
5490 case TLS_MODEL_INITIAL_EXEC:
5491 if (TARGET_64BIT)
5492 {
5493 pic = NULL;
5494 type = UNSPEC_GOTNTPOFF;
5495 }
5496 else if (flag_pic)
5497 {
5498 if (reload_in_progress)
5499 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5500 pic = pic_offset_table_rtx;
5501 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5502 }
5503 else if (!TARGET_GNU_TLS)
5504 {
5505 pic = gen_reg_rtx (Pmode);
5506 emit_insn (gen_set_got (pic));
5507 type = UNSPEC_GOTTPOFF;
5508 }
5509 else
5510 {
5511 pic = NULL;
5512 type = UNSPEC_INDNTPOFF;
5513 }
5514
5515 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5516 off = gen_rtx_CONST (Pmode, off);
5517 if (pic)
5518 off = gen_rtx_PLUS (Pmode, pic, off);
5519 off = gen_const_mem (Pmode, off);
5520 set_mem_alias_set (off, ix86_GOT_alias_set ());
5521
5522 if (TARGET_64BIT || TARGET_GNU_TLS)
5523 {
5524 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5525 off = force_reg (Pmode, off);
5526 return gen_rtx_PLUS (Pmode, base, off);
5527 }
5528 else
5529 {
5530 base = get_thread_pointer (true);
5531 dest = gen_reg_rtx (Pmode);
5532 emit_insn (gen_subsi3 (dest, base, off));
5533 }
5534 break;
5535
5536 case TLS_MODEL_LOCAL_EXEC:
5537 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5538 (TARGET_64BIT || TARGET_GNU_TLS)
5539 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5540 off = gen_rtx_CONST (Pmode, off);
5541
5542 if (TARGET_64BIT || TARGET_GNU_TLS)
5543 {
5544 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5545 return gen_rtx_PLUS (Pmode, base, off);
5546 }
5547 else
5548 {
5549 base = get_thread_pointer (true);
5550 dest = gen_reg_rtx (Pmode);
5551 emit_insn (gen_subsi3 (dest, base, off));
5552 }
5553 break;
5554
5555 default:
5556 abort ();
5557 }
5558
5559 return dest;
5560 }
5561
5562 /* Try machine-dependent ways of modifying an illegitimate address
5563 to be legitimate. If we find one, return the new, valid address.
5564 This macro is used in only one place: `memory_address' in explow.c.
5565
5566 OLDX is the address as it was before break_out_memory_refs was called.
5567 In some cases it is useful to look at this to decide what needs to be done.
5568
5569 MODE and WIN are passed so that this macro can use
5570 GO_IF_LEGITIMATE_ADDRESS.
5571
5572 It is always safe for this macro to do nothing. It exists to recognize
5573 opportunities to optimize the output.
5574
5575 For the 80386, we handle X+REG by loading X into a register R and
5576 using R+REG. R will go in a general reg and indexing will be used.
5577 However, if REG is a broken-out memory address or multiplication,
5578 nothing needs to be done because REG can certainly go in a general reg.
5579
5580 When -fpic is used, special handling is needed for symbolic references.
5581 See comments by legitimize_pic_address in i386.c for details. */
5582
5583 rtx
5584 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5585 {
5586 int changed = 0;
5587 unsigned log;
5588
5589 if (TARGET_DEBUG_ADDR)
5590 {
5591 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5592 GET_MODE_NAME (mode));
5593 debug_rtx (x);
5594 }
5595
5596 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5597 if (log)
5598 return legitimize_tls_address (x, log, false);
5599 if (GET_CODE (x) == CONST
5600 && GET_CODE (XEXP (x, 0)) == PLUS
5601 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5602 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5603 {
5604 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5605 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5606 }
5607
5608 if (flag_pic && SYMBOLIC_CONST (x))
5609 return legitimize_pic_address (x, 0);
5610
5611 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5612 if (GET_CODE (x) == ASHIFT
5613 && GET_CODE (XEXP (x, 1)) == CONST_INT
5614 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5615 {
5616 changed = 1;
5617 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5618 GEN_INT (1 << log));
5619 }
5620
5621 if (GET_CODE (x) == PLUS)
5622 {
5623 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5624
5625 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5626 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5627 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5628 {
5629 changed = 1;
5630 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5631 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5632 GEN_INT (1 << log));
5633 }
5634
5635 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5636 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5637 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5638 {
5639 changed = 1;
5640 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5641 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5642 GEN_INT (1 << log));
5643 }
5644
5645 /* Put multiply first if it isn't already. */
5646 if (GET_CODE (XEXP (x, 1)) == MULT)
5647 {
5648 rtx tmp = XEXP (x, 0);
5649 XEXP (x, 0) = XEXP (x, 1);
5650 XEXP (x, 1) = tmp;
5651 changed = 1;
5652 }
5653
5654 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5655 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5656 created by virtual register instantiation, register elimination, and
5657 similar optimizations. */
5658 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5659 {
5660 changed = 1;
5661 x = gen_rtx_PLUS (Pmode,
5662 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5663 XEXP (XEXP (x, 1), 0)),
5664 XEXP (XEXP (x, 1), 1));
5665 }
5666
5667 /* Canonicalize
5668 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5669 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5670 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5671 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5672 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5673 && CONSTANT_P (XEXP (x, 1)))
5674 {
5675 rtx constant;
5676 rtx other = NULL_RTX;
5677
5678 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5679 {
5680 constant = XEXP (x, 1);
5681 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5682 }
5683 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5684 {
5685 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5686 other = XEXP (x, 1);
5687 }
5688 else
5689 constant = 0;
5690
5691 if (constant)
5692 {
5693 changed = 1;
5694 x = gen_rtx_PLUS (Pmode,
5695 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5696 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5697 plus_constant (other, INTVAL (constant)));
5698 }
5699 }
5700
5701 if (changed && legitimate_address_p (mode, x, FALSE))
5702 return x;
5703
5704 if (GET_CODE (XEXP (x, 0)) == MULT)
5705 {
5706 changed = 1;
5707 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5708 }
5709
5710 if (GET_CODE (XEXP (x, 1)) == MULT)
5711 {
5712 changed = 1;
5713 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5714 }
5715
5716 if (changed
5717 && GET_CODE (XEXP (x, 1)) == REG
5718 && GET_CODE (XEXP (x, 0)) == REG)
5719 return x;
5720
5721 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5722 {
5723 changed = 1;
5724 x = legitimize_pic_address (x, 0);
5725 }
5726
5727 if (changed && legitimate_address_p (mode, x, FALSE))
5728 return x;
5729
5730 if (GET_CODE (XEXP (x, 0)) == REG)
5731 {
5732 rtx temp = gen_reg_rtx (Pmode);
5733 rtx val = force_operand (XEXP (x, 1), temp);
5734 if (val != temp)
5735 emit_move_insn (temp, val);
5736
5737 XEXP (x, 1) = temp;
5738 return x;
5739 }
5740
5741 else if (GET_CODE (XEXP (x, 1)) == REG)
5742 {
5743 rtx temp = gen_reg_rtx (Pmode);
5744 rtx val = force_operand (XEXP (x, 0), temp);
5745 if (val != temp)
5746 emit_move_insn (temp, val);
5747
5748 XEXP (x, 0) = temp;
5749 return x;
5750 }
5751 }
5752
5753 return x;
5754 }
5755 \f
5756 /* Print an integer constant expression in assembler syntax. Addition
5757 and subtraction are the only arithmetic that may appear in these
5758 expressions. FILE is the stdio stream to write to, X is the rtx, and
5759 CODE is the operand print code from the output string. */
5760
5761 static void
5762 output_pic_addr_const (FILE *file, rtx x, int code)
5763 {
5764 char buf[256];
5765
5766 switch (GET_CODE (x))
5767 {
5768 case PC:
5769 if (flag_pic)
5770 putc ('.', file);
5771 else
5772 abort ();
5773 break;
5774
5775 case SYMBOL_REF:
5776 /* Mark the decl as referenced so that cgraph will output the function. */
5777 if (SYMBOL_REF_DECL (x))
5778 mark_decl_referenced (SYMBOL_REF_DECL (x));
5779
5780 assemble_name (file, XSTR (x, 0));
5781 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5782 fputs ("@PLT", file);
5783 break;
5784
5785 case LABEL_REF:
5786 x = XEXP (x, 0);
5787 /* FALLTHRU */
5788 case CODE_LABEL:
5789 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5790 assemble_name (asm_out_file, buf);
5791 break;
5792
5793 case CONST_INT:
5794 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5795 break;
5796
5797 case CONST:
5798 /* This used to output parentheses around the expression,
5799 but that does not work on the 386 (either ATT or BSD assembler). */
5800 output_pic_addr_const (file, XEXP (x, 0), code);
5801 break;
5802
5803 case CONST_DOUBLE:
5804 if (GET_MODE (x) == VOIDmode)
5805 {
5806 /* We can use %d if the number is <32 bits and positive. */
5807 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5808 fprintf (file, "0x%lx%08lx",
5809 (unsigned long) CONST_DOUBLE_HIGH (x),
5810 (unsigned long) CONST_DOUBLE_LOW (x));
5811 else
5812 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5813 }
5814 else
5815 /* We can't handle floating point constants;
5816 PRINT_OPERAND must handle them. */
5817 output_operand_lossage ("floating constant misused");
5818 break;
5819
5820 case PLUS:
5821 /* Some assemblers need integer constants to appear first. */
5822 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5823 {
5824 output_pic_addr_const (file, XEXP (x, 0), code);
5825 putc ('+', file);
5826 output_pic_addr_const (file, XEXP (x, 1), code);
5827 }
5828 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5829 {
5830 output_pic_addr_const (file, XEXP (x, 1), code);
5831 putc ('+', file);
5832 output_pic_addr_const (file, XEXP (x, 0), code);
5833 }
5834 else
5835 abort ();
5836 break;
5837
5838 case MINUS:
5839 if (!TARGET_MACHO)
5840 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5841 output_pic_addr_const (file, XEXP (x, 0), code);
5842 putc ('-', file);
5843 output_pic_addr_const (file, XEXP (x, 1), code);
5844 if (!TARGET_MACHO)
5845 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5846 break;
5847
5848 case UNSPEC:
5849 if (XVECLEN (x, 0) != 1)
5850 abort ();
5851 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5852 switch (XINT (x, 1))
5853 {
5854 case UNSPEC_GOT:
5855 fputs ("@GOT", file);
5856 break;
5857 case UNSPEC_GOTOFF:
5858 fputs ("@GOTOFF", file);
5859 break;
5860 case UNSPEC_GOTPCREL:
5861 fputs ("@GOTPCREL(%rip)", file);
5862 break;
5863 case UNSPEC_GOTTPOFF:
5864 /* FIXME: This might be @TPOFF in Sun ld too. */
5865 fputs ("@GOTTPOFF", file);
5866 break;
5867 case UNSPEC_TPOFF:
5868 fputs ("@TPOFF", file);
5869 break;
5870 case UNSPEC_NTPOFF:
5871 if (TARGET_64BIT)
5872 fputs ("@TPOFF", file);
5873 else
5874 fputs ("@NTPOFF", file);
5875 break;
5876 case UNSPEC_DTPOFF:
5877 fputs ("@DTPOFF", file);
5878 break;
5879 case UNSPEC_GOTNTPOFF:
5880 if (TARGET_64BIT)
5881 fputs ("@GOTTPOFF(%rip)", file);
5882 else
5883 fputs ("@GOTNTPOFF", file);
5884 break;
5885 case UNSPEC_INDNTPOFF:
5886 fputs ("@INDNTPOFF", file);
5887 break;
5888 default:
5889 output_operand_lossage ("invalid UNSPEC as operand");
5890 break;
5891 }
5892 break;
5893
5894 default:
5895 output_operand_lossage ("invalid expression as operand");
5896 }
5897 }
5898
5899 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5900 We need to emit DTP-relative relocations. */
5901
5902 void
5903 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5904 {
5905 fputs (ASM_LONG, file);
5906 output_addr_const (file, x);
5907 fputs ("@DTPOFF", file);
5908 switch (size)
5909 {
5910 case 4:
5911 break;
5912 case 8:
5913 fputs (", 0", file);
5914 break;
5915 default:
5916 abort ();
5917 }
5918 }
5919
5920 /* In the name of slightly smaller debug output, and to cater to
5921 general assembler losage, recognize PIC+GOTOFF and turn it back
5922 into a direct symbol reference. */
5923
5924 static rtx
5925 ix86_delegitimize_address (rtx orig_x)
5926 {
5927 rtx x = orig_x, y;
5928
5929 if (GET_CODE (x) == MEM)
5930 x = XEXP (x, 0);
5931
5932 if (TARGET_64BIT)
5933 {
5934 if (GET_CODE (x) != CONST
5935 || GET_CODE (XEXP (x, 0)) != UNSPEC
5936 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5937 || GET_CODE (orig_x) != MEM)
5938 return orig_x;
5939 return XVECEXP (XEXP (x, 0), 0, 0);
5940 }
5941
5942 if (GET_CODE (x) != PLUS
5943 || GET_CODE (XEXP (x, 1)) != CONST)
5944 return orig_x;
5945
5946 if (GET_CODE (XEXP (x, 0)) == REG
5947 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5948 /* %ebx + GOT/GOTOFF */
5949 y = NULL;
5950 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5951 {
5952 /* %ebx + %reg * scale + GOT/GOTOFF */
5953 y = XEXP (x, 0);
5954 if (GET_CODE (XEXP (y, 0)) == REG
5955 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5956 y = XEXP (y, 1);
5957 else if (GET_CODE (XEXP (y, 1)) == REG
5958 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5959 y = XEXP (y, 0);
5960 else
5961 return orig_x;
5962 if (GET_CODE (y) != REG
5963 && GET_CODE (y) != MULT
5964 && GET_CODE (y) != ASHIFT)
5965 return orig_x;
5966 }
5967 else
5968 return orig_x;
5969
5970 x = XEXP (XEXP (x, 1), 0);
5971 if (GET_CODE (x) == UNSPEC
5972 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5973 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5974 {
5975 if (y)
5976 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5977 return XVECEXP (x, 0, 0);
5978 }
5979
5980 if (GET_CODE (x) == PLUS
5981 && GET_CODE (XEXP (x, 0)) == UNSPEC
5982 && GET_CODE (XEXP (x, 1)) == CONST_INT
5983 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5984 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5985 && GET_CODE (orig_x) != MEM)))
5986 {
5987 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5988 if (y)
5989 return gen_rtx_PLUS (Pmode, y, x);
5990 return x;
5991 }
5992
5993 return orig_x;
5994 }
5995 \f
5996 static void
5997 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
5998 int fp, FILE *file)
5999 {
6000 const char *suffix;
6001
6002 if (mode == CCFPmode || mode == CCFPUmode)
6003 {
6004 enum rtx_code second_code, bypass_code;
6005 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6006 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6007 abort ();
6008 code = ix86_fp_compare_code_to_integer (code);
6009 mode = CCmode;
6010 }
6011 if (reverse)
6012 code = reverse_condition (code);
6013
6014 switch (code)
6015 {
6016 case EQ:
6017 suffix = "e";
6018 break;
6019 case NE:
6020 suffix = "ne";
6021 break;
6022 case GT:
6023 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6024 abort ();
6025 suffix = "g";
6026 break;
6027 case GTU:
6028 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6029 Those same assemblers have the same but opposite losage on cmov. */
6030 if (mode != CCmode)
6031 abort ();
6032 suffix = fp ? "nbe" : "a";
6033 break;
6034 case LT:
6035 if (mode == CCNOmode || mode == CCGOCmode)
6036 suffix = "s";
6037 else if (mode == CCmode || mode == CCGCmode)
6038 suffix = "l";
6039 else
6040 abort ();
6041 break;
6042 case LTU:
6043 if (mode != CCmode)
6044 abort ();
6045 suffix = "b";
6046 break;
6047 case GE:
6048 if (mode == CCNOmode || mode == CCGOCmode)
6049 suffix = "ns";
6050 else if (mode == CCmode || mode == CCGCmode)
6051 suffix = "ge";
6052 else
6053 abort ();
6054 break;
6055 case GEU:
6056 /* ??? As above. */
6057 if (mode != CCmode)
6058 abort ();
6059 suffix = fp ? "nb" : "ae";
6060 break;
6061 case LE:
6062 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6063 abort ();
6064 suffix = "le";
6065 break;
6066 case LEU:
6067 if (mode != CCmode)
6068 abort ();
6069 suffix = "be";
6070 break;
6071 case UNORDERED:
6072 suffix = fp ? "u" : "p";
6073 break;
6074 case ORDERED:
6075 suffix = fp ? "nu" : "np";
6076 break;
6077 default:
6078 abort ();
6079 }
6080 fputs (suffix, file);
6081 }
6082
6083 /* Print the name of register X to FILE based on its machine mode and number.
6084 If CODE is 'w', pretend the mode is HImode.
6085 If CODE is 'b', pretend the mode is QImode.
6086 If CODE is 'k', pretend the mode is SImode.
6087 If CODE is 'q', pretend the mode is DImode.
6088 If CODE is 'h', pretend the reg is the `high' byte register.
6089 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6090
6091 void
6092 print_reg (rtx x, int code, FILE *file)
6093 {
6094 if (REGNO (x) == ARG_POINTER_REGNUM
6095 || REGNO (x) == FRAME_POINTER_REGNUM
6096 || REGNO (x) == FLAGS_REG
6097 || REGNO (x) == FPSR_REG)
6098 abort ();
6099
6100 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6101 putc ('%', file);
6102
6103 if (code == 'w' || MMX_REG_P (x))
6104 code = 2;
6105 else if (code == 'b')
6106 code = 1;
6107 else if (code == 'k')
6108 code = 4;
6109 else if (code == 'q')
6110 code = 8;
6111 else if (code == 'y')
6112 code = 3;
6113 else if (code == 'h')
6114 code = 0;
6115 else
6116 code = GET_MODE_SIZE (GET_MODE (x));
6117
6118 /* Irritatingly, AMD extended registers use different naming convention
6119 from the normal registers. */
6120 if (REX_INT_REG_P (x))
6121 {
6122 if (!TARGET_64BIT)
6123 abort ();
6124 switch (code)
6125 {
6126 case 0:
6127 error ("extended registers have no high halves");
6128 break;
6129 case 1:
6130 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6131 break;
6132 case 2:
6133 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6134 break;
6135 case 4:
6136 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6137 break;
6138 case 8:
6139 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6140 break;
6141 default:
6142 error ("unsupported operand size for extended register");
6143 break;
6144 }
6145 return;
6146 }
6147 switch (code)
6148 {
6149 case 3:
6150 if (STACK_TOP_P (x))
6151 {
6152 fputs ("st(0)", file);
6153 break;
6154 }
6155 /* FALLTHRU */
6156 case 8:
6157 case 4:
6158 case 12:
6159 if (! ANY_FP_REG_P (x))
6160 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6161 /* FALLTHRU */
6162 case 16:
6163 case 2:
6164 normal:
6165 fputs (hi_reg_name[REGNO (x)], file);
6166 break;
6167 case 1:
6168 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6169 goto normal;
6170 fputs (qi_reg_name[REGNO (x)], file);
6171 break;
6172 case 0:
6173 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6174 goto normal;
6175 fputs (qi_high_reg_name[REGNO (x)], file);
6176 break;
6177 default:
6178 abort ();
6179 }
6180 }
6181
6182 /* Locate some local-dynamic symbol still in use by this function
6183 so that we can print its name in some tls_local_dynamic_base
6184 pattern. */
6185
6186 static const char *
6187 get_some_local_dynamic_name (void)
6188 {
6189 rtx insn;
6190
6191 if (cfun->machine->some_ld_name)
6192 return cfun->machine->some_ld_name;
6193
6194 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6195 if (INSN_P (insn)
6196 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6197 return cfun->machine->some_ld_name;
6198
6199 abort ();
6200 }
6201
6202 static int
6203 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6204 {
6205 rtx x = *px;
6206
6207 if (GET_CODE (x) == SYMBOL_REF
6208 && local_dynamic_symbolic_operand (x, Pmode))
6209 {
6210 cfun->machine->some_ld_name = XSTR (x, 0);
6211 return 1;
6212 }
6213
6214 return 0;
6215 }
6216
6217 /* Meaning of CODE:
6218 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6219 C -- print opcode suffix for set/cmov insn.
6220 c -- like C, but print reversed condition
6221 F,f -- likewise, but for floating-point.
6222 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6223 otherwise nothing
6224 R -- print the prefix for register names.
6225 z -- print the opcode suffix for the size of the current operand.
6226 * -- print a star (in certain assembler syntax)
6227 A -- print an absolute memory reference.
6228 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6229 s -- print a shift double count, followed by the assemblers argument
6230 delimiter.
6231 b -- print the QImode name of the register for the indicated operand.
6232 %b0 would print %al if operands[0] is reg 0.
6233 w -- likewise, print the HImode name of the register.
6234 k -- likewise, print the SImode name of the register.
6235 q -- likewise, print the DImode name of the register.
6236 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6237 y -- print "st(0)" instead of "st" as a register.
6238 D -- print condition for SSE cmp instruction.
6239 P -- if PIC, print an @PLT suffix.
6240 X -- don't print any sort of PIC '@' suffix for a symbol.
6241 & -- print some in-use local-dynamic symbol name.
6242 */
6243
6244 void
6245 print_operand (FILE *file, rtx x, int code)
6246 {
6247 if (code)
6248 {
6249 switch (code)
6250 {
6251 case '*':
6252 if (ASSEMBLER_DIALECT == ASM_ATT)
6253 putc ('*', file);
6254 return;
6255
6256 case '&':
6257 assemble_name (file, get_some_local_dynamic_name ());
6258 return;
6259
6260 case 'A':
6261 if (ASSEMBLER_DIALECT == ASM_ATT)
6262 putc ('*', file);
6263 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6264 {
6265 /* Intel syntax. For absolute addresses, registers should not
6266 be surrounded by braces. */
6267 if (GET_CODE (x) != REG)
6268 {
6269 putc ('[', file);
6270 PRINT_OPERAND (file, x, 0);
6271 putc (']', file);
6272 return;
6273 }
6274 }
6275 else
6276 abort ();
6277
6278 PRINT_OPERAND (file, x, 0);
6279 return;
6280
6281
6282 case 'L':
6283 if (ASSEMBLER_DIALECT == ASM_ATT)
6284 putc ('l', file);
6285 return;
6286
6287 case 'W':
6288 if (ASSEMBLER_DIALECT == ASM_ATT)
6289 putc ('w', file);
6290 return;
6291
6292 case 'B':
6293 if (ASSEMBLER_DIALECT == ASM_ATT)
6294 putc ('b', file);
6295 return;
6296
6297 case 'Q':
6298 if (ASSEMBLER_DIALECT == ASM_ATT)
6299 putc ('l', file);
6300 return;
6301
6302 case 'S':
6303 if (ASSEMBLER_DIALECT == ASM_ATT)
6304 putc ('s', file);
6305 return;
6306
6307 case 'T':
6308 if (ASSEMBLER_DIALECT == ASM_ATT)
6309 putc ('t', file);
6310 return;
6311
6312 case 'z':
6313 /* 387 opcodes don't get size suffixes if the operands are
6314 registers. */
6315 if (STACK_REG_P (x))
6316 return;
6317
6318 /* Likewise if using Intel opcodes. */
6319 if (ASSEMBLER_DIALECT == ASM_INTEL)
6320 return;
6321
6322 /* This is the size of op from size of operand. */
6323 switch (GET_MODE_SIZE (GET_MODE (x)))
6324 {
6325 case 2:
6326 #ifdef HAVE_GAS_FILDS_FISTS
6327 putc ('s', file);
6328 #endif
6329 return;
6330
6331 case 4:
6332 if (GET_MODE (x) == SFmode)
6333 {
6334 putc ('s', file);
6335 return;
6336 }
6337 else
6338 putc ('l', file);
6339 return;
6340
6341 case 12:
6342 case 16:
6343 putc ('t', file);
6344 return;
6345
6346 case 8:
6347 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6348 {
6349 #ifdef GAS_MNEMONICS
6350 putc ('q', file);
6351 #else
6352 putc ('l', file);
6353 putc ('l', file);
6354 #endif
6355 }
6356 else
6357 putc ('l', file);
6358 return;
6359
6360 default:
6361 abort ();
6362 }
6363
6364 case 'b':
6365 case 'w':
6366 case 'k':
6367 case 'q':
6368 case 'h':
6369 case 'y':
6370 case 'X':
6371 case 'P':
6372 break;
6373
6374 case 's':
6375 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6376 {
6377 PRINT_OPERAND (file, x, 0);
6378 putc (',', file);
6379 }
6380 return;
6381
6382 case 'D':
6383 /* Little bit of braindamage here. The SSE compare instructions
6384 does use completely different names for the comparisons that the
6385 fp conditional moves. */
6386 switch (GET_CODE (x))
6387 {
6388 case EQ:
6389 case UNEQ:
6390 fputs ("eq", file);
6391 break;
6392 case LT:
6393 case UNLT:
6394 fputs ("lt", file);
6395 break;
6396 case LE:
6397 case UNLE:
6398 fputs ("le", file);
6399 break;
6400 case UNORDERED:
6401 fputs ("unord", file);
6402 break;
6403 case NE:
6404 case LTGT:
6405 fputs ("neq", file);
6406 break;
6407 case UNGE:
6408 case GE:
6409 fputs ("nlt", file);
6410 break;
6411 case UNGT:
6412 case GT:
6413 fputs ("nle", file);
6414 break;
6415 case ORDERED:
6416 fputs ("ord", file);
6417 break;
6418 default:
6419 abort ();
6420 break;
6421 }
6422 return;
6423 case 'O':
6424 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6425 if (ASSEMBLER_DIALECT == ASM_ATT)
6426 {
6427 switch (GET_MODE (x))
6428 {
6429 case HImode: putc ('w', file); break;
6430 case SImode:
6431 case SFmode: putc ('l', file); break;
6432 case DImode:
6433 case DFmode: putc ('q', file); break;
6434 default: abort ();
6435 }
6436 putc ('.', file);
6437 }
6438 #endif
6439 return;
6440 case 'C':
6441 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6442 return;
6443 case 'F':
6444 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6445 if (ASSEMBLER_DIALECT == ASM_ATT)
6446 putc ('.', file);
6447 #endif
6448 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6449 return;
6450
6451 /* Like above, but reverse condition */
6452 case 'c':
6453 /* Check to see if argument to %c is really a constant
6454 and not a condition code which needs to be reversed. */
6455 if (!COMPARISON_P (x))
6456 {
6457 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6458 return;
6459 }
6460 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6461 return;
6462 case 'f':
6463 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6464 if (ASSEMBLER_DIALECT == ASM_ATT)
6465 putc ('.', file);
6466 #endif
6467 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6468 return;
6469 case '+':
6470 {
6471 rtx x;
6472
6473 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6474 return;
6475
6476 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6477 if (x)
6478 {
6479 int pred_val = INTVAL (XEXP (x, 0));
6480
6481 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6482 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6483 {
6484 int taken = pred_val > REG_BR_PROB_BASE / 2;
6485 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6486
6487 /* Emit hints only in the case default branch prediction
6488 heuristics would fail. */
6489 if (taken != cputaken)
6490 {
6491 /* We use 3e (DS) prefix for taken branches and
6492 2e (CS) prefix for not taken branches. */
6493 if (taken)
6494 fputs ("ds ; ", file);
6495 else
6496 fputs ("cs ; ", file);
6497 }
6498 }
6499 }
6500 return;
6501 }
6502 default:
6503 output_operand_lossage ("invalid operand code '%c'", code);
6504 }
6505 }
6506
6507 if (GET_CODE (x) == REG)
6508 print_reg (x, code, file);
6509
6510 else if (GET_CODE (x) == MEM)
6511 {
6512 /* No `byte ptr' prefix for call instructions. */
6513 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6514 {
6515 const char * size;
6516 switch (GET_MODE_SIZE (GET_MODE (x)))
6517 {
6518 case 1: size = "BYTE"; break;
6519 case 2: size = "WORD"; break;
6520 case 4: size = "DWORD"; break;
6521 case 8: size = "QWORD"; break;
6522 case 12: size = "XWORD"; break;
6523 case 16: size = "XMMWORD"; break;
6524 default:
6525 abort ();
6526 }
6527
6528 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6529 if (code == 'b')
6530 size = "BYTE";
6531 else if (code == 'w')
6532 size = "WORD";
6533 else if (code == 'k')
6534 size = "DWORD";
6535
6536 fputs (size, file);
6537 fputs (" PTR ", file);
6538 }
6539
6540 x = XEXP (x, 0);
6541 /* Avoid (%rip) for call operands. */
6542 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6543 && GET_CODE (x) != CONST_INT)
6544 output_addr_const (file, x);
6545 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6546 output_operand_lossage ("invalid constraints for operand");
6547 else
6548 output_address (x);
6549 }
6550
6551 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6552 {
6553 REAL_VALUE_TYPE r;
6554 long l;
6555
6556 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6557 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6558
6559 if (ASSEMBLER_DIALECT == ASM_ATT)
6560 putc ('$', file);
6561 fprintf (file, "0x%08lx", l);
6562 }
6563
6564 /* These float cases don't actually occur as immediate operands. */
6565 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6566 {
6567 char dstr[30];
6568
6569 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6570 fprintf (file, "%s", dstr);
6571 }
6572
6573 else if (GET_CODE (x) == CONST_DOUBLE
6574 && GET_MODE (x) == XFmode)
6575 {
6576 char dstr[30];
6577
6578 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6579 fprintf (file, "%s", dstr);
6580 }
6581
6582 else
6583 {
6584 if (code != 'P')
6585 {
6586 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6587 {
6588 if (ASSEMBLER_DIALECT == ASM_ATT)
6589 putc ('$', file);
6590 }
6591 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6592 || GET_CODE (x) == LABEL_REF)
6593 {
6594 if (ASSEMBLER_DIALECT == ASM_ATT)
6595 putc ('$', file);
6596 else
6597 fputs ("OFFSET FLAT:", file);
6598 }
6599 }
6600 if (GET_CODE (x) == CONST_INT)
6601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6602 else if (flag_pic)
6603 output_pic_addr_const (file, x, code);
6604 else
6605 output_addr_const (file, x);
6606 }
6607 }
6608 \f
6609 /* Print a memory operand whose address is ADDR. */
6610
6611 void
6612 print_operand_address (FILE *file, rtx addr)
6613 {
6614 struct ix86_address parts;
6615 rtx base, index, disp;
6616 int scale;
6617
6618 if (! ix86_decompose_address (addr, &parts))
6619 abort ();
6620
6621 base = parts.base;
6622 index = parts.index;
6623 disp = parts.disp;
6624 scale = parts.scale;
6625
6626 switch (parts.seg)
6627 {
6628 case SEG_DEFAULT:
6629 break;
6630 case SEG_FS:
6631 case SEG_GS:
6632 if (USER_LABEL_PREFIX[0] == 0)
6633 putc ('%', file);
6634 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6635 break;
6636 default:
6637 abort ();
6638 }
6639
6640 if (!base && !index)
6641 {
6642 /* Displacement only requires special attention. */
6643
6644 if (GET_CODE (disp) == CONST_INT)
6645 {
6646 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6647 {
6648 if (USER_LABEL_PREFIX[0] == 0)
6649 putc ('%', file);
6650 fputs ("ds:", file);
6651 }
6652 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6653 }
6654 else if (flag_pic)
6655 output_pic_addr_const (file, disp, 0);
6656 else
6657 output_addr_const (file, disp);
6658
6659 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6660 if (TARGET_64BIT
6661 && ((GET_CODE (disp) == SYMBOL_REF
6662 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6663 || GET_CODE (disp) == LABEL_REF
6664 || (GET_CODE (disp) == CONST
6665 && GET_CODE (XEXP (disp, 0)) == PLUS
6666 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6667 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6668 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6669 fputs ("(%rip)", file);
6670 }
6671 else
6672 {
6673 if (ASSEMBLER_DIALECT == ASM_ATT)
6674 {
6675 if (disp)
6676 {
6677 if (flag_pic)
6678 output_pic_addr_const (file, disp, 0);
6679 else if (GET_CODE (disp) == LABEL_REF)
6680 output_asm_label (disp);
6681 else
6682 output_addr_const (file, disp);
6683 }
6684
6685 putc ('(', file);
6686 if (base)
6687 print_reg (base, 0, file);
6688 if (index)
6689 {
6690 putc (',', file);
6691 print_reg (index, 0, file);
6692 if (scale != 1)
6693 fprintf (file, ",%d", scale);
6694 }
6695 putc (')', file);
6696 }
6697 else
6698 {
6699 rtx offset = NULL_RTX;
6700
6701 if (disp)
6702 {
6703 /* Pull out the offset of a symbol; print any symbol itself. */
6704 if (GET_CODE (disp) == CONST
6705 && GET_CODE (XEXP (disp, 0)) == PLUS
6706 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6707 {
6708 offset = XEXP (XEXP (disp, 0), 1);
6709 disp = gen_rtx_CONST (VOIDmode,
6710 XEXP (XEXP (disp, 0), 0));
6711 }
6712
6713 if (flag_pic)
6714 output_pic_addr_const (file, disp, 0);
6715 else if (GET_CODE (disp) == LABEL_REF)
6716 output_asm_label (disp);
6717 else if (GET_CODE (disp) == CONST_INT)
6718 offset = disp;
6719 else
6720 output_addr_const (file, disp);
6721 }
6722
6723 putc ('[', file);
6724 if (base)
6725 {
6726 print_reg (base, 0, file);
6727 if (offset)
6728 {
6729 if (INTVAL (offset) >= 0)
6730 putc ('+', file);
6731 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6732 }
6733 }
6734 else if (offset)
6735 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6736 else
6737 putc ('0', file);
6738
6739 if (index)
6740 {
6741 putc ('+', file);
6742 print_reg (index, 0, file);
6743 if (scale != 1)
6744 fprintf (file, "*%d", scale);
6745 }
6746 putc (']', file);
6747 }
6748 }
6749 }
6750
6751 bool
6752 output_addr_const_extra (FILE *file, rtx x)
6753 {
6754 rtx op;
6755
6756 if (GET_CODE (x) != UNSPEC)
6757 return false;
6758
6759 op = XVECEXP (x, 0, 0);
6760 switch (XINT (x, 1))
6761 {
6762 case UNSPEC_GOTTPOFF:
6763 output_addr_const (file, op);
6764 /* FIXME: This might be @TPOFF in Sun ld. */
6765 fputs ("@GOTTPOFF", file);
6766 break;
6767 case UNSPEC_TPOFF:
6768 output_addr_const (file, op);
6769 fputs ("@TPOFF", file);
6770 break;
6771 case UNSPEC_NTPOFF:
6772 output_addr_const (file, op);
6773 if (TARGET_64BIT)
6774 fputs ("@TPOFF", file);
6775 else
6776 fputs ("@NTPOFF", file);
6777 break;
6778 case UNSPEC_DTPOFF:
6779 output_addr_const (file, op);
6780 fputs ("@DTPOFF", file);
6781 break;
6782 case UNSPEC_GOTNTPOFF:
6783 output_addr_const (file, op);
6784 if (TARGET_64BIT)
6785 fputs ("@GOTTPOFF(%rip)", file);
6786 else
6787 fputs ("@GOTNTPOFF", file);
6788 break;
6789 case UNSPEC_INDNTPOFF:
6790 output_addr_const (file, op);
6791 fputs ("@INDNTPOFF", file);
6792 break;
6793
6794 default:
6795 return false;
6796 }
6797
6798 return true;
6799 }
6800 \f
6801 /* Split one or more DImode RTL references into pairs of SImode
6802 references. The RTL can be REG, offsettable MEM, integer constant, or
6803 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6804 split and "num" is its length. lo_half and hi_half are output arrays
6805 that parallel "operands". */
6806
6807 void
6808 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6809 {
6810 while (num--)
6811 {
6812 rtx op = operands[num];
6813
6814 /* simplify_subreg refuse to split volatile memory addresses,
6815 but we still have to handle it. */
6816 if (GET_CODE (op) == MEM)
6817 {
6818 lo_half[num] = adjust_address (op, SImode, 0);
6819 hi_half[num] = adjust_address (op, SImode, 4);
6820 }
6821 else
6822 {
6823 lo_half[num] = simplify_gen_subreg (SImode, op,
6824 GET_MODE (op) == VOIDmode
6825 ? DImode : GET_MODE (op), 0);
6826 hi_half[num] = simplify_gen_subreg (SImode, op,
6827 GET_MODE (op) == VOIDmode
6828 ? DImode : GET_MODE (op), 4);
6829 }
6830 }
6831 }
6832 /* Split one or more TImode RTL references into pairs of SImode
6833 references. The RTL can be REG, offsettable MEM, integer constant, or
6834 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6835 split and "num" is its length. lo_half and hi_half are output arrays
6836 that parallel "operands". */
6837
6838 void
6839 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6840 {
6841 while (num--)
6842 {
6843 rtx op = operands[num];
6844
6845 /* simplify_subreg refuse to split volatile memory addresses, but we
6846 still have to handle it. */
6847 if (GET_CODE (op) == MEM)
6848 {
6849 lo_half[num] = adjust_address (op, DImode, 0);
6850 hi_half[num] = adjust_address (op, DImode, 8);
6851 }
6852 else
6853 {
6854 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6855 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6856 }
6857 }
6858 }
6859 \f
6860 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6861 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6862 is the expression of the binary operation. The output may either be
6863 emitted here, or returned to the caller, like all output_* functions.
6864
6865 There is no guarantee that the operands are the same mode, as they
6866 might be within FLOAT or FLOAT_EXTEND expressions. */
6867
6868 #ifndef SYSV386_COMPAT
6869 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6870 wants to fix the assemblers because that causes incompatibility
6871 with gcc. No-one wants to fix gcc because that causes
6872 incompatibility with assemblers... You can use the option of
6873 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6874 #define SYSV386_COMPAT 1
6875 #endif
6876
6877 const char *
6878 output_387_binary_op (rtx insn, rtx *operands)
6879 {
6880 static char buf[30];
6881 const char *p;
6882 const char *ssep;
6883 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
6884
6885 #ifdef ENABLE_CHECKING
6886 /* Even if we do not want to check the inputs, this documents input
6887 constraints. Which helps in understanding the following code. */
6888 if (STACK_REG_P (operands[0])
6889 && ((REG_P (operands[1])
6890 && REGNO (operands[0]) == REGNO (operands[1])
6891 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6892 || (REG_P (operands[2])
6893 && REGNO (operands[0]) == REGNO (operands[2])
6894 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6895 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6896 ; /* ok */
6897 else if (!is_sse)
6898 abort ();
6899 #endif
6900
6901 switch (GET_CODE (operands[3]))
6902 {
6903 case PLUS:
6904 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6905 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6906 p = "fiadd";
6907 else
6908 p = "fadd";
6909 ssep = "add";
6910 break;
6911
6912 case MINUS:
6913 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6914 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6915 p = "fisub";
6916 else
6917 p = "fsub";
6918 ssep = "sub";
6919 break;
6920
6921 case MULT:
6922 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6923 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6924 p = "fimul";
6925 else
6926 p = "fmul";
6927 ssep = "mul";
6928 break;
6929
6930 case DIV:
6931 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6932 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6933 p = "fidiv";
6934 else
6935 p = "fdiv";
6936 ssep = "div";
6937 break;
6938
6939 default:
6940 abort ();
6941 }
6942
6943 if (is_sse)
6944 {
6945 strcpy (buf, ssep);
6946 if (GET_MODE (operands[0]) == SFmode)
6947 strcat (buf, "ss\t{%2, %0|%0, %2}");
6948 else
6949 strcat (buf, "sd\t{%2, %0|%0, %2}");
6950 return buf;
6951 }
6952 strcpy (buf, p);
6953
6954 switch (GET_CODE (operands[3]))
6955 {
6956 case MULT:
6957 case PLUS:
6958 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6959 {
6960 rtx temp = operands[2];
6961 operands[2] = operands[1];
6962 operands[1] = temp;
6963 }
6964
6965 /* know operands[0] == operands[1]. */
6966
6967 if (GET_CODE (operands[2]) == MEM)
6968 {
6969 p = "%z2\t%2";
6970 break;
6971 }
6972
6973 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6974 {
6975 if (STACK_TOP_P (operands[0]))
6976 /* How is it that we are storing to a dead operand[2]?
6977 Well, presumably operands[1] is dead too. We can't
6978 store the result to st(0) as st(0) gets popped on this
6979 instruction. Instead store to operands[2] (which I
6980 think has to be st(1)). st(1) will be popped later.
6981 gcc <= 2.8.1 didn't have this check and generated
6982 assembly code that the Unixware assembler rejected. */
6983 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6984 else
6985 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6986 break;
6987 }
6988
6989 if (STACK_TOP_P (operands[0]))
6990 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6991 else
6992 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6993 break;
6994
6995 case MINUS:
6996 case DIV:
6997 if (GET_CODE (operands[1]) == MEM)
6998 {
6999 p = "r%z1\t%1";
7000 break;
7001 }
7002
7003 if (GET_CODE (operands[2]) == MEM)
7004 {
7005 p = "%z2\t%2";
7006 break;
7007 }
7008
7009 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7010 {
7011 #if SYSV386_COMPAT
7012 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7013 derived assemblers, confusingly reverse the direction of
7014 the operation for fsub{r} and fdiv{r} when the
7015 destination register is not st(0). The Intel assembler
7016 doesn't have this brain damage. Read !SYSV386_COMPAT to
7017 figure out what the hardware really does. */
7018 if (STACK_TOP_P (operands[0]))
7019 p = "{p\t%0, %2|rp\t%2, %0}";
7020 else
7021 p = "{rp\t%2, %0|p\t%0, %2}";
7022 #else
7023 if (STACK_TOP_P (operands[0]))
7024 /* As above for fmul/fadd, we can't store to st(0). */
7025 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7026 else
7027 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7028 #endif
7029 break;
7030 }
7031
7032 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7033 {
7034 #if SYSV386_COMPAT
7035 if (STACK_TOP_P (operands[0]))
7036 p = "{rp\t%0, %1|p\t%1, %0}";
7037 else
7038 p = "{p\t%1, %0|rp\t%0, %1}";
7039 #else
7040 if (STACK_TOP_P (operands[0]))
7041 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7042 else
7043 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7044 #endif
7045 break;
7046 }
7047
7048 if (STACK_TOP_P (operands[0]))
7049 {
7050 if (STACK_TOP_P (operands[1]))
7051 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7052 else
7053 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7054 break;
7055 }
7056 else if (STACK_TOP_P (operands[1]))
7057 {
7058 #if SYSV386_COMPAT
7059 p = "{\t%1, %0|r\t%0, %1}";
7060 #else
7061 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7062 #endif
7063 }
7064 else
7065 {
7066 #if SYSV386_COMPAT
7067 p = "{r\t%2, %0|\t%0, %2}";
7068 #else
7069 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7070 #endif
7071 }
7072 break;
7073
7074 default:
7075 abort ();
7076 }
7077
7078 strcat (buf, p);
7079 return buf;
7080 }
7081
7082 /* Output code to initialize control word copies used by trunc?f?i and
7083 rounding patterns. CURRENT_MODE is set to current control word,
7084 while NEW_MODE is set to new control word. */
7085
7086 void
7087 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7088 {
7089 rtx reg = gen_reg_rtx (HImode);
7090
7091 emit_insn (gen_x86_fnstcw_1 (current_mode));
7092 emit_move_insn (reg, current_mode);
7093
7094 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7095 && !TARGET_64BIT)
7096 {
7097 switch (mode)
7098 {
7099 case I387_CW_FLOOR:
7100 /* round down toward -oo */
7101 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7102 break;
7103
7104 case I387_CW_CEIL:
7105 /* round up toward +oo */
7106 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7107 break;
7108
7109 case I387_CW_TRUNC:
7110 /* round toward zero (truncate) */
7111 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7112 break;
7113
7114 case I387_CW_MASK_PM:
7115 /* mask precision exception for nearbyint() */
7116 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7117 break;
7118
7119 default:
7120 abort();
7121 }
7122 }
7123 else
7124 {
7125 switch (mode)
7126 {
7127 case I387_CW_FLOOR:
7128 /* round down toward -oo */
7129 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7130 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7131 break;
7132
7133 case I387_CW_CEIL:
7134 /* round up toward +oo */
7135 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7136 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7137 break;
7138
7139 case I387_CW_TRUNC:
7140 /* round toward zero (truncate) */
7141 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7142 break;
7143
7144 case I387_CW_MASK_PM:
7145 /* mask precision exception for nearbyint() */
7146 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7147 break;
7148
7149 default:
7150 abort();
7151 }
7152 }
7153
7154 emit_move_insn (new_mode, reg);
7155 }
7156
7157 /* Output code for INSN to convert a float to a signed int. OPERANDS
7158 are the insn operands. The output may be [HSD]Imode and the input
7159 operand may be [SDX]Fmode. */
7160
7161 const char *
7162 output_fix_trunc (rtx insn, rtx *operands)
7163 {
7164 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7165 int dimode_p = GET_MODE (operands[0]) == DImode;
7166
7167 /* Jump through a hoop or two for DImode, since the hardware has no
7168 non-popping instruction. We used to do this a different way, but
7169 that was somewhat fragile and broke with post-reload splitters. */
7170 if (dimode_p && !stack_top_dies)
7171 output_asm_insn ("fld\t%y1", operands);
7172
7173 if (!STACK_TOP_P (operands[1]))
7174 abort ();
7175
7176 if (GET_CODE (operands[0]) != MEM)
7177 abort ();
7178
7179 output_asm_insn ("fldcw\t%3", operands);
7180 if (stack_top_dies || dimode_p)
7181 output_asm_insn ("fistp%z0\t%0", operands);
7182 else
7183 output_asm_insn ("fist%z0\t%0", operands);
7184 output_asm_insn ("fldcw\t%2", operands);
7185
7186 return "";
7187 }
7188
7189 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7190 should be used. UNORDERED_P is true when fucom should be used. */
7191
7192 const char *
7193 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7194 {
7195 int stack_top_dies;
7196 rtx cmp_op0, cmp_op1;
7197 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7198
7199 if (eflags_p)
7200 {
7201 cmp_op0 = operands[0];
7202 cmp_op1 = operands[1];
7203 }
7204 else
7205 {
7206 cmp_op0 = operands[1];
7207 cmp_op1 = operands[2];
7208 }
7209
7210 if (is_sse)
7211 {
7212 if (GET_MODE (operands[0]) == SFmode)
7213 if (unordered_p)
7214 return "ucomiss\t{%1, %0|%0, %1}";
7215 else
7216 return "comiss\t{%1, %0|%0, %1}";
7217 else
7218 if (unordered_p)
7219 return "ucomisd\t{%1, %0|%0, %1}";
7220 else
7221 return "comisd\t{%1, %0|%0, %1}";
7222 }
7223
7224 if (! STACK_TOP_P (cmp_op0))
7225 abort ();
7226
7227 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7228
7229 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7230 {
7231 if (stack_top_dies)
7232 {
7233 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7234 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7235 }
7236 else
7237 return "ftst\n\tfnstsw\t%0";
7238 }
7239
7240 if (STACK_REG_P (cmp_op1)
7241 && stack_top_dies
7242 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7243 && REGNO (cmp_op1) != FIRST_STACK_REG)
7244 {
7245 /* If both the top of the 387 stack dies, and the other operand
7246 is also a stack register that dies, then this must be a
7247 `fcompp' float compare */
7248
7249 if (eflags_p)
7250 {
7251 /* There is no double popping fcomi variant. Fortunately,
7252 eflags is immune from the fstp's cc clobbering. */
7253 if (unordered_p)
7254 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7255 else
7256 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7257 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7258 }
7259 else
7260 {
7261 if (unordered_p)
7262 return "fucompp\n\tfnstsw\t%0";
7263 else
7264 return "fcompp\n\tfnstsw\t%0";
7265 }
7266 }
7267 else
7268 {
7269 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7270
7271 static const char * const alt[16] =
7272 {
7273 "fcom%z2\t%y2\n\tfnstsw\t%0",
7274 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7275 "fucom%z2\t%y2\n\tfnstsw\t%0",
7276 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7277
7278 "ficom%z2\t%y2\n\tfnstsw\t%0",
7279 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7280 NULL,
7281 NULL,
7282
7283 "fcomi\t{%y1, %0|%0, %y1}",
7284 "fcomip\t{%y1, %0|%0, %y1}",
7285 "fucomi\t{%y1, %0|%0, %y1}",
7286 "fucomip\t{%y1, %0|%0, %y1}",
7287
7288 NULL,
7289 NULL,
7290 NULL,
7291 NULL
7292 };
7293
7294 int mask;
7295 const char *ret;
7296
7297 mask = eflags_p << 3;
7298 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7299 mask |= unordered_p << 1;
7300 mask |= stack_top_dies;
7301
7302 if (mask >= 16)
7303 abort ();
7304 ret = alt[mask];
7305 if (ret == NULL)
7306 abort ();
7307
7308 return ret;
7309 }
7310 }
7311
7312 void
7313 ix86_output_addr_vec_elt (FILE *file, int value)
7314 {
7315 const char *directive = ASM_LONG;
7316
7317 if (TARGET_64BIT)
7318 {
7319 #ifdef ASM_QUAD
7320 directive = ASM_QUAD;
7321 #else
7322 abort ();
7323 #endif
7324 }
7325
7326 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7327 }
7328
7329 void
7330 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7331 {
7332 if (TARGET_64BIT)
7333 fprintf (file, "%s%s%d-%s%d\n",
7334 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7335 else if (HAVE_AS_GOTOFF_IN_DATA)
7336 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7337 #if TARGET_MACHO
7338 else if (TARGET_MACHO)
7339 {
7340 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7341 machopic_output_function_base_name (file);
7342 fprintf(file, "\n");
7343 }
7344 #endif
7345 else
7346 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7347 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7348 }
7349 \f
7350 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7351 for the target. */
7352
7353 void
7354 ix86_expand_clear (rtx dest)
7355 {
7356 rtx tmp;
7357
7358 /* We play register width games, which are only valid after reload. */
7359 if (!reload_completed)
7360 abort ();
7361
7362 /* Avoid HImode and its attendant prefix byte. */
7363 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7364 dest = gen_rtx_REG (SImode, REGNO (dest));
7365
7366 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7367
7368 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7369 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7370 {
7371 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7372 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7373 }
7374
7375 emit_insn (tmp);
7376 }
7377
7378 /* X is an unchanging MEM. If it is a constant pool reference, return
7379 the constant pool rtx, else NULL. */
7380
7381 rtx
7382 maybe_get_pool_constant (rtx x)
7383 {
7384 x = ix86_delegitimize_address (XEXP (x, 0));
7385
7386 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7387 return get_pool_constant (x);
7388
7389 return NULL_RTX;
7390 }
7391
7392 void
7393 ix86_expand_move (enum machine_mode mode, rtx operands[])
7394 {
7395 int strict = (reload_in_progress || reload_completed);
7396 rtx op0, op1;
7397 enum tls_model model;
7398
7399 op0 = operands[0];
7400 op1 = operands[1];
7401
7402 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7403 if (model)
7404 {
7405 op1 = legitimize_tls_address (op1, model, true);
7406 op1 = force_operand (op1, op0);
7407 if (op1 == op0)
7408 return;
7409 }
7410
7411 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7412 {
7413 #if TARGET_MACHO
7414 if (MACHOPIC_PURE)
7415 {
7416 rtx temp = ((reload_in_progress
7417 || ((op0 && GET_CODE (op0) == REG)
7418 && mode == Pmode))
7419 ? op0 : gen_reg_rtx (Pmode));
7420 op1 = machopic_indirect_data_reference (op1, temp);
7421 op1 = machopic_legitimize_pic_address (op1, mode,
7422 temp == op1 ? 0 : temp);
7423 }
7424 else if (MACHOPIC_INDIRECT)
7425 op1 = machopic_indirect_data_reference (op1, 0);
7426 if (op0 == op1)
7427 return;
7428 #else
7429 if (GET_CODE (op0) == MEM)
7430 op1 = force_reg (Pmode, op1);
7431 else
7432 op1 = legitimize_address (op1, op1, Pmode);
7433 #endif /* TARGET_MACHO */
7434 }
7435 else
7436 {
7437 if (GET_CODE (op0) == MEM
7438 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7439 || !push_operand (op0, mode))
7440 && GET_CODE (op1) == MEM)
7441 op1 = force_reg (mode, op1);
7442
7443 if (push_operand (op0, mode)
7444 && ! general_no_elim_operand (op1, mode))
7445 op1 = copy_to_mode_reg (mode, op1);
7446
7447 /* Force large constants in 64bit compilation into register
7448 to get them CSEed. */
7449 if (TARGET_64BIT && mode == DImode
7450 && immediate_operand (op1, mode)
7451 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7452 && !register_operand (op0, mode)
7453 && optimize && !reload_completed && !reload_in_progress)
7454 op1 = copy_to_mode_reg (mode, op1);
7455
7456 if (FLOAT_MODE_P (mode))
7457 {
7458 /* If we are loading a floating point constant to a register,
7459 force the value to memory now, since we'll get better code
7460 out the back end. */
7461
7462 if (strict)
7463 ;
7464 else if (GET_CODE (op1) == CONST_DOUBLE)
7465 {
7466 op1 = validize_mem (force_const_mem (mode, op1));
7467 if (!register_operand (op0, mode))
7468 {
7469 rtx temp = gen_reg_rtx (mode);
7470 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7471 emit_move_insn (op0, temp);
7472 return;
7473 }
7474 }
7475 }
7476 }
7477
7478 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7479 }
7480
7481 void
7482 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7483 {
7484 /* Force constants other than zero into memory. We do not know how
7485 the instructions used to build constants modify the upper 64 bits
7486 of the register, once we have that information we may be able
7487 to handle some of them more efficiently. */
7488 if ((reload_in_progress | reload_completed) == 0
7489 && register_operand (operands[0], mode)
7490 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7491 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
7492
7493 /* Make operand1 a register if it isn't already. */
7494 if (!no_new_pseudos
7495 && !register_operand (operands[0], mode)
7496 && !register_operand (operands[1], mode))
7497 {
7498 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7499 emit_move_insn (operands[0], temp);
7500 return;
7501 }
7502
7503 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7504 }
7505
7506 /* Attempt to expand a binary operator. Make the expansion closer to the
7507 actual machine, then just general_operand, which will allow 3 separate
7508 memory references (one output, two input) in a single insn. */
7509
7510 void
7511 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7512 rtx operands[])
7513 {
7514 int matching_memory;
7515 rtx src1, src2, dst, op, clob;
7516
7517 dst = operands[0];
7518 src1 = operands[1];
7519 src2 = operands[2];
7520
7521 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7522 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7523 && (rtx_equal_p (dst, src2)
7524 || immediate_operand (src1, mode)))
7525 {
7526 rtx temp = src1;
7527 src1 = src2;
7528 src2 = temp;
7529 }
7530
7531 /* If the destination is memory, and we do not have matching source
7532 operands, do things in registers. */
7533 matching_memory = 0;
7534 if (GET_CODE (dst) == MEM)
7535 {
7536 if (rtx_equal_p (dst, src1))
7537 matching_memory = 1;
7538 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7539 && rtx_equal_p (dst, src2))
7540 matching_memory = 2;
7541 else
7542 dst = gen_reg_rtx (mode);
7543 }
7544
7545 /* Both source operands cannot be in memory. */
7546 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7547 {
7548 if (matching_memory != 2)
7549 src2 = force_reg (mode, src2);
7550 else
7551 src1 = force_reg (mode, src1);
7552 }
7553
7554 /* If the operation is not commutable, source 1 cannot be a constant
7555 or non-matching memory. */
7556 if ((CONSTANT_P (src1)
7557 || (!matching_memory && GET_CODE (src1) == MEM))
7558 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7559 src1 = force_reg (mode, src1);
7560
7561 /* If optimizing, copy to regs to improve CSE */
7562 if (optimize && ! no_new_pseudos)
7563 {
7564 if (GET_CODE (dst) == MEM)
7565 dst = gen_reg_rtx (mode);
7566 if (GET_CODE (src1) == MEM)
7567 src1 = force_reg (mode, src1);
7568 if (GET_CODE (src2) == MEM)
7569 src2 = force_reg (mode, src2);
7570 }
7571
7572 /* Emit the instruction. */
7573
7574 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7575 if (reload_in_progress)
7576 {
7577 /* Reload doesn't know about the flags register, and doesn't know that
7578 it doesn't want to clobber it. We can only do this with PLUS. */
7579 if (code != PLUS)
7580 abort ();
7581 emit_insn (op);
7582 }
7583 else
7584 {
7585 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7586 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7587 }
7588
7589 /* Fix up the destination if needed. */
7590 if (dst != operands[0])
7591 emit_move_insn (operands[0], dst);
7592 }
7593
7594 /* Return TRUE or FALSE depending on whether the binary operator meets the
7595 appropriate constraints. */
7596
7597 int
7598 ix86_binary_operator_ok (enum rtx_code code,
7599 enum machine_mode mode ATTRIBUTE_UNUSED,
7600 rtx operands[3])
7601 {
7602 /* Both source operands cannot be in memory. */
7603 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7604 return 0;
7605 /* If the operation is not commutable, source 1 cannot be a constant. */
7606 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7607 return 0;
7608 /* If the destination is memory, we must have a matching source operand. */
7609 if (GET_CODE (operands[0]) == MEM
7610 && ! (rtx_equal_p (operands[0], operands[1])
7611 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7612 && rtx_equal_p (operands[0], operands[2]))))
7613 return 0;
7614 /* If the operation is not commutable and the source 1 is memory, we must
7615 have a matching destination. */
7616 if (GET_CODE (operands[1]) == MEM
7617 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7618 && ! rtx_equal_p (operands[0], operands[1]))
7619 return 0;
7620 return 1;
7621 }
7622
7623 /* Attempt to expand a unary operator. Make the expansion closer to the
7624 actual machine, then just general_operand, which will allow 2 separate
7625 memory references (one output, one input) in a single insn. */
7626
7627 void
7628 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7629 rtx operands[])
7630 {
7631 int matching_memory;
7632 rtx src, dst, op, clob;
7633
7634 dst = operands[0];
7635 src = operands[1];
7636
7637 /* If the destination is memory, and we do not have matching source
7638 operands, do things in registers. */
7639 matching_memory = 0;
7640 if (MEM_P (dst))
7641 {
7642 if (rtx_equal_p (dst, src))
7643 matching_memory = 1;
7644 else
7645 dst = gen_reg_rtx (mode);
7646 }
7647
7648 /* When source operand is memory, destination must match. */
7649 if (MEM_P (src) && !matching_memory)
7650 src = force_reg (mode, src);
7651
7652 /* If optimizing, copy to regs to improve CSE. */
7653 if (optimize && ! no_new_pseudos)
7654 {
7655 if (GET_CODE (dst) == MEM)
7656 dst = gen_reg_rtx (mode);
7657 if (GET_CODE (src) == MEM)
7658 src = force_reg (mode, src);
7659 }
7660
7661 /* Emit the instruction. */
7662
7663 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7664 if (reload_in_progress || code == NOT)
7665 {
7666 /* Reload doesn't know about the flags register, and doesn't know that
7667 it doesn't want to clobber it. */
7668 if (code != NOT)
7669 abort ();
7670 emit_insn (op);
7671 }
7672 else
7673 {
7674 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7675 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7676 }
7677
7678 /* Fix up the destination if needed. */
7679 if (dst != operands[0])
7680 emit_move_insn (operands[0], dst);
7681 }
7682
7683 /* Return TRUE or FALSE depending on whether the unary operator meets the
7684 appropriate constraints. */
7685
7686 int
7687 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7688 enum machine_mode mode ATTRIBUTE_UNUSED,
7689 rtx operands[2] ATTRIBUTE_UNUSED)
7690 {
7691 /* If one of operands is memory, source and destination must match. */
7692 if ((GET_CODE (operands[0]) == MEM
7693 || GET_CODE (operands[1]) == MEM)
7694 && ! rtx_equal_p (operands[0], operands[1]))
7695 return FALSE;
7696 return TRUE;
7697 }
7698
7699 /* Generate code for floating point ABS or NEG. */
7700
7701 void
7702 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7703 rtx operands[])
7704 {
7705 rtx mask, set, use, clob, dst, src;
7706 bool matching_memory;
7707 bool use_sse = false;
7708
7709 if (TARGET_SSE_MATH)
7710 {
7711 if (mode == SFmode)
7712 use_sse = true;
7713 else if (mode == DFmode && TARGET_SSE2)
7714 use_sse = true;
7715 }
7716
7717 /* NEG and ABS performed with SSE use bitwise mask operations.
7718 Create the appropriate mask now. */
7719 if (use_sse)
7720 {
7721 HOST_WIDE_INT hi, lo;
7722 int shift = 63;
7723
7724 /* Find the sign bit, sign extended to 2*HWI. */
7725 if (mode == SFmode)
7726 lo = 0x80000000, hi = lo < 0;
7727 else if (HOST_BITS_PER_WIDE_INT >= 64)
7728 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
7729 else
7730 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
7731
7732 /* If we're looking for the absolute value, then we want
7733 the compliment. */
7734 if (code == ABS)
7735 lo = ~lo, hi = ~hi;
7736
7737 /* Force this value into the low part of a fp vector constant. */
7738 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
7739 mask = gen_lowpart (mode, mask);
7740 if (mode == SFmode)
7741 mask = gen_rtx_CONST_VECTOR (V4SFmode,
7742 gen_rtvec (4, mask, CONST0_RTX (SFmode),
7743 CONST0_RTX (SFmode),
7744 CONST0_RTX (SFmode)));
7745 else
7746 mask = gen_rtx_CONST_VECTOR (V2DFmode,
7747 gen_rtvec (2, mask, CONST0_RTX (DFmode)));
7748 mask = force_reg (GET_MODE (mask), mask);
7749 }
7750 else
7751 {
7752 /* When not using SSE, we don't use the mask, but prefer to keep the
7753 same general form of the insn pattern to reduce duplication when
7754 it comes time to split. */
7755 mask = const0_rtx;
7756 }
7757
7758 dst = operands[0];
7759 src = operands[1];
7760
7761 /* If the destination is memory, and we don't have matching source
7762 operands, do things in registers. */
7763 matching_memory = false;
7764 if (MEM_P (dst))
7765 {
7766 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
7767 matching_memory = true;
7768 else
7769 dst = gen_reg_rtx (mode);
7770 }
7771 if (MEM_P (src) && !matching_memory)
7772 src = force_reg (mode, src);
7773
7774 set = gen_rtx_fmt_e (code, mode, src);
7775 set = gen_rtx_SET (VOIDmode, dst, set);
7776 use = gen_rtx_USE (VOIDmode, mask);
7777 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7778 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
7779
7780 if (dst != operands[0])
7781 emit_move_insn (operands[0], dst);
7782 }
7783
7784 /* Return TRUE or FALSE depending on whether the first SET in INSN
7785 has source and destination with matching CC modes, and that the
7786 CC mode is at least as constrained as REQ_MODE. */
7787
7788 int
7789 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7790 {
7791 rtx set;
7792 enum machine_mode set_mode;
7793
7794 set = PATTERN (insn);
7795 if (GET_CODE (set) == PARALLEL)
7796 set = XVECEXP (set, 0, 0);
7797 if (GET_CODE (set) != SET)
7798 abort ();
7799 if (GET_CODE (SET_SRC (set)) != COMPARE)
7800 abort ();
7801
7802 set_mode = GET_MODE (SET_DEST (set));
7803 switch (set_mode)
7804 {
7805 case CCNOmode:
7806 if (req_mode != CCNOmode
7807 && (req_mode != CCmode
7808 || XEXP (SET_SRC (set), 1) != const0_rtx))
7809 return 0;
7810 break;
7811 case CCmode:
7812 if (req_mode == CCGCmode)
7813 return 0;
7814 /* FALLTHRU */
7815 case CCGCmode:
7816 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7817 return 0;
7818 /* FALLTHRU */
7819 case CCGOCmode:
7820 if (req_mode == CCZmode)
7821 return 0;
7822 /* FALLTHRU */
7823 case CCZmode:
7824 break;
7825
7826 default:
7827 abort ();
7828 }
7829
7830 return (GET_MODE (SET_SRC (set)) == set_mode);
7831 }
7832
7833 /* Generate insn patterns to do an integer compare of OPERANDS. */
7834
7835 static rtx
7836 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
7837 {
7838 enum machine_mode cmpmode;
7839 rtx tmp, flags;
7840
7841 cmpmode = SELECT_CC_MODE (code, op0, op1);
7842 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7843
7844 /* This is very simple, but making the interface the same as in the
7845 FP case makes the rest of the code easier. */
7846 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7847 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7848
7849 /* Return the test that should be put into the flags user, i.e.
7850 the bcc, scc, or cmov instruction. */
7851 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7852 }
7853
7854 /* Figure out whether to use ordered or unordered fp comparisons.
7855 Return the appropriate mode to use. */
7856
7857 enum machine_mode
7858 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
7859 {
7860 /* ??? In order to make all comparisons reversible, we do all comparisons
7861 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7862 all forms trapping and nontrapping comparisons, we can make inequality
7863 comparisons trapping again, since it results in better code when using
7864 FCOM based compares. */
7865 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7866 }
7867
7868 enum machine_mode
7869 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
7870 {
7871 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7872 return ix86_fp_compare_mode (code);
7873 switch (code)
7874 {
7875 /* Only zero flag is needed. */
7876 case EQ: /* ZF=0 */
7877 case NE: /* ZF!=0 */
7878 return CCZmode;
7879 /* Codes needing carry flag. */
7880 case GEU: /* CF=0 */
7881 case GTU: /* CF=0 & ZF=0 */
7882 case LTU: /* CF=1 */
7883 case LEU: /* CF=1 | ZF=1 */
7884 return CCmode;
7885 /* Codes possibly doable only with sign flag when
7886 comparing against zero. */
7887 case GE: /* SF=OF or SF=0 */
7888 case LT: /* SF<>OF or SF=1 */
7889 if (op1 == const0_rtx)
7890 return CCGOCmode;
7891 else
7892 /* For other cases Carry flag is not required. */
7893 return CCGCmode;
7894 /* Codes doable only with sign flag when comparing
7895 against zero, but we miss jump instruction for it
7896 so we need to use relational tests against overflow
7897 that thus needs to be zero. */
7898 case GT: /* ZF=0 & SF=OF */
7899 case LE: /* ZF=1 | SF<>OF */
7900 if (op1 == const0_rtx)
7901 return CCNOmode;
7902 else
7903 return CCGCmode;
7904 /* strcmp pattern do (use flags) and combine may ask us for proper
7905 mode. */
7906 case USE:
7907 return CCmode;
7908 default:
7909 abort ();
7910 }
7911 }
7912
7913 /* Return the fixed registers used for condition codes. */
7914
7915 static bool
7916 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7917 {
7918 *p1 = FLAGS_REG;
7919 *p2 = FPSR_REG;
7920 return true;
7921 }
7922
7923 /* If two condition code modes are compatible, return a condition code
7924 mode which is compatible with both. Otherwise, return
7925 VOIDmode. */
7926
7927 static enum machine_mode
7928 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
7929 {
7930 if (m1 == m2)
7931 return m1;
7932
7933 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
7934 return VOIDmode;
7935
7936 if ((m1 == CCGCmode && m2 == CCGOCmode)
7937 || (m1 == CCGOCmode && m2 == CCGCmode))
7938 return CCGCmode;
7939
7940 switch (m1)
7941 {
7942 default:
7943 abort ();
7944
7945 case CCmode:
7946 case CCGCmode:
7947 case CCGOCmode:
7948 case CCNOmode:
7949 case CCZmode:
7950 switch (m2)
7951 {
7952 default:
7953 return VOIDmode;
7954
7955 case CCmode:
7956 case CCGCmode:
7957 case CCGOCmode:
7958 case CCNOmode:
7959 case CCZmode:
7960 return CCmode;
7961 }
7962
7963 case CCFPmode:
7964 case CCFPUmode:
7965 /* These are only compatible with themselves, which we already
7966 checked above. */
7967 return VOIDmode;
7968 }
7969 }
7970
7971 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7972
7973 int
7974 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
7975 {
7976 enum rtx_code swapped_code = swap_condition (code);
7977 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7978 || (ix86_fp_comparison_cost (swapped_code)
7979 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7980 }
7981
7982 /* Swap, force into registers, or otherwise massage the two operands
7983 to a fp comparison. The operands are updated in place; the new
7984 comparison code is returned. */
7985
7986 static enum rtx_code
7987 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
7988 {
7989 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7990 rtx op0 = *pop0, op1 = *pop1;
7991 enum machine_mode op_mode = GET_MODE (op0);
7992 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
7993
7994 /* All of the unordered compare instructions only work on registers.
7995 The same is true of the fcomi compare instructions. The same is
7996 true of the XFmode compare instructions if not comparing with
7997 zero (ftst insn is used in this case). */
7998
7999 if (!is_sse
8000 && (fpcmp_mode == CCFPUmode
8001 || (op_mode == XFmode
8002 && ! (standard_80387_constant_p (op0) == 1
8003 || standard_80387_constant_p (op1) == 1))
8004 || ix86_use_fcomi_compare (code)))
8005 {
8006 op0 = force_reg (op_mode, op0);
8007 op1 = force_reg (op_mode, op1);
8008 }
8009 else
8010 {
8011 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8012 things around if they appear profitable, otherwise force op0
8013 into a register. */
8014
8015 if (standard_80387_constant_p (op0) == 0
8016 || (GET_CODE (op0) == MEM
8017 && ! (standard_80387_constant_p (op1) == 0
8018 || GET_CODE (op1) == MEM)))
8019 {
8020 rtx tmp;
8021 tmp = op0, op0 = op1, op1 = tmp;
8022 code = swap_condition (code);
8023 }
8024
8025 if (GET_CODE (op0) != REG)
8026 op0 = force_reg (op_mode, op0);
8027
8028 if (CONSTANT_P (op1))
8029 {
8030 int tmp = standard_80387_constant_p (op1);
8031 if (tmp == 0)
8032 op1 = validize_mem (force_const_mem (op_mode, op1));
8033 else if (tmp == 1)
8034 {
8035 if (TARGET_CMOVE)
8036 op1 = force_reg (op_mode, op1);
8037 }
8038 else
8039 op1 = force_reg (op_mode, op1);
8040 }
8041 }
8042
8043 /* Try to rearrange the comparison to make it cheaper. */
8044 if (ix86_fp_comparison_cost (code)
8045 > ix86_fp_comparison_cost (swap_condition (code))
8046 && (GET_CODE (op1) == REG || !no_new_pseudos))
8047 {
8048 rtx tmp;
8049 tmp = op0, op0 = op1, op1 = tmp;
8050 code = swap_condition (code);
8051 if (GET_CODE (op0) != REG)
8052 op0 = force_reg (op_mode, op0);
8053 }
8054
8055 *pop0 = op0;
8056 *pop1 = op1;
8057 return code;
8058 }
8059
8060 /* Convert comparison codes we use to represent FP comparison to integer
8061 code that will result in proper branch. Return UNKNOWN if no such code
8062 is available. */
8063
8064 enum rtx_code
8065 ix86_fp_compare_code_to_integer (enum rtx_code code)
8066 {
8067 switch (code)
8068 {
8069 case GT:
8070 return GTU;
8071 case GE:
8072 return GEU;
8073 case ORDERED:
8074 case UNORDERED:
8075 return code;
8076 break;
8077 case UNEQ:
8078 return EQ;
8079 break;
8080 case UNLT:
8081 return LTU;
8082 break;
8083 case UNLE:
8084 return LEU;
8085 break;
8086 case LTGT:
8087 return NE;
8088 break;
8089 default:
8090 return UNKNOWN;
8091 }
8092 }
8093
8094 /* Split comparison code CODE into comparisons we can do using branch
8095 instructions. BYPASS_CODE is comparison code for branch that will
8096 branch around FIRST_CODE and SECOND_CODE. If some of branches
8097 is not required, set value to UNKNOWN.
8098 We never require more than two branches. */
8099
8100 void
8101 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8102 enum rtx_code *first_code,
8103 enum rtx_code *second_code)
8104 {
8105 *first_code = code;
8106 *bypass_code = UNKNOWN;
8107 *second_code = UNKNOWN;
8108
8109 /* The fcomi comparison sets flags as follows:
8110
8111 cmp ZF PF CF
8112 > 0 0 0
8113 < 0 0 1
8114 = 1 0 0
8115 un 1 1 1 */
8116
8117 switch (code)
8118 {
8119 case GT: /* GTU - CF=0 & ZF=0 */
8120 case GE: /* GEU - CF=0 */
8121 case ORDERED: /* PF=0 */
8122 case UNORDERED: /* PF=1 */
8123 case UNEQ: /* EQ - ZF=1 */
8124 case UNLT: /* LTU - CF=1 */
8125 case UNLE: /* LEU - CF=1 | ZF=1 */
8126 case LTGT: /* EQ - ZF=0 */
8127 break;
8128 case LT: /* LTU - CF=1 - fails on unordered */
8129 *first_code = UNLT;
8130 *bypass_code = UNORDERED;
8131 break;
8132 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8133 *first_code = UNLE;
8134 *bypass_code = UNORDERED;
8135 break;
8136 case EQ: /* EQ - ZF=1 - fails on unordered */
8137 *first_code = UNEQ;
8138 *bypass_code = UNORDERED;
8139 break;
8140 case NE: /* NE - ZF=0 - fails on unordered */
8141 *first_code = LTGT;
8142 *second_code = UNORDERED;
8143 break;
8144 case UNGE: /* GEU - CF=0 - fails on unordered */
8145 *first_code = GE;
8146 *second_code = UNORDERED;
8147 break;
8148 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8149 *first_code = GT;
8150 *second_code = UNORDERED;
8151 break;
8152 default:
8153 abort ();
8154 }
8155 if (!TARGET_IEEE_FP)
8156 {
8157 *second_code = UNKNOWN;
8158 *bypass_code = UNKNOWN;
8159 }
8160 }
8161
8162 /* Return cost of comparison done fcom + arithmetics operations on AX.
8163 All following functions do use number of instructions as a cost metrics.
8164 In future this should be tweaked to compute bytes for optimize_size and
8165 take into account performance of various instructions on various CPUs. */
8166 static int
8167 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8168 {
8169 if (!TARGET_IEEE_FP)
8170 return 4;
8171 /* The cost of code output by ix86_expand_fp_compare. */
8172 switch (code)
8173 {
8174 case UNLE:
8175 case UNLT:
8176 case LTGT:
8177 case GT:
8178 case GE:
8179 case UNORDERED:
8180 case ORDERED:
8181 case UNEQ:
8182 return 4;
8183 break;
8184 case LT:
8185 case NE:
8186 case EQ:
8187 case UNGE:
8188 return 5;
8189 break;
8190 case LE:
8191 case UNGT:
8192 return 6;
8193 break;
8194 default:
8195 abort ();
8196 }
8197 }
8198
8199 /* Return cost of comparison done using fcomi operation.
8200 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8201 static int
8202 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8203 {
8204 enum rtx_code bypass_code, first_code, second_code;
8205 /* Return arbitrarily high cost when instruction is not supported - this
8206 prevents gcc from using it. */
8207 if (!TARGET_CMOVE)
8208 return 1024;
8209 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8210 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8211 }
8212
8213 /* Return cost of comparison done using sahf operation.
8214 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8215 static int
8216 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8217 {
8218 enum rtx_code bypass_code, first_code, second_code;
8219 /* Return arbitrarily high cost when instruction is not preferred - this
8220 avoids gcc from using it. */
8221 if (!TARGET_USE_SAHF && !optimize_size)
8222 return 1024;
8223 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8224 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8225 }
8226
8227 /* Compute cost of the comparison done using any method.
8228 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8229 static int
8230 ix86_fp_comparison_cost (enum rtx_code code)
8231 {
8232 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8233 int min;
8234
8235 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8236 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8237
8238 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8239 if (min > sahf_cost)
8240 min = sahf_cost;
8241 if (min > fcomi_cost)
8242 min = fcomi_cost;
8243 return min;
8244 }
8245
8246 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8247
8248 static rtx
8249 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8250 rtx *second_test, rtx *bypass_test)
8251 {
8252 enum machine_mode fpcmp_mode, intcmp_mode;
8253 rtx tmp, tmp2;
8254 int cost = ix86_fp_comparison_cost (code);
8255 enum rtx_code bypass_code, first_code, second_code;
8256
8257 fpcmp_mode = ix86_fp_compare_mode (code);
8258 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8259
8260 if (second_test)
8261 *second_test = NULL_RTX;
8262 if (bypass_test)
8263 *bypass_test = NULL_RTX;
8264
8265 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8266
8267 /* Do fcomi/sahf based test when profitable. */
8268 if ((bypass_code == UNKNOWN || bypass_test)
8269 && (second_code == UNKNOWN || second_test)
8270 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8271 {
8272 if (TARGET_CMOVE)
8273 {
8274 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8275 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8276 tmp);
8277 emit_insn (tmp);
8278 }
8279 else
8280 {
8281 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8282 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8283 if (!scratch)
8284 scratch = gen_reg_rtx (HImode);
8285 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8286 emit_insn (gen_x86_sahf_1 (scratch));
8287 }
8288
8289 /* The FP codes work out to act like unsigned. */
8290 intcmp_mode = fpcmp_mode;
8291 code = first_code;
8292 if (bypass_code != UNKNOWN)
8293 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8294 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8295 const0_rtx);
8296 if (second_code != UNKNOWN)
8297 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8298 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8299 const0_rtx);
8300 }
8301 else
8302 {
8303 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8304 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8305 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8306 if (!scratch)
8307 scratch = gen_reg_rtx (HImode);
8308 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8309
8310 /* In the unordered case, we have to check C2 for NaN's, which
8311 doesn't happen to work out to anything nice combination-wise.
8312 So do some bit twiddling on the value we've got in AH to come
8313 up with an appropriate set of condition codes. */
8314
8315 intcmp_mode = CCNOmode;
8316 switch (code)
8317 {
8318 case GT:
8319 case UNGT:
8320 if (code == GT || !TARGET_IEEE_FP)
8321 {
8322 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8323 code = EQ;
8324 }
8325 else
8326 {
8327 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8328 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8329 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8330 intcmp_mode = CCmode;
8331 code = GEU;
8332 }
8333 break;
8334 case LT:
8335 case UNLT:
8336 if (code == LT && TARGET_IEEE_FP)
8337 {
8338 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8339 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8340 intcmp_mode = CCmode;
8341 code = EQ;
8342 }
8343 else
8344 {
8345 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8346 code = NE;
8347 }
8348 break;
8349 case GE:
8350 case UNGE:
8351 if (code == GE || !TARGET_IEEE_FP)
8352 {
8353 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8354 code = EQ;
8355 }
8356 else
8357 {
8358 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8359 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8360 GEN_INT (0x01)));
8361 code = NE;
8362 }
8363 break;
8364 case LE:
8365 case UNLE:
8366 if (code == LE && TARGET_IEEE_FP)
8367 {
8368 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8369 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8370 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8371 intcmp_mode = CCmode;
8372 code = LTU;
8373 }
8374 else
8375 {
8376 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8377 code = NE;
8378 }
8379 break;
8380 case EQ:
8381 case UNEQ:
8382 if (code == EQ && TARGET_IEEE_FP)
8383 {
8384 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8385 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8386 intcmp_mode = CCmode;
8387 code = EQ;
8388 }
8389 else
8390 {
8391 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8392 code = NE;
8393 break;
8394 }
8395 break;
8396 case NE:
8397 case LTGT:
8398 if (code == NE && TARGET_IEEE_FP)
8399 {
8400 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8401 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8402 GEN_INT (0x40)));
8403 code = NE;
8404 }
8405 else
8406 {
8407 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8408 code = EQ;
8409 }
8410 break;
8411
8412 case UNORDERED:
8413 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8414 code = NE;
8415 break;
8416 case ORDERED:
8417 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8418 code = EQ;
8419 break;
8420
8421 default:
8422 abort ();
8423 }
8424 }
8425
8426 /* Return the test that should be put into the flags user, i.e.
8427 the bcc, scc, or cmov instruction. */
8428 return gen_rtx_fmt_ee (code, VOIDmode,
8429 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8430 const0_rtx);
8431 }
8432
8433 rtx
8434 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8435 {
8436 rtx op0, op1, ret;
8437 op0 = ix86_compare_op0;
8438 op1 = ix86_compare_op1;
8439
8440 if (second_test)
8441 *second_test = NULL_RTX;
8442 if (bypass_test)
8443 *bypass_test = NULL_RTX;
8444
8445 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8446 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8447 second_test, bypass_test);
8448 else
8449 ret = ix86_expand_int_compare (code, op0, op1);
8450
8451 return ret;
8452 }
8453
8454 /* Return true if the CODE will result in nontrivial jump sequence. */
8455 bool
8456 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8457 {
8458 enum rtx_code bypass_code, first_code, second_code;
8459 if (!TARGET_CMOVE)
8460 return true;
8461 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8462 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8463 }
8464
8465 void
8466 ix86_expand_branch (enum rtx_code code, rtx label)
8467 {
8468 rtx tmp;
8469
8470 switch (GET_MODE (ix86_compare_op0))
8471 {
8472 case QImode:
8473 case HImode:
8474 case SImode:
8475 simple:
8476 tmp = ix86_expand_compare (code, NULL, NULL);
8477 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8478 gen_rtx_LABEL_REF (VOIDmode, label),
8479 pc_rtx);
8480 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8481 return;
8482
8483 case SFmode:
8484 case DFmode:
8485 case XFmode:
8486 {
8487 rtvec vec;
8488 int use_fcomi;
8489 enum rtx_code bypass_code, first_code, second_code;
8490
8491 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8492 &ix86_compare_op1);
8493
8494 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8495
8496 /* Check whether we will use the natural sequence with one jump. If
8497 so, we can expand jump early. Otherwise delay expansion by
8498 creating compound insn to not confuse optimizers. */
8499 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8500 && TARGET_CMOVE)
8501 {
8502 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8503 gen_rtx_LABEL_REF (VOIDmode, label),
8504 pc_rtx, NULL_RTX, NULL_RTX);
8505 }
8506 else
8507 {
8508 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8509 ix86_compare_op0, ix86_compare_op1);
8510 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8511 gen_rtx_LABEL_REF (VOIDmode, label),
8512 pc_rtx);
8513 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8514
8515 use_fcomi = ix86_use_fcomi_compare (code);
8516 vec = rtvec_alloc (3 + !use_fcomi);
8517 RTVEC_ELT (vec, 0) = tmp;
8518 RTVEC_ELT (vec, 1)
8519 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8520 RTVEC_ELT (vec, 2)
8521 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8522 if (! use_fcomi)
8523 RTVEC_ELT (vec, 3)
8524 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8525
8526 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8527 }
8528 return;
8529 }
8530
8531 case DImode:
8532 if (TARGET_64BIT)
8533 goto simple;
8534 /* Expand DImode branch into multiple compare+branch. */
8535 {
8536 rtx lo[2], hi[2], label2;
8537 enum rtx_code code1, code2, code3;
8538
8539 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8540 {
8541 tmp = ix86_compare_op0;
8542 ix86_compare_op0 = ix86_compare_op1;
8543 ix86_compare_op1 = tmp;
8544 code = swap_condition (code);
8545 }
8546 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8547 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8548
8549 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8550 avoid two branches. This costs one extra insn, so disable when
8551 optimizing for size. */
8552
8553 if ((code == EQ || code == NE)
8554 && (!optimize_size
8555 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8556 {
8557 rtx xor0, xor1;
8558
8559 xor1 = hi[0];
8560 if (hi[1] != const0_rtx)
8561 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8562 NULL_RTX, 0, OPTAB_WIDEN);
8563
8564 xor0 = lo[0];
8565 if (lo[1] != const0_rtx)
8566 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8567 NULL_RTX, 0, OPTAB_WIDEN);
8568
8569 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8570 NULL_RTX, 0, OPTAB_WIDEN);
8571
8572 ix86_compare_op0 = tmp;
8573 ix86_compare_op1 = const0_rtx;
8574 ix86_expand_branch (code, label);
8575 return;
8576 }
8577
8578 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8579 op1 is a constant and the low word is zero, then we can just
8580 examine the high word. */
8581
8582 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8583 switch (code)
8584 {
8585 case LT: case LTU: case GE: case GEU:
8586 ix86_compare_op0 = hi[0];
8587 ix86_compare_op1 = hi[1];
8588 ix86_expand_branch (code, label);
8589 return;
8590 default:
8591 break;
8592 }
8593
8594 /* Otherwise, we need two or three jumps. */
8595
8596 label2 = gen_label_rtx ();
8597
8598 code1 = code;
8599 code2 = swap_condition (code);
8600 code3 = unsigned_condition (code);
8601
8602 switch (code)
8603 {
8604 case LT: case GT: case LTU: case GTU:
8605 break;
8606
8607 case LE: code1 = LT; code2 = GT; break;
8608 case GE: code1 = GT; code2 = LT; break;
8609 case LEU: code1 = LTU; code2 = GTU; break;
8610 case GEU: code1 = GTU; code2 = LTU; break;
8611
8612 case EQ: code1 = UNKNOWN; code2 = NE; break;
8613 case NE: code2 = UNKNOWN; break;
8614
8615 default:
8616 abort ();
8617 }
8618
8619 /*
8620 * a < b =>
8621 * if (hi(a) < hi(b)) goto true;
8622 * if (hi(a) > hi(b)) goto false;
8623 * if (lo(a) < lo(b)) goto true;
8624 * false:
8625 */
8626
8627 ix86_compare_op0 = hi[0];
8628 ix86_compare_op1 = hi[1];
8629
8630 if (code1 != UNKNOWN)
8631 ix86_expand_branch (code1, label);
8632 if (code2 != UNKNOWN)
8633 ix86_expand_branch (code2, label2);
8634
8635 ix86_compare_op0 = lo[0];
8636 ix86_compare_op1 = lo[1];
8637 ix86_expand_branch (code3, label);
8638
8639 if (code2 != UNKNOWN)
8640 emit_label (label2);
8641 return;
8642 }
8643
8644 default:
8645 abort ();
8646 }
8647 }
8648
8649 /* Split branch based on floating point condition. */
8650 void
8651 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8652 rtx target1, rtx target2, rtx tmp, rtx pushed)
8653 {
8654 rtx second, bypass;
8655 rtx label = NULL_RTX;
8656 rtx condition;
8657 int bypass_probability = -1, second_probability = -1, probability = -1;
8658 rtx i;
8659
8660 if (target2 != pc_rtx)
8661 {
8662 rtx tmp = target2;
8663 code = reverse_condition_maybe_unordered (code);
8664 target2 = target1;
8665 target1 = tmp;
8666 }
8667
8668 condition = ix86_expand_fp_compare (code, op1, op2,
8669 tmp, &second, &bypass);
8670
8671 /* Remove pushed operand from stack. */
8672 if (pushed)
8673 ix86_free_from_memory (GET_MODE (pushed));
8674
8675 if (split_branch_probability >= 0)
8676 {
8677 /* Distribute the probabilities across the jumps.
8678 Assume the BYPASS and SECOND to be always test
8679 for UNORDERED. */
8680 probability = split_branch_probability;
8681
8682 /* Value of 1 is low enough to make no need for probability
8683 to be updated. Later we may run some experiments and see
8684 if unordered values are more frequent in practice. */
8685 if (bypass)
8686 bypass_probability = 1;
8687 if (second)
8688 second_probability = 1;
8689 }
8690 if (bypass != NULL_RTX)
8691 {
8692 label = gen_label_rtx ();
8693 i = emit_jump_insn (gen_rtx_SET
8694 (VOIDmode, pc_rtx,
8695 gen_rtx_IF_THEN_ELSE (VOIDmode,
8696 bypass,
8697 gen_rtx_LABEL_REF (VOIDmode,
8698 label),
8699 pc_rtx)));
8700 if (bypass_probability >= 0)
8701 REG_NOTES (i)
8702 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8703 GEN_INT (bypass_probability),
8704 REG_NOTES (i));
8705 }
8706 i = emit_jump_insn (gen_rtx_SET
8707 (VOIDmode, pc_rtx,
8708 gen_rtx_IF_THEN_ELSE (VOIDmode,
8709 condition, target1, target2)));
8710 if (probability >= 0)
8711 REG_NOTES (i)
8712 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8713 GEN_INT (probability),
8714 REG_NOTES (i));
8715 if (second != NULL_RTX)
8716 {
8717 i = emit_jump_insn (gen_rtx_SET
8718 (VOIDmode, pc_rtx,
8719 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8720 target2)));
8721 if (second_probability >= 0)
8722 REG_NOTES (i)
8723 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8724 GEN_INT (second_probability),
8725 REG_NOTES (i));
8726 }
8727 if (label != NULL_RTX)
8728 emit_label (label);
8729 }
8730
8731 int
8732 ix86_expand_setcc (enum rtx_code code, rtx dest)
8733 {
8734 rtx ret, tmp, tmpreg, equiv;
8735 rtx second_test, bypass_test;
8736
8737 if (GET_MODE (ix86_compare_op0) == DImode
8738 && !TARGET_64BIT)
8739 return 0; /* FAIL */
8740
8741 if (GET_MODE (dest) != QImode)
8742 abort ();
8743
8744 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8745 PUT_MODE (ret, QImode);
8746
8747 tmp = dest;
8748 tmpreg = dest;
8749
8750 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8751 if (bypass_test || second_test)
8752 {
8753 rtx test = second_test;
8754 int bypass = 0;
8755 rtx tmp2 = gen_reg_rtx (QImode);
8756 if (bypass_test)
8757 {
8758 if (second_test)
8759 abort ();
8760 test = bypass_test;
8761 bypass = 1;
8762 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8763 }
8764 PUT_MODE (test, QImode);
8765 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8766
8767 if (bypass)
8768 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8769 else
8770 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8771 }
8772
8773 /* Attach a REG_EQUAL note describing the comparison result. */
8774 equiv = simplify_gen_relational (code, QImode,
8775 GET_MODE (ix86_compare_op0),
8776 ix86_compare_op0, ix86_compare_op1);
8777 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8778
8779 return 1; /* DONE */
8780 }
8781
8782 /* Expand comparison setting or clearing carry flag. Return true when
8783 successful and set pop for the operation. */
8784 static bool
8785 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8786 {
8787 enum machine_mode mode =
8788 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8789
8790 /* Do not handle DImode compares that go trought special path. Also we can't
8791 deal with FP compares yet. This is possible to add. */
8792 if ((mode == DImode && !TARGET_64BIT))
8793 return false;
8794 if (FLOAT_MODE_P (mode))
8795 {
8796 rtx second_test = NULL, bypass_test = NULL;
8797 rtx compare_op, compare_seq;
8798
8799 /* Shortcut: following common codes never translate into carry flag compares. */
8800 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8801 || code == ORDERED || code == UNORDERED)
8802 return false;
8803
8804 /* These comparisons require zero flag; swap operands so they won't. */
8805 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8806 && !TARGET_IEEE_FP)
8807 {
8808 rtx tmp = op0;
8809 op0 = op1;
8810 op1 = tmp;
8811 code = swap_condition (code);
8812 }
8813
8814 /* Try to expand the comparison and verify that we end up with carry flag
8815 based comparison. This is fails to be true only when we decide to expand
8816 comparison using arithmetic that is not too common scenario. */
8817 start_sequence ();
8818 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8819 &second_test, &bypass_test);
8820 compare_seq = get_insns ();
8821 end_sequence ();
8822
8823 if (second_test || bypass_test)
8824 return false;
8825 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8826 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8827 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8828 else
8829 code = GET_CODE (compare_op);
8830 if (code != LTU && code != GEU)
8831 return false;
8832 emit_insn (compare_seq);
8833 *pop = compare_op;
8834 return true;
8835 }
8836 if (!INTEGRAL_MODE_P (mode))
8837 return false;
8838 switch (code)
8839 {
8840 case LTU:
8841 case GEU:
8842 break;
8843
8844 /* Convert a==0 into (unsigned)a<1. */
8845 case EQ:
8846 case NE:
8847 if (op1 != const0_rtx)
8848 return false;
8849 op1 = const1_rtx;
8850 code = (code == EQ ? LTU : GEU);
8851 break;
8852
8853 /* Convert a>b into b<a or a>=b-1. */
8854 case GTU:
8855 case LEU:
8856 if (GET_CODE (op1) == CONST_INT)
8857 {
8858 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8859 /* Bail out on overflow. We still can swap operands but that
8860 would force loading of the constant into register. */
8861 if (op1 == const0_rtx
8862 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8863 return false;
8864 code = (code == GTU ? GEU : LTU);
8865 }
8866 else
8867 {
8868 rtx tmp = op1;
8869 op1 = op0;
8870 op0 = tmp;
8871 code = (code == GTU ? LTU : GEU);
8872 }
8873 break;
8874
8875 /* Convert a>=0 into (unsigned)a<0x80000000. */
8876 case LT:
8877 case GE:
8878 if (mode == DImode || op1 != const0_rtx)
8879 return false;
8880 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8881 code = (code == LT ? GEU : LTU);
8882 break;
8883 case LE:
8884 case GT:
8885 if (mode == DImode || op1 != constm1_rtx)
8886 return false;
8887 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8888 code = (code == LE ? GEU : LTU);
8889 break;
8890
8891 default:
8892 return false;
8893 }
8894 /* Swapping operands may cause constant to appear as first operand. */
8895 if (!nonimmediate_operand (op0, VOIDmode))
8896 {
8897 if (no_new_pseudos)
8898 return false;
8899 op0 = force_reg (mode, op0);
8900 }
8901 ix86_compare_op0 = op0;
8902 ix86_compare_op1 = op1;
8903 *pop = ix86_expand_compare (code, NULL, NULL);
8904 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8905 abort ();
8906 return true;
8907 }
8908
8909 int
8910 ix86_expand_int_movcc (rtx operands[])
8911 {
8912 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8913 rtx compare_seq, compare_op;
8914 rtx second_test, bypass_test;
8915 enum machine_mode mode = GET_MODE (operands[0]);
8916 bool sign_bit_compare_p = false;;
8917
8918 start_sequence ();
8919 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8920 compare_seq = get_insns ();
8921 end_sequence ();
8922
8923 compare_code = GET_CODE (compare_op);
8924
8925 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
8926 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
8927 sign_bit_compare_p = true;
8928
8929 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8930 HImode insns, we'd be swallowed in word prefix ops. */
8931
8932 if ((mode != HImode || TARGET_FAST_PREFIX)
8933 && (mode != DImode || TARGET_64BIT)
8934 && GET_CODE (operands[2]) == CONST_INT
8935 && GET_CODE (operands[3]) == CONST_INT)
8936 {
8937 rtx out = operands[0];
8938 HOST_WIDE_INT ct = INTVAL (operands[2]);
8939 HOST_WIDE_INT cf = INTVAL (operands[3]);
8940 HOST_WIDE_INT diff;
8941
8942 diff = ct - cf;
8943 /* Sign bit compares are better done using shifts than we do by using
8944 sbb. */
8945 if (sign_bit_compare_p
8946 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
8947 ix86_compare_op1, &compare_op))
8948 {
8949 /* Detect overlap between destination and compare sources. */
8950 rtx tmp = out;
8951
8952 if (!sign_bit_compare_p)
8953 {
8954 bool fpcmp = false;
8955
8956 compare_code = GET_CODE (compare_op);
8957
8958 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8959 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8960 {
8961 fpcmp = true;
8962 compare_code = ix86_fp_compare_code_to_integer (compare_code);
8963 }
8964
8965 /* To simplify rest of code, restrict to the GEU case. */
8966 if (compare_code == LTU)
8967 {
8968 HOST_WIDE_INT tmp = ct;
8969 ct = cf;
8970 cf = tmp;
8971 compare_code = reverse_condition (compare_code);
8972 code = reverse_condition (code);
8973 }
8974 else
8975 {
8976 if (fpcmp)
8977 PUT_CODE (compare_op,
8978 reverse_condition_maybe_unordered
8979 (GET_CODE (compare_op)));
8980 else
8981 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
8982 }
8983 diff = ct - cf;
8984
8985 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8986 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8987 tmp = gen_reg_rtx (mode);
8988
8989 if (mode == DImode)
8990 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
8991 else
8992 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
8993 }
8994 else
8995 {
8996 if (code == GT || code == GE)
8997 code = reverse_condition (code);
8998 else
8999 {
9000 HOST_WIDE_INT tmp = ct;
9001 ct = cf;
9002 cf = tmp;
9003 diff = ct - cf;
9004 }
9005 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9006 ix86_compare_op1, VOIDmode, 0, -1);
9007 }
9008
9009 if (diff == 1)
9010 {
9011 /*
9012 * cmpl op0,op1
9013 * sbbl dest,dest
9014 * [addl dest, ct]
9015 *
9016 * Size 5 - 8.
9017 */
9018 if (ct)
9019 tmp = expand_simple_binop (mode, PLUS,
9020 tmp, GEN_INT (ct),
9021 copy_rtx (tmp), 1, OPTAB_DIRECT);
9022 }
9023 else if (cf == -1)
9024 {
9025 /*
9026 * cmpl op0,op1
9027 * sbbl dest,dest
9028 * orl $ct, dest
9029 *
9030 * Size 8.
9031 */
9032 tmp = expand_simple_binop (mode, IOR,
9033 tmp, GEN_INT (ct),
9034 copy_rtx (tmp), 1, OPTAB_DIRECT);
9035 }
9036 else if (diff == -1 && ct)
9037 {
9038 /*
9039 * cmpl op0,op1
9040 * sbbl dest,dest
9041 * notl dest
9042 * [addl dest, cf]
9043 *
9044 * Size 8 - 11.
9045 */
9046 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9047 if (cf)
9048 tmp = expand_simple_binop (mode, PLUS,
9049 copy_rtx (tmp), GEN_INT (cf),
9050 copy_rtx (tmp), 1, OPTAB_DIRECT);
9051 }
9052 else
9053 {
9054 /*
9055 * cmpl op0,op1
9056 * sbbl dest,dest
9057 * [notl dest]
9058 * andl cf - ct, dest
9059 * [addl dest, ct]
9060 *
9061 * Size 8 - 11.
9062 */
9063
9064 if (cf == 0)
9065 {
9066 cf = ct;
9067 ct = 0;
9068 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9069 }
9070
9071 tmp = expand_simple_binop (mode, AND,
9072 copy_rtx (tmp),
9073 gen_int_mode (cf - ct, mode),
9074 copy_rtx (tmp), 1, OPTAB_DIRECT);
9075 if (ct)
9076 tmp = expand_simple_binop (mode, PLUS,
9077 copy_rtx (tmp), GEN_INT (ct),
9078 copy_rtx (tmp), 1, OPTAB_DIRECT);
9079 }
9080
9081 if (!rtx_equal_p (tmp, out))
9082 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9083
9084 return 1; /* DONE */
9085 }
9086
9087 if (diff < 0)
9088 {
9089 HOST_WIDE_INT tmp;
9090 tmp = ct, ct = cf, cf = tmp;
9091 diff = -diff;
9092 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9093 {
9094 /* We may be reversing unordered compare to normal compare, that
9095 is not valid in general (we may convert non-trapping condition
9096 to trapping one), however on i386 we currently emit all
9097 comparisons unordered. */
9098 compare_code = reverse_condition_maybe_unordered (compare_code);
9099 code = reverse_condition_maybe_unordered (code);
9100 }
9101 else
9102 {
9103 compare_code = reverse_condition (compare_code);
9104 code = reverse_condition (code);
9105 }
9106 }
9107
9108 compare_code = UNKNOWN;
9109 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9110 && GET_CODE (ix86_compare_op1) == CONST_INT)
9111 {
9112 if (ix86_compare_op1 == const0_rtx
9113 && (code == LT || code == GE))
9114 compare_code = code;
9115 else if (ix86_compare_op1 == constm1_rtx)
9116 {
9117 if (code == LE)
9118 compare_code = LT;
9119 else if (code == GT)
9120 compare_code = GE;
9121 }
9122 }
9123
9124 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9125 if (compare_code != UNKNOWN
9126 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9127 && (cf == -1 || ct == -1))
9128 {
9129 /* If lea code below could be used, only optimize
9130 if it results in a 2 insn sequence. */
9131
9132 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9133 || diff == 3 || diff == 5 || diff == 9)
9134 || (compare_code == LT && ct == -1)
9135 || (compare_code == GE && cf == -1))
9136 {
9137 /*
9138 * notl op1 (if necessary)
9139 * sarl $31, op1
9140 * orl cf, op1
9141 */
9142 if (ct != -1)
9143 {
9144 cf = ct;
9145 ct = -1;
9146 code = reverse_condition (code);
9147 }
9148
9149 out = emit_store_flag (out, code, ix86_compare_op0,
9150 ix86_compare_op1, VOIDmode, 0, -1);
9151
9152 out = expand_simple_binop (mode, IOR,
9153 out, GEN_INT (cf),
9154 out, 1, OPTAB_DIRECT);
9155 if (out != operands[0])
9156 emit_move_insn (operands[0], out);
9157
9158 return 1; /* DONE */
9159 }
9160 }
9161
9162
9163 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9164 || diff == 3 || diff == 5 || diff == 9)
9165 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9166 && (mode != DImode
9167 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9168 {
9169 /*
9170 * xorl dest,dest
9171 * cmpl op1,op2
9172 * setcc dest
9173 * lea cf(dest*(ct-cf)),dest
9174 *
9175 * Size 14.
9176 *
9177 * This also catches the degenerate setcc-only case.
9178 */
9179
9180 rtx tmp;
9181 int nops;
9182
9183 out = emit_store_flag (out, code, ix86_compare_op0,
9184 ix86_compare_op1, VOIDmode, 0, 1);
9185
9186 nops = 0;
9187 /* On x86_64 the lea instruction operates on Pmode, so we need
9188 to get arithmetics done in proper mode to match. */
9189 if (diff == 1)
9190 tmp = copy_rtx (out);
9191 else
9192 {
9193 rtx out1;
9194 out1 = copy_rtx (out);
9195 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9196 nops++;
9197 if (diff & 1)
9198 {
9199 tmp = gen_rtx_PLUS (mode, tmp, out1);
9200 nops++;
9201 }
9202 }
9203 if (cf != 0)
9204 {
9205 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9206 nops++;
9207 }
9208 if (!rtx_equal_p (tmp, out))
9209 {
9210 if (nops == 1)
9211 out = force_operand (tmp, copy_rtx (out));
9212 else
9213 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9214 }
9215 if (!rtx_equal_p (out, operands[0]))
9216 emit_move_insn (operands[0], copy_rtx (out));
9217
9218 return 1; /* DONE */
9219 }
9220
9221 /*
9222 * General case: Jumpful:
9223 * xorl dest,dest cmpl op1, op2
9224 * cmpl op1, op2 movl ct, dest
9225 * setcc dest jcc 1f
9226 * decl dest movl cf, dest
9227 * andl (cf-ct),dest 1:
9228 * addl ct,dest
9229 *
9230 * Size 20. Size 14.
9231 *
9232 * This is reasonably steep, but branch mispredict costs are
9233 * high on modern cpus, so consider failing only if optimizing
9234 * for space.
9235 */
9236
9237 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9238 && BRANCH_COST >= 2)
9239 {
9240 if (cf == 0)
9241 {
9242 cf = ct;
9243 ct = 0;
9244 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9245 /* We may be reversing unordered compare to normal compare,
9246 that is not valid in general (we may convert non-trapping
9247 condition to trapping one), however on i386 we currently
9248 emit all comparisons unordered. */
9249 code = reverse_condition_maybe_unordered (code);
9250 else
9251 {
9252 code = reverse_condition (code);
9253 if (compare_code != UNKNOWN)
9254 compare_code = reverse_condition (compare_code);
9255 }
9256 }
9257
9258 if (compare_code != UNKNOWN)
9259 {
9260 /* notl op1 (if needed)
9261 sarl $31, op1
9262 andl (cf-ct), op1
9263 addl ct, op1
9264
9265 For x < 0 (resp. x <= -1) there will be no notl,
9266 so if possible swap the constants to get rid of the
9267 complement.
9268 True/false will be -1/0 while code below (store flag
9269 followed by decrement) is 0/-1, so the constants need
9270 to be exchanged once more. */
9271
9272 if (compare_code == GE || !cf)
9273 {
9274 code = reverse_condition (code);
9275 compare_code = LT;
9276 }
9277 else
9278 {
9279 HOST_WIDE_INT tmp = cf;
9280 cf = ct;
9281 ct = tmp;
9282 }
9283
9284 out = emit_store_flag (out, code, ix86_compare_op0,
9285 ix86_compare_op1, VOIDmode, 0, -1);
9286 }
9287 else
9288 {
9289 out = emit_store_flag (out, code, ix86_compare_op0,
9290 ix86_compare_op1, VOIDmode, 0, 1);
9291
9292 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9293 copy_rtx (out), 1, OPTAB_DIRECT);
9294 }
9295
9296 out = expand_simple_binop (mode, AND, copy_rtx (out),
9297 gen_int_mode (cf - ct, mode),
9298 copy_rtx (out), 1, OPTAB_DIRECT);
9299 if (ct)
9300 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9301 copy_rtx (out), 1, OPTAB_DIRECT);
9302 if (!rtx_equal_p (out, operands[0]))
9303 emit_move_insn (operands[0], copy_rtx (out));
9304
9305 return 1; /* DONE */
9306 }
9307 }
9308
9309 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9310 {
9311 /* Try a few things more with specific constants and a variable. */
9312
9313 optab op;
9314 rtx var, orig_out, out, tmp;
9315
9316 if (BRANCH_COST <= 2)
9317 return 0; /* FAIL */
9318
9319 /* If one of the two operands is an interesting constant, load a
9320 constant with the above and mask it in with a logical operation. */
9321
9322 if (GET_CODE (operands[2]) == CONST_INT)
9323 {
9324 var = operands[3];
9325 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9326 operands[3] = constm1_rtx, op = and_optab;
9327 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9328 operands[3] = const0_rtx, op = ior_optab;
9329 else
9330 return 0; /* FAIL */
9331 }
9332 else if (GET_CODE (operands[3]) == CONST_INT)
9333 {
9334 var = operands[2];
9335 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9336 operands[2] = constm1_rtx, op = and_optab;
9337 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9338 operands[2] = const0_rtx, op = ior_optab;
9339 else
9340 return 0; /* FAIL */
9341 }
9342 else
9343 return 0; /* FAIL */
9344
9345 orig_out = operands[0];
9346 tmp = gen_reg_rtx (mode);
9347 operands[0] = tmp;
9348
9349 /* Recurse to get the constant loaded. */
9350 if (ix86_expand_int_movcc (operands) == 0)
9351 return 0; /* FAIL */
9352
9353 /* Mask in the interesting variable. */
9354 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9355 OPTAB_WIDEN);
9356 if (!rtx_equal_p (out, orig_out))
9357 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9358
9359 return 1; /* DONE */
9360 }
9361
9362 /*
9363 * For comparison with above,
9364 *
9365 * movl cf,dest
9366 * movl ct,tmp
9367 * cmpl op1,op2
9368 * cmovcc tmp,dest
9369 *
9370 * Size 15.
9371 */
9372
9373 if (! nonimmediate_operand (operands[2], mode))
9374 operands[2] = force_reg (mode, operands[2]);
9375 if (! nonimmediate_operand (operands[3], mode))
9376 operands[3] = force_reg (mode, operands[3]);
9377
9378 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9379 {
9380 rtx tmp = gen_reg_rtx (mode);
9381 emit_move_insn (tmp, operands[3]);
9382 operands[3] = tmp;
9383 }
9384 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9385 {
9386 rtx tmp = gen_reg_rtx (mode);
9387 emit_move_insn (tmp, operands[2]);
9388 operands[2] = tmp;
9389 }
9390
9391 if (! register_operand (operands[2], VOIDmode)
9392 && (mode == QImode
9393 || ! register_operand (operands[3], VOIDmode)))
9394 operands[2] = force_reg (mode, operands[2]);
9395
9396 if (mode == QImode
9397 && ! register_operand (operands[3], VOIDmode))
9398 operands[3] = force_reg (mode, operands[3]);
9399
9400 emit_insn (compare_seq);
9401 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9402 gen_rtx_IF_THEN_ELSE (mode,
9403 compare_op, operands[2],
9404 operands[3])));
9405 if (bypass_test)
9406 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9407 gen_rtx_IF_THEN_ELSE (mode,
9408 bypass_test,
9409 copy_rtx (operands[3]),
9410 copy_rtx (operands[0]))));
9411 if (second_test)
9412 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9413 gen_rtx_IF_THEN_ELSE (mode,
9414 second_test,
9415 copy_rtx (operands[2]),
9416 copy_rtx (operands[0]))));
9417
9418 return 1; /* DONE */
9419 }
9420
9421 int
9422 ix86_expand_fp_movcc (rtx operands[])
9423 {
9424 enum rtx_code code;
9425 rtx tmp;
9426 rtx compare_op, second_test, bypass_test;
9427
9428 /* For SF/DFmode conditional moves based on comparisons
9429 in same mode, we may want to use SSE min/max instructions. */
9430 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9431 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9432 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9433 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9434 && (!TARGET_IEEE_FP
9435 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9436 /* We may be called from the post-reload splitter. */
9437 && (!REG_P (operands[0])
9438 || SSE_REG_P (operands[0])
9439 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9440 {
9441 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9442 code = GET_CODE (operands[1]);
9443
9444 /* See if we have (cross) match between comparison operands and
9445 conditional move operands. */
9446 if (rtx_equal_p (operands[2], op1))
9447 {
9448 rtx tmp = op0;
9449 op0 = op1;
9450 op1 = tmp;
9451 code = reverse_condition_maybe_unordered (code);
9452 }
9453 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9454 {
9455 /* Check for min operation. */
9456 if (code == LT || code == UNLE)
9457 {
9458 if (code == UNLE)
9459 {
9460 rtx tmp = op0;
9461 op0 = op1;
9462 op1 = tmp;
9463 }
9464 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9465 if (memory_operand (op0, VOIDmode))
9466 op0 = force_reg (GET_MODE (operands[0]), op0);
9467 if (GET_MODE (operands[0]) == SFmode)
9468 emit_insn (gen_minsf3 (operands[0], op0, op1));
9469 else
9470 emit_insn (gen_mindf3 (operands[0], op0, op1));
9471 return 1;
9472 }
9473 /* Check for max operation. */
9474 if (code == GT || code == UNGE)
9475 {
9476 if (code == UNGE)
9477 {
9478 rtx tmp = op0;
9479 op0 = op1;
9480 op1 = tmp;
9481 }
9482 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9483 if (memory_operand (op0, VOIDmode))
9484 op0 = force_reg (GET_MODE (operands[0]), op0);
9485 if (GET_MODE (operands[0]) == SFmode)
9486 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9487 else
9488 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9489 return 1;
9490 }
9491 }
9492 /* Manage condition to be sse_comparison_operator. In case we are
9493 in non-ieee mode, try to canonicalize the destination operand
9494 to be first in the comparison - this helps reload to avoid extra
9495 moves. */
9496 if (!sse_comparison_operator (operands[1], VOIDmode)
9497 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9498 {
9499 rtx tmp = ix86_compare_op0;
9500 ix86_compare_op0 = ix86_compare_op1;
9501 ix86_compare_op1 = tmp;
9502 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9503 VOIDmode, ix86_compare_op0,
9504 ix86_compare_op1);
9505 }
9506 /* Similarly try to manage result to be first operand of conditional
9507 move. We also don't support the NE comparison on SSE, so try to
9508 avoid it. */
9509 if ((rtx_equal_p (operands[0], operands[3])
9510 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9511 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9512 {
9513 rtx tmp = operands[2];
9514 operands[2] = operands[3];
9515 operands[3] = tmp;
9516 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9517 (GET_CODE (operands[1])),
9518 VOIDmode, ix86_compare_op0,
9519 ix86_compare_op1);
9520 }
9521 if (GET_MODE (operands[0]) == SFmode)
9522 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9523 operands[2], operands[3],
9524 ix86_compare_op0, ix86_compare_op1));
9525 else
9526 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9527 operands[2], operands[3],
9528 ix86_compare_op0, ix86_compare_op1));
9529 return 1;
9530 }
9531
9532 /* The floating point conditional move instructions don't directly
9533 support conditions resulting from a signed integer comparison. */
9534
9535 code = GET_CODE (operands[1]);
9536 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9537
9538 /* The floating point conditional move instructions don't directly
9539 support signed integer comparisons. */
9540
9541 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9542 {
9543 if (second_test != NULL || bypass_test != NULL)
9544 abort ();
9545 tmp = gen_reg_rtx (QImode);
9546 ix86_expand_setcc (code, tmp);
9547 code = NE;
9548 ix86_compare_op0 = tmp;
9549 ix86_compare_op1 = const0_rtx;
9550 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9551 }
9552 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9553 {
9554 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9555 emit_move_insn (tmp, operands[3]);
9556 operands[3] = tmp;
9557 }
9558 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9559 {
9560 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9561 emit_move_insn (tmp, operands[2]);
9562 operands[2] = tmp;
9563 }
9564
9565 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9566 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9567 compare_op,
9568 operands[2],
9569 operands[3])));
9570 if (bypass_test)
9571 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9572 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9573 bypass_test,
9574 operands[3],
9575 operands[0])));
9576 if (second_test)
9577 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9578 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9579 second_test,
9580 operands[2],
9581 operands[0])));
9582
9583 return 1;
9584 }
9585
9586 /* Expand conditional increment or decrement using adb/sbb instructions.
9587 The default case using setcc followed by the conditional move can be
9588 done by generic code. */
9589 int
9590 ix86_expand_int_addcc (rtx operands[])
9591 {
9592 enum rtx_code code = GET_CODE (operands[1]);
9593 rtx compare_op;
9594 rtx val = const0_rtx;
9595 bool fpcmp = false;
9596 enum machine_mode mode = GET_MODE (operands[0]);
9597
9598 if (operands[3] != const1_rtx
9599 && operands[3] != constm1_rtx)
9600 return 0;
9601 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9602 ix86_compare_op1, &compare_op))
9603 return 0;
9604 code = GET_CODE (compare_op);
9605
9606 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9607 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9608 {
9609 fpcmp = true;
9610 code = ix86_fp_compare_code_to_integer (code);
9611 }
9612
9613 if (code != LTU)
9614 {
9615 val = constm1_rtx;
9616 if (fpcmp)
9617 PUT_CODE (compare_op,
9618 reverse_condition_maybe_unordered
9619 (GET_CODE (compare_op)));
9620 else
9621 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9622 }
9623 PUT_MODE (compare_op, mode);
9624
9625 /* Construct either adc or sbb insn. */
9626 if ((code == LTU) == (operands[3] == constm1_rtx))
9627 {
9628 switch (GET_MODE (operands[0]))
9629 {
9630 case QImode:
9631 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9632 break;
9633 case HImode:
9634 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9635 break;
9636 case SImode:
9637 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9638 break;
9639 case DImode:
9640 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9641 break;
9642 default:
9643 abort ();
9644 }
9645 }
9646 else
9647 {
9648 switch (GET_MODE (operands[0]))
9649 {
9650 case QImode:
9651 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9652 break;
9653 case HImode:
9654 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9655 break;
9656 case SImode:
9657 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9658 break;
9659 case DImode:
9660 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9661 break;
9662 default:
9663 abort ();
9664 }
9665 }
9666 return 1; /* DONE */
9667 }
9668
9669
9670 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9671 works for floating pointer parameters and nonoffsetable memories.
9672 For pushes, it returns just stack offsets; the values will be saved
9673 in the right order. Maximally three parts are generated. */
9674
9675 static int
9676 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9677 {
9678 int size;
9679
9680 if (!TARGET_64BIT)
9681 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9682 else
9683 size = (GET_MODE_SIZE (mode) + 4) / 8;
9684
9685 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9686 abort ();
9687 if (size < 2 || size > 3)
9688 abort ();
9689
9690 /* Optimize constant pool reference to immediates. This is used by fp
9691 moves, that force all constants to memory to allow combining. */
9692 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9693 {
9694 rtx tmp = maybe_get_pool_constant (operand);
9695 if (tmp)
9696 operand = tmp;
9697 }
9698
9699 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9700 {
9701 /* The only non-offsetable memories we handle are pushes. */
9702 if (! push_operand (operand, VOIDmode))
9703 abort ();
9704
9705 operand = copy_rtx (operand);
9706 PUT_MODE (operand, Pmode);
9707 parts[0] = parts[1] = parts[2] = operand;
9708 }
9709 else if (!TARGET_64BIT)
9710 {
9711 if (mode == DImode)
9712 split_di (&operand, 1, &parts[0], &parts[1]);
9713 else
9714 {
9715 if (REG_P (operand))
9716 {
9717 if (!reload_completed)
9718 abort ();
9719 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9720 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9721 if (size == 3)
9722 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9723 }
9724 else if (offsettable_memref_p (operand))
9725 {
9726 operand = adjust_address (operand, SImode, 0);
9727 parts[0] = operand;
9728 parts[1] = adjust_address (operand, SImode, 4);
9729 if (size == 3)
9730 parts[2] = adjust_address (operand, SImode, 8);
9731 }
9732 else if (GET_CODE (operand) == CONST_DOUBLE)
9733 {
9734 REAL_VALUE_TYPE r;
9735 long l[4];
9736
9737 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9738 switch (mode)
9739 {
9740 case XFmode:
9741 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9742 parts[2] = gen_int_mode (l[2], SImode);
9743 break;
9744 case DFmode:
9745 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9746 break;
9747 default:
9748 abort ();
9749 }
9750 parts[1] = gen_int_mode (l[1], SImode);
9751 parts[0] = gen_int_mode (l[0], SImode);
9752 }
9753 else
9754 abort ();
9755 }
9756 }
9757 else
9758 {
9759 if (mode == TImode)
9760 split_ti (&operand, 1, &parts[0], &parts[1]);
9761 if (mode == XFmode || mode == TFmode)
9762 {
9763 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9764 if (REG_P (operand))
9765 {
9766 if (!reload_completed)
9767 abort ();
9768 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9769 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9770 }
9771 else if (offsettable_memref_p (operand))
9772 {
9773 operand = adjust_address (operand, DImode, 0);
9774 parts[0] = operand;
9775 parts[1] = adjust_address (operand, upper_mode, 8);
9776 }
9777 else if (GET_CODE (operand) == CONST_DOUBLE)
9778 {
9779 REAL_VALUE_TYPE r;
9780 long l[3];
9781
9782 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9783 real_to_target (l, &r, mode);
9784 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9785 if (HOST_BITS_PER_WIDE_INT >= 64)
9786 parts[0]
9787 = gen_int_mode
9788 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9789 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9790 DImode);
9791 else
9792 parts[0] = immed_double_const (l[0], l[1], DImode);
9793 if (upper_mode == SImode)
9794 parts[1] = gen_int_mode (l[2], SImode);
9795 else if (HOST_BITS_PER_WIDE_INT >= 64)
9796 parts[1]
9797 = gen_int_mode
9798 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9799 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9800 DImode);
9801 else
9802 parts[1] = immed_double_const (l[2], l[3], DImode);
9803 }
9804 else
9805 abort ();
9806 }
9807 }
9808
9809 return size;
9810 }
9811
9812 /* Emit insns to perform a move or push of DI, DF, and XF values.
9813 Return false when normal moves are needed; true when all required
9814 insns have been emitted. Operands 2-4 contain the input values
9815 int the correct order; operands 5-7 contain the output values. */
9816
9817 void
9818 ix86_split_long_move (rtx operands[])
9819 {
9820 rtx part[2][3];
9821 int nparts;
9822 int push = 0;
9823 int collisions = 0;
9824 enum machine_mode mode = GET_MODE (operands[0]);
9825
9826 /* The DFmode expanders may ask us to move double.
9827 For 64bit target this is single move. By hiding the fact
9828 here we simplify i386.md splitters. */
9829 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9830 {
9831 /* Optimize constant pool reference to immediates. This is used by
9832 fp moves, that force all constants to memory to allow combining. */
9833
9834 if (GET_CODE (operands[1]) == MEM
9835 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9836 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9837 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9838 if (push_operand (operands[0], VOIDmode))
9839 {
9840 operands[0] = copy_rtx (operands[0]);
9841 PUT_MODE (operands[0], Pmode);
9842 }
9843 else
9844 operands[0] = gen_lowpart (DImode, operands[0]);
9845 operands[1] = gen_lowpart (DImode, operands[1]);
9846 emit_move_insn (operands[0], operands[1]);
9847 return;
9848 }
9849
9850 /* The only non-offsettable memory we handle is push. */
9851 if (push_operand (operands[0], VOIDmode))
9852 push = 1;
9853 else if (GET_CODE (operands[0]) == MEM
9854 && ! offsettable_memref_p (operands[0]))
9855 abort ();
9856
9857 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9858 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9859
9860 /* When emitting push, take care for source operands on the stack. */
9861 if (push && GET_CODE (operands[1]) == MEM
9862 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9863 {
9864 if (nparts == 3)
9865 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9866 XEXP (part[1][2], 0));
9867 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9868 XEXP (part[1][1], 0));
9869 }
9870
9871 /* We need to do copy in the right order in case an address register
9872 of the source overlaps the destination. */
9873 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9874 {
9875 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9876 collisions++;
9877 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9878 collisions++;
9879 if (nparts == 3
9880 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9881 collisions++;
9882
9883 /* Collision in the middle part can be handled by reordering. */
9884 if (collisions == 1 && nparts == 3
9885 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9886 {
9887 rtx tmp;
9888 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9889 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9890 }
9891
9892 /* If there are more collisions, we can't handle it by reordering.
9893 Do an lea to the last part and use only one colliding move. */
9894 else if (collisions > 1)
9895 {
9896 rtx base;
9897
9898 collisions = 1;
9899
9900 base = part[0][nparts - 1];
9901
9902 /* Handle the case when the last part isn't valid for lea.
9903 Happens in 64-bit mode storing the 12-byte XFmode. */
9904 if (GET_MODE (base) != Pmode)
9905 base = gen_rtx_REG (Pmode, REGNO (base));
9906
9907 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9908 part[1][0] = replace_equiv_address (part[1][0], base);
9909 part[1][1] = replace_equiv_address (part[1][1],
9910 plus_constant (base, UNITS_PER_WORD));
9911 if (nparts == 3)
9912 part[1][2] = replace_equiv_address (part[1][2],
9913 plus_constant (base, 8));
9914 }
9915 }
9916
9917 if (push)
9918 {
9919 if (!TARGET_64BIT)
9920 {
9921 if (nparts == 3)
9922 {
9923 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
9924 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
9925 emit_move_insn (part[0][2], part[1][2]);
9926 }
9927 }
9928 else
9929 {
9930 /* In 64bit mode we don't have 32bit push available. In case this is
9931 register, it is OK - we will just use larger counterpart. We also
9932 retype memory - these comes from attempt to avoid REX prefix on
9933 moving of second half of TFmode value. */
9934 if (GET_MODE (part[1][1]) == SImode)
9935 {
9936 if (GET_CODE (part[1][1]) == MEM)
9937 part[1][1] = adjust_address (part[1][1], DImode, 0);
9938 else if (REG_P (part[1][1]))
9939 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9940 else
9941 abort ();
9942 if (GET_MODE (part[1][0]) == SImode)
9943 part[1][0] = part[1][1];
9944 }
9945 }
9946 emit_move_insn (part[0][1], part[1][1]);
9947 emit_move_insn (part[0][0], part[1][0]);
9948 return;
9949 }
9950
9951 /* Choose correct order to not overwrite the source before it is copied. */
9952 if ((REG_P (part[0][0])
9953 && REG_P (part[1][1])
9954 && (REGNO (part[0][0]) == REGNO (part[1][1])
9955 || (nparts == 3
9956 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9957 || (collisions > 0
9958 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9959 {
9960 if (nparts == 3)
9961 {
9962 operands[2] = part[0][2];
9963 operands[3] = part[0][1];
9964 operands[4] = part[0][0];
9965 operands[5] = part[1][2];
9966 operands[6] = part[1][1];
9967 operands[7] = part[1][0];
9968 }
9969 else
9970 {
9971 operands[2] = part[0][1];
9972 operands[3] = part[0][0];
9973 operands[5] = part[1][1];
9974 operands[6] = part[1][0];
9975 }
9976 }
9977 else
9978 {
9979 if (nparts == 3)
9980 {
9981 operands[2] = part[0][0];
9982 operands[3] = part[0][1];
9983 operands[4] = part[0][2];
9984 operands[5] = part[1][0];
9985 operands[6] = part[1][1];
9986 operands[7] = part[1][2];
9987 }
9988 else
9989 {
9990 operands[2] = part[0][0];
9991 operands[3] = part[0][1];
9992 operands[5] = part[1][0];
9993 operands[6] = part[1][1];
9994 }
9995 }
9996
9997 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
9998 if (optimize_size)
9999 {
10000 if (GET_CODE (operands[5]) == CONST_INT
10001 && operands[5] != const0_rtx
10002 && REG_P (operands[2]))
10003 {
10004 if (GET_CODE (operands[6]) == CONST_INT
10005 && INTVAL (operands[6]) == INTVAL (operands[5]))
10006 operands[6] = operands[2];
10007
10008 if (nparts == 3
10009 && GET_CODE (operands[7]) == CONST_INT
10010 && INTVAL (operands[7]) == INTVAL (operands[5]))
10011 operands[7] = operands[2];
10012 }
10013
10014 if (nparts == 3
10015 && GET_CODE (operands[6]) == CONST_INT
10016 && operands[6] != const0_rtx
10017 && REG_P (operands[3])
10018 && GET_CODE (operands[7]) == CONST_INT
10019 && INTVAL (operands[7]) == INTVAL (operands[6]))
10020 operands[7] = operands[3];
10021 }
10022
10023 emit_move_insn (operands[2], operands[5]);
10024 emit_move_insn (operands[3], operands[6]);
10025 if (nparts == 3)
10026 emit_move_insn (operands[4], operands[7]);
10027
10028 return;
10029 }
10030
10031 /* Helper function of ix86_split_ashldi used to generate an SImode
10032 left shift by a constant, either using a single shift or
10033 a sequence of add instructions. */
10034
10035 static void
10036 ix86_expand_ashlsi3_const (rtx operand, int count)
10037 {
10038 if (count == 1)
10039 emit_insn (gen_addsi3 (operand, operand, operand));
10040 else if (!optimize_size
10041 && count * ix86_cost->add <= ix86_cost->shift_const)
10042 {
10043 int i;
10044 for (i=0; i<count; i++)
10045 emit_insn (gen_addsi3 (operand, operand, operand));
10046 }
10047 else
10048 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10049 }
10050
10051 void
10052 ix86_split_ashldi (rtx *operands, rtx scratch)
10053 {
10054 rtx low[2], high[2];
10055 int count;
10056
10057 if (GET_CODE (operands[2]) == CONST_INT)
10058 {
10059 split_di (operands, 2, low, high);
10060 count = INTVAL (operands[2]) & 63;
10061
10062 if (count >= 32)
10063 {
10064 emit_move_insn (high[0], low[1]);
10065 emit_move_insn (low[0], const0_rtx);
10066
10067 if (count > 32)
10068 ix86_expand_ashlsi3_const (high[0], count - 32);
10069 }
10070 else
10071 {
10072 if (!rtx_equal_p (operands[0], operands[1]))
10073 emit_move_insn (operands[0], operands[1]);
10074 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10075 ix86_expand_ashlsi3_const (low[0], count);
10076 }
10077 return;
10078 }
10079
10080 split_di (operands, 1, low, high);
10081
10082 if (operands[1] == const1_rtx)
10083 {
10084 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10085 can be done with two 32-bit shifts, no branches, no cmoves. */
10086 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10087 {
10088 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10089
10090 ix86_expand_clear (low[0]);
10091 ix86_expand_clear (high[0]);
10092 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10093
10094 d = gen_lowpart (QImode, low[0]);
10095 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10096 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10097 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10098
10099 d = gen_lowpart (QImode, high[0]);
10100 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10101 s = gen_rtx_NE (QImode, flags, const0_rtx);
10102 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10103 }
10104
10105 /* Otherwise, we can get the same results by manually performing
10106 a bit extract operation on bit 5, and then performing the two
10107 shifts. The two methods of getting 0/1 into low/high are exactly
10108 the same size. Avoiding the shift in the bit extract case helps
10109 pentium4 a bit; no one else seems to care much either way. */
10110 else
10111 {
10112 rtx x;
10113
10114 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10115 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10116 else
10117 x = gen_lowpart (SImode, operands[2]);
10118 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10119
10120 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10121 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10122 emit_move_insn (low[0], high[0]);
10123 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10124 }
10125
10126 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10127 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10128 return;
10129 }
10130
10131 if (operands[1] == constm1_rtx)
10132 {
10133 /* For -1LL << N, we can avoid the shld instruction, because we
10134 know that we're shifting 0...31 ones into a -1. */
10135 emit_move_insn (low[0], constm1_rtx);
10136 if (optimize_size)
10137 emit_move_insn (high[0], low[0]);
10138 else
10139 emit_move_insn (high[0], constm1_rtx);
10140 }
10141 else
10142 {
10143 if (!rtx_equal_p (operands[0], operands[1]))
10144 emit_move_insn (operands[0], operands[1]);
10145
10146 split_di (operands, 1, low, high);
10147 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10148 }
10149
10150 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10151
10152 if (TARGET_CMOVE && scratch)
10153 {
10154 ix86_expand_clear (scratch);
10155 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10156 }
10157 else
10158 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10159 }
10160
10161 void
10162 ix86_split_ashrdi (rtx *operands, rtx scratch)
10163 {
10164 rtx low[2], high[2];
10165 int count;
10166
10167 if (GET_CODE (operands[2]) == CONST_INT)
10168 {
10169 split_di (operands, 2, low, high);
10170 count = INTVAL (operands[2]) & 63;
10171
10172 if (count == 63)
10173 {
10174 emit_move_insn (high[0], high[1]);
10175 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10176 emit_move_insn (low[0], high[0]);
10177
10178 }
10179 else if (count >= 32)
10180 {
10181 emit_move_insn (low[0], high[1]);
10182 emit_move_insn (high[0], low[0]);
10183 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10184 if (count > 32)
10185 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10186 }
10187 else
10188 {
10189 if (!rtx_equal_p (operands[0], operands[1]))
10190 emit_move_insn (operands[0], operands[1]);
10191 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10192 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10193 }
10194 }
10195 else
10196 {
10197 if (!rtx_equal_p (operands[0], operands[1]))
10198 emit_move_insn (operands[0], operands[1]);
10199
10200 split_di (operands, 1, low, high);
10201
10202 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10203 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10204
10205 if (TARGET_CMOVE && scratch)
10206 {
10207 emit_move_insn (scratch, high[0]);
10208 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10209 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10210 scratch));
10211 }
10212 else
10213 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10214 }
10215 }
10216
10217 void
10218 ix86_split_lshrdi (rtx *operands, rtx scratch)
10219 {
10220 rtx low[2], high[2];
10221 int count;
10222
10223 if (GET_CODE (operands[2]) == CONST_INT)
10224 {
10225 split_di (operands, 2, low, high);
10226 count = INTVAL (operands[2]) & 63;
10227
10228 if (count >= 32)
10229 {
10230 emit_move_insn (low[0], high[1]);
10231 ix86_expand_clear (high[0]);
10232
10233 if (count > 32)
10234 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10235 }
10236 else
10237 {
10238 if (!rtx_equal_p (operands[0], operands[1]))
10239 emit_move_insn (operands[0], operands[1]);
10240 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10241 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10242 }
10243 }
10244 else
10245 {
10246 if (!rtx_equal_p (operands[0], operands[1]))
10247 emit_move_insn (operands[0], operands[1]);
10248
10249 split_di (operands, 1, low, high);
10250
10251 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10252 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10253
10254 /* Heh. By reversing the arguments, we can reuse this pattern. */
10255 if (TARGET_CMOVE && scratch)
10256 {
10257 ix86_expand_clear (scratch);
10258 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10259 scratch));
10260 }
10261 else
10262 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10263 }
10264 }
10265
10266 /* Helper function for the string operations below. Dest VARIABLE whether
10267 it is aligned to VALUE bytes. If true, jump to the label. */
10268 static rtx
10269 ix86_expand_aligntest (rtx variable, int value)
10270 {
10271 rtx label = gen_label_rtx ();
10272 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10273 if (GET_MODE (variable) == DImode)
10274 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10275 else
10276 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10277 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10278 1, label);
10279 return label;
10280 }
10281
10282 /* Adjust COUNTER by the VALUE. */
10283 static void
10284 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10285 {
10286 if (GET_MODE (countreg) == DImode)
10287 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10288 else
10289 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10290 }
10291
10292 /* Zero extend possibly SImode EXP to Pmode register. */
10293 rtx
10294 ix86_zero_extend_to_Pmode (rtx exp)
10295 {
10296 rtx r;
10297 if (GET_MODE (exp) == VOIDmode)
10298 return force_reg (Pmode, exp);
10299 if (GET_MODE (exp) == Pmode)
10300 return copy_to_mode_reg (Pmode, exp);
10301 r = gen_reg_rtx (Pmode);
10302 emit_insn (gen_zero_extendsidi2 (r, exp));
10303 return r;
10304 }
10305
10306 /* Expand string move (memcpy) operation. Use i386 string operations when
10307 profitable. expand_clrmem contains similar code. */
10308 int
10309 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10310 {
10311 rtx srcreg, destreg, countreg, srcexp, destexp;
10312 enum machine_mode counter_mode;
10313 HOST_WIDE_INT align = 0;
10314 unsigned HOST_WIDE_INT count = 0;
10315
10316 if (GET_CODE (align_exp) == CONST_INT)
10317 align = INTVAL (align_exp);
10318
10319 /* Can't use any of this if the user has appropriated esi or edi. */
10320 if (global_regs[4] || global_regs[5])
10321 return 0;
10322
10323 /* This simple hack avoids all inlining code and simplifies code below. */
10324 if (!TARGET_ALIGN_STRINGOPS)
10325 align = 64;
10326
10327 if (GET_CODE (count_exp) == CONST_INT)
10328 {
10329 count = INTVAL (count_exp);
10330 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10331 return 0;
10332 }
10333
10334 /* Figure out proper mode for counter. For 32bits it is always SImode,
10335 for 64bits use SImode when possible, otherwise DImode.
10336 Set count to number of bytes copied when known at compile time. */
10337 if (!TARGET_64BIT
10338 || GET_MODE (count_exp) == SImode
10339 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10340 counter_mode = SImode;
10341 else
10342 counter_mode = DImode;
10343
10344 if (counter_mode != SImode && counter_mode != DImode)
10345 abort ();
10346
10347 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10348 if (destreg != XEXP (dst, 0))
10349 dst = replace_equiv_address_nv (dst, destreg);
10350 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10351 if (srcreg != XEXP (src, 0))
10352 src = replace_equiv_address_nv (src, srcreg);
10353
10354 /* When optimizing for size emit simple rep ; movsb instruction for
10355 counts not divisible by 4. */
10356
10357 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10358 {
10359 emit_insn (gen_cld ());
10360 countreg = ix86_zero_extend_to_Pmode (count_exp);
10361 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10362 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10363 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10364 destexp, srcexp));
10365 }
10366
10367 /* For constant aligned (or small unaligned) copies use rep movsl
10368 followed by code copying the rest. For PentiumPro ensure 8 byte
10369 alignment to allow rep movsl acceleration. */
10370
10371 else if (count != 0
10372 && (align >= 8
10373 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10374 || optimize_size || count < (unsigned int) 64))
10375 {
10376 unsigned HOST_WIDE_INT offset = 0;
10377 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10378 rtx srcmem, dstmem;
10379
10380 emit_insn (gen_cld ());
10381 if (count & ~(size - 1))
10382 {
10383 countreg = copy_to_mode_reg (counter_mode,
10384 GEN_INT ((count >> (size == 4 ? 2 : 3))
10385 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10386 countreg = ix86_zero_extend_to_Pmode (countreg);
10387
10388 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10389 GEN_INT (size == 4 ? 2 : 3));
10390 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10391 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10392
10393 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10394 countreg, destexp, srcexp));
10395 offset = count & ~(size - 1);
10396 }
10397 if (size == 8 && (count & 0x04))
10398 {
10399 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10400 offset);
10401 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10402 offset);
10403 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10404 offset += 4;
10405 }
10406 if (count & 0x02)
10407 {
10408 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10409 offset);
10410 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10411 offset);
10412 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10413 offset += 2;
10414 }
10415 if (count & 0x01)
10416 {
10417 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10418 offset);
10419 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10420 offset);
10421 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10422 }
10423 }
10424 /* The generic code based on the glibc implementation:
10425 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10426 allowing accelerated copying there)
10427 - copy the data using rep movsl
10428 - copy the rest. */
10429 else
10430 {
10431 rtx countreg2;
10432 rtx label = NULL;
10433 rtx srcmem, dstmem;
10434 int desired_alignment = (TARGET_PENTIUMPRO
10435 && (count == 0 || count >= (unsigned int) 260)
10436 ? 8 : UNITS_PER_WORD);
10437 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10438 dst = change_address (dst, BLKmode, destreg);
10439 src = change_address (src, BLKmode, srcreg);
10440
10441 /* In case we don't know anything about the alignment, default to
10442 library version, since it is usually equally fast and result in
10443 shorter code.
10444
10445 Also emit call when we know that the count is large and call overhead
10446 will not be important. */
10447 if (!TARGET_INLINE_ALL_STRINGOPS
10448 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10449 return 0;
10450
10451 if (TARGET_SINGLE_STRINGOP)
10452 emit_insn (gen_cld ());
10453
10454 countreg2 = gen_reg_rtx (Pmode);
10455 countreg = copy_to_mode_reg (counter_mode, count_exp);
10456
10457 /* We don't use loops to align destination and to copy parts smaller
10458 than 4 bytes, because gcc is able to optimize such code better (in
10459 the case the destination or the count really is aligned, gcc is often
10460 able to predict the branches) and also it is friendlier to the
10461 hardware branch prediction.
10462
10463 Using loops is beneficial for generic case, because we can
10464 handle small counts using the loops. Many CPUs (such as Athlon)
10465 have large REP prefix setup costs.
10466
10467 This is quite costly. Maybe we can revisit this decision later or
10468 add some customizability to this code. */
10469
10470 if (count == 0 && align < desired_alignment)
10471 {
10472 label = gen_label_rtx ();
10473 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10474 LEU, 0, counter_mode, 1, label);
10475 }
10476 if (align <= 1)
10477 {
10478 rtx label = ix86_expand_aligntest (destreg, 1);
10479 srcmem = change_address (src, QImode, srcreg);
10480 dstmem = change_address (dst, QImode, destreg);
10481 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10482 ix86_adjust_counter (countreg, 1);
10483 emit_label (label);
10484 LABEL_NUSES (label) = 1;
10485 }
10486 if (align <= 2)
10487 {
10488 rtx label = ix86_expand_aligntest (destreg, 2);
10489 srcmem = change_address (src, HImode, srcreg);
10490 dstmem = change_address (dst, HImode, destreg);
10491 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10492 ix86_adjust_counter (countreg, 2);
10493 emit_label (label);
10494 LABEL_NUSES (label) = 1;
10495 }
10496 if (align <= 4 && desired_alignment > 4)
10497 {
10498 rtx label = ix86_expand_aligntest (destreg, 4);
10499 srcmem = change_address (src, SImode, srcreg);
10500 dstmem = change_address (dst, SImode, destreg);
10501 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10502 ix86_adjust_counter (countreg, 4);
10503 emit_label (label);
10504 LABEL_NUSES (label) = 1;
10505 }
10506
10507 if (label && desired_alignment > 4 && !TARGET_64BIT)
10508 {
10509 emit_label (label);
10510 LABEL_NUSES (label) = 1;
10511 label = NULL_RTX;
10512 }
10513 if (!TARGET_SINGLE_STRINGOP)
10514 emit_insn (gen_cld ());
10515 if (TARGET_64BIT)
10516 {
10517 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10518 GEN_INT (3)));
10519 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10520 }
10521 else
10522 {
10523 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10524 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10525 }
10526 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10527 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10528 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10529 countreg2, destexp, srcexp));
10530
10531 if (label)
10532 {
10533 emit_label (label);
10534 LABEL_NUSES (label) = 1;
10535 }
10536 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10537 {
10538 srcmem = change_address (src, SImode, srcreg);
10539 dstmem = change_address (dst, SImode, destreg);
10540 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10541 }
10542 if ((align <= 4 || count == 0) && TARGET_64BIT)
10543 {
10544 rtx label = ix86_expand_aligntest (countreg, 4);
10545 srcmem = change_address (src, SImode, srcreg);
10546 dstmem = change_address (dst, SImode, destreg);
10547 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10548 emit_label (label);
10549 LABEL_NUSES (label) = 1;
10550 }
10551 if (align > 2 && count != 0 && (count & 2))
10552 {
10553 srcmem = change_address (src, HImode, srcreg);
10554 dstmem = change_address (dst, HImode, destreg);
10555 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10556 }
10557 if (align <= 2 || count == 0)
10558 {
10559 rtx label = ix86_expand_aligntest (countreg, 2);
10560 srcmem = change_address (src, HImode, srcreg);
10561 dstmem = change_address (dst, HImode, destreg);
10562 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10563 emit_label (label);
10564 LABEL_NUSES (label) = 1;
10565 }
10566 if (align > 1 && count != 0 && (count & 1))
10567 {
10568 srcmem = change_address (src, QImode, srcreg);
10569 dstmem = change_address (dst, QImode, destreg);
10570 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10571 }
10572 if (align <= 1 || count == 0)
10573 {
10574 rtx label = ix86_expand_aligntest (countreg, 1);
10575 srcmem = change_address (src, QImode, srcreg);
10576 dstmem = change_address (dst, QImode, destreg);
10577 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10578 emit_label (label);
10579 LABEL_NUSES (label) = 1;
10580 }
10581 }
10582
10583 return 1;
10584 }
10585
10586 /* Expand string clear operation (bzero). Use i386 string operations when
10587 profitable. expand_movmem contains similar code. */
10588 int
10589 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10590 {
10591 rtx destreg, zeroreg, countreg, destexp;
10592 enum machine_mode counter_mode;
10593 HOST_WIDE_INT align = 0;
10594 unsigned HOST_WIDE_INT count = 0;
10595
10596 if (GET_CODE (align_exp) == CONST_INT)
10597 align = INTVAL (align_exp);
10598
10599 /* Can't use any of this if the user has appropriated esi. */
10600 if (global_regs[4])
10601 return 0;
10602
10603 /* This simple hack avoids all inlining code and simplifies code below. */
10604 if (!TARGET_ALIGN_STRINGOPS)
10605 align = 32;
10606
10607 if (GET_CODE (count_exp) == CONST_INT)
10608 {
10609 count = INTVAL (count_exp);
10610 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10611 return 0;
10612 }
10613 /* Figure out proper mode for counter. For 32bits it is always SImode,
10614 for 64bits use SImode when possible, otherwise DImode.
10615 Set count to number of bytes copied when known at compile time. */
10616 if (!TARGET_64BIT
10617 || GET_MODE (count_exp) == SImode
10618 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10619 counter_mode = SImode;
10620 else
10621 counter_mode = DImode;
10622
10623 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10624 if (destreg != XEXP (dst, 0))
10625 dst = replace_equiv_address_nv (dst, destreg);
10626
10627
10628 /* When optimizing for size emit simple rep ; movsb instruction for
10629 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10630 sequence is 7 bytes long, so if optimizing for size and count is
10631 small enough that some stosl, stosw and stosb instructions without
10632 rep are shorter, fall back into the next if. */
10633
10634 if ((!optimize || optimize_size)
10635 && (count == 0
10636 || ((count & 0x03)
10637 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10638 {
10639 emit_insn (gen_cld ());
10640
10641 countreg = ix86_zero_extend_to_Pmode (count_exp);
10642 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10643 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10644 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10645 }
10646 else if (count != 0
10647 && (align >= 8
10648 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10649 || optimize_size || count < (unsigned int) 64))
10650 {
10651 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10652 unsigned HOST_WIDE_INT offset = 0;
10653
10654 emit_insn (gen_cld ());
10655
10656 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10657 if (count & ~(size - 1))
10658 {
10659 unsigned HOST_WIDE_INT repcount;
10660 unsigned int max_nonrep;
10661
10662 repcount = count >> (size == 4 ? 2 : 3);
10663 if (!TARGET_64BIT)
10664 repcount &= 0x3fffffff;
10665
10666 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10667 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10668 bytes. In both cases the latter seems to be faster for small
10669 values of N. */
10670 max_nonrep = size == 4 ? 7 : 4;
10671 if (!optimize_size)
10672 switch (ix86_tune)
10673 {
10674 case PROCESSOR_PENTIUM4:
10675 case PROCESSOR_NOCONA:
10676 max_nonrep = 3;
10677 break;
10678 default:
10679 break;
10680 }
10681
10682 if (repcount <= max_nonrep)
10683 while (repcount-- > 0)
10684 {
10685 rtx mem = adjust_automodify_address_nv (dst,
10686 GET_MODE (zeroreg),
10687 destreg, offset);
10688 emit_insn (gen_strset (destreg, mem, zeroreg));
10689 offset += size;
10690 }
10691 else
10692 {
10693 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10694 countreg = ix86_zero_extend_to_Pmode (countreg);
10695 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10696 GEN_INT (size == 4 ? 2 : 3));
10697 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10698 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10699 destexp));
10700 offset = count & ~(size - 1);
10701 }
10702 }
10703 if (size == 8 && (count & 0x04))
10704 {
10705 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10706 offset);
10707 emit_insn (gen_strset (destreg, mem,
10708 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10709 offset += 4;
10710 }
10711 if (count & 0x02)
10712 {
10713 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10714 offset);
10715 emit_insn (gen_strset (destreg, mem,
10716 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10717 offset += 2;
10718 }
10719 if (count & 0x01)
10720 {
10721 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10722 offset);
10723 emit_insn (gen_strset (destreg, mem,
10724 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10725 }
10726 }
10727 else
10728 {
10729 rtx countreg2;
10730 rtx label = NULL;
10731 /* Compute desired alignment of the string operation. */
10732 int desired_alignment = (TARGET_PENTIUMPRO
10733 && (count == 0 || count >= (unsigned int) 260)
10734 ? 8 : UNITS_PER_WORD);
10735
10736 /* In case we don't know anything about the alignment, default to
10737 library version, since it is usually equally fast and result in
10738 shorter code.
10739
10740 Also emit call when we know that the count is large and call overhead
10741 will not be important. */
10742 if (!TARGET_INLINE_ALL_STRINGOPS
10743 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10744 return 0;
10745
10746 if (TARGET_SINGLE_STRINGOP)
10747 emit_insn (gen_cld ());
10748
10749 countreg2 = gen_reg_rtx (Pmode);
10750 countreg = copy_to_mode_reg (counter_mode, count_exp);
10751 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10752 /* Get rid of MEM_OFFSET, it won't be accurate. */
10753 dst = change_address (dst, BLKmode, destreg);
10754
10755 if (count == 0 && align < desired_alignment)
10756 {
10757 label = gen_label_rtx ();
10758 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10759 LEU, 0, counter_mode, 1, label);
10760 }
10761 if (align <= 1)
10762 {
10763 rtx label = ix86_expand_aligntest (destreg, 1);
10764 emit_insn (gen_strset (destreg, dst,
10765 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10766 ix86_adjust_counter (countreg, 1);
10767 emit_label (label);
10768 LABEL_NUSES (label) = 1;
10769 }
10770 if (align <= 2)
10771 {
10772 rtx label = ix86_expand_aligntest (destreg, 2);
10773 emit_insn (gen_strset (destreg, dst,
10774 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10775 ix86_adjust_counter (countreg, 2);
10776 emit_label (label);
10777 LABEL_NUSES (label) = 1;
10778 }
10779 if (align <= 4 && desired_alignment > 4)
10780 {
10781 rtx label = ix86_expand_aligntest (destreg, 4);
10782 emit_insn (gen_strset (destreg, dst,
10783 (TARGET_64BIT
10784 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10785 : zeroreg)));
10786 ix86_adjust_counter (countreg, 4);
10787 emit_label (label);
10788 LABEL_NUSES (label) = 1;
10789 }
10790
10791 if (label && desired_alignment > 4 && !TARGET_64BIT)
10792 {
10793 emit_label (label);
10794 LABEL_NUSES (label) = 1;
10795 label = NULL_RTX;
10796 }
10797
10798 if (!TARGET_SINGLE_STRINGOP)
10799 emit_insn (gen_cld ());
10800 if (TARGET_64BIT)
10801 {
10802 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10803 GEN_INT (3)));
10804 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10805 }
10806 else
10807 {
10808 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10809 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10810 }
10811 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10812 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10813
10814 if (label)
10815 {
10816 emit_label (label);
10817 LABEL_NUSES (label) = 1;
10818 }
10819
10820 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10821 emit_insn (gen_strset (destreg, dst,
10822 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10823 if (TARGET_64BIT && (align <= 4 || count == 0))
10824 {
10825 rtx label = ix86_expand_aligntest (countreg, 4);
10826 emit_insn (gen_strset (destreg, dst,
10827 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10828 emit_label (label);
10829 LABEL_NUSES (label) = 1;
10830 }
10831 if (align > 2 && count != 0 && (count & 2))
10832 emit_insn (gen_strset (destreg, dst,
10833 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10834 if (align <= 2 || count == 0)
10835 {
10836 rtx label = ix86_expand_aligntest (countreg, 2);
10837 emit_insn (gen_strset (destreg, dst,
10838 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10839 emit_label (label);
10840 LABEL_NUSES (label) = 1;
10841 }
10842 if (align > 1 && count != 0 && (count & 1))
10843 emit_insn (gen_strset (destreg, dst,
10844 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10845 if (align <= 1 || count == 0)
10846 {
10847 rtx label = ix86_expand_aligntest (countreg, 1);
10848 emit_insn (gen_strset (destreg, dst,
10849 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10850 emit_label (label);
10851 LABEL_NUSES (label) = 1;
10852 }
10853 }
10854 return 1;
10855 }
10856
10857 /* Expand strlen. */
10858 int
10859 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
10860 {
10861 rtx addr, scratch1, scratch2, scratch3, scratch4;
10862
10863 /* The generic case of strlen expander is long. Avoid it's
10864 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10865
10866 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10867 && !TARGET_INLINE_ALL_STRINGOPS
10868 && !optimize_size
10869 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10870 return 0;
10871
10872 addr = force_reg (Pmode, XEXP (src, 0));
10873 scratch1 = gen_reg_rtx (Pmode);
10874
10875 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10876 && !optimize_size)
10877 {
10878 /* Well it seems that some optimizer does not combine a call like
10879 foo(strlen(bar), strlen(bar));
10880 when the move and the subtraction is done here. It does calculate
10881 the length just once when these instructions are done inside of
10882 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10883 often used and I use one fewer register for the lifetime of
10884 output_strlen_unroll() this is better. */
10885
10886 emit_move_insn (out, addr);
10887
10888 ix86_expand_strlensi_unroll_1 (out, src, align);
10889
10890 /* strlensi_unroll_1 returns the address of the zero at the end of
10891 the string, like memchr(), so compute the length by subtracting
10892 the start address. */
10893 if (TARGET_64BIT)
10894 emit_insn (gen_subdi3 (out, out, addr));
10895 else
10896 emit_insn (gen_subsi3 (out, out, addr));
10897 }
10898 else
10899 {
10900 rtx unspec;
10901 scratch2 = gen_reg_rtx (Pmode);
10902 scratch3 = gen_reg_rtx (Pmode);
10903 scratch4 = force_reg (Pmode, constm1_rtx);
10904
10905 emit_move_insn (scratch3, addr);
10906 eoschar = force_reg (QImode, eoschar);
10907
10908 emit_insn (gen_cld ());
10909 src = replace_equiv_address_nv (src, scratch3);
10910
10911 /* If .md starts supporting :P, this can be done in .md. */
10912 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10913 scratch4), UNSPEC_SCAS);
10914 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
10915 if (TARGET_64BIT)
10916 {
10917 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10918 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10919 }
10920 else
10921 {
10922 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10923 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10924 }
10925 }
10926 return 1;
10927 }
10928
10929 /* Expand the appropriate insns for doing strlen if not just doing
10930 repnz; scasb
10931
10932 out = result, initialized with the start address
10933 align_rtx = alignment of the address.
10934 scratch = scratch register, initialized with the startaddress when
10935 not aligned, otherwise undefined
10936
10937 This is just the body. It needs the initializations mentioned above and
10938 some address computing at the end. These things are done in i386.md. */
10939
10940 static void
10941 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
10942 {
10943 int align;
10944 rtx tmp;
10945 rtx align_2_label = NULL_RTX;
10946 rtx align_3_label = NULL_RTX;
10947 rtx align_4_label = gen_label_rtx ();
10948 rtx end_0_label = gen_label_rtx ();
10949 rtx mem;
10950 rtx tmpreg = gen_reg_rtx (SImode);
10951 rtx scratch = gen_reg_rtx (SImode);
10952 rtx cmp;
10953
10954 align = 0;
10955 if (GET_CODE (align_rtx) == CONST_INT)
10956 align = INTVAL (align_rtx);
10957
10958 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10959
10960 /* Is there a known alignment and is it less than 4? */
10961 if (align < 4)
10962 {
10963 rtx scratch1 = gen_reg_rtx (Pmode);
10964 emit_move_insn (scratch1, out);
10965 /* Is there a known alignment and is it not 2? */
10966 if (align != 2)
10967 {
10968 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10969 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10970
10971 /* Leave just the 3 lower bits. */
10972 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10973 NULL_RTX, 0, OPTAB_WIDEN);
10974
10975 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10976 Pmode, 1, align_4_label);
10977 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
10978 Pmode, 1, align_2_label);
10979 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
10980 Pmode, 1, align_3_label);
10981 }
10982 else
10983 {
10984 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10985 check if is aligned to 4 - byte. */
10986
10987 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
10988 NULL_RTX, 0, OPTAB_WIDEN);
10989
10990 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10991 Pmode, 1, align_4_label);
10992 }
10993
10994 mem = change_address (src, QImode, out);
10995
10996 /* Now compare the bytes. */
10997
10998 /* Compare the first n unaligned byte on a byte per byte basis. */
10999 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11000 QImode, 1, end_0_label);
11001
11002 /* Increment the address. */
11003 if (TARGET_64BIT)
11004 emit_insn (gen_adddi3 (out, out, const1_rtx));
11005 else
11006 emit_insn (gen_addsi3 (out, out, const1_rtx));
11007
11008 /* Not needed with an alignment of 2 */
11009 if (align != 2)
11010 {
11011 emit_label (align_2_label);
11012
11013 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11014 end_0_label);
11015
11016 if (TARGET_64BIT)
11017 emit_insn (gen_adddi3 (out, out, const1_rtx));
11018 else
11019 emit_insn (gen_addsi3 (out, out, const1_rtx));
11020
11021 emit_label (align_3_label);
11022 }
11023
11024 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11025 end_0_label);
11026
11027 if (TARGET_64BIT)
11028 emit_insn (gen_adddi3 (out, out, const1_rtx));
11029 else
11030 emit_insn (gen_addsi3 (out, out, const1_rtx));
11031 }
11032
11033 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11034 align this loop. It gives only huge programs, but does not help to
11035 speed up. */
11036 emit_label (align_4_label);
11037
11038 mem = change_address (src, SImode, out);
11039 emit_move_insn (scratch, mem);
11040 if (TARGET_64BIT)
11041 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11042 else
11043 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11044
11045 /* This formula yields a nonzero result iff one of the bytes is zero.
11046 This saves three branches inside loop and many cycles. */
11047
11048 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11049 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11050 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11051 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11052 gen_int_mode (0x80808080, SImode)));
11053 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11054 align_4_label);
11055
11056 if (TARGET_CMOVE)
11057 {
11058 rtx reg = gen_reg_rtx (SImode);
11059 rtx reg2 = gen_reg_rtx (Pmode);
11060 emit_move_insn (reg, tmpreg);
11061 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11062
11063 /* If zero is not in the first two bytes, move two bytes forward. */
11064 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11065 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11066 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11067 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11068 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11069 reg,
11070 tmpreg)));
11071 /* Emit lea manually to avoid clobbering of flags. */
11072 emit_insn (gen_rtx_SET (SImode, reg2,
11073 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11074
11075 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11076 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11077 emit_insn (gen_rtx_SET (VOIDmode, out,
11078 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11079 reg2,
11080 out)));
11081
11082 }
11083 else
11084 {
11085 rtx end_2_label = gen_label_rtx ();
11086 /* Is zero in the first two bytes? */
11087
11088 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11089 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11090 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11091 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11092 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11093 pc_rtx);
11094 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11095 JUMP_LABEL (tmp) = end_2_label;
11096
11097 /* Not in the first two. Move two bytes forward. */
11098 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11099 if (TARGET_64BIT)
11100 emit_insn (gen_adddi3 (out, out, const2_rtx));
11101 else
11102 emit_insn (gen_addsi3 (out, out, const2_rtx));
11103
11104 emit_label (end_2_label);
11105
11106 }
11107
11108 /* Avoid branch in fixing the byte. */
11109 tmpreg = gen_lowpart (QImode, tmpreg);
11110 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11111 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11112 if (TARGET_64BIT)
11113 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11114 else
11115 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11116
11117 emit_label (end_0_label);
11118 }
11119
11120 void
11121 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11122 rtx callarg2 ATTRIBUTE_UNUSED,
11123 rtx pop, int sibcall)
11124 {
11125 rtx use = NULL, call;
11126
11127 if (pop == const0_rtx)
11128 pop = NULL;
11129 if (TARGET_64BIT && pop)
11130 abort ();
11131
11132 #if TARGET_MACHO
11133 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11134 fnaddr = machopic_indirect_call_target (fnaddr);
11135 #else
11136 /* Static functions and indirect calls don't need the pic register. */
11137 if (! TARGET_64BIT && flag_pic
11138 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11139 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11140 use_reg (&use, pic_offset_table_rtx);
11141
11142 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11143 {
11144 rtx al = gen_rtx_REG (QImode, 0);
11145 emit_move_insn (al, callarg2);
11146 use_reg (&use, al);
11147 }
11148 #endif /* TARGET_MACHO */
11149
11150 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11151 {
11152 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11153 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11154 }
11155 if (sibcall && TARGET_64BIT
11156 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11157 {
11158 rtx addr;
11159 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11160 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11161 emit_move_insn (fnaddr, addr);
11162 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11163 }
11164
11165 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11166 if (retval)
11167 call = gen_rtx_SET (VOIDmode, retval, call);
11168 if (pop)
11169 {
11170 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11171 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11172 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11173 }
11174
11175 call = emit_call_insn (call);
11176 if (use)
11177 CALL_INSN_FUNCTION_USAGE (call) = use;
11178 }
11179
11180 \f
11181 /* Clear stack slot assignments remembered from previous functions.
11182 This is called from INIT_EXPANDERS once before RTL is emitted for each
11183 function. */
11184
11185 static struct machine_function *
11186 ix86_init_machine_status (void)
11187 {
11188 struct machine_function *f;
11189
11190 f = ggc_alloc_cleared (sizeof (struct machine_function));
11191 f->use_fast_prologue_epilogue_nregs = -1;
11192
11193 return f;
11194 }
11195
11196 /* Return a MEM corresponding to a stack slot with mode MODE.
11197 Allocate a new slot if necessary.
11198
11199 The RTL for a function can have several slots available: N is
11200 which slot to use. */
11201
11202 rtx
11203 assign_386_stack_local (enum machine_mode mode, int n)
11204 {
11205 struct stack_local_entry *s;
11206
11207 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11208 abort ();
11209
11210 for (s = ix86_stack_locals; s; s = s->next)
11211 if (s->mode == mode && s->n == n)
11212 return s->rtl;
11213
11214 s = (struct stack_local_entry *)
11215 ggc_alloc (sizeof (struct stack_local_entry));
11216 s->n = n;
11217 s->mode = mode;
11218 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11219
11220 s->next = ix86_stack_locals;
11221 ix86_stack_locals = s;
11222 return s->rtl;
11223 }
11224
11225 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11226
11227 static GTY(()) rtx ix86_tls_symbol;
11228 rtx
11229 ix86_tls_get_addr (void)
11230 {
11231
11232 if (!ix86_tls_symbol)
11233 {
11234 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11235 (TARGET_GNU_TLS && !TARGET_64BIT)
11236 ? "___tls_get_addr"
11237 : "__tls_get_addr");
11238 }
11239
11240 return ix86_tls_symbol;
11241 }
11242 \f
11243 /* Calculate the length of the memory address in the instruction
11244 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11245
11246 int
11247 memory_address_length (rtx addr)
11248 {
11249 struct ix86_address parts;
11250 rtx base, index, disp;
11251 int len;
11252
11253 if (GET_CODE (addr) == PRE_DEC
11254 || GET_CODE (addr) == POST_INC
11255 || GET_CODE (addr) == PRE_MODIFY
11256 || GET_CODE (addr) == POST_MODIFY)
11257 return 0;
11258
11259 if (! ix86_decompose_address (addr, &parts))
11260 abort ();
11261
11262 base = parts.base;
11263 index = parts.index;
11264 disp = parts.disp;
11265 len = 0;
11266
11267 /* Rule of thumb:
11268 - esp as the base always wants an index,
11269 - ebp as the base always wants a displacement. */
11270
11271 /* Register Indirect. */
11272 if (base && !index && !disp)
11273 {
11274 /* esp (for its index) and ebp (for its displacement) need
11275 the two-byte modrm form. */
11276 if (addr == stack_pointer_rtx
11277 || addr == arg_pointer_rtx
11278 || addr == frame_pointer_rtx
11279 || addr == hard_frame_pointer_rtx)
11280 len = 1;
11281 }
11282
11283 /* Direct Addressing. */
11284 else if (disp && !base && !index)
11285 len = 4;
11286
11287 else
11288 {
11289 /* Find the length of the displacement constant. */
11290 if (disp)
11291 {
11292 if (GET_CODE (disp) == CONST_INT
11293 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11294 && base)
11295 len = 1;
11296 else
11297 len = 4;
11298 }
11299 /* ebp always wants a displacement. */
11300 else if (base == hard_frame_pointer_rtx)
11301 len = 1;
11302
11303 /* An index requires the two-byte modrm form.... */
11304 if (index
11305 /* ...like esp, which always wants an index. */
11306 || base == stack_pointer_rtx
11307 || base == arg_pointer_rtx
11308 || base == frame_pointer_rtx)
11309 len += 1;
11310 }
11311
11312 return len;
11313 }
11314
11315 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11316 is set, expect that insn have 8bit immediate alternative. */
11317 int
11318 ix86_attr_length_immediate_default (rtx insn, int shortform)
11319 {
11320 int len = 0;
11321 int i;
11322 extract_insn_cached (insn);
11323 for (i = recog_data.n_operands - 1; i >= 0; --i)
11324 if (CONSTANT_P (recog_data.operand[i]))
11325 {
11326 if (len)
11327 abort ();
11328 if (shortform
11329 && GET_CODE (recog_data.operand[i]) == CONST_INT
11330 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11331 len = 1;
11332 else
11333 {
11334 switch (get_attr_mode (insn))
11335 {
11336 case MODE_QI:
11337 len+=1;
11338 break;
11339 case MODE_HI:
11340 len+=2;
11341 break;
11342 case MODE_SI:
11343 len+=4;
11344 break;
11345 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11346 case MODE_DI:
11347 len+=4;
11348 break;
11349 default:
11350 fatal_insn ("unknown insn mode", insn);
11351 }
11352 }
11353 }
11354 return len;
11355 }
11356 /* Compute default value for "length_address" attribute. */
11357 int
11358 ix86_attr_length_address_default (rtx insn)
11359 {
11360 int i;
11361
11362 if (get_attr_type (insn) == TYPE_LEA)
11363 {
11364 rtx set = PATTERN (insn);
11365 if (GET_CODE (set) == SET)
11366 ;
11367 else if (GET_CODE (set) == PARALLEL
11368 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11369 set = XVECEXP (set, 0, 0);
11370 else
11371 {
11372 #ifdef ENABLE_CHECKING
11373 abort ();
11374 #endif
11375 return 0;
11376 }
11377
11378 return memory_address_length (SET_SRC (set));
11379 }
11380
11381 extract_insn_cached (insn);
11382 for (i = recog_data.n_operands - 1; i >= 0; --i)
11383 if (GET_CODE (recog_data.operand[i]) == MEM)
11384 {
11385 return memory_address_length (XEXP (recog_data.operand[i], 0));
11386 break;
11387 }
11388 return 0;
11389 }
11390 \f
11391 /* Return the maximum number of instructions a cpu can issue. */
11392
11393 static int
11394 ix86_issue_rate (void)
11395 {
11396 switch (ix86_tune)
11397 {
11398 case PROCESSOR_PENTIUM:
11399 case PROCESSOR_K6:
11400 return 2;
11401
11402 case PROCESSOR_PENTIUMPRO:
11403 case PROCESSOR_PENTIUM4:
11404 case PROCESSOR_ATHLON:
11405 case PROCESSOR_K8:
11406 case PROCESSOR_NOCONA:
11407 return 3;
11408
11409 default:
11410 return 1;
11411 }
11412 }
11413
11414 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11415 by DEP_INSN and nothing set by DEP_INSN. */
11416
11417 static int
11418 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11419 {
11420 rtx set, set2;
11421
11422 /* Simplify the test for uninteresting insns. */
11423 if (insn_type != TYPE_SETCC
11424 && insn_type != TYPE_ICMOV
11425 && insn_type != TYPE_FCMOV
11426 && insn_type != TYPE_IBR)
11427 return 0;
11428
11429 if ((set = single_set (dep_insn)) != 0)
11430 {
11431 set = SET_DEST (set);
11432 set2 = NULL_RTX;
11433 }
11434 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11435 && XVECLEN (PATTERN (dep_insn), 0) == 2
11436 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11437 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11438 {
11439 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11440 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11441 }
11442 else
11443 return 0;
11444
11445 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11446 return 0;
11447
11448 /* This test is true if the dependent insn reads the flags but
11449 not any other potentially set register. */
11450 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11451 return 0;
11452
11453 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11454 return 0;
11455
11456 return 1;
11457 }
11458
11459 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11460 address with operands set by DEP_INSN. */
11461
11462 static int
11463 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11464 {
11465 rtx addr;
11466
11467 if (insn_type == TYPE_LEA
11468 && TARGET_PENTIUM)
11469 {
11470 addr = PATTERN (insn);
11471 if (GET_CODE (addr) == SET)
11472 ;
11473 else if (GET_CODE (addr) == PARALLEL
11474 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11475 addr = XVECEXP (addr, 0, 0);
11476 else
11477 abort ();
11478 addr = SET_SRC (addr);
11479 }
11480 else
11481 {
11482 int i;
11483 extract_insn_cached (insn);
11484 for (i = recog_data.n_operands - 1; i >= 0; --i)
11485 if (GET_CODE (recog_data.operand[i]) == MEM)
11486 {
11487 addr = XEXP (recog_data.operand[i], 0);
11488 goto found;
11489 }
11490 return 0;
11491 found:;
11492 }
11493
11494 return modified_in_p (addr, dep_insn);
11495 }
11496
11497 static int
11498 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11499 {
11500 enum attr_type insn_type, dep_insn_type;
11501 enum attr_memory memory;
11502 rtx set, set2;
11503 int dep_insn_code_number;
11504
11505 /* Anti and output dependencies have zero cost on all CPUs. */
11506 if (REG_NOTE_KIND (link) != 0)
11507 return 0;
11508
11509 dep_insn_code_number = recog_memoized (dep_insn);
11510
11511 /* If we can't recognize the insns, we can't really do anything. */
11512 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11513 return cost;
11514
11515 insn_type = get_attr_type (insn);
11516 dep_insn_type = get_attr_type (dep_insn);
11517
11518 switch (ix86_tune)
11519 {
11520 case PROCESSOR_PENTIUM:
11521 /* Address Generation Interlock adds a cycle of latency. */
11522 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11523 cost += 1;
11524
11525 /* ??? Compares pair with jump/setcc. */
11526 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11527 cost = 0;
11528
11529 /* Floating point stores require value to be ready one cycle earlier. */
11530 if (insn_type == TYPE_FMOV
11531 && get_attr_memory (insn) == MEMORY_STORE
11532 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11533 cost += 1;
11534 break;
11535
11536 case PROCESSOR_PENTIUMPRO:
11537 memory = get_attr_memory (insn);
11538
11539 /* INT->FP conversion is expensive. */
11540 if (get_attr_fp_int_src (dep_insn))
11541 cost += 5;
11542
11543 /* There is one cycle extra latency between an FP op and a store. */
11544 if (insn_type == TYPE_FMOV
11545 && (set = single_set (dep_insn)) != NULL_RTX
11546 && (set2 = single_set (insn)) != NULL_RTX
11547 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11548 && GET_CODE (SET_DEST (set2)) == MEM)
11549 cost += 1;
11550
11551 /* Show ability of reorder buffer to hide latency of load by executing
11552 in parallel with previous instruction in case
11553 previous instruction is not needed to compute the address. */
11554 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11555 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11556 {
11557 /* Claim moves to take one cycle, as core can issue one load
11558 at time and the next load can start cycle later. */
11559 if (dep_insn_type == TYPE_IMOV
11560 || dep_insn_type == TYPE_FMOV)
11561 cost = 1;
11562 else if (cost > 1)
11563 cost--;
11564 }
11565 break;
11566
11567 case PROCESSOR_K6:
11568 memory = get_attr_memory (insn);
11569
11570 /* The esp dependency is resolved before the instruction is really
11571 finished. */
11572 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11573 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11574 return 1;
11575
11576 /* INT->FP conversion is expensive. */
11577 if (get_attr_fp_int_src (dep_insn))
11578 cost += 5;
11579
11580 /* Show ability of reorder buffer to hide latency of load by executing
11581 in parallel with previous instruction in case
11582 previous instruction is not needed to compute the address. */
11583 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11584 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11585 {
11586 /* Claim moves to take one cycle, as core can issue one load
11587 at time and the next load can start cycle later. */
11588 if (dep_insn_type == TYPE_IMOV
11589 || dep_insn_type == TYPE_FMOV)
11590 cost = 1;
11591 else if (cost > 2)
11592 cost -= 2;
11593 else
11594 cost = 1;
11595 }
11596 break;
11597
11598 case PROCESSOR_ATHLON:
11599 case PROCESSOR_K8:
11600 memory = get_attr_memory (insn);
11601
11602 /* Show ability of reorder buffer to hide latency of load by executing
11603 in parallel with previous instruction in case
11604 previous instruction is not needed to compute the address. */
11605 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11606 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11607 {
11608 enum attr_unit unit = get_attr_unit (insn);
11609 int loadcost = 3;
11610
11611 /* Because of the difference between the length of integer and
11612 floating unit pipeline preparation stages, the memory operands
11613 for floating point are cheaper.
11614
11615 ??? For Athlon it the difference is most probably 2. */
11616 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11617 loadcost = 3;
11618 else
11619 loadcost = TARGET_ATHLON ? 2 : 0;
11620
11621 if (cost >= loadcost)
11622 cost -= loadcost;
11623 else
11624 cost = 0;
11625 }
11626
11627 default:
11628 break;
11629 }
11630
11631 return cost;
11632 }
11633
11634 /* How many alternative schedules to try. This should be as wide as the
11635 scheduling freedom in the DFA, but no wider. Making this value too
11636 large results extra work for the scheduler. */
11637
11638 static int
11639 ia32_multipass_dfa_lookahead (void)
11640 {
11641 if (ix86_tune == PROCESSOR_PENTIUM)
11642 return 2;
11643
11644 if (ix86_tune == PROCESSOR_PENTIUMPRO
11645 || ix86_tune == PROCESSOR_K6)
11646 return 1;
11647
11648 else
11649 return 0;
11650 }
11651
11652 \f
11653 /* Implement the target hook targetm.vectorize.misaligned_mem_ok. */
11654
11655 static bool
11656 ix86_misaligned_mem_ok (enum machine_mode mode)
11657 {
11658 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
11659 return true;
11660 else
11661 return false;
11662 }
11663
11664 /* Compute the alignment given to a constant that is being placed in memory.
11665 EXP is the constant and ALIGN is the alignment that the object would
11666 ordinarily have.
11667 The value of this function is used instead of that alignment to align
11668 the object. */
11669
11670 int
11671 ix86_constant_alignment (tree exp, int align)
11672 {
11673 if (TREE_CODE (exp) == REAL_CST)
11674 {
11675 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11676 return 64;
11677 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11678 return 128;
11679 }
11680 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11681 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11682 return BITS_PER_WORD;
11683
11684 return align;
11685 }
11686
11687 /* Compute the alignment for a static variable.
11688 TYPE is the data type, and ALIGN is the alignment that
11689 the object would ordinarily have. The value of this function is used
11690 instead of that alignment to align the object. */
11691
11692 int
11693 ix86_data_alignment (tree type, int align)
11694 {
11695 if (AGGREGATE_TYPE_P (type)
11696 && TYPE_SIZE (type)
11697 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11698 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11699 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11700 return 256;
11701
11702 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11703 to 16byte boundary. */
11704 if (TARGET_64BIT)
11705 {
11706 if (AGGREGATE_TYPE_P (type)
11707 && TYPE_SIZE (type)
11708 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11709 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11710 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11711 return 128;
11712 }
11713
11714 if (TREE_CODE (type) == ARRAY_TYPE)
11715 {
11716 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11717 return 64;
11718 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11719 return 128;
11720 }
11721 else if (TREE_CODE (type) == COMPLEX_TYPE)
11722 {
11723
11724 if (TYPE_MODE (type) == DCmode && align < 64)
11725 return 64;
11726 if (TYPE_MODE (type) == XCmode && align < 128)
11727 return 128;
11728 }
11729 else if ((TREE_CODE (type) == RECORD_TYPE
11730 || TREE_CODE (type) == UNION_TYPE
11731 || TREE_CODE (type) == QUAL_UNION_TYPE)
11732 && TYPE_FIELDS (type))
11733 {
11734 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11735 return 64;
11736 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11737 return 128;
11738 }
11739 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11740 || TREE_CODE (type) == INTEGER_TYPE)
11741 {
11742 if (TYPE_MODE (type) == DFmode && align < 64)
11743 return 64;
11744 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11745 return 128;
11746 }
11747
11748 return align;
11749 }
11750
11751 /* Compute the alignment for a local variable.
11752 TYPE is the data type, and ALIGN is the alignment that
11753 the object would ordinarily have. The value of this macro is used
11754 instead of that alignment to align the object. */
11755
11756 int
11757 ix86_local_alignment (tree type, int align)
11758 {
11759 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11760 to 16byte boundary. */
11761 if (TARGET_64BIT)
11762 {
11763 if (AGGREGATE_TYPE_P (type)
11764 && TYPE_SIZE (type)
11765 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11766 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11767 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11768 return 128;
11769 }
11770 if (TREE_CODE (type) == ARRAY_TYPE)
11771 {
11772 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11773 return 64;
11774 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11775 return 128;
11776 }
11777 else if (TREE_CODE (type) == COMPLEX_TYPE)
11778 {
11779 if (TYPE_MODE (type) == DCmode && align < 64)
11780 return 64;
11781 if (TYPE_MODE (type) == XCmode && align < 128)
11782 return 128;
11783 }
11784 else if ((TREE_CODE (type) == RECORD_TYPE
11785 || TREE_CODE (type) == UNION_TYPE
11786 || TREE_CODE (type) == QUAL_UNION_TYPE)
11787 && TYPE_FIELDS (type))
11788 {
11789 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11790 return 64;
11791 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11792 return 128;
11793 }
11794 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11795 || TREE_CODE (type) == INTEGER_TYPE)
11796 {
11797
11798 if (TYPE_MODE (type) == DFmode && align < 64)
11799 return 64;
11800 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11801 return 128;
11802 }
11803 return align;
11804 }
11805 \f
11806 /* Emit RTL insns to initialize the variable parts of a trampoline.
11807 FNADDR is an RTX for the address of the function's pure code.
11808 CXT is an RTX for the static chain value for the function. */
11809 void
11810 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11811 {
11812 if (!TARGET_64BIT)
11813 {
11814 /* Compute offset from the end of the jmp to the target function. */
11815 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11816 plus_constant (tramp, 10),
11817 NULL_RTX, 1, OPTAB_DIRECT);
11818 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11819 gen_int_mode (0xb9, QImode));
11820 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11821 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11822 gen_int_mode (0xe9, QImode));
11823 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11824 }
11825 else
11826 {
11827 int offset = 0;
11828 /* Try to load address using shorter movl instead of movabs.
11829 We may want to support movq for kernel mode, but kernel does not use
11830 trampolines at the moment. */
11831 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
11832 {
11833 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11834 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11835 gen_int_mode (0xbb41, HImode));
11836 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11837 gen_lowpart (SImode, fnaddr));
11838 offset += 6;
11839 }
11840 else
11841 {
11842 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11843 gen_int_mode (0xbb49, HImode));
11844 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11845 fnaddr);
11846 offset += 10;
11847 }
11848 /* Load static chain using movabs to r10. */
11849 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11850 gen_int_mode (0xba49, HImode));
11851 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11852 cxt);
11853 offset += 10;
11854 /* Jump to the r11 */
11855 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11856 gen_int_mode (0xff49, HImode));
11857 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11858 gen_int_mode (0xe3, QImode));
11859 offset += 3;
11860 if (offset > TRAMPOLINE_SIZE)
11861 abort ();
11862 }
11863
11864 #ifdef ENABLE_EXECUTE_STACK
11865 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
11866 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11867 #endif
11868 }
11869 \f
11870 #define def_builtin(MASK, NAME, TYPE, CODE) \
11871 do { \
11872 if ((MASK) & target_flags \
11873 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11874 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11875 NULL, NULL_TREE); \
11876 } while (0)
11877
11878 struct builtin_description
11879 {
11880 const unsigned int mask;
11881 const enum insn_code icode;
11882 const char *const name;
11883 const enum ix86_builtins code;
11884 const enum rtx_code comparison;
11885 const unsigned int flag;
11886 };
11887
11888 static const struct builtin_description bdesc_comi[] =
11889 {
11890 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11891 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11892 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11893 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11894 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11895 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11896 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11897 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11898 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11899 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11900 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11901 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11902 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11903 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11904 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11905 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11906 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11908 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11909 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11910 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11911 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11912 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11913 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11914 };
11915
11916 static const struct builtin_description bdesc_2arg[] =
11917 {
11918 /* SSE */
11919 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11920 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11921 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11922 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11923 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11924 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11925 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11926 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11927
11928 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11929 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11930 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11931 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11932 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11933 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11934 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11935 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11936 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11937 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11938 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11939 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11940 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11941 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11942 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11943 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11944 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11945 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11946 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11947 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11948
11949 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11950 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11951 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11952 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11953
11954 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11955 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11956 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11957 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11958
11959 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11960 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11961 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11962 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11963 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11964
11965 /* MMX */
11966 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11967 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11968 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11969 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
11970 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11971 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11972 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11973 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
11974
11975 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11976 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11977 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11978 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11979 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11980 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11981 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11982 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11983
11984 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11985 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11986 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11987
11988 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11989 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11990 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11991 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11992
11993 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11994 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11995
11996 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11997 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11998 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11999 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12000 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12001 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12002
12003 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12004 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12005 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12006 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12007
12008 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12009 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12010 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12011 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12012 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12013 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12014
12015 /* Special. */
12016 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12017 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12018 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12019
12020 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12021 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12022 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12023
12024 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12025 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12026 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12027 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12028 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12029 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12030
12031 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12032 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12033 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12034 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12035 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12036 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12037
12038 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12039 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12040 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12041 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12042
12043 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12044 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12045
12046 /* SSE2 */
12047 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12048 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12050 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12051 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12052 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12053 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12055
12056 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12057 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12058 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12059 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12060 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12061 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12062 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12063 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12064 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12065 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12066 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12067 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12068 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12069 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12070 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12071 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12072 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12073 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12074 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12075 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12076
12077 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12078 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12079 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12080 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12081
12082 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12083 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12084 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12085 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12086
12087 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12088 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12090
12091 /* SSE2 MMX */
12092 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12095 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12096 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12097 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12098 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12099 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12100
12101 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12102 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12103 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12104 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12105 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12106 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12107 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12108 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12109
12110 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12111 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12112
12113 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12114 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12115 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12116 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12117
12118 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12119 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12120
12121 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12124 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12125 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12126 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12127
12128 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12129 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12130 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12132
12133 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12134 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12135 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12136 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12137 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12138 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12139 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12140 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12141
12142 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12143 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12144 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12145
12146 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12147 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12148
12149 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12150 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12151
12152 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12153 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12154 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12155 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12156 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12157 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12158
12159 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12160 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12161 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12162 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12163 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12164 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12165
12166 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12167 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12168 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12170
12171 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12172
12173 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12174 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12175 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12176 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12177
12178 /* SSE3 MMX */
12179 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12180 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12181 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12182 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12183 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12184 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12185 };
12186
12187 static const struct builtin_description bdesc_1arg[] =
12188 {
12189 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12190 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12191
12192 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12193 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12194 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12195
12196 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12197 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12198 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12199 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12200 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12201 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12202
12203 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12204 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12205 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12206 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12207
12208 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12209
12210 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12211 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12212
12213 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12214 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12215 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12216 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12217 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12218
12219 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12220
12221 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12222 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12223 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12224 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12225
12226 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12227 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12228 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12229
12230 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12231
12232 /* SSE3 */
12233 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12234 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12235 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12236 };
12237
12238 void
12239 ix86_init_builtins (void)
12240 {
12241 if (TARGET_MMX)
12242 ix86_init_mmx_sse_builtins ();
12243 }
12244
12245 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12246 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12247 builtins. */
12248 static void
12249 ix86_init_mmx_sse_builtins (void)
12250 {
12251 const struct builtin_description * d;
12252 size_t i;
12253
12254 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12255 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12256 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12257 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12258 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12259 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12260 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12261 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12262 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12263 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12264
12265 tree pchar_type_node = build_pointer_type (char_type_node);
12266 tree pcchar_type_node = build_pointer_type (
12267 build_type_variant (char_type_node, 1, 0));
12268 tree pfloat_type_node = build_pointer_type (float_type_node);
12269 tree pcfloat_type_node = build_pointer_type (
12270 build_type_variant (float_type_node, 1, 0));
12271 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12272 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12273 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12274
12275 /* Comparisons. */
12276 tree int_ftype_v4sf_v4sf
12277 = build_function_type_list (integer_type_node,
12278 V4SF_type_node, V4SF_type_node, NULL_TREE);
12279 tree v4si_ftype_v4sf_v4sf
12280 = build_function_type_list (V4SI_type_node,
12281 V4SF_type_node, V4SF_type_node, NULL_TREE);
12282 /* MMX/SSE/integer conversions. */
12283 tree int_ftype_v4sf
12284 = build_function_type_list (integer_type_node,
12285 V4SF_type_node, NULL_TREE);
12286 tree int64_ftype_v4sf
12287 = build_function_type_list (long_long_integer_type_node,
12288 V4SF_type_node, NULL_TREE);
12289 tree int_ftype_v8qi
12290 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12291 tree v4sf_ftype_v4sf_int
12292 = build_function_type_list (V4SF_type_node,
12293 V4SF_type_node, integer_type_node, NULL_TREE);
12294 tree v4sf_ftype_v4sf_int64
12295 = build_function_type_list (V4SF_type_node,
12296 V4SF_type_node, long_long_integer_type_node,
12297 NULL_TREE);
12298 tree v4sf_ftype_v4sf_v2si
12299 = build_function_type_list (V4SF_type_node,
12300 V4SF_type_node, V2SI_type_node, NULL_TREE);
12301 tree int_ftype_v4hi_int
12302 = build_function_type_list (integer_type_node,
12303 V4HI_type_node, integer_type_node, NULL_TREE);
12304 tree v4hi_ftype_v4hi_int_int
12305 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12306 integer_type_node, integer_type_node,
12307 NULL_TREE);
12308 /* Miscellaneous. */
12309 tree v8qi_ftype_v4hi_v4hi
12310 = build_function_type_list (V8QI_type_node,
12311 V4HI_type_node, V4HI_type_node, NULL_TREE);
12312 tree v4hi_ftype_v2si_v2si
12313 = build_function_type_list (V4HI_type_node,
12314 V2SI_type_node, V2SI_type_node, NULL_TREE);
12315 tree v4sf_ftype_v4sf_v4sf_int
12316 = build_function_type_list (V4SF_type_node,
12317 V4SF_type_node, V4SF_type_node,
12318 integer_type_node, NULL_TREE);
12319 tree v2si_ftype_v4hi_v4hi
12320 = build_function_type_list (V2SI_type_node,
12321 V4HI_type_node, V4HI_type_node, NULL_TREE);
12322 tree v4hi_ftype_v4hi_int
12323 = build_function_type_list (V4HI_type_node,
12324 V4HI_type_node, integer_type_node, NULL_TREE);
12325 tree v4hi_ftype_v4hi_di
12326 = build_function_type_list (V4HI_type_node,
12327 V4HI_type_node, long_long_unsigned_type_node,
12328 NULL_TREE);
12329 tree v2si_ftype_v2si_di
12330 = build_function_type_list (V2SI_type_node,
12331 V2SI_type_node, long_long_unsigned_type_node,
12332 NULL_TREE);
12333 tree void_ftype_void
12334 = build_function_type (void_type_node, void_list_node);
12335 tree void_ftype_unsigned
12336 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12337 tree void_ftype_unsigned_unsigned
12338 = build_function_type_list (void_type_node, unsigned_type_node,
12339 unsigned_type_node, NULL_TREE);
12340 tree void_ftype_pcvoid_unsigned_unsigned
12341 = build_function_type_list (void_type_node, const_ptr_type_node,
12342 unsigned_type_node, unsigned_type_node,
12343 NULL_TREE);
12344 tree unsigned_ftype_void
12345 = build_function_type (unsigned_type_node, void_list_node);
12346 tree di_ftype_void
12347 = build_function_type (long_long_unsigned_type_node, void_list_node);
12348 tree v4sf_ftype_void
12349 = build_function_type (V4SF_type_node, void_list_node);
12350 tree v2si_ftype_v4sf
12351 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12352 /* Loads/stores. */
12353 tree void_ftype_v8qi_v8qi_pchar
12354 = build_function_type_list (void_type_node,
12355 V8QI_type_node, V8QI_type_node,
12356 pchar_type_node, NULL_TREE);
12357 tree v4sf_ftype_pcfloat
12358 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12359 /* @@@ the type is bogus */
12360 tree v4sf_ftype_v4sf_pv2si
12361 = build_function_type_list (V4SF_type_node,
12362 V4SF_type_node, pv2si_type_node, NULL_TREE);
12363 tree void_ftype_pv2si_v4sf
12364 = build_function_type_list (void_type_node,
12365 pv2si_type_node, V4SF_type_node, NULL_TREE);
12366 tree void_ftype_pfloat_v4sf
12367 = build_function_type_list (void_type_node,
12368 pfloat_type_node, V4SF_type_node, NULL_TREE);
12369 tree void_ftype_pdi_di
12370 = build_function_type_list (void_type_node,
12371 pdi_type_node, long_long_unsigned_type_node,
12372 NULL_TREE);
12373 tree void_ftype_pv2di_v2di
12374 = build_function_type_list (void_type_node,
12375 pv2di_type_node, V2DI_type_node, NULL_TREE);
12376 /* Normal vector unops. */
12377 tree v4sf_ftype_v4sf
12378 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12379
12380 /* Normal vector binops. */
12381 tree v4sf_ftype_v4sf_v4sf
12382 = build_function_type_list (V4SF_type_node,
12383 V4SF_type_node, V4SF_type_node, NULL_TREE);
12384 tree v8qi_ftype_v8qi_v8qi
12385 = build_function_type_list (V8QI_type_node,
12386 V8QI_type_node, V8QI_type_node, NULL_TREE);
12387 tree v4hi_ftype_v4hi_v4hi
12388 = build_function_type_list (V4HI_type_node,
12389 V4HI_type_node, V4HI_type_node, NULL_TREE);
12390 tree v2si_ftype_v2si_v2si
12391 = build_function_type_list (V2SI_type_node,
12392 V2SI_type_node, V2SI_type_node, NULL_TREE);
12393 tree di_ftype_di_di
12394 = build_function_type_list (long_long_unsigned_type_node,
12395 long_long_unsigned_type_node,
12396 long_long_unsigned_type_node, NULL_TREE);
12397
12398 tree v2si_ftype_v2sf
12399 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12400 tree v2sf_ftype_v2si
12401 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12402 tree v2si_ftype_v2si
12403 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12404 tree v2sf_ftype_v2sf
12405 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12406 tree v2sf_ftype_v2sf_v2sf
12407 = build_function_type_list (V2SF_type_node,
12408 V2SF_type_node, V2SF_type_node, NULL_TREE);
12409 tree v2si_ftype_v2sf_v2sf
12410 = build_function_type_list (V2SI_type_node,
12411 V2SF_type_node, V2SF_type_node, NULL_TREE);
12412 tree pint_type_node = build_pointer_type (integer_type_node);
12413 tree pcint_type_node = build_pointer_type (
12414 build_type_variant (integer_type_node, 1, 0));
12415 tree pdouble_type_node = build_pointer_type (double_type_node);
12416 tree pcdouble_type_node = build_pointer_type (
12417 build_type_variant (double_type_node, 1, 0));
12418 tree int_ftype_v2df_v2df
12419 = build_function_type_list (integer_type_node,
12420 V2DF_type_node, V2DF_type_node, NULL_TREE);
12421
12422 tree ti_ftype_void
12423 = build_function_type (intTI_type_node, void_list_node);
12424 tree v2di_ftype_void
12425 = build_function_type (V2DI_type_node, void_list_node);
12426 tree ti_ftype_ti_ti
12427 = build_function_type_list (intTI_type_node,
12428 intTI_type_node, intTI_type_node, NULL_TREE);
12429 tree void_ftype_pcvoid
12430 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12431 tree v2di_ftype_di
12432 = build_function_type_list (V2DI_type_node,
12433 long_long_unsigned_type_node, NULL_TREE);
12434 tree di_ftype_v2di
12435 = build_function_type_list (long_long_unsigned_type_node,
12436 V2DI_type_node, NULL_TREE);
12437 tree v4sf_ftype_v4si
12438 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12439 tree v4si_ftype_v4sf
12440 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12441 tree v2df_ftype_v4si
12442 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12443 tree v4si_ftype_v2df
12444 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12445 tree v2si_ftype_v2df
12446 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12447 tree v4sf_ftype_v2df
12448 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12449 tree v2df_ftype_v2si
12450 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12451 tree v2df_ftype_v4sf
12452 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12453 tree int_ftype_v2df
12454 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12455 tree int64_ftype_v2df
12456 = build_function_type_list (long_long_integer_type_node,
12457 V2DF_type_node, NULL_TREE);
12458 tree v2df_ftype_v2df_int
12459 = build_function_type_list (V2DF_type_node,
12460 V2DF_type_node, integer_type_node, NULL_TREE);
12461 tree v2df_ftype_v2df_int64
12462 = build_function_type_list (V2DF_type_node,
12463 V2DF_type_node, long_long_integer_type_node,
12464 NULL_TREE);
12465 tree v4sf_ftype_v4sf_v2df
12466 = build_function_type_list (V4SF_type_node,
12467 V4SF_type_node, V2DF_type_node, NULL_TREE);
12468 tree v2df_ftype_v2df_v4sf
12469 = build_function_type_list (V2DF_type_node,
12470 V2DF_type_node, V4SF_type_node, NULL_TREE);
12471 tree v2df_ftype_v2df_v2df_int
12472 = build_function_type_list (V2DF_type_node,
12473 V2DF_type_node, V2DF_type_node,
12474 integer_type_node,
12475 NULL_TREE);
12476 tree v2df_ftype_v2df_pv2si
12477 = build_function_type_list (V2DF_type_node,
12478 V2DF_type_node, pv2si_type_node, NULL_TREE);
12479 tree void_ftype_pv2si_v2df
12480 = build_function_type_list (void_type_node,
12481 pv2si_type_node, V2DF_type_node, NULL_TREE);
12482 tree void_ftype_pdouble_v2df
12483 = build_function_type_list (void_type_node,
12484 pdouble_type_node, V2DF_type_node, NULL_TREE);
12485 tree void_ftype_pint_int
12486 = build_function_type_list (void_type_node,
12487 pint_type_node, integer_type_node, NULL_TREE);
12488 tree void_ftype_v16qi_v16qi_pchar
12489 = build_function_type_list (void_type_node,
12490 V16QI_type_node, V16QI_type_node,
12491 pchar_type_node, NULL_TREE);
12492 tree v2df_ftype_pcdouble
12493 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12494 tree v2df_ftype_v2df_v2df
12495 = build_function_type_list (V2DF_type_node,
12496 V2DF_type_node, V2DF_type_node, NULL_TREE);
12497 tree v16qi_ftype_v16qi_v16qi
12498 = build_function_type_list (V16QI_type_node,
12499 V16QI_type_node, V16QI_type_node, NULL_TREE);
12500 tree v8hi_ftype_v8hi_v8hi
12501 = build_function_type_list (V8HI_type_node,
12502 V8HI_type_node, V8HI_type_node, NULL_TREE);
12503 tree v4si_ftype_v4si_v4si
12504 = build_function_type_list (V4SI_type_node,
12505 V4SI_type_node, V4SI_type_node, NULL_TREE);
12506 tree v2di_ftype_v2di_v2di
12507 = build_function_type_list (V2DI_type_node,
12508 V2DI_type_node, V2DI_type_node, NULL_TREE);
12509 tree v2di_ftype_v2df_v2df
12510 = build_function_type_list (V2DI_type_node,
12511 V2DF_type_node, V2DF_type_node, NULL_TREE);
12512 tree v2df_ftype_v2df
12513 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12514 tree v2df_ftype_double
12515 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12516 tree v2df_ftype_double_double
12517 = build_function_type_list (V2DF_type_node,
12518 double_type_node, double_type_node, NULL_TREE);
12519 tree int_ftype_v8hi_int
12520 = build_function_type_list (integer_type_node,
12521 V8HI_type_node, integer_type_node, NULL_TREE);
12522 tree v8hi_ftype_v8hi_int_int
12523 = build_function_type_list (V8HI_type_node,
12524 V8HI_type_node, integer_type_node,
12525 integer_type_node, NULL_TREE);
12526 tree v2di_ftype_v2di_int
12527 = build_function_type_list (V2DI_type_node,
12528 V2DI_type_node, integer_type_node, NULL_TREE);
12529 tree v4si_ftype_v4si_int
12530 = build_function_type_list (V4SI_type_node,
12531 V4SI_type_node, integer_type_node, NULL_TREE);
12532 tree v8hi_ftype_v8hi_int
12533 = build_function_type_list (V8HI_type_node,
12534 V8HI_type_node, integer_type_node, NULL_TREE);
12535 tree v8hi_ftype_v8hi_v2di
12536 = build_function_type_list (V8HI_type_node,
12537 V8HI_type_node, V2DI_type_node, NULL_TREE);
12538 tree v4si_ftype_v4si_v2di
12539 = build_function_type_list (V4SI_type_node,
12540 V4SI_type_node, V2DI_type_node, NULL_TREE);
12541 tree v4si_ftype_v8hi_v8hi
12542 = build_function_type_list (V4SI_type_node,
12543 V8HI_type_node, V8HI_type_node, NULL_TREE);
12544 tree di_ftype_v8qi_v8qi
12545 = build_function_type_list (long_long_unsigned_type_node,
12546 V8QI_type_node, V8QI_type_node, NULL_TREE);
12547 tree di_ftype_v2si_v2si
12548 = build_function_type_list (long_long_unsigned_type_node,
12549 V2SI_type_node, V2SI_type_node, NULL_TREE);
12550 tree v2di_ftype_v16qi_v16qi
12551 = build_function_type_list (V2DI_type_node,
12552 V16QI_type_node, V16QI_type_node, NULL_TREE);
12553 tree v2di_ftype_v4si_v4si
12554 = build_function_type_list (V2DI_type_node,
12555 V4SI_type_node, V4SI_type_node, NULL_TREE);
12556 tree int_ftype_v16qi
12557 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12558 tree v16qi_ftype_pcchar
12559 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12560 tree void_ftype_pchar_v16qi
12561 = build_function_type_list (void_type_node,
12562 pchar_type_node, V16QI_type_node, NULL_TREE);
12563 tree v4si_ftype_pcint
12564 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12565 tree void_ftype_pcint_v4si
12566 = build_function_type_list (void_type_node,
12567 pcint_type_node, V4SI_type_node, NULL_TREE);
12568 tree v2di_ftype_v2di
12569 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12570
12571 tree float80_type;
12572 tree float128_type;
12573
12574 /* The __float80 type. */
12575 if (TYPE_MODE (long_double_type_node) == XFmode)
12576 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12577 "__float80");
12578 else
12579 {
12580 /* The __float80 type. */
12581 float80_type = make_node (REAL_TYPE);
12582 TYPE_PRECISION (float80_type) = 80;
12583 layout_type (float80_type);
12584 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12585 }
12586
12587 float128_type = make_node (REAL_TYPE);
12588 TYPE_PRECISION (float128_type) = 128;
12589 layout_type (float128_type);
12590 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12591
12592 /* Add all builtins that are more or less simple operations on two
12593 operands. */
12594 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12595 {
12596 /* Use one of the operands; the target can have a different mode for
12597 mask-generating compares. */
12598 enum machine_mode mode;
12599 tree type;
12600
12601 if (d->name == 0)
12602 continue;
12603 mode = insn_data[d->icode].operand[1].mode;
12604
12605 switch (mode)
12606 {
12607 case V16QImode:
12608 type = v16qi_ftype_v16qi_v16qi;
12609 break;
12610 case V8HImode:
12611 type = v8hi_ftype_v8hi_v8hi;
12612 break;
12613 case V4SImode:
12614 type = v4si_ftype_v4si_v4si;
12615 break;
12616 case V2DImode:
12617 type = v2di_ftype_v2di_v2di;
12618 break;
12619 case V2DFmode:
12620 type = v2df_ftype_v2df_v2df;
12621 break;
12622 case TImode:
12623 type = ti_ftype_ti_ti;
12624 break;
12625 case V4SFmode:
12626 type = v4sf_ftype_v4sf_v4sf;
12627 break;
12628 case V8QImode:
12629 type = v8qi_ftype_v8qi_v8qi;
12630 break;
12631 case V4HImode:
12632 type = v4hi_ftype_v4hi_v4hi;
12633 break;
12634 case V2SImode:
12635 type = v2si_ftype_v2si_v2si;
12636 break;
12637 case DImode:
12638 type = di_ftype_di_di;
12639 break;
12640
12641 default:
12642 abort ();
12643 }
12644
12645 /* Override for comparisons. */
12646 if (d->icode == CODE_FOR_maskcmpv4sf3
12647 || d->icode == CODE_FOR_maskncmpv4sf3
12648 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12649 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12650 type = v4si_ftype_v4sf_v4sf;
12651
12652 if (d->icode == CODE_FOR_maskcmpv2df3
12653 || d->icode == CODE_FOR_maskncmpv2df3
12654 || d->icode == CODE_FOR_vmmaskcmpv2df3
12655 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12656 type = v2di_ftype_v2df_v2df;
12657
12658 def_builtin (d->mask, d->name, type, d->code);
12659 }
12660
12661 /* Add the remaining MMX insns with somewhat more complicated types. */
12662 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12663 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12664 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12665 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12666 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12667
12668 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12669 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12670 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12671
12672 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12673 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12674
12675 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12676 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12677
12678 /* comi/ucomi insns. */
12679 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12680 if (d->mask == MASK_SSE2)
12681 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12682 else
12683 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12684
12685 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12686 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12687 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12688
12689 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12690 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12691 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12692 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12693 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12694 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12695 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12696 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12697 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12698 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12699 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12700
12701 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12702 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12703
12704 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12705
12706 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12707 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12708 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12709 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12710 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12711 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12712
12713 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12714 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12715 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12716 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12717
12718 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12719 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12720 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12721 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12722
12723 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12724
12725 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12726
12727 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12728 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12729 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12730 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12731 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12732 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12733
12734 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12735
12736 /* Original 3DNow! */
12737 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12738 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12739 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12740 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12741 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12742 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12743 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12744 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12745 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12746 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12747 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12748 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12749 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12750 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12751 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12752 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12753 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12754 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12755 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12756 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12757
12758 /* 3DNow! extension as used in the Athlon CPU. */
12759 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12760 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12761 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12762 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12763 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12764 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12765
12766 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12767
12768 /* SSE2 */
12769 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12770 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12771
12772 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12773 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12774 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12775
12776 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12777 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12778 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12779 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12780 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12781 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12782
12783 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12784 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12785 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12786 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12787
12788 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12789 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12790 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12791 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12792 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12793
12794 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12795 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12796 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12797 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12798
12799 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12800 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12801
12802 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12803
12804 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12805 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12806
12807 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12808 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12809 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12810 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12811 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12812
12813 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12814
12815 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12816 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12817 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12818 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12819
12820 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12821 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12822 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12823
12824 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12825 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12826 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12827 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12828
12829 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12830 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12831 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12832 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12833 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12834 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12835 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12836
12837 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12838 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12839 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12840
12841 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12842 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12843 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12844 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12845 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12846 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12847 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12848
12849 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12850
12851 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12852 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12853
12854 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12855 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12856 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12857
12858 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12859 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12860 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12861
12862 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12863 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12864
12865 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12866 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12867 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12868 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12869
12870 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12871 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12872 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12873 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12874
12875 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12876 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12877
12878 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12879
12880 /* Prescott New Instructions. */
12881 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
12882 void_ftype_pcvoid_unsigned_unsigned,
12883 IX86_BUILTIN_MONITOR);
12884 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
12885 void_ftype_unsigned_unsigned,
12886 IX86_BUILTIN_MWAIT);
12887 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
12888 v4sf_ftype_v4sf,
12889 IX86_BUILTIN_MOVSHDUP);
12890 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
12891 v4sf_ftype_v4sf,
12892 IX86_BUILTIN_MOVSLDUP);
12893 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
12894 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
12895 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
12896 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
12897 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
12898 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
12899 }
12900
12901 /* Errors in the source file can cause expand_expr to return const0_rtx
12902 where we expect a vector. To avoid crashing, use one of the vector
12903 clear instructions. */
12904 static rtx
12905 safe_vector_operand (rtx x, enum machine_mode mode)
12906 {
12907 if (x != const0_rtx)
12908 return x;
12909 x = gen_reg_rtx (mode);
12910
12911 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12912 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12913 : gen_rtx_SUBREG (DImode, x, 0)));
12914 else
12915 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12916 : gen_rtx_SUBREG (V4SFmode, x, 0),
12917 CONST0_RTX (V4SFmode)));
12918 return x;
12919 }
12920
12921 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12922
12923 static rtx
12924 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
12925 {
12926 rtx pat;
12927 tree arg0 = TREE_VALUE (arglist);
12928 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12929 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12930 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12931 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12932 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12933 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12934
12935 if (VECTOR_MODE_P (mode0))
12936 op0 = safe_vector_operand (op0, mode0);
12937 if (VECTOR_MODE_P (mode1))
12938 op1 = safe_vector_operand (op1, mode1);
12939
12940 if (! target
12941 || GET_MODE (target) != tmode
12942 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12943 target = gen_reg_rtx (tmode);
12944
12945 if (GET_MODE (op1) == SImode && mode1 == TImode)
12946 {
12947 rtx x = gen_reg_rtx (V4SImode);
12948 emit_insn (gen_sse2_loadd (x, op1));
12949 op1 = gen_lowpart (TImode, x);
12950 }
12951
12952 /* In case the insn wants input operands in modes different from
12953 the result, abort. */
12954 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
12955 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
12956 abort ();
12957
12958 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12959 op0 = copy_to_mode_reg (mode0, op0);
12960 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12961 op1 = copy_to_mode_reg (mode1, op1);
12962
12963 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12964 yet one of the two must not be a memory. This is normally enforced
12965 by expanders, but we didn't bother to create one here. */
12966 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12967 op0 = copy_to_mode_reg (mode0, op0);
12968
12969 pat = GEN_FCN (icode) (target, op0, op1);
12970 if (! pat)
12971 return 0;
12972 emit_insn (pat);
12973 return target;
12974 }
12975
12976 /* Subroutine of ix86_expand_builtin to take care of stores. */
12977
12978 static rtx
12979 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
12980 {
12981 rtx pat;
12982 tree arg0 = TREE_VALUE (arglist);
12983 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12984 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12985 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12986 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12987 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12988
12989 if (VECTOR_MODE_P (mode1))
12990 op1 = safe_vector_operand (op1, mode1);
12991
12992 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12993 op1 = copy_to_mode_reg (mode1, op1);
12994
12995 pat = GEN_FCN (icode) (op0, op1);
12996 if (pat)
12997 emit_insn (pat);
12998 return 0;
12999 }
13000
13001 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13002
13003 static rtx
13004 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13005 rtx target, int do_load)
13006 {
13007 rtx pat;
13008 tree arg0 = TREE_VALUE (arglist);
13009 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13010 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13011 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13012
13013 if (! target
13014 || GET_MODE (target) != tmode
13015 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13016 target = gen_reg_rtx (tmode);
13017 if (do_load)
13018 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13019 else
13020 {
13021 if (VECTOR_MODE_P (mode0))
13022 op0 = safe_vector_operand (op0, mode0);
13023
13024 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13025 op0 = copy_to_mode_reg (mode0, op0);
13026 }
13027
13028 pat = GEN_FCN (icode) (target, op0);
13029 if (! pat)
13030 return 0;
13031 emit_insn (pat);
13032 return target;
13033 }
13034
13035 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13036 sqrtss, rsqrtss, rcpss. */
13037
13038 static rtx
13039 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13040 {
13041 rtx pat;
13042 tree arg0 = TREE_VALUE (arglist);
13043 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13044 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13045 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13046
13047 if (! target
13048 || GET_MODE (target) != tmode
13049 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13050 target = gen_reg_rtx (tmode);
13051
13052 if (VECTOR_MODE_P (mode0))
13053 op0 = safe_vector_operand (op0, mode0);
13054
13055 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13056 op0 = copy_to_mode_reg (mode0, op0);
13057
13058 op1 = op0;
13059 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13060 op1 = copy_to_mode_reg (mode0, op1);
13061
13062 pat = GEN_FCN (icode) (target, op0, op1);
13063 if (! pat)
13064 return 0;
13065 emit_insn (pat);
13066 return target;
13067 }
13068
13069 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13070
13071 static rtx
13072 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13073 rtx target)
13074 {
13075 rtx pat;
13076 tree arg0 = TREE_VALUE (arglist);
13077 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13078 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13079 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13080 rtx op2;
13081 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13082 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13083 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13084 enum rtx_code comparison = d->comparison;
13085
13086 if (VECTOR_MODE_P (mode0))
13087 op0 = safe_vector_operand (op0, mode0);
13088 if (VECTOR_MODE_P (mode1))
13089 op1 = safe_vector_operand (op1, mode1);
13090
13091 /* Swap operands if we have a comparison that isn't available in
13092 hardware. */
13093 if (d->flag)
13094 {
13095 rtx tmp = gen_reg_rtx (mode1);
13096 emit_move_insn (tmp, op1);
13097 op1 = op0;
13098 op0 = tmp;
13099 }
13100
13101 if (! target
13102 || GET_MODE (target) != tmode
13103 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13104 target = gen_reg_rtx (tmode);
13105
13106 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13107 op0 = copy_to_mode_reg (mode0, op0);
13108 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13109 op1 = copy_to_mode_reg (mode1, op1);
13110
13111 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13112 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13113 if (! pat)
13114 return 0;
13115 emit_insn (pat);
13116 return target;
13117 }
13118
13119 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13120
13121 static rtx
13122 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13123 rtx target)
13124 {
13125 rtx pat;
13126 tree arg0 = TREE_VALUE (arglist);
13127 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13128 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13129 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13130 rtx op2;
13131 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13132 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13133 enum rtx_code comparison = d->comparison;
13134
13135 if (VECTOR_MODE_P (mode0))
13136 op0 = safe_vector_operand (op0, mode0);
13137 if (VECTOR_MODE_P (mode1))
13138 op1 = safe_vector_operand (op1, mode1);
13139
13140 /* Swap operands if we have a comparison that isn't available in
13141 hardware. */
13142 if (d->flag)
13143 {
13144 rtx tmp = op1;
13145 op1 = op0;
13146 op0 = tmp;
13147 }
13148
13149 target = gen_reg_rtx (SImode);
13150 emit_move_insn (target, const0_rtx);
13151 target = gen_rtx_SUBREG (QImode, target, 0);
13152
13153 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13154 op0 = copy_to_mode_reg (mode0, op0);
13155 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13156 op1 = copy_to_mode_reg (mode1, op1);
13157
13158 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13159 pat = GEN_FCN (d->icode) (op0, op1);
13160 if (! pat)
13161 return 0;
13162 emit_insn (pat);
13163 emit_insn (gen_rtx_SET (VOIDmode,
13164 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13165 gen_rtx_fmt_ee (comparison, QImode,
13166 SET_DEST (pat),
13167 const0_rtx)));
13168
13169 return SUBREG_REG (target);
13170 }
13171
13172 /* Expand an expression EXP that calls a built-in function,
13173 with result going to TARGET if that's convenient
13174 (and in mode MODE if that's convenient).
13175 SUBTARGET may be used as the target for computing one of EXP's operands.
13176 IGNORE is nonzero if the value is to be ignored. */
13177
13178 rtx
13179 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13180 enum machine_mode mode ATTRIBUTE_UNUSED,
13181 int ignore ATTRIBUTE_UNUSED)
13182 {
13183 const struct builtin_description *d;
13184 size_t i;
13185 enum insn_code icode;
13186 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13187 tree arglist = TREE_OPERAND (exp, 1);
13188 tree arg0, arg1, arg2;
13189 rtx op0, op1, op2, pat;
13190 enum machine_mode tmode, mode0, mode1, mode2;
13191 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13192
13193 switch (fcode)
13194 {
13195 case IX86_BUILTIN_EMMS:
13196 emit_insn (gen_emms ());
13197 return 0;
13198
13199 case IX86_BUILTIN_SFENCE:
13200 emit_insn (gen_sfence ());
13201 return 0;
13202
13203 case IX86_BUILTIN_PEXTRW:
13204 case IX86_BUILTIN_PEXTRW128:
13205 icode = (fcode == IX86_BUILTIN_PEXTRW
13206 ? CODE_FOR_mmx_pextrw
13207 : CODE_FOR_sse2_pextrw);
13208 arg0 = TREE_VALUE (arglist);
13209 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13210 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13211 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13212 tmode = insn_data[icode].operand[0].mode;
13213 mode0 = insn_data[icode].operand[1].mode;
13214 mode1 = insn_data[icode].operand[2].mode;
13215
13216 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13217 op0 = copy_to_mode_reg (mode0, op0);
13218 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13219 {
13220 error ("selector must be an integer constant in the range 0..%i",
13221 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13222 return gen_reg_rtx (tmode);
13223 }
13224 if (target == 0
13225 || GET_MODE (target) != tmode
13226 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13227 target = gen_reg_rtx (tmode);
13228 pat = GEN_FCN (icode) (target, op0, op1);
13229 if (! pat)
13230 return 0;
13231 emit_insn (pat);
13232 return target;
13233
13234 case IX86_BUILTIN_PINSRW:
13235 case IX86_BUILTIN_PINSRW128:
13236 icode = (fcode == IX86_BUILTIN_PINSRW
13237 ? CODE_FOR_mmx_pinsrw
13238 : CODE_FOR_sse2_pinsrw);
13239 arg0 = TREE_VALUE (arglist);
13240 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13241 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13242 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13243 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13244 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13245 tmode = insn_data[icode].operand[0].mode;
13246 mode0 = insn_data[icode].operand[1].mode;
13247 mode1 = insn_data[icode].operand[2].mode;
13248 mode2 = insn_data[icode].operand[3].mode;
13249
13250 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13251 op0 = copy_to_mode_reg (mode0, op0);
13252 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13253 op1 = copy_to_mode_reg (mode1, op1);
13254 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13255 {
13256 error ("selector must be an integer constant in the range 0..%i",
13257 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13258 return const0_rtx;
13259 }
13260 if (target == 0
13261 || GET_MODE (target) != tmode
13262 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13263 target = gen_reg_rtx (tmode);
13264 pat = GEN_FCN (icode) (target, op0, op1, op2);
13265 if (! pat)
13266 return 0;
13267 emit_insn (pat);
13268 return target;
13269
13270 case IX86_BUILTIN_MASKMOVQ:
13271 case IX86_BUILTIN_MASKMOVDQU:
13272 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13273 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13274 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13275 : CODE_FOR_sse2_maskmovdqu));
13276 /* Note the arg order is different from the operand order. */
13277 arg1 = TREE_VALUE (arglist);
13278 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13279 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13280 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13281 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13282 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13283 mode0 = insn_data[icode].operand[0].mode;
13284 mode1 = insn_data[icode].operand[1].mode;
13285 mode2 = insn_data[icode].operand[2].mode;
13286
13287 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13288 op0 = copy_to_mode_reg (mode0, op0);
13289 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13290 op1 = copy_to_mode_reg (mode1, op1);
13291 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13292 op2 = copy_to_mode_reg (mode2, op2);
13293 pat = GEN_FCN (icode) (op0, op1, op2);
13294 if (! pat)
13295 return 0;
13296 emit_insn (pat);
13297 return 0;
13298
13299 case IX86_BUILTIN_SQRTSS:
13300 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13301 case IX86_BUILTIN_RSQRTSS:
13302 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13303 case IX86_BUILTIN_RCPSS:
13304 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13305
13306 case IX86_BUILTIN_LOADAPS:
13307 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13308
13309 case IX86_BUILTIN_LOADUPS:
13310 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13311
13312 case IX86_BUILTIN_STOREAPS:
13313 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13314
13315 case IX86_BUILTIN_STOREUPS:
13316 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13317
13318 case IX86_BUILTIN_LOADSS:
13319 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13320
13321 case IX86_BUILTIN_STORESS:
13322 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13323
13324 case IX86_BUILTIN_LOADHPS:
13325 case IX86_BUILTIN_LOADLPS:
13326 case IX86_BUILTIN_LOADHPD:
13327 case IX86_BUILTIN_LOADLPD:
13328 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13329 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13330 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13331 : CODE_FOR_sse2_movsd);
13332 arg0 = TREE_VALUE (arglist);
13333 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13334 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13335 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13336 tmode = insn_data[icode].operand[0].mode;
13337 mode0 = insn_data[icode].operand[1].mode;
13338 mode1 = insn_data[icode].operand[2].mode;
13339
13340 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13341 op0 = copy_to_mode_reg (mode0, op0);
13342 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13343 if (target == 0
13344 || GET_MODE (target) != tmode
13345 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13346 target = gen_reg_rtx (tmode);
13347 pat = GEN_FCN (icode) (target, op0, op1);
13348 if (! pat)
13349 return 0;
13350 emit_insn (pat);
13351 return target;
13352
13353 case IX86_BUILTIN_STOREHPS:
13354 case IX86_BUILTIN_STORELPS:
13355 case IX86_BUILTIN_STOREHPD:
13356 case IX86_BUILTIN_STORELPD:
13357 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13358 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13359 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13360 : CODE_FOR_sse2_movsd);
13361 arg0 = TREE_VALUE (arglist);
13362 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13363 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13364 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13365 mode0 = insn_data[icode].operand[1].mode;
13366 mode1 = insn_data[icode].operand[2].mode;
13367
13368 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13369 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13370 op1 = copy_to_mode_reg (mode1, op1);
13371
13372 pat = GEN_FCN (icode) (op0, op0, op1);
13373 if (! pat)
13374 return 0;
13375 emit_insn (pat);
13376 return 0;
13377
13378 case IX86_BUILTIN_MOVNTPS:
13379 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13380 case IX86_BUILTIN_MOVNTQ:
13381 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13382
13383 case IX86_BUILTIN_LDMXCSR:
13384 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13385 target = assign_386_stack_local (SImode, 0);
13386 emit_move_insn (target, op0);
13387 emit_insn (gen_ldmxcsr (target));
13388 return 0;
13389
13390 case IX86_BUILTIN_STMXCSR:
13391 target = assign_386_stack_local (SImode, 0);
13392 emit_insn (gen_stmxcsr (target));
13393 return copy_to_mode_reg (SImode, target);
13394
13395 case IX86_BUILTIN_SHUFPS:
13396 case IX86_BUILTIN_SHUFPD:
13397 icode = (fcode == IX86_BUILTIN_SHUFPS
13398 ? CODE_FOR_sse_shufps
13399 : CODE_FOR_sse2_shufpd);
13400 arg0 = TREE_VALUE (arglist);
13401 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13402 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13403 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13404 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13405 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13406 tmode = insn_data[icode].operand[0].mode;
13407 mode0 = insn_data[icode].operand[1].mode;
13408 mode1 = insn_data[icode].operand[2].mode;
13409 mode2 = insn_data[icode].operand[3].mode;
13410
13411 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13412 op0 = copy_to_mode_reg (mode0, op0);
13413 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13414 op1 = copy_to_mode_reg (mode1, op1);
13415 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13416 {
13417 /* @@@ better error message */
13418 error ("mask must be an immediate");
13419 return gen_reg_rtx (tmode);
13420 }
13421 if (target == 0
13422 || GET_MODE (target) != tmode
13423 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13424 target = gen_reg_rtx (tmode);
13425 pat = GEN_FCN (icode) (target, op0, op1, op2);
13426 if (! pat)
13427 return 0;
13428 emit_insn (pat);
13429 return target;
13430
13431 case IX86_BUILTIN_PSHUFW:
13432 case IX86_BUILTIN_PSHUFD:
13433 case IX86_BUILTIN_PSHUFHW:
13434 case IX86_BUILTIN_PSHUFLW:
13435 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13436 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13437 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13438 : CODE_FOR_mmx_pshufw);
13439 arg0 = TREE_VALUE (arglist);
13440 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13441 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13442 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13443 tmode = insn_data[icode].operand[0].mode;
13444 mode1 = insn_data[icode].operand[1].mode;
13445 mode2 = insn_data[icode].operand[2].mode;
13446
13447 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13448 op0 = copy_to_mode_reg (mode1, op0);
13449 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13450 {
13451 /* @@@ better error message */
13452 error ("mask must be an immediate");
13453 return const0_rtx;
13454 }
13455 if (target == 0
13456 || GET_MODE (target) != tmode
13457 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13458 target = gen_reg_rtx (tmode);
13459 pat = GEN_FCN (icode) (target, op0, op1);
13460 if (! pat)
13461 return 0;
13462 emit_insn (pat);
13463 return target;
13464
13465 case IX86_BUILTIN_PSLLDQI128:
13466 case IX86_BUILTIN_PSRLDQI128:
13467 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13468 : CODE_FOR_sse2_lshrti3);
13469 arg0 = TREE_VALUE (arglist);
13470 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13471 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13472 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13473 tmode = insn_data[icode].operand[0].mode;
13474 mode1 = insn_data[icode].operand[1].mode;
13475 mode2 = insn_data[icode].operand[2].mode;
13476
13477 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13478 {
13479 op0 = copy_to_reg (op0);
13480 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13481 }
13482 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13483 {
13484 error ("shift must be an immediate");
13485 return const0_rtx;
13486 }
13487 target = gen_reg_rtx (V2DImode);
13488 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13489 if (! pat)
13490 return 0;
13491 emit_insn (pat);
13492 return target;
13493
13494 case IX86_BUILTIN_FEMMS:
13495 emit_insn (gen_femms ());
13496 return NULL_RTX;
13497
13498 case IX86_BUILTIN_PAVGUSB:
13499 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13500
13501 case IX86_BUILTIN_PF2ID:
13502 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13503
13504 case IX86_BUILTIN_PFACC:
13505 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13506
13507 case IX86_BUILTIN_PFADD:
13508 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13509
13510 case IX86_BUILTIN_PFCMPEQ:
13511 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13512
13513 case IX86_BUILTIN_PFCMPGE:
13514 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13515
13516 case IX86_BUILTIN_PFCMPGT:
13517 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13518
13519 case IX86_BUILTIN_PFMAX:
13520 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13521
13522 case IX86_BUILTIN_PFMIN:
13523 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13524
13525 case IX86_BUILTIN_PFMUL:
13526 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13527
13528 case IX86_BUILTIN_PFRCP:
13529 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13530
13531 case IX86_BUILTIN_PFRCPIT1:
13532 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13533
13534 case IX86_BUILTIN_PFRCPIT2:
13535 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13536
13537 case IX86_BUILTIN_PFRSQIT1:
13538 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13539
13540 case IX86_BUILTIN_PFRSQRT:
13541 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13542
13543 case IX86_BUILTIN_PFSUB:
13544 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13545
13546 case IX86_BUILTIN_PFSUBR:
13547 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13548
13549 case IX86_BUILTIN_PI2FD:
13550 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13551
13552 case IX86_BUILTIN_PMULHRW:
13553 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13554
13555 case IX86_BUILTIN_PF2IW:
13556 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13557
13558 case IX86_BUILTIN_PFNACC:
13559 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13560
13561 case IX86_BUILTIN_PFPNACC:
13562 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13563
13564 case IX86_BUILTIN_PI2FW:
13565 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13566
13567 case IX86_BUILTIN_PSWAPDSI:
13568 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13569
13570 case IX86_BUILTIN_PSWAPDSF:
13571 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13572
13573 case IX86_BUILTIN_SSE_ZERO:
13574 target = gen_reg_rtx (V4SFmode);
13575 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13576 return target;
13577
13578 case IX86_BUILTIN_MMX_ZERO:
13579 target = gen_reg_rtx (DImode);
13580 emit_insn (gen_mmx_clrdi (target));
13581 return target;
13582
13583 case IX86_BUILTIN_CLRTI:
13584 target = gen_reg_rtx (V2DImode);
13585 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13586 return target;
13587
13588
13589 case IX86_BUILTIN_SQRTSD:
13590 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13591 case IX86_BUILTIN_LOADAPD:
13592 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13593 case IX86_BUILTIN_LOADUPD:
13594 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13595
13596 case IX86_BUILTIN_STOREAPD:
13597 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13598 case IX86_BUILTIN_STOREUPD:
13599 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13600
13601 case IX86_BUILTIN_LOADSD:
13602 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13603
13604 case IX86_BUILTIN_STORESD:
13605 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13606
13607 case IX86_BUILTIN_SETPD1:
13608 target = assign_386_stack_local (DFmode, 0);
13609 arg0 = TREE_VALUE (arglist);
13610 emit_move_insn (adjust_address (target, DFmode, 0),
13611 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13612 op0 = gen_reg_rtx (V2DFmode);
13613 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13614 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13615 return op0;
13616
13617 case IX86_BUILTIN_SETPD:
13618 target = assign_386_stack_local (V2DFmode, 0);
13619 arg0 = TREE_VALUE (arglist);
13620 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13621 emit_move_insn (adjust_address (target, DFmode, 0),
13622 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13623 emit_move_insn (adjust_address (target, DFmode, 8),
13624 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13625 op0 = gen_reg_rtx (V2DFmode);
13626 emit_insn (gen_sse2_movapd (op0, target));
13627 return op0;
13628
13629 case IX86_BUILTIN_LOADRPD:
13630 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13631 gen_reg_rtx (V2DFmode), 1);
13632 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13633 return target;
13634
13635 case IX86_BUILTIN_LOADPD1:
13636 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13637 gen_reg_rtx (V2DFmode), 1);
13638 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13639 return target;
13640
13641 case IX86_BUILTIN_STOREPD1:
13642 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13643 case IX86_BUILTIN_STORERPD:
13644 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13645
13646 case IX86_BUILTIN_CLRPD:
13647 target = gen_reg_rtx (V2DFmode);
13648 emit_insn (gen_sse_clrv2df (target));
13649 return target;
13650
13651 case IX86_BUILTIN_MFENCE:
13652 emit_insn (gen_sse2_mfence ());
13653 return 0;
13654 case IX86_BUILTIN_LFENCE:
13655 emit_insn (gen_sse2_lfence ());
13656 return 0;
13657
13658 case IX86_BUILTIN_CLFLUSH:
13659 arg0 = TREE_VALUE (arglist);
13660 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13661 icode = CODE_FOR_sse2_clflush;
13662 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13663 op0 = copy_to_mode_reg (Pmode, op0);
13664
13665 emit_insn (gen_sse2_clflush (op0));
13666 return 0;
13667
13668 case IX86_BUILTIN_MOVNTPD:
13669 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13670 case IX86_BUILTIN_MOVNTDQ:
13671 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13672 case IX86_BUILTIN_MOVNTI:
13673 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13674
13675 case IX86_BUILTIN_LOADDQA:
13676 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13677 case IX86_BUILTIN_LOADDQU:
13678 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13679 case IX86_BUILTIN_LOADD:
13680 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13681
13682 case IX86_BUILTIN_STOREDQA:
13683 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13684 case IX86_BUILTIN_STOREDQU:
13685 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13686 case IX86_BUILTIN_STORED:
13687 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13688
13689 case IX86_BUILTIN_MONITOR:
13690 arg0 = TREE_VALUE (arglist);
13691 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13692 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13693 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13694 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13695 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13696 if (!REG_P (op0))
13697 op0 = copy_to_mode_reg (SImode, op0);
13698 if (!REG_P (op1))
13699 op1 = copy_to_mode_reg (SImode, op1);
13700 if (!REG_P (op2))
13701 op2 = copy_to_mode_reg (SImode, op2);
13702 emit_insn (gen_monitor (op0, op1, op2));
13703 return 0;
13704
13705 case IX86_BUILTIN_MWAIT:
13706 arg0 = TREE_VALUE (arglist);
13707 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13708 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13709 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13710 if (!REG_P (op0))
13711 op0 = copy_to_mode_reg (SImode, op0);
13712 if (!REG_P (op1))
13713 op1 = copy_to_mode_reg (SImode, op1);
13714 emit_insn (gen_mwait (op0, op1));
13715 return 0;
13716
13717 case IX86_BUILTIN_LOADDDUP:
13718 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13719
13720 case IX86_BUILTIN_LDDQU:
13721 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13722 1);
13723
13724 default:
13725 break;
13726 }
13727
13728 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13729 if (d->code == fcode)
13730 {
13731 /* Compares are treated specially. */
13732 if (d->icode == CODE_FOR_maskcmpv4sf3
13733 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13734 || d->icode == CODE_FOR_maskncmpv4sf3
13735 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13736 || d->icode == CODE_FOR_maskcmpv2df3
13737 || d->icode == CODE_FOR_vmmaskcmpv2df3
13738 || d->icode == CODE_FOR_maskncmpv2df3
13739 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13740 return ix86_expand_sse_compare (d, arglist, target);
13741
13742 return ix86_expand_binop_builtin (d->icode, arglist, target);
13743 }
13744
13745 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13746 if (d->code == fcode)
13747 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13748
13749 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13750 if (d->code == fcode)
13751 return ix86_expand_sse_comi (d, arglist, target);
13752
13753 /* @@@ Should really do something sensible here. */
13754 return 0;
13755 }
13756
13757 /* Store OPERAND to the memory after reload is completed. This means
13758 that we can't easily use assign_stack_local. */
13759 rtx
13760 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13761 {
13762 rtx result;
13763 if (!reload_completed)
13764 abort ();
13765 if (TARGET_RED_ZONE)
13766 {
13767 result = gen_rtx_MEM (mode,
13768 gen_rtx_PLUS (Pmode,
13769 stack_pointer_rtx,
13770 GEN_INT (-RED_ZONE_SIZE)));
13771 emit_move_insn (result, operand);
13772 }
13773 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13774 {
13775 switch (mode)
13776 {
13777 case HImode:
13778 case SImode:
13779 operand = gen_lowpart (DImode, operand);
13780 /* FALLTHRU */
13781 case DImode:
13782 emit_insn (
13783 gen_rtx_SET (VOIDmode,
13784 gen_rtx_MEM (DImode,
13785 gen_rtx_PRE_DEC (DImode,
13786 stack_pointer_rtx)),
13787 operand));
13788 break;
13789 default:
13790 abort ();
13791 }
13792 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13793 }
13794 else
13795 {
13796 switch (mode)
13797 {
13798 case DImode:
13799 {
13800 rtx operands[2];
13801 split_di (&operand, 1, operands, operands + 1);
13802 emit_insn (
13803 gen_rtx_SET (VOIDmode,
13804 gen_rtx_MEM (SImode,
13805 gen_rtx_PRE_DEC (Pmode,
13806 stack_pointer_rtx)),
13807 operands[1]));
13808 emit_insn (
13809 gen_rtx_SET (VOIDmode,
13810 gen_rtx_MEM (SImode,
13811 gen_rtx_PRE_DEC (Pmode,
13812 stack_pointer_rtx)),
13813 operands[0]));
13814 }
13815 break;
13816 case HImode:
13817 /* It is better to store HImodes as SImodes. */
13818 if (!TARGET_PARTIAL_REG_STALL)
13819 operand = gen_lowpart (SImode, operand);
13820 /* FALLTHRU */
13821 case SImode:
13822 emit_insn (
13823 gen_rtx_SET (VOIDmode,
13824 gen_rtx_MEM (GET_MODE (operand),
13825 gen_rtx_PRE_DEC (SImode,
13826 stack_pointer_rtx)),
13827 operand));
13828 break;
13829 default:
13830 abort ();
13831 }
13832 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13833 }
13834 return result;
13835 }
13836
13837 /* Free operand from the memory. */
13838 void
13839 ix86_free_from_memory (enum machine_mode mode)
13840 {
13841 if (!TARGET_RED_ZONE)
13842 {
13843 int size;
13844
13845 if (mode == DImode || TARGET_64BIT)
13846 size = 8;
13847 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13848 size = 2;
13849 else
13850 size = 4;
13851 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13852 to pop or add instruction if registers are available. */
13853 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13854 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13855 GEN_INT (size))));
13856 }
13857 }
13858
13859 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13860 QImode must go into class Q_REGS.
13861 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13862 movdf to do mem-to-mem moves through integer regs. */
13863 enum reg_class
13864 ix86_preferred_reload_class (rtx x, enum reg_class class)
13865 {
13866 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13867 return NO_REGS;
13868 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13869 {
13870 /* SSE can't load any constant directly yet. */
13871 if (SSE_CLASS_P (class))
13872 return NO_REGS;
13873 /* Floats can load 0 and 1. */
13874 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13875 {
13876 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13877 if (MAYBE_SSE_CLASS_P (class))
13878 return (reg_class_subset_p (class, GENERAL_REGS)
13879 ? GENERAL_REGS : FLOAT_REGS);
13880 else
13881 return class;
13882 }
13883 /* General regs can load everything. */
13884 if (reg_class_subset_p (class, GENERAL_REGS))
13885 return GENERAL_REGS;
13886 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13887 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13888 return NO_REGS;
13889 }
13890 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13891 return NO_REGS;
13892 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13893 return Q_REGS;
13894 return class;
13895 }
13896
13897 /* If we are copying between general and FP registers, we need a memory
13898 location. The same is true for SSE and MMX registers.
13899
13900 The macro can't work reliably when one of the CLASSES is class containing
13901 registers from multiple units (SSE, MMX, integer). We avoid this by never
13902 combining those units in single alternative in the machine description.
13903 Ensure that this constraint holds to avoid unexpected surprises.
13904
13905 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13906 enforce these sanity checks. */
13907 int
13908 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
13909 enum machine_mode mode, int strict)
13910 {
13911 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13912 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13913 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13914 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13915 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13916 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13917 {
13918 if (strict)
13919 abort ();
13920 else
13921 return 1;
13922 }
13923 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13924 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13925 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
13926 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
13927 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
13928 }
13929 /* Return the cost of moving data from a register in class CLASS1 to
13930 one in class CLASS2.
13931
13932 It is not required that the cost always equal 2 when FROM is the same as TO;
13933 on some machines it is expensive to move between registers if they are not
13934 general registers. */
13935 int
13936 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
13937 enum reg_class class2)
13938 {
13939 /* In case we require secondary memory, compute cost of the store followed
13940 by load. In order to avoid bad register allocation choices, we need
13941 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13942
13943 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13944 {
13945 int cost = 1;
13946
13947 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13948 MEMORY_MOVE_COST (mode, class1, 1));
13949 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13950 MEMORY_MOVE_COST (mode, class2, 1));
13951
13952 /* In case of copying from general_purpose_register we may emit multiple
13953 stores followed by single load causing memory size mismatch stall.
13954 Count this as arbitrarily high cost of 20. */
13955 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13956 cost += 20;
13957
13958 /* In the case of FP/MMX moves, the registers actually overlap, and we
13959 have to switch modes in order to treat them differently. */
13960 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13961 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13962 cost += 20;
13963
13964 return cost;
13965 }
13966
13967 /* Moves between SSE/MMX and integer unit are expensive. */
13968 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13969 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13970 return ix86_cost->mmxsse_to_integer;
13971 if (MAYBE_FLOAT_CLASS_P (class1))
13972 return ix86_cost->fp_move;
13973 if (MAYBE_SSE_CLASS_P (class1))
13974 return ix86_cost->sse_move;
13975 if (MAYBE_MMX_CLASS_P (class1))
13976 return ix86_cost->mmx_move;
13977 return 2;
13978 }
13979
13980 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13981 int
13982 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
13983 {
13984 /* Flags and only flags can only hold CCmode values. */
13985 if (CC_REGNO_P (regno))
13986 return GET_MODE_CLASS (mode) == MODE_CC;
13987 if (GET_MODE_CLASS (mode) == MODE_CC
13988 || GET_MODE_CLASS (mode) == MODE_RANDOM
13989 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13990 return 0;
13991 if (FP_REGNO_P (regno))
13992 return VALID_FP_MODE_P (mode);
13993 if (SSE_REGNO_P (regno))
13994 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
13995 if (MMX_REGNO_P (regno))
13996 return (TARGET_MMX
13997 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
13998 /* We handle both integer and floats in the general purpose registers.
13999 In future we should be able to handle vector modes as well. */
14000 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14001 return 0;
14002 /* Take care for QImode values - they can be in non-QI regs, but then
14003 they do cause partial register stalls. */
14004 if (regno < 4 || mode != QImode || TARGET_64BIT)
14005 return 1;
14006 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14007 }
14008
14009 /* Return the cost of moving data of mode M between a
14010 register and memory. A value of 2 is the default; this cost is
14011 relative to those in `REGISTER_MOVE_COST'.
14012
14013 If moving between registers and memory is more expensive than
14014 between two registers, you should define this macro to express the
14015 relative cost.
14016
14017 Model also increased moving costs of QImode registers in non
14018 Q_REGS classes.
14019 */
14020 int
14021 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14022 {
14023 if (FLOAT_CLASS_P (class))
14024 {
14025 int index;
14026 switch (mode)
14027 {
14028 case SFmode:
14029 index = 0;
14030 break;
14031 case DFmode:
14032 index = 1;
14033 break;
14034 case XFmode:
14035 index = 2;
14036 break;
14037 default:
14038 return 100;
14039 }
14040 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14041 }
14042 if (SSE_CLASS_P (class))
14043 {
14044 int index;
14045 switch (GET_MODE_SIZE (mode))
14046 {
14047 case 4:
14048 index = 0;
14049 break;
14050 case 8:
14051 index = 1;
14052 break;
14053 case 16:
14054 index = 2;
14055 break;
14056 default:
14057 return 100;
14058 }
14059 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14060 }
14061 if (MMX_CLASS_P (class))
14062 {
14063 int index;
14064 switch (GET_MODE_SIZE (mode))
14065 {
14066 case 4:
14067 index = 0;
14068 break;
14069 case 8:
14070 index = 1;
14071 break;
14072 default:
14073 return 100;
14074 }
14075 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14076 }
14077 switch (GET_MODE_SIZE (mode))
14078 {
14079 case 1:
14080 if (in)
14081 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14082 : ix86_cost->movzbl_load);
14083 else
14084 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14085 : ix86_cost->int_store[0] + 4);
14086 break;
14087 case 2:
14088 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14089 default:
14090 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14091 if (mode == TFmode)
14092 mode = XFmode;
14093 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14094 * (((int) GET_MODE_SIZE (mode)
14095 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14096 }
14097 }
14098
14099 /* Compute a (partial) cost for rtx X. Return true if the complete
14100 cost has been computed, and false if subexpressions should be
14101 scanned. In either case, *TOTAL contains the cost result. */
14102
14103 static bool
14104 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14105 {
14106 enum machine_mode mode = GET_MODE (x);
14107
14108 switch (code)
14109 {
14110 case CONST_INT:
14111 case CONST:
14112 case LABEL_REF:
14113 case SYMBOL_REF:
14114 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14115 *total = 3;
14116 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14117 *total = 2;
14118 else if (flag_pic && SYMBOLIC_CONST (x)
14119 && (!TARGET_64BIT
14120 || (!GET_CODE (x) != LABEL_REF
14121 && (GET_CODE (x) != SYMBOL_REF
14122 || !SYMBOL_REF_LOCAL_P (x)))))
14123 *total = 1;
14124 else
14125 *total = 0;
14126 return true;
14127
14128 case CONST_DOUBLE:
14129 if (mode == VOIDmode)
14130 *total = 0;
14131 else
14132 switch (standard_80387_constant_p (x))
14133 {
14134 case 1: /* 0.0 */
14135 *total = 1;
14136 break;
14137 default: /* Other constants */
14138 *total = 2;
14139 break;
14140 case 0:
14141 case -1:
14142 /* Start with (MEM (SYMBOL_REF)), since that's where
14143 it'll probably end up. Add a penalty for size. */
14144 *total = (COSTS_N_INSNS (1)
14145 + (flag_pic != 0 && !TARGET_64BIT)
14146 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14147 break;
14148 }
14149 return true;
14150
14151 case ZERO_EXTEND:
14152 /* The zero extensions is often completely free on x86_64, so make
14153 it as cheap as possible. */
14154 if (TARGET_64BIT && mode == DImode
14155 && GET_MODE (XEXP (x, 0)) == SImode)
14156 *total = 1;
14157 else if (TARGET_ZERO_EXTEND_WITH_AND)
14158 *total = COSTS_N_INSNS (ix86_cost->add);
14159 else
14160 *total = COSTS_N_INSNS (ix86_cost->movzx);
14161 return false;
14162
14163 case SIGN_EXTEND:
14164 *total = COSTS_N_INSNS (ix86_cost->movsx);
14165 return false;
14166
14167 case ASHIFT:
14168 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14169 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14170 {
14171 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14172 if (value == 1)
14173 {
14174 *total = COSTS_N_INSNS (ix86_cost->add);
14175 return false;
14176 }
14177 if ((value == 2 || value == 3)
14178 && ix86_cost->lea <= ix86_cost->shift_const)
14179 {
14180 *total = COSTS_N_INSNS (ix86_cost->lea);
14181 return false;
14182 }
14183 }
14184 /* FALLTHRU */
14185
14186 case ROTATE:
14187 case ASHIFTRT:
14188 case LSHIFTRT:
14189 case ROTATERT:
14190 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14191 {
14192 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14193 {
14194 if (INTVAL (XEXP (x, 1)) > 32)
14195 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14196 else
14197 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14198 }
14199 else
14200 {
14201 if (GET_CODE (XEXP (x, 1)) == AND)
14202 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14203 else
14204 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14205 }
14206 }
14207 else
14208 {
14209 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14210 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14211 else
14212 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14213 }
14214 return false;
14215
14216 case MULT:
14217 if (FLOAT_MODE_P (mode))
14218 {
14219 *total = COSTS_N_INSNS (ix86_cost->fmul);
14220 return false;
14221 }
14222 else
14223 {
14224 rtx op0 = XEXP (x, 0);
14225 rtx op1 = XEXP (x, 1);
14226 int nbits;
14227 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14228 {
14229 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14230 for (nbits = 0; value != 0; value &= value - 1)
14231 nbits++;
14232 }
14233 else
14234 /* This is arbitrary. */
14235 nbits = 7;
14236
14237 /* Compute costs correctly for widening multiplication. */
14238 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14239 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14240 == GET_MODE_SIZE (mode))
14241 {
14242 int is_mulwiden = 0;
14243 enum machine_mode inner_mode = GET_MODE (op0);
14244
14245 if (GET_CODE (op0) == GET_CODE (op1))
14246 is_mulwiden = 1, op1 = XEXP (op1, 0);
14247 else if (GET_CODE (op1) == CONST_INT)
14248 {
14249 if (GET_CODE (op0) == SIGN_EXTEND)
14250 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14251 == INTVAL (op1);
14252 else
14253 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14254 }
14255
14256 if (is_mulwiden)
14257 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14258 }
14259
14260 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14261 + nbits * ix86_cost->mult_bit)
14262 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14263
14264 return true;
14265 }
14266
14267 case DIV:
14268 case UDIV:
14269 case MOD:
14270 case UMOD:
14271 if (FLOAT_MODE_P (mode))
14272 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14273 else
14274 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14275 return false;
14276
14277 case PLUS:
14278 if (FLOAT_MODE_P (mode))
14279 *total = COSTS_N_INSNS (ix86_cost->fadd);
14280 else if (GET_MODE_CLASS (mode) == MODE_INT
14281 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14282 {
14283 if (GET_CODE (XEXP (x, 0)) == PLUS
14284 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14285 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14286 && CONSTANT_P (XEXP (x, 1)))
14287 {
14288 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14289 if (val == 2 || val == 4 || val == 8)
14290 {
14291 *total = COSTS_N_INSNS (ix86_cost->lea);
14292 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14293 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14294 outer_code);
14295 *total += rtx_cost (XEXP (x, 1), outer_code);
14296 return true;
14297 }
14298 }
14299 else if (GET_CODE (XEXP (x, 0)) == MULT
14300 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14301 {
14302 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14303 if (val == 2 || val == 4 || val == 8)
14304 {
14305 *total = COSTS_N_INSNS (ix86_cost->lea);
14306 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14307 *total += rtx_cost (XEXP (x, 1), outer_code);
14308 return true;
14309 }
14310 }
14311 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14312 {
14313 *total = COSTS_N_INSNS (ix86_cost->lea);
14314 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14315 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14316 *total += rtx_cost (XEXP (x, 1), outer_code);
14317 return true;
14318 }
14319 }
14320 /* FALLTHRU */
14321
14322 case MINUS:
14323 if (FLOAT_MODE_P (mode))
14324 {
14325 *total = COSTS_N_INSNS (ix86_cost->fadd);
14326 return false;
14327 }
14328 /* FALLTHRU */
14329
14330 case AND:
14331 case IOR:
14332 case XOR:
14333 if (!TARGET_64BIT && mode == DImode)
14334 {
14335 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14336 + (rtx_cost (XEXP (x, 0), outer_code)
14337 << (GET_MODE (XEXP (x, 0)) != DImode))
14338 + (rtx_cost (XEXP (x, 1), outer_code)
14339 << (GET_MODE (XEXP (x, 1)) != DImode)));
14340 return true;
14341 }
14342 /* FALLTHRU */
14343
14344 case NEG:
14345 if (FLOAT_MODE_P (mode))
14346 {
14347 *total = COSTS_N_INSNS (ix86_cost->fchs);
14348 return false;
14349 }
14350 /* FALLTHRU */
14351
14352 case NOT:
14353 if (!TARGET_64BIT && mode == DImode)
14354 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14355 else
14356 *total = COSTS_N_INSNS (ix86_cost->add);
14357 return false;
14358
14359 case COMPARE:
14360 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
14361 && XEXP (XEXP (x, 0), 1) == const1_rtx
14362 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
14363 && XEXP (x, 1) == const0_rtx)
14364 {
14365 /* This kind of construct is implemented using test[bwl].
14366 Treat it as if we had an AND. */
14367 *total = (COSTS_N_INSNS (ix86_cost->add)
14368 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
14369 + rtx_cost (const1_rtx, outer_code));
14370 return true;
14371 }
14372 return false;
14373
14374 case FLOAT_EXTEND:
14375 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14376 *total = 0;
14377 return false;
14378
14379 case ABS:
14380 if (FLOAT_MODE_P (mode))
14381 *total = COSTS_N_INSNS (ix86_cost->fabs);
14382 return false;
14383
14384 case SQRT:
14385 if (FLOAT_MODE_P (mode))
14386 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14387 return false;
14388
14389 case UNSPEC:
14390 if (XINT (x, 1) == UNSPEC_TP)
14391 *total = 0;
14392 return false;
14393
14394 default:
14395 return false;
14396 }
14397 }
14398
14399 #if TARGET_MACHO
14400
14401 static int current_machopic_label_num;
14402
14403 /* Given a symbol name and its associated stub, write out the
14404 definition of the stub. */
14405
14406 void
14407 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14408 {
14409 unsigned int length;
14410 char *binder_name, *symbol_name, lazy_ptr_name[32];
14411 int label = ++current_machopic_label_num;
14412
14413 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14414 symb = (*targetm.strip_name_encoding) (symb);
14415
14416 length = strlen (stub);
14417 binder_name = alloca (length + 32);
14418 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14419
14420 length = strlen (symb);
14421 symbol_name = alloca (length + 32);
14422 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14423
14424 sprintf (lazy_ptr_name, "L%d$lz", label);
14425
14426 if (MACHOPIC_PURE)
14427 machopic_picsymbol_stub_section ();
14428 else
14429 machopic_symbol_stub_section ();
14430
14431 fprintf (file, "%s:\n", stub);
14432 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14433
14434 if (MACHOPIC_PURE)
14435 {
14436 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14437 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14438 fprintf (file, "\tjmp %%edx\n");
14439 }
14440 else
14441 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14442
14443 fprintf (file, "%s:\n", binder_name);
14444
14445 if (MACHOPIC_PURE)
14446 {
14447 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14448 fprintf (file, "\tpushl %%eax\n");
14449 }
14450 else
14451 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14452
14453 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14454
14455 machopic_lazy_symbol_ptr_section ();
14456 fprintf (file, "%s:\n", lazy_ptr_name);
14457 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14458 fprintf (file, "\t.long %s\n", binder_name);
14459 }
14460 #endif /* TARGET_MACHO */
14461
14462 /* Order the registers for register allocator. */
14463
14464 void
14465 x86_order_regs_for_local_alloc (void)
14466 {
14467 int pos = 0;
14468 int i;
14469
14470 /* First allocate the local general purpose registers. */
14471 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14472 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14473 reg_alloc_order [pos++] = i;
14474
14475 /* Global general purpose registers. */
14476 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14477 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14478 reg_alloc_order [pos++] = i;
14479
14480 /* x87 registers come first in case we are doing FP math
14481 using them. */
14482 if (!TARGET_SSE_MATH)
14483 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14484 reg_alloc_order [pos++] = i;
14485
14486 /* SSE registers. */
14487 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14488 reg_alloc_order [pos++] = i;
14489 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14490 reg_alloc_order [pos++] = i;
14491
14492 /* x87 registers. */
14493 if (TARGET_SSE_MATH)
14494 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14495 reg_alloc_order [pos++] = i;
14496
14497 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14498 reg_alloc_order [pos++] = i;
14499
14500 /* Initialize the rest of array as we do not allocate some registers
14501 at all. */
14502 while (pos < FIRST_PSEUDO_REGISTER)
14503 reg_alloc_order [pos++] = 0;
14504 }
14505
14506 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14507 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14508 #endif
14509
14510 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14511 struct attribute_spec.handler. */
14512 static tree
14513 ix86_handle_struct_attribute (tree *node, tree name,
14514 tree args ATTRIBUTE_UNUSED,
14515 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14516 {
14517 tree *type = NULL;
14518 if (DECL_P (*node))
14519 {
14520 if (TREE_CODE (*node) == TYPE_DECL)
14521 type = &TREE_TYPE (*node);
14522 }
14523 else
14524 type = node;
14525
14526 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14527 || TREE_CODE (*type) == UNION_TYPE)))
14528 {
14529 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
14530 *no_add_attrs = true;
14531 }
14532
14533 else if ((is_attribute_p ("ms_struct", name)
14534 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14535 || ((is_attribute_p ("gcc_struct", name)
14536 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14537 {
14538 warning ("%qs incompatible attribute ignored",
14539 IDENTIFIER_POINTER (name));
14540 *no_add_attrs = true;
14541 }
14542
14543 return NULL_TREE;
14544 }
14545
14546 static bool
14547 ix86_ms_bitfield_layout_p (tree record_type)
14548 {
14549 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14550 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14551 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14552 }
14553
14554 /* Returns an expression indicating where the this parameter is
14555 located on entry to the FUNCTION. */
14556
14557 static rtx
14558 x86_this_parameter (tree function)
14559 {
14560 tree type = TREE_TYPE (function);
14561
14562 if (TARGET_64BIT)
14563 {
14564 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14565 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14566 }
14567
14568 if (ix86_function_regparm (type, function) > 0)
14569 {
14570 tree parm;
14571
14572 parm = TYPE_ARG_TYPES (type);
14573 /* Figure out whether or not the function has a variable number of
14574 arguments. */
14575 for (; parm; parm = TREE_CHAIN (parm))
14576 if (TREE_VALUE (parm) == void_type_node)
14577 break;
14578 /* If not, the this parameter is in the first argument. */
14579 if (parm)
14580 {
14581 int regno = 0;
14582 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14583 regno = 2;
14584 return gen_rtx_REG (SImode, regno);
14585 }
14586 }
14587
14588 if (aggregate_value_p (TREE_TYPE (type), type))
14589 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14590 else
14591 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14592 }
14593
14594 /* Determine whether x86_output_mi_thunk can succeed. */
14595
14596 static bool
14597 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14598 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14599 HOST_WIDE_INT vcall_offset, tree function)
14600 {
14601 /* 64-bit can handle anything. */
14602 if (TARGET_64BIT)
14603 return true;
14604
14605 /* For 32-bit, everything's fine if we have one free register. */
14606 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14607 return true;
14608
14609 /* Need a free register for vcall_offset. */
14610 if (vcall_offset)
14611 return false;
14612
14613 /* Need a free register for GOT references. */
14614 if (flag_pic && !(*targetm.binds_local_p) (function))
14615 return false;
14616
14617 /* Otherwise ok. */
14618 return true;
14619 }
14620
14621 /* Output the assembler code for a thunk function. THUNK_DECL is the
14622 declaration for the thunk function itself, FUNCTION is the decl for
14623 the target function. DELTA is an immediate constant offset to be
14624 added to THIS. If VCALL_OFFSET is nonzero, the word at
14625 *(*this + vcall_offset) should be added to THIS. */
14626
14627 static void
14628 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14629 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14630 HOST_WIDE_INT vcall_offset, tree function)
14631 {
14632 rtx xops[3];
14633 rtx this = x86_this_parameter (function);
14634 rtx this_reg, tmp;
14635
14636 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14637 pull it in now and let DELTA benefit. */
14638 if (REG_P (this))
14639 this_reg = this;
14640 else if (vcall_offset)
14641 {
14642 /* Put the this parameter into %eax. */
14643 xops[0] = this;
14644 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14645 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14646 }
14647 else
14648 this_reg = NULL_RTX;
14649
14650 /* Adjust the this parameter by a fixed constant. */
14651 if (delta)
14652 {
14653 xops[0] = GEN_INT (delta);
14654 xops[1] = this_reg ? this_reg : this;
14655 if (TARGET_64BIT)
14656 {
14657 if (!x86_64_general_operand (xops[0], DImode))
14658 {
14659 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14660 xops[1] = tmp;
14661 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14662 xops[0] = tmp;
14663 xops[1] = this;
14664 }
14665 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14666 }
14667 else
14668 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14669 }
14670
14671 /* Adjust the this parameter by a value stored in the vtable. */
14672 if (vcall_offset)
14673 {
14674 if (TARGET_64BIT)
14675 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14676 else
14677 {
14678 int tmp_regno = 2 /* ECX */;
14679 if (lookup_attribute ("fastcall",
14680 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14681 tmp_regno = 0 /* EAX */;
14682 tmp = gen_rtx_REG (SImode, tmp_regno);
14683 }
14684
14685 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14686 xops[1] = tmp;
14687 if (TARGET_64BIT)
14688 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14689 else
14690 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14691
14692 /* Adjust the this parameter. */
14693 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14694 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14695 {
14696 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14697 xops[0] = GEN_INT (vcall_offset);
14698 xops[1] = tmp2;
14699 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14700 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14701 }
14702 xops[1] = this_reg;
14703 if (TARGET_64BIT)
14704 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14705 else
14706 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14707 }
14708
14709 /* If necessary, drop THIS back to its stack slot. */
14710 if (this_reg && this_reg != this)
14711 {
14712 xops[0] = this_reg;
14713 xops[1] = this;
14714 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14715 }
14716
14717 xops[0] = XEXP (DECL_RTL (function), 0);
14718 if (TARGET_64BIT)
14719 {
14720 if (!flag_pic || (*targetm.binds_local_p) (function))
14721 output_asm_insn ("jmp\t%P0", xops);
14722 else
14723 {
14724 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14725 tmp = gen_rtx_CONST (Pmode, tmp);
14726 tmp = gen_rtx_MEM (QImode, tmp);
14727 xops[0] = tmp;
14728 output_asm_insn ("jmp\t%A0", xops);
14729 }
14730 }
14731 else
14732 {
14733 if (!flag_pic || (*targetm.binds_local_p) (function))
14734 output_asm_insn ("jmp\t%P0", xops);
14735 else
14736 #if TARGET_MACHO
14737 if (TARGET_MACHO)
14738 {
14739 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14740 tmp = (gen_rtx_SYMBOL_REF
14741 (Pmode,
14742 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14743 tmp = gen_rtx_MEM (QImode, tmp);
14744 xops[0] = tmp;
14745 output_asm_insn ("jmp\t%0", xops);
14746 }
14747 else
14748 #endif /* TARGET_MACHO */
14749 {
14750 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14751 output_set_got (tmp);
14752
14753 xops[1] = tmp;
14754 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14755 output_asm_insn ("jmp\t{*}%1", xops);
14756 }
14757 }
14758 }
14759
14760 static void
14761 x86_file_start (void)
14762 {
14763 default_file_start ();
14764 if (X86_FILE_START_VERSION_DIRECTIVE)
14765 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14766 if (X86_FILE_START_FLTUSED)
14767 fputs ("\t.global\t__fltused\n", asm_out_file);
14768 if (ix86_asm_dialect == ASM_INTEL)
14769 fputs ("\t.intel_syntax\n", asm_out_file);
14770 }
14771
14772 int
14773 x86_field_alignment (tree field, int computed)
14774 {
14775 enum machine_mode mode;
14776 tree type = TREE_TYPE (field);
14777
14778 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14779 return computed;
14780 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14781 ? get_inner_array_type (type) : type);
14782 if (mode == DFmode || mode == DCmode
14783 || GET_MODE_CLASS (mode) == MODE_INT
14784 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14785 return MIN (32, computed);
14786 return computed;
14787 }
14788
14789 /* Output assembler code to FILE to increment profiler label # LABELNO
14790 for profiling a function entry. */
14791 void
14792 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
14793 {
14794 if (TARGET_64BIT)
14795 if (flag_pic)
14796 {
14797 #ifndef NO_PROFILE_COUNTERS
14798 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14799 #endif
14800 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14801 }
14802 else
14803 {
14804 #ifndef NO_PROFILE_COUNTERS
14805 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14806 #endif
14807 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14808 }
14809 else if (flag_pic)
14810 {
14811 #ifndef NO_PROFILE_COUNTERS
14812 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14813 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14814 #endif
14815 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14816 }
14817 else
14818 {
14819 #ifndef NO_PROFILE_COUNTERS
14820 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14821 PROFILE_COUNT_REGISTER);
14822 #endif
14823 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14824 }
14825 }
14826
14827 /* We don't have exact information about the insn sizes, but we may assume
14828 quite safely that we are informed about all 1 byte insns and memory
14829 address sizes. This is enough to eliminate unnecessary padding in
14830 99% of cases. */
14831
14832 static int
14833 min_insn_size (rtx insn)
14834 {
14835 int l = 0;
14836
14837 if (!INSN_P (insn) || !active_insn_p (insn))
14838 return 0;
14839
14840 /* Discard alignments we've emit and jump instructions. */
14841 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14842 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14843 return 0;
14844 if (GET_CODE (insn) == JUMP_INSN
14845 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14846 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14847 return 0;
14848
14849 /* Important case - calls are always 5 bytes.
14850 It is common to have many calls in the row. */
14851 if (GET_CODE (insn) == CALL_INSN
14852 && symbolic_reference_mentioned_p (PATTERN (insn))
14853 && !SIBLING_CALL_P (insn))
14854 return 5;
14855 if (get_attr_length (insn) <= 1)
14856 return 1;
14857
14858 /* For normal instructions we may rely on the sizes of addresses
14859 and the presence of symbol to require 4 bytes of encoding.
14860 This is not the case for jumps where references are PC relative. */
14861 if (GET_CODE (insn) != JUMP_INSN)
14862 {
14863 l = get_attr_length_address (insn);
14864 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14865 l = 4;
14866 }
14867 if (l)
14868 return 1+l;
14869 else
14870 return 2;
14871 }
14872
14873 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14874 window. */
14875
14876 static void
14877 ix86_avoid_jump_misspredicts (void)
14878 {
14879 rtx insn, start = get_insns ();
14880 int nbytes = 0, njumps = 0;
14881 int isjump = 0;
14882
14883 /* Look for all minimal intervals of instructions containing 4 jumps.
14884 The intervals are bounded by START and INSN. NBYTES is the total
14885 size of instructions in the interval including INSN and not including
14886 START. When the NBYTES is smaller than 16 bytes, it is possible
14887 that the end of START and INSN ends up in the same 16byte page.
14888
14889 The smallest offset in the page INSN can start is the case where START
14890 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14891 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14892 */
14893 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14894 {
14895
14896 nbytes += min_insn_size (insn);
14897 if (dump_file)
14898 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
14899 INSN_UID (insn), min_insn_size (insn));
14900 if ((GET_CODE (insn) == JUMP_INSN
14901 && GET_CODE (PATTERN (insn)) != ADDR_VEC
14902 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
14903 || GET_CODE (insn) == CALL_INSN)
14904 njumps++;
14905 else
14906 continue;
14907
14908 while (njumps > 3)
14909 {
14910 start = NEXT_INSN (start);
14911 if ((GET_CODE (start) == JUMP_INSN
14912 && GET_CODE (PATTERN (start)) != ADDR_VEC
14913 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
14914 || GET_CODE (start) == CALL_INSN)
14915 njumps--, isjump = 1;
14916 else
14917 isjump = 0;
14918 nbytes -= min_insn_size (start);
14919 }
14920 if (njumps < 0)
14921 abort ();
14922 if (dump_file)
14923 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
14924 INSN_UID (start), INSN_UID (insn), nbytes);
14925
14926 if (njumps == 3 && isjump && nbytes < 16)
14927 {
14928 int padsize = 15 - nbytes + min_insn_size (insn);
14929
14930 if (dump_file)
14931 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
14932 INSN_UID (insn), padsize);
14933 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
14934 }
14935 }
14936 }
14937
14938 /* AMD Athlon works faster
14939 when RET is not destination of conditional jump or directly preceded
14940 by other jump instruction. We avoid the penalty by inserting NOP just
14941 before the RET instructions in such cases. */
14942 static void
14943 ix86_pad_returns (void)
14944 {
14945 edge e;
14946 edge_iterator ei;
14947
14948 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
14949 {
14950 basic_block bb = e->src;
14951 rtx ret = BB_END (bb);
14952 rtx prev;
14953 bool replace = false;
14954
14955 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
14956 || !maybe_hot_bb_p (bb))
14957 continue;
14958 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14959 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14960 break;
14961 if (prev && GET_CODE (prev) == CODE_LABEL)
14962 {
14963 edge e;
14964 edge_iterator ei;
14965
14966 FOR_EACH_EDGE (e, ei, bb->preds)
14967 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14968 && !(e->flags & EDGE_FALLTHRU))
14969 replace = true;
14970 }
14971 if (!replace)
14972 {
14973 prev = prev_active_insn (ret);
14974 if (prev
14975 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
14976 || GET_CODE (prev) == CALL_INSN))
14977 replace = true;
14978 /* Empty functions get branch mispredict even when the jump destination
14979 is not visible to us. */
14980 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14981 replace = true;
14982 }
14983 if (replace)
14984 {
14985 emit_insn_before (gen_return_internal_long (), ret);
14986 delete_insn (ret);
14987 }
14988 }
14989 }
14990
14991 /* Implement machine specific optimizations. We implement padding of returns
14992 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14993 static void
14994 ix86_reorg (void)
14995 {
14996 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
14997 ix86_pad_returns ();
14998 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
14999 ix86_avoid_jump_misspredicts ();
15000 }
15001
15002 /* Return nonzero when QImode register that must be represented via REX prefix
15003 is used. */
15004 bool
15005 x86_extended_QIreg_mentioned_p (rtx insn)
15006 {
15007 int i;
15008 extract_insn_cached (insn);
15009 for (i = 0; i < recog_data.n_operands; i++)
15010 if (REG_P (recog_data.operand[i])
15011 && REGNO (recog_data.operand[i]) >= 4)
15012 return true;
15013 return false;
15014 }
15015
15016 /* Return nonzero when P points to register encoded via REX prefix.
15017 Called via for_each_rtx. */
15018 static int
15019 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15020 {
15021 unsigned int regno;
15022 if (!REG_P (*p))
15023 return 0;
15024 regno = REGNO (*p);
15025 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15026 }
15027
15028 /* Return true when INSN mentions register that must be encoded using REX
15029 prefix. */
15030 bool
15031 x86_extended_reg_mentioned_p (rtx insn)
15032 {
15033 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15034 }
15035
15036 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15037 optabs would emit if we didn't have TFmode patterns. */
15038
15039 void
15040 x86_emit_floatuns (rtx operands[2])
15041 {
15042 rtx neglab, donelab, i0, i1, f0, in, out;
15043 enum machine_mode mode, inmode;
15044
15045 inmode = GET_MODE (operands[1]);
15046 if (inmode != SImode
15047 && inmode != DImode)
15048 abort ();
15049
15050 out = operands[0];
15051 in = force_reg (inmode, operands[1]);
15052 mode = GET_MODE (out);
15053 neglab = gen_label_rtx ();
15054 donelab = gen_label_rtx ();
15055 i1 = gen_reg_rtx (Pmode);
15056 f0 = gen_reg_rtx (mode);
15057
15058 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15059
15060 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15061 emit_jump_insn (gen_jump (donelab));
15062 emit_barrier ();
15063
15064 emit_label (neglab);
15065
15066 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15067 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15068 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15069 expand_float (f0, i0, 0);
15070 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15071
15072 emit_label (donelab);
15073 }
15074
15075 /* Initialize vector TARGET via VALS. */
15076 void
15077 ix86_expand_vector_init (rtx target, rtx vals)
15078 {
15079 enum machine_mode mode = GET_MODE (target);
15080 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15081 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15082 int i;
15083
15084 for (i = n_elts - 1; i >= 0; i--)
15085 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15086 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15087 break;
15088
15089 /* Few special cases first...
15090 ... constants are best loaded from constant pool. */
15091 if (i < 0)
15092 {
15093 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15094 return;
15095 }
15096
15097 /* ... values where only first field is non-constant are best loaded
15098 from the pool and overwritten via move later. */
15099 if (!i)
15100 {
15101 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15102 GET_MODE_INNER (mode), 0);
15103
15104 op = force_reg (mode, op);
15105 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15106 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15107 switch (GET_MODE (target))
15108 {
15109 case V2DFmode:
15110 emit_insn (gen_sse2_movsd (target, target, op));
15111 break;
15112 case V4SFmode:
15113 emit_insn (gen_sse_movss (target, target, op));
15114 break;
15115 default:
15116 break;
15117 }
15118 return;
15119 }
15120
15121 /* And the busy sequence doing rotations. */
15122 switch (GET_MODE (target))
15123 {
15124 case V2DFmode:
15125 {
15126 rtx vecop0 =
15127 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15128 rtx vecop1 =
15129 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15130
15131 vecop0 = force_reg (V2DFmode, vecop0);
15132 vecop1 = force_reg (V2DFmode, vecop1);
15133 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15134 }
15135 break;
15136 case V4SFmode:
15137 {
15138 rtx vecop0 =
15139 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15140 rtx vecop1 =
15141 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15142 rtx vecop2 =
15143 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15144 rtx vecop3 =
15145 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15146 rtx tmp1 = gen_reg_rtx (V4SFmode);
15147 rtx tmp2 = gen_reg_rtx (V4SFmode);
15148
15149 vecop0 = force_reg (V4SFmode, vecop0);
15150 vecop1 = force_reg (V4SFmode, vecop1);
15151 vecop2 = force_reg (V4SFmode, vecop2);
15152 vecop3 = force_reg (V4SFmode, vecop3);
15153 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15154 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15155 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15156 }
15157 break;
15158 default:
15159 abort ();
15160 }
15161 }
15162
15163 /* Implements target hook vector_mode_supported_p. */
15164 static bool
15165 ix86_vector_mode_supported_p (enum machine_mode mode)
15166 {
15167 if (TARGET_SSE
15168 && VALID_SSE_REG_MODE (mode))
15169 return true;
15170
15171 else if (TARGET_MMX
15172 && VALID_MMX_REG_MODE (mode))
15173 return true;
15174
15175 else if (TARGET_3DNOW
15176 && VALID_MMX_REG_MODE_3DNOW (mode))
15177 return true;
15178
15179 else
15180 return false;
15181 }
15182
15183 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15184
15185 We do this in the new i386 backend to maintain source compatibility
15186 with the old cc0-based compiler. */
15187
15188 static tree
15189 ix86_md_asm_clobbers (tree clobbers)
15190 {
15191 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15192 clobbers);
15193 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15194 clobbers);
15195 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15196 clobbers);
15197 return clobbers;
15198 }
15199
15200 /* Worker function for REVERSE_CONDITION. */
15201
15202 enum rtx_code
15203 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15204 {
15205 return (mode != CCFPmode && mode != CCFPUmode
15206 ? reverse_condition (code)
15207 : reverse_condition_maybe_unordered (code));
15208 }
15209
15210 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15211 to OPERANDS[0]. */
15212
15213 const char *
15214 output_387_reg_move (rtx insn, rtx *operands)
15215 {
15216 if (REG_P (operands[1])
15217 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15218 {
15219 if (REGNO (operands[0]) == FIRST_STACK_REG
15220 && TARGET_USE_FFREEP)
15221 return "ffreep\t%y0";
15222 return "fstp\t%y0";
15223 }
15224 if (STACK_TOP_P (operands[0]))
15225 return "fld%z1\t%y1";
15226 return "fst\t%y0";
15227 }
15228
15229 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15230 FP status register is set. */
15231
15232 void
15233 ix86_emit_fp_unordered_jump (rtx label)
15234 {
15235 rtx reg = gen_reg_rtx (HImode);
15236 rtx temp;
15237
15238 emit_insn (gen_x86_fnstsw_1 (reg));
15239
15240 if (TARGET_USE_SAHF)
15241 {
15242 emit_insn (gen_x86_sahf_1 (reg));
15243
15244 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15245 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15246 }
15247 else
15248 {
15249 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15250
15251 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15252 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15253 }
15254
15255 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15256 gen_rtx_LABEL_REF (VOIDmode, label),
15257 pc_rtx);
15258 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15259 emit_jump_insn (temp);
15260 }
15261
15262 /* Output code to perform a log1p XFmode calculation. */
15263
15264 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15265 {
15266 rtx label1 = gen_label_rtx ();
15267 rtx label2 = gen_label_rtx ();
15268
15269 rtx tmp = gen_reg_rtx (XFmode);
15270 rtx tmp2 = gen_reg_rtx (XFmode);
15271
15272 emit_insn (gen_absxf2 (tmp, op1));
15273 emit_insn (gen_cmpxf (tmp,
15274 CONST_DOUBLE_FROM_REAL_VALUE (
15275 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15276 XFmode)));
15277 emit_jump_insn (gen_bge (label1));
15278
15279 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15280 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15281 emit_jump (label2);
15282
15283 emit_label (label1);
15284 emit_move_insn (tmp, CONST1_RTX (XFmode));
15285 emit_insn (gen_addxf3 (tmp, op1, tmp));
15286 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15287 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15288
15289 emit_label (label2);
15290 }
15291
15292 /* Solaris named-section hook. Parameters are as for
15293 named_section_real. */
15294
15295 static void
15296 i386_solaris_elf_named_section (const char *name, unsigned int flags,
15297 tree decl)
15298 {
15299 /* With Binutils 2.15, the "@unwind" marker must be specified on
15300 every occurrence of the ".eh_frame" section, not just the first
15301 one. */
15302 if (TARGET_64BIT
15303 && strcmp (name, ".eh_frame") == 0)
15304 {
15305 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
15306 flags & SECTION_WRITE ? "aw" : "a");
15307 return;
15308 }
15309 default_elf_asm_named_section (name, flags, decl);
15310 }
15311
15312 #include "gt-i386.h"
This page took 0.734137 seconds and 4 git commands to generate.