]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.c (ix86_expand_fp_movcc): Remove TARGET_IEEE_FP special case for sse.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
568 undefined. */
569 const int x86_sse_split_regs = m_ATHLON_K8;
570 const int x86_sse_typeless_stores = m_ATHLON_K8;
571 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
572 const int x86_use_ffreep = m_ATHLON_K8;
573 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
574 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
575 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
576 /* Some CPU cores are not able to predict more than 4 branch instructions in
577 the 16 byte window. */
578 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
579 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
580 const int x86_use_bt = m_ATHLON_K8;
581
582 /* In case the average insn count for single function invocation is
583 lower than this constant, emit fast (but longer) prologue and
584 epilogue code. */
585 #define FAST_PROLOGUE_INSN_COUNT 20
586
587 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
588 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
589 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
590 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
591
592 /* Array of the smallest class containing reg number REGNO, indexed by
593 REGNO. Used by REGNO_REG_CLASS in i386.h. */
594
595 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
596 {
597 /* ax, dx, cx, bx */
598 AREG, DREG, CREG, BREG,
599 /* si, di, bp, sp */
600 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
601 /* FP registers */
602 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
603 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
604 /* arg pointer */
605 NON_Q_REGS,
606 /* flags, fpsr, dirflag, frame */
607 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
610 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
611 MMX_REGS, MMX_REGS,
612 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
615 SSE_REGS, SSE_REGS,
616 };
617
618 /* The "default" register map used in 32bit mode. */
619
620 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
621 {
622 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
623 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
624 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
625 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
626 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
629 };
630
631 static int const x86_64_int_parameter_registers[6] =
632 {
633 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
634 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
635 };
636
637 static int const x86_64_int_return_registers[4] =
638 {
639 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
640 };
641
642 /* The "default" register map used in 64bit mode. */
643 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
644 {
645 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
646 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
647 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
648 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
649 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
650 8,9,10,11,12,13,14,15, /* extended integer registers */
651 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
652 };
653
654 /* Define the register numbers to be used in Dwarf debugging information.
655 The SVR4 reference port C compiler uses the following register numbers
656 in its Dwarf output code:
657 0 for %eax (gcc regno = 0)
658 1 for %ecx (gcc regno = 2)
659 2 for %edx (gcc regno = 1)
660 3 for %ebx (gcc regno = 3)
661 4 for %esp (gcc regno = 7)
662 5 for %ebp (gcc regno = 6)
663 6 for %esi (gcc regno = 4)
664 7 for %edi (gcc regno = 5)
665 The following three DWARF register numbers are never generated by
666 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
667 believes these numbers have these meanings.
668 8 for %eip (no gcc equivalent)
669 9 for %eflags (gcc regno = 17)
670 10 for %trapno (no gcc equivalent)
671 It is not at all clear how we should number the FP stack registers
672 for the x86 architecture. If the version of SDB on x86/svr4 were
673 a bit less brain dead with respect to floating-point then we would
674 have a precedent to follow with respect to DWARF register numbers
675 for x86 FP registers, but the SDB on x86/svr4 is so completely
676 broken with respect to FP registers that it is hardly worth thinking
677 of it as something to strive for compatibility with.
678 The version of x86/svr4 SDB I have at the moment does (partially)
679 seem to believe that DWARF register number 11 is associated with
680 the x86 register %st(0), but that's about all. Higher DWARF
681 register numbers don't seem to be associated with anything in
682 particular, and even for DWARF regno 11, SDB only seems to under-
683 stand that it should say that a variable lives in %st(0) (when
684 asked via an `=' command) if we said it was in DWARF regno 11,
685 but SDB still prints garbage when asked for the value of the
686 variable in question (via a `/' command).
687 (Also note that the labels SDB prints for various FP stack regs
688 when doing an `x' command are all wrong.)
689 Note that these problems generally don't affect the native SVR4
690 C compiler because it doesn't allow the use of -O with -g and
691 because when it is *not* optimizing, it allocates a memory
692 location for each floating-point variable, and the memory
693 location is what gets described in the DWARF AT_location
694 attribute for the variable in question.
695 Regardless of the severe mental illness of the x86/svr4 SDB, we
696 do something sensible here and we use the following DWARF
697 register numbers. Note that these are all stack-top-relative
698 numbers.
699 11 for %st(0) (gcc regno = 8)
700 12 for %st(1) (gcc regno = 9)
701 13 for %st(2) (gcc regno = 10)
702 14 for %st(3) (gcc regno = 11)
703 15 for %st(4) (gcc regno = 12)
704 16 for %st(5) (gcc regno = 13)
705 17 for %st(6) (gcc regno = 14)
706 18 for %st(7) (gcc regno = 15)
707 */
708 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
709 {
710 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
711 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
712 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
713 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
714 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
715 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
717 };
718
719 /* Test and compare insns in i386.md store the information needed to
720 generate branch and scc insns here. */
721
722 rtx ix86_compare_op0 = NULL_RTX;
723 rtx ix86_compare_op1 = NULL_RTX;
724
725 #define MAX_386_STACK_LOCALS 3
726 /* Size of the register save area. */
727 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
728
729 /* Define the structure for the machine field in struct function. */
730
731 struct stack_local_entry GTY(())
732 {
733 unsigned short mode;
734 unsigned short n;
735 rtx rtl;
736 struct stack_local_entry *next;
737 };
738
739 /* Structure describing stack frame layout.
740 Stack grows downward:
741
742 [arguments]
743 <- ARG_POINTER
744 saved pc
745
746 saved frame pointer if frame_pointer_needed
747 <- HARD_FRAME_POINTER
748 [saved regs]
749
750 [padding1] \
751 )
752 [va_arg registers] (
753 > to_allocate <- FRAME_POINTER
754 [frame] (
755 )
756 [padding2] /
757 */
758 struct ix86_frame
759 {
760 int nregs;
761 int padding1;
762 int va_arg_size;
763 HOST_WIDE_INT frame;
764 int padding2;
765 int outgoing_arguments_size;
766 int red_zone_size;
767
768 HOST_WIDE_INT to_allocate;
769 /* The offsets relative to ARG_POINTER. */
770 HOST_WIDE_INT frame_pointer_offset;
771 HOST_WIDE_INT hard_frame_pointer_offset;
772 HOST_WIDE_INT stack_pointer_offset;
773
774 /* When save_regs_using_mov is set, emit prologue using
775 move instead of push instructions. */
776 bool save_regs_using_mov;
777 };
778
779 /* Used to enable/disable debugging features. */
780 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
781 /* Code model option as passed by user. */
782 const char *ix86_cmodel_string;
783 /* Parsed value. */
784 enum cmodel ix86_cmodel;
785 /* Asm dialect. */
786 const char *ix86_asm_string;
787 enum asm_dialect ix86_asm_dialect = ASM_ATT;
788 /* TLS dialext. */
789 const char *ix86_tls_dialect_string;
790 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
791
792 /* Which unit we are generating floating point math for. */
793 enum fpmath_unit ix86_fpmath;
794
795 /* Which cpu are we scheduling for. */
796 enum processor_type ix86_tune;
797 /* Which instruction set architecture to use. */
798 enum processor_type ix86_arch;
799
800 /* Strings to hold which cpu and instruction set architecture to use. */
801 const char *ix86_tune_string; /* for -mtune=<xxx> */
802 const char *ix86_arch_string; /* for -march=<xxx> */
803 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
804
805 /* # of registers to use to pass arguments. */
806 const char *ix86_regparm_string;
807
808 /* true if sse prefetch instruction is not NOOP. */
809 int x86_prefetch_sse;
810
811 /* ix86_regparm_string as a number */
812 int ix86_regparm;
813
814 /* Alignment to use for loops and jumps: */
815
816 /* Power of two alignment for loops. */
817 const char *ix86_align_loops_string;
818
819 /* Power of two alignment for non-loop jumps. */
820 const char *ix86_align_jumps_string;
821
822 /* Power of two alignment for stack boundary in bytes. */
823 const char *ix86_preferred_stack_boundary_string;
824
825 /* Preferred alignment for stack boundary in bits. */
826 unsigned int ix86_preferred_stack_boundary;
827
828 /* Values 1-5: see jump.c */
829 int ix86_branch_cost;
830 const char *ix86_branch_cost_string;
831
832 /* Power of two alignment for functions. */
833 const char *ix86_align_funcs_string;
834
835 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
836 char internal_label_prefix[16];
837 int internal_label_prefix_len;
838 \f
839 static void output_pic_addr_const (FILE *, rtx, int);
840 static void put_condition_code (enum rtx_code, enum machine_mode,
841 int, int, FILE *);
842 static const char *get_some_local_dynamic_name (void);
843 static int get_some_local_dynamic_name_1 (rtx *, void *);
844 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
845 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 rtx *);
847 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
848 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 enum machine_mode);
850 static rtx get_thread_pointer (int);
851 static rtx legitimize_tls_address (rtx, enum tls_model, int);
852 static void get_pc_thunk_name (char [32], unsigned int);
853 static rtx gen_push (rtx);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
882 static bool ix86_vector_mode_supported_p (enum machine_mode);
883
884 static int ix86_address_cost (rtx);
885 static bool ix86_cannot_force_const_mem (rtx);
886 static rtx ix86_delegitimize_address (rtx);
887
888 struct builtin_description;
889 static rtx ix86_expand_sse_comi (const struct builtin_description *,
890 tree, rtx);
891 static rtx ix86_expand_sse_compare (const struct builtin_description *,
892 tree, rtx);
893 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
894 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
895 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
896 static rtx ix86_expand_store_builtin (enum insn_code, tree);
897 static rtx safe_vector_operand (rtx, enum machine_mode);
898 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
899 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
900 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
901 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
902 static int ix86_fp_comparison_cost (enum rtx_code code);
903 static unsigned int ix86_select_alt_pic_regnum (void);
904 static int ix86_save_reg (unsigned int, int);
905 static void ix86_compute_frame_layout (struct ix86_frame *);
906 static int ix86_comp_type_attributes (tree, tree);
907 static int ix86_function_regparm (tree, tree);
908 const struct attribute_spec ix86_attribute_table[];
909 static bool ix86_function_ok_for_sibcall (tree, tree);
910 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
911 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
912 static int ix86_value_regno (enum machine_mode);
913 static bool contains_128bit_aligned_vector_p (tree);
914 static rtx ix86_struct_value_rtx (tree, int);
915 static bool ix86_ms_bitfield_layout_p (tree);
916 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
917 static int extended_reg_mentioned_1 (rtx *, void *);
918 static bool ix86_rtx_costs (rtx, int, int, int *);
919 static int min_insn_size (rtx);
920 static tree ix86_md_asm_clobbers (tree clobbers);
921 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
922 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
923 tree, bool);
924 static void ix86_init_builtins (void);
925 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
926
927 /* This function is only used on Solaris. */
928 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
929 ATTRIBUTE_UNUSED;
930
931 /* Register class used for passing given 64bit part of the argument.
932 These represent classes as documented by the PS ABI, with the exception
933 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
934 use SF or DFmode move instead of DImode to avoid reformatting penalties.
935
936 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
937 whenever possible (upper half does contain padding).
938 */
939 enum x86_64_reg_class
940 {
941 X86_64_NO_CLASS,
942 X86_64_INTEGER_CLASS,
943 X86_64_INTEGERSI_CLASS,
944 X86_64_SSE_CLASS,
945 X86_64_SSESF_CLASS,
946 X86_64_SSEDF_CLASS,
947 X86_64_SSEUP_CLASS,
948 X86_64_X87_CLASS,
949 X86_64_X87UP_CLASS,
950 X86_64_COMPLEX_X87_CLASS,
951 X86_64_MEMORY_CLASS
952 };
953 static const char * const x86_64_reg_class_name[] = {
954 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
955 "sseup", "x87", "x87up", "cplx87", "no"
956 };
957
958 #define MAX_CLASSES 4
959
960 /* Table of constants used by fldpi, fldln2, etc.... */
961 static REAL_VALUE_TYPE ext_80387_constants_table [5];
962 static bool ext_80387_constants_init = 0;
963 static void init_ext_80387_constants (void);
964 \f
965 /* Initialize the GCC target structure. */
966 #undef TARGET_ATTRIBUTE_TABLE
967 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
968 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
969 # undef TARGET_MERGE_DECL_ATTRIBUTES
970 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
971 #endif
972
973 #undef TARGET_COMP_TYPE_ATTRIBUTES
974 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
975
976 #undef TARGET_INIT_BUILTINS
977 #define TARGET_INIT_BUILTINS ix86_init_builtins
978 #undef TARGET_EXPAND_BUILTIN
979 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
980
981 #undef TARGET_ASM_FUNCTION_EPILOGUE
982 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
983
984 #undef TARGET_ASM_OPEN_PAREN
985 #define TARGET_ASM_OPEN_PAREN ""
986 #undef TARGET_ASM_CLOSE_PAREN
987 #define TARGET_ASM_CLOSE_PAREN ""
988
989 #undef TARGET_ASM_ALIGNED_HI_OP
990 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
991 #undef TARGET_ASM_ALIGNED_SI_OP
992 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
993 #ifdef ASM_QUAD
994 #undef TARGET_ASM_ALIGNED_DI_OP
995 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
996 #endif
997
998 #undef TARGET_ASM_UNALIGNED_HI_OP
999 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1000 #undef TARGET_ASM_UNALIGNED_SI_OP
1001 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1002 #undef TARGET_ASM_UNALIGNED_DI_OP
1003 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1004
1005 #undef TARGET_SCHED_ADJUST_COST
1006 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1007 #undef TARGET_SCHED_ISSUE_RATE
1008 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1009 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1010 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1011 ia32_multipass_dfa_lookahead
1012
1013 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1014 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1015
1016 #ifdef HAVE_AS_TLS
1017 #undef TARGET_HAVE_TLS
1018 #define TARGET_HAVE_TLS true
1019 #endif
1020 #undef TARGET_CANNOT_FORCE_CONST_MEM
1021 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1022
1023 #undef TARGET_DELEGITIMIZE_ADDRESS
1024 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1025
1026 #undef TARGET_MS_BITFIELD_LAYOUT_P
1027 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1028
1029 #undef TARGET_ASM_OUTPUT_MI_THUNK
1030 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1031 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1032 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1033
1034 #undef TARGET_ASM_FILE_START
1035 #define TARGET_ASM_FILE_START x86_file_start
1036
1037 #undef TARGET_RTX_COSTS
1038 #define TARGET_RTX_COSTS ix86_rtx_costs
1039 #undef TARGET_ADDRESS_COST
1040 #define TARGET_ADDRESS_COST ix86_address_cost
1041
1042 #undef TARGET_FIXED_CONDITION_CODE_REGS
1043 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1044 #undef TARGET_CC_MODES_COMPATIBLE
1045 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1046
1047 #undef TARGET_MACHINE_DEPENDENT_REORG
1048 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1049
1050 #undef TARGET_BUILD_BUILTIN_VA_LIST
1051 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1052
1053 #undef TARGET_MD_ASM_CLOBBERS
1054 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1055
1056 #undef TARGET_PROMOTE_PROTOTYPES
1057 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1058 #undef TARGET_STRUCT_VALUE_RTX
1059 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1060 #undef TARGET_SETUP_INCOMING_VARARGS
1061 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1062 #undef TARGET_MUST_PASS_IN_STACK
1063 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1064 #undef TARGET_PASS_BY_REFERENCE
1065 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1066
1067 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1068 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1069
1070 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1071 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1072
1073 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1074 #undef TARGET_INSERT_ATTRIBUTES
1075 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1076 #endif
1077
1078 struct gcc_target targetm = TARGET_INITIALIZER;
1079
1080 \f
1081 /* The svr4 ABI for the i386 says that records and unions are returned
1082 in memory. */
1083 #ifndef DEFAULT_PCC_STRUCT_RETURN
1084 #define DEFAULT_PCC_STRUCT_RETURN 1
1085 #endif
1086
1087 /* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1091 been parsed.
1092
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1095
1096 void
1097 override_options (void)
1098 {
1099 int i;
1100 int ix86_tune_defaulted = 0;
1101
1102 /* Comes from final.c -- no real reason to change it. */
1103 #define MAX_CODE_ALIGN 16
1104
1105 static struct ptt
1106 {
1107 const struct processor_costs *cost; /* Processor costs */
1108 const int target_enable; /* Target flags to enable. */
1109 const int target_disable; /* Target flags to disable. */
1110 const int align_loop; /* Default alignments. */
1111 const int align_loop_max_skip;
1112 const int align_jump;
1113 const int align_jump_max_skip;
1114 const int align_func;
1115 }
1116 const processor_target_table[PROCESSOR_max] =
1117 {
1118 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1119 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1120 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1121 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1122 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1123 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1125 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1126 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1127 };
1128
1129 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1130 static struct pta
1131 {
1132 const char *const name; /* processor name or nickname. */
1133 const enum processor_type processor;
1134 const enum pta_flags
1135 {
1136 PTA_SSE = 1,
1137 PTA_SSE2 = 2,
1138 PTA_SSE3 = 4,
1139 PTA_MMX = 8,
1140 PTA_PREFETCH_SSE = 16,
1141 PTA_3DNOW = 32,
1142 PTA_3DNOW_A = 64,
1143 PTA_64BIT = 128
1144 } flags;
1145 }
1146 const processor_alias_table[] =
1147 {
1148 {"i386", PROCESSOR_I386, 0},
1149 {"i486", PROCESSOR_I486, 0},
1150 {"i586", PROCESSOR_PENTIUM, 0},
1151 {"pentium", PROCESSOR_PENTIUM, 0},
1152 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1153 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1154 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1155 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1156 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1157 {"i686", PROCESSOR_PENTIUMPRO, 0},
1158 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1159 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1160 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1161 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1162 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1163 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1164 | PTA_MMX | PTA_PREFETCH_SSE},
1165 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1166 | PTA_MMX | PTA_PREFETCH_SSE},
1167 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"k6", PROCESSOR_K6, PTA_MMX},
1172 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1173 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1174 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1175 | PTA_3DNOW_A},
1176 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1177 | PTA_3DNOW | PTA_3DNOW_A},
1178 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 | PTA_3DNOW_A | PTA_SSE},
1180 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1181 | PTA_3DNOW_A | PTA_SSE},
1182 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1183 | PTA_3DNOW_A | PTA_SSE},
1184 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1185 | PTA_SSE | PTA_SSE2 },
1186 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1187 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1188 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1189 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1190 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1192 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 };
1195
1196 int const pta_size = ARRAY_SIZE (processor_alias_table);
1197
1198 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1199 SUBTARGET_OVERRIDE_OPTIONS;
1200 #endif
1201
1202 /* Set the default values for switches whose default depends on TARGET_64BIT
1203 in case they weren't overwritten by command line options. */
1204 if (TARGET_64BIT)
1205 {
1206 if (flag_omit_frame_pointer == 2)
1207 flag_omit_frame_pointer = 1;
1208 if (flag_asynchronous_unwind_tables == 2)
1209 flag_asynchronous_unwind_tables = 1;
1210 if (flag_pcc_struct_return == 2)
1211 flag_pcc_struct_return = 0;
1212 }
1213 else
1214 {
1215 if (flag_omit_frame_pointer == 2)
1216 flag_omit_frame_pointer = 0;
1217 if (flag_asynchronous_unwind_tables == 2)
1218 flag_asynchronous_unwind_tables = 0;
1219 if (flag_pcc_struct_return == 2)
1220 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1221 }
1222
1223 if (!ix86_tune_string && ix86_arch_string)
1224 ix86_tune_string = ix86_arch_string;
1225 if (!ix86_tune_string)
1226 {
1227 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1228 ix86_tune_defaulted = 1;
1229 }
1230 if (!ix86_arch_string)
1231 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1232
1233 if (ix86_cmodel_string != 0)
1234 {
1235 if (!strcmp (ix86_cmodel_string, "small"))
1236 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1237 else if (flag_pic)
1238 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1239 else if (!strcmp (ix86_cmodel_string, "32"))
1240 ix86_cmodel = CM_32;
1241 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1242 ix86_cmodel = CM_KERNEL;
1243 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1244 ix86_cmodel = CM_MEDIUM;
1245 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1246 ix86_cmodel = CM_LARGE;
1247 else
1248 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1249 }
1250 else
1251 {
1252 ix86_cmodel = CM_32;
1253 if (TARGET_64BIT)
1254 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1255 }
1256 if (ix86_asm_string != 0)
1257 {
1258 if (!strcmp (ix86_asm_string, "intel"))
1259 ix86_asm_dialect = ASM_INTEL;
1260 else if (!strcmp (ix86_asm_string, "att"))
1261 ix86_asm_dialect = ASM_ATT;
1262 else
1263 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1264 }
1265 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1266 error ("code model %qs not supported in the %s bit mode",
1267 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1268 if (ix86_cmodel == CM_LARGE)
1269 sorry ("code model %<large%> not supported yet");
1270 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1271 sorry ("%i-bit mode not compiled in",
1272 (target_flags & MASK_64BIT) ? 64 : 32);
1273
1274 for (i = 0; i < pta_size; i++)
1275 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1276 {
1277 ix86_arch = processor_alias_table[i].processor;
1278 /* Default cpu tuning to the architecture. */
1279 ix86_tune = ix86_arch;
1280 if (processor_alias_table[i].flags & PTA_MMX
1281 && !(target_flags_explicit & MASK_MMX))
1282 target_flags |= MASK_MMX;
1283 if (processor_alias_table[i].flags & PTA_3DNOW
1284 && !(target_flags_explicit & MASK_3DNOW))
1285 target_flags |= MASK_3DNOW;
1286 if (processor_alias_table[i].flags & PTA_3DNOW_A
1287 && !(target_flags_explicit & MASK_3DNOW_A))
1288 target_flags |= MASK_3DNOW_A;
1289 if (processor_alias_table[i].flags & PTA_SSE
1290 && !(target_flags_explicit & MASK_SSE))
1291 target_flags |= MASK_SSE;
1292 if (processor_alias_table[i].flags & PTA_SSE2
1293 && !(target_flags_explicit & MASK_SSE2))
1294 target_flags |= MASK_SSE2;
1295 if (processor_alias_table[i].flags & PTA_SSE3
1296 && !(target_flags_explicit & MASK_SSE3))
1297 target_flags |= MASK_SSE3;
1298 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1299 x86_prefetch_sse = true;
1300 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1301 error ("CPU you selected does not support x86-64 "
1302 "instruction set");
1303 break;
1304 }
1305
1306 if (i == pta_size)
1307 error ("bad value (%s) for -march= switch", ix86_arch_string);
1308
1309 for (i = 0; i < pta_size; i++)
1310 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1311 {
1312 ix86_tune = processor_alias_table[i].processor;
1313 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1314 {
1315 if (ix86_tune_defaulted)
1316 {
1317 ix86_tune_string = "x86-64";
1318 for (i = 0; i < pta_size; i++)
1319 if (! strcmp (ix86_tune_string,
1320 processor_alias_table[i].name))
1321 break;
1322 ix86_tune = processor_alias_table[i].processor;
1323 }
1324 else
1325 error ("CPU you selected does not support x86-64 "
1326 "instruction set");
1327 }
1328 /* Intel CPUs have always interpreted SSE prefetch instructions as
1329 NOPs; so, we can enable SSE prefetch instructions even when
1330 -mtune (rather than -march) points us to a processor that has them.
1331 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1332 higher processors. */
1333 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1334 x86_prefetch_sse = true;
1335 break;
1336 }
1337 if (i == pta_size)
1338 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1339
1340 if (optimize_size)
1341 ix86_cost = &size_cost;
1342 else
1343 ix86_cost = processor_target_table[ix86_tune].cost;
1344 target_flags |= processor_target_table[ix86_tune].target_enable;
1345 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1346
1347 /* Arrange to set up i386_stack_locals for all functions. */
1348 init_machine_status = ix86_init_machine_status;
1349
1350 /* Validate -mregparm= value. */
1351 if (ix86_regparm_string)
1352 {
1353 i = atoi (ix86_regparm_string);
1354 if (i < 0 || i > REGPARM_MAX)
1355 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1356 else
1357 ix86_regparm = i;
1358 }
1359 else
1360 if (TARGET_64BIT)
1361 ix86_regparm = REGPARM_MAX;
1362
1363 /* If the user has provided any of the -malign-* options,
1364 warn and use that value only if -falign-* is not set.
1365 Remove this code in GCC 3.2 or later. */
1366 if (ix86_align_loops_string)
1367 {
1368 warning ("-malign-loops is obsolete, use -falign-loops");
1369 if (align_loops == 0)
1370 {
1371 i = atoi (ix86_align_loops_string);
1372 if (i < 0 || i > MAX_CODE_ALIGN)
1373 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1374 else
1375 align_loops = 1 << i;
1376 }
1377 }
1378
1379 if (ix86_align_jumps_string)
1380 {
1381 warning ("-malign-jumps is obsolete, use -falign-jumps");
1382 if (align_jumps == 0)
1383 {
1384 i = atoi (ix86_align_jumps_string);
1385 if (i < 0 || i > MAX_CODE_ALIGN)
1386 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1387 else
1388 align_jumps = 1 << i;
1389 }
1390 }
1391
1392 if (ix86_align_funcs_string)
1393 {
1394 warning ("-malign-functions is obsolete, use -falign-functions");
1395 if (align_functions == 0)
1396 {
1397 i = atoi (ix86_align_funcs_string);
1398 if (i < 0 || i > MAX_CODE_ALIGN)
1399 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1400 else
1401 align_functions = 1 << i;
1402 }
1403 }
1404
1405 /* Default align_* from the processor table. */
1406 if (align_loops == 0)
1407 {
1408 align_loops = processor_target_table[ix86_tune].align_loop;
1409 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1410 }
1411 if (align_jumps == 0)
1412 {
1413 align_jumps = processor_target_table[ix86_tune].align_jump;
1414 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1415 }
1416 if (align_functions == 0)
1417 {
1418 align_functions = processor_target_table[ix86_tune].align_func;
1419 }
1420
1421 /* Validate -mpreferred-stack-boundary= value, or provide default.
1422 The default of 128 bits is for Pentium III's SSE __m128, but we
1423 don't want additional code to keep the stack aligned when
1424 optimizing for code size. */
1425 ix86_preferred_stack_boundary = (optimize_size
1426 ? TARGET_64BIT ? 128 : 32
1427 : 128);
1428 if (ix86_preferred_stack_boundary_string)
1429 {
1430 i = atoi (ix86_preferred_stack_boundary_string);
1431 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1432 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1433 TARGET_64BIT ? 4 : 2);
1434 else
1435 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1436 }
1437
1438 /* Validate -mbranch-cost= value, or provide default. */
1439 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1440 if (ix86_branch_cost_string)
1441 {
1442 i = atoi (ix86_branch_cost_string);
1443 if (i < 0 || i > 5)
1444 error ("-mbranch-cost=%d is not between 0 and 5", i);
1445 else
1446 ix86_branch_cost = i;
1447 }
1448
1449 if (ix86_tls_dialect_string)
1450 {
1451 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1452 ix86_tls_dialect = TLS_DIALECT_GNU;
1453 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1454 ix86_tls_dialect = TLS_DIALECT_SUN;
1455 else
1456 error ("bad value (%s) for -mtls-dialect= switch",
1457 ix86_tls_dialect_string);
1458 }
1459
1460 /* Keep nonleaf frame pointers. */
1461 if (flag_omit_frame_pointer)
1462 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1463 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1464 flag_omit_frame_pointer = 1;
1465
1466 /* If we're doing fast math, we don't care about comparison order
1467 wrt NaNs. This lets us use a shorter comparison sequence. */
1468 if (flag_unsafe_math_optimizations)
1469 target_flags &= ~MASK_IEEE_FP;
1470
1471 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1472 since the insns won't need emulation. */
1473 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1474 target_flags &= ~MASK_NO_FANCY_MATH_387;
1475
1476 /* Likewise, if the target doesn't have a 387, or we've specified
1477 software floating point, don't use 387 inline instrinsics. */
1478 if (!TARGET_80387)
1479 target_flags |= MASK_NO_FANCY_MATH_387;
1480
1481 /* Turn on SSE2 builtins for -msse3. */
1482 if (TARGET_SSE3)
1483 target_flags |= MASK_SSE2;
1484
1485 /* Turn on SSE builtins for -msse2. */
1486 if (TARGET_SSE2)
1487 target_flags |= MASK_SSE;
1488
1489 /* Turn on MMX builtins for -msse. */
1490 if (TARGET_SSE)
1491 {
1492 target_flags |= MASK_MMX & ~target_flags_explicit;
1493 x86_prefetch_sse = true;
1494 }
1495
1496 /* Turn on MMX builtins for 3Dnow. */
1497 if (TARGET_3DNOW)
1498 target_flags |= MASK_MMX;
1499
1500 if (TARGET_64BIT)
1501 {
1502 if (TARGET_ALIGN_DOUBLE)
1503 error ("-malign-double makes no sense in the 64bit mode");
1504 if (TARGET_RTD)
1505 error ("-mrtd calling convention not supported in the 64bit mode");
1506
1507 /* Enable by default the SSE and MMX builtins. Do allow the user to
1508 explicitly disable any of these. In particular, disabling SSE and
1509 MMX for kernel code is extremely useful. */
1510 target_flags
1511 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1512 & ~target_flags_explicit);
1513
1514 if (TARGET_SSE)
1515 ix86_fpmath = FPMATH_SSE;
1516 }
1517 else
1518 {
1519 ix86_fpmath = FPMATH_387;
1520 /* i386 ABI does not specify red zone. It still makes sense to use it
1521 when programmer takes care to stack from being destroyed. */
1522 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1523 target_flags |= MASK_NO_RED_ZONE;
1524 }
1525
1526 if (ix86_fpmath_string != 0)
1527 {
1528 if (! strcmp (ix86_fpmath_string, "387"))
1529 ix86_fpmath = FPMATH_387;
1530 else if (! strcmp (ix86_fpmath_string, "sse"))
1531 {
1532 if (!TARGET_SSE)
1533 {
1534 warning ("SSE instruction set disabled, using 387 arithmetics");
1535 ix86_fpmath = FPMATH_387;
1536 }
1537 else
1538 ix86_fpmath = FPMATH_SSE;
1539 }
1540 else if (! strcmp (ix86_fpmath_string, "387,sse")
1541 || ! strcmp (ix86_fpmath_string, "sse,387"))
1542 {
1543 if (!TARGET_SSE)
1544 {
1545 warning ("SSE instruction set disabled, using 387 arithmetics");
1546 ix86_fpmath = FPMATH_387;
1547 }
1548 else if (!TARGET_80387)
1549 {
1550 warning ("387 instruction set disabled, using SSE arithmetics");
1551 ix86_fpmath = FPMATH_SSE;
1552 }
1553 else
1554 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1555 }
1556 else
1557 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1558 }
1559
1560 if ((x86_accumulate_outgoing_args & TUNEMASK)
1561 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1562 && !optimize_size)
1563 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1564
1565 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1566 {
1567 char *p;
1568 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1569 p = strchr (internal_label_prefix, 'X');
1570 internal_label_prefix_len = p - internal_label_prefix;
1571 *p = '\0';
1572 }
1573
1574 /* When scheduling description is not available, disable scheduler pass
1575 so it won't slow down the compilation and make x87 code slower. */
1576 if (!TARGET_SCHEDULE)
1577 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1578 }
1579 \f
1580 void
1581 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1582 {
1583 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1584 make the problem with not enough registers even worse. */
1585 #ifdef INSN_SCHEDULING
1586 if (level > 1)
1587 flag_schedule_insns = 0;
1588 #endif
1589
1590 /* The default values of these switches depend on the TARGET_64BIT
1591 that is not known at this moment. Mark these values with 2 and
1592 let user the to override these. In case there is no command line option
1593 specifying them, we will set the defaults in override_options. */
1594 if (optimize >= 1)
1595 flag_omit_frame_pointer = 2;
1596 flag_pcc_struct_return = 2;
1597 flag_asynchronous_unwind_tables = 2;
1598 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1599 SUBTARGET_OPTIMIZATION_OPTIONS;
1600 #endif
1601 }
1602 \f
1603 /* Table of valid machine attributes. */
1604 const struct attribute_spec ix86_attribute_table[] =
1605 {
1606 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1607 /* Stdcall attribute says callee is responsible for popping arguments
1608 if they are not variable. */
1609 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1610 /* Fastcall attribute says callee is responsible for popping arguments
1611 if they are not variable. */
1612 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1613 /* Cdecl attribute says the callee is a normal C declaration */
1614 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1615 /* Regparm attribute specifies how many integer arguments are to be
1616 passed in registers. */
1617 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1618 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1619 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1620 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1621 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1622 #endif
1623 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1624 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1625 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1626 SUBTARGET_ATTRIBUTE_TABLE,
1627 #endif
1628 { NULL, 0, 0, false, false, false, NULL }
1629 };
1630
1631 /* Decide whether we can make a sibling call to a function. DECL is the
1632 declaration of the function being targeted by the call and EXP is the
1633 CALL_EXPR representing the call. */
1634
1635 static bool
1636 ix86_function_ok_for_sibcall (tree decl, tree exp)
1637 {
1638 /* If we are generating position-independent code, we cannot sibcall
1639 optimize any indirect call, or a direct call to a global function,
1640 as the PLT requires %ebx be live. */
1641 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1642 return false;
1643
1644 /* If we are returning floats on the 80387 register stack, we cannot
1645 make a sibcall from a function that doesn't return a float to a
1646 function that does or, conversely, from a function that does return
1647 a float to a function that doesn't; the necessary stack adjustment
1648 would not be executed. */
1649 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1650 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1651 return false;
1652
1653 /* If this call is indirect, we'll need to be able to use a call-clobbered
1654 register for the address of the target function. Make sure that all
1655 such registers are not used for passing parameters. */
1656 if (!decl && !TARGET_64BIT)
1657 {
1658 tree type;
1659
1660 /* We're looking at the CALL_EXPR, we need the type of the function. */
1661 type = TREE_OPERAND (exp, 0); /* pointer expression */
1662 type = TREE_TYPE (type); /* pointer type */
1663 type = TREE_TYPE (type); /* function type */
1664
1665 if (ix86_function_regparm (type, NULL) >= 3)
1666 {
1667 /* ??? Need to count the actual number of registers to be used,
1668 not the possible number of registers. Fix later. */
1669 return false;
1670 }
1671 }
1672
1673 /* Otherwise okay. That also includes certain types of indirect calls. */
1674 return true;
1675 }
1676
1677 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1678 arguments as in struct attribute_spec.handler. */
1679 static tree
1680 ix86_handle_cdecl_attribute (tree *node, tree name,
1681 tree args ATTRIBUTE_UNUSED,
1682 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1683 {
1684 if (TREE_CODE (*node) != FUNCTION_TYPE
1685 && TREE_CODE (*node) != METHOD_TYPE
1686 && TREE_CODE (*node) != FIELD_DECL
1687 && TREE_CODE (*node) != TYPE_DECL)
1688 {
1689 warning ("%qs attribute only applies to functions",
1690 IDENTIFIER_POINTER (name));
1691 *no_add_attrs = true;
1692 }
1693 else
1694 {
1695 if (is_attribute_p ("fastcall", name))
1696 {
1697 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1698 {
1699 error ("fastcall and stdcall attributes are not compatible");
1700 }
1701 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1702 {
1703 error ("fastcall and regparm attributes are not compatible");
1704 }
1705 }
1706 else if (is_attribute_p ("stdcall", name))
1707 {
1708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1709 {
1710 error ("fastcall and stdcall attributes are not compatible");
1711 }
1712 }
1713 }
1714
1715 if (TARGET_64BIT)
1716 {
1717 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1718 *no_add_attrs = true;
1719 }
1720
1721 return NULL_TREE;
1722 }
1723
1724 /* Handle a "regparm" attribute;
1725 arguments as in struct attribute_spec.handler. */
1726 static tree
1727 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1728 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1729 {
1730 if (TREE_CODE (*node) != FUNCTION_TYPE
1731 && TREE_CODE (*node) != METHOD_TYPE
1732 && TREE_CODE (*node) != FIELD_DECL
1733 && TREE_CODE (*node) != TYPE_DECL)
1734 {
1735 warning ("%qs attribute only applies to functions",
1736 IDENTIFIER_POINTER (name));
1737 *no_add_attrs = true;
1738 }
1739 else
1740 {
1741 tree cst;
1742
1743 cst = TREE_VALUE (args);
1744 if (TREE_CODE (cst) != INTEGER_CST)
1745 {
1746 warning ("%qs attribute requires an integer constant argument",
1747 IDENTIFIER_POINTER (name));
1748 *no_add_attrs = true;
1749 }
1750 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1751 {
1752 warning ("argument to %qs attribute larger than %d",
1753 IDENTIFIER_POINTER (name), REGPARM_MAX);
1754 *no_add_attrs = true;
1755 }
1756
1757 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1758 {
1759 error ("fastcall and regparm attributes are not compatible");
1760 }
1761 }
1762
1763 return NULL_TREE;
1764 }
1765
1766 /* Return 0 if the attributes for two types are incompatible, 1 if they
1767 are compatible, and 2 if they are nearly compatible (which causes a
1768 warning to be generated). */
1769
1770 static int
1771 ix86_comp_type_attributes (tree type1, tree type2)
1772 {
1773 /* Check for mismatch of non-default calling convention. */
1774 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1775
1776 if (TREE_CODE (type1) != FUNCTION_TYPE)
1777 return 1;
1778
1779 /* Check for mismatched fastcall types */
1780 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1781 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1782 return 0;
1783
1784 /* Check for mismatched return types (cdecl vs stdcall). */
1785 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1786 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1787 return 0;
1788 if (ix86_function_regparm (type1, NULL)
1789 != ix86_function_regparm (type2, NULL))
1790 return 0;
1791 return 1;
1792 }
1793 \f
1794 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1795 DECL may be NULL when calling function indirectly
1796 or considering a libcall. */
1797
1798 static int
1799 ix86_function_regparm (tree type, tree decl)
1800 {
1801 tree attr;
1802 int regparm = ix86_regparm;
1803 bool user_convention = false;
1804
1805 if (!TARGET_64BIT)
1806 {
1807 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1808 if (attr)
1809 {
1810 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1811 user_convention = true;
1812 }
1813
1814 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1815 {
1816 regparm = 2;
1817 user_convention = true;
1818 }
1819
1820 /* Use register calling convention for local functions when possible. */
1821 if (!TARGET_64BIT && !user_convention && decl
1822 && flag_unit_at_a_time && !profile_flag)
1823 {
1824 struct cgraph_local_info *i = cgraph_local_info (decl);
1825 if (i && i->local)
1826 {
1827 /* We can't use regparm(3) for nested functions as these use
1828 static chain pointer in third argument. */
1829 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1830 regparm = 2;
1831 else
1832 regparm = 3;
1833 }
1834 }
1835 }
1836 return regparm;
1837 }
1838
1839 /* Return true if EAX is live at the start of the function. Used by
1840 ix86_expand_prologue to determine if we need special help before
1841 calling allocate_stack_worker. */
1842
1843 static bool
1844 ix86_eax_live_at_start_p (void)
1845 {
1846 /* Cheat. Don't bother working forward from ix86_function_regparm
1847 to the function type to whether an actual argument is located in
1848 eax. Instead just look at cfg info, which is still close enough
1849 to correct at this point. This gives false positives for broken
1850 functions that might use uninitialized data that happens to be
1851 allocated in eax, but who cares? */
1852 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1853 }
1854
1855 /* Value is the number of bytes of arguments automatically
1856 popped when returning from a subroutine call.
1857 FUNDECL is the declaration node of the function (as a tree),
1858 FUNTYPE is the data type of the function (as a tree),
1859 or for a library call it is an identifier node for the subroutine name.
1860 SIZE is the number of bytes of arguments passed on the stack.
1861
1862 On the 80386, the RTD insn may be used to pop them if the number
1863 of args is fixed, but if the number is variable then the caller
1864 must pop them all. RTD can't be used for library calls now
1865 because the library is compiled with the Unix compiler.
1866 Use of RTD is a selectable option, since it is incompatible with
1867 standard Unix calling sequences. If the option is not selected,
1868 the caller must always pop the args.
1869
1870 The attribute stdcall is equivalent to RTD on a per module basis. */
1871
1872 int
1873 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1874 {
1875 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1876
1877 /* Cdecl functions override -mrtd, and never pop the stack. */
1878 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1879
1880 /* Stdcall and fastcall functions will pop the stack if not
1881 variable args. */
1882 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1883 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1884 rtd = 1;
1885
1886 if (rtd
1887 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1888 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1889 == void_type_node)))
1890 return size;
1891 }
1892
1893 /* Lose any fake structure return argument if it is passed on the stack. */
1894 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1895 && !TARGET_64BIT
1896 && !KEEP_AGGREGATE_RETURN_POINTER)
1897 {
1898 int nregs = ix86_function_regparm (funtype, fundecl);
1899
1900 if (!nregs)
1901 return GET_MODE_SIZE (Pmode);
1902 }
1903
1904 return 0;
1905 }
1906 \f
1907 /* Argument support functions. */
1908
1909 /* Return true when register may be used to pass function parameters. */
1910 bool
1911 ix86_function_arg_regno_p (int regno)
1912 {
1913 int i;
1914 if (!TARGET_64BIT)
1915 return (regno < REGPARM_MAX
1916 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1917 if (SSE_REGNO_P (regno) && TARGET_SSE)
1918 return true;
1919 /* RAX is used as hidden argument to va_arg functions. */
1920 if (!regno)
1921 return true;
1922 for (i = 0; i < REGPARM_MAX; i++)
1923 if (regno == x86_64_int_parameter_registers[i])
1924 return true;
1925 return false;
1926 }
1927
1928 /* Return if we do not know how to pass TYPE solely in registers. */
1929
1930 static bool
1931 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1932 {
1933 if (must_pass_in_stack_var_size_or_pad (mode, type))
1934 return true;
1935
1936 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1937 The layout_type routine is crafty and tries to trick us into passing
1938 currently unsupported vector types on the stack by using TImode. */
1939 return (!TARGET_64BIT && mode == TImode
1940 && type && TREE_CODE (type) != VECTOR_TYPE);
1941 }
1942
1943 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1944 for a call to a function whose data type is FNTYPE.
1945 For a library call, FNTYPE is 0. */
1946
1947 void
1948 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1949 tree fntype, /* tree ptr for function decl */
1950 rtx libname, /* SYMBOL_REF of library name or 0 */
1951 tree fndecl)
1952 {
1953 static CUMULATIVE_ARGS zero_cum;
1954 tree param, next_param;
1955
1956 if (TARGET_DEBUG_ARG)
1957 {
1958 fprintf (stderr, "\ninit_cumulative_args (");
1959 if (fntype)
1960 fprintf (stderr, "fntype code = %s, ret code = %s",
1961 tree_code_name[(int) TREE_CODE (fntype)],
1962 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1963 else
1964 fprintf (stderr, "no fntype");
1965
1966 if (libname)
1967 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1968 }
1969
1970 *cum = zero_cum;
1971
1972 /* Set up the number of registers to use for passing arguments. */
1973 if (fntype)
1974 cum->nregs = ix86_function_regparm (fntype, fndecl);
1975 else
1976 cum->nregs = ix86_regparm;
1977 if (TARGET_SSE)
1978 cum->sse_nregs = SSE_REGPARM_MAX;
1979 if (TARGET_MMX)
1980 cum->mmx_nregs = MMX_REGPARM_MAX;
1981 cum->warn_sse = true;
1982 cum->warn_mmx = true;
1983 cum->maybe_vaarg = false;
1984
1985 /* Use ecx and edx registers if function has fastcall attribute */
1986 if (fntype && !TARGET_64BIT)
1987 {
1988 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1989 {
1990 cum->nregs = 2;
1991 cum->fastcall = 1;
1992 }
1993 }
1994
1995 /* Determine if this function has variable arguments. This is
1996 indicated by the last argument being 'void_type_mode' if there
1997 are no variable arguments. If there are variable arguments, then
1998 we won't pass anything in registers in 32-bit mode. */
1999
2000 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2001 {
2002 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2003 param != 0; param = next_param)
2004 {
2005 next_param = TREE_CHAIN (param);
2006 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2007 {
2008 if (!TARGET_64BIT)
2009 {
2010 cum->nregs = 0;
2011 cum->sse_nregs = 0;
2012 cum->mmx_nregs = 0;
2013 cum->warn_sse = 0;
2014 cum->warn_mmx = 0;
2015 cum->fastcall = 0;
2016 }
2017 cum->maybe_vaarg = true;
2018 }
2019 }
2020 }
2021 if ((!fntype && !libname)
2022 || (fntype && !TYPE_ARG_TYPES (fntype)))
2023 cum->maybe_vaarg = 1;
2024
2025 if (TARGET_DEBUG_ARG)
2026 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2027
2028 return;
2029 }
2030
2031 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2032 But in the case of vector types, it is some vector mode.
2033
2034 When we have only some of our vector isa extensions enabled, then there
2035 are some modes for which vector_mode_supported_p is false. For these
2036 modes, the generic vector support in gcc will choose some non-vector mode
2037 in order to implement the type. By computing the natural mode, we'll
2038 select the proper ABI location for the operand and not depend on whatever
2039 the middle-end decides to do with these vector types. */
2040
2041 static enum machine_mode
2042 type_natural_mode (tree type)
2043 {
2044 enum machine_mode mode = TYPE_MODE (type);
2045
2046 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2047 {
2048 HOST_WIDE_INT size = int_size_in_bytes (type);
2049 if ((size == 8 || size == 16)
2050 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2051 && TYPE_VECTOR_SUBPARTS (type) > 1)
2052 {
2053 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2054
2055 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2056 mode = MIN_MODE_VECTOR_FLOAT;
2057 else
2058 mode = MIN_MODE_VECTOR_INT;
2059
2060 /* Get the mode which has this inner mode and number of units. */
2061 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2062 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2063 && GET_MODE_INNER (mode) == innermode)
2064 return mode;
2065
2066 abort ();
2067 }
2068 }
2069
2070 return mode;
2071 }
2072
2073 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2074 this may not agree with the mode that the type system has chosen for the
2075 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2076 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2077
2078 static rtx
2079 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2080 unsigned int regno)
2081 {
2082 rtx tmp;
2083
2084 if (orig_mode != BLKmode)
2085 tmp = gen_rtx_REG (orig_mode, regno);
2086 else
2087 {
2088 tmp = gen_rtx_REG (mode, regno);
2089 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2090 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2091 }
2092
2093 return tmp;
2094 }
2095
2096 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2097 of this code is to classify each 8bytes of incoming argument by the register
2098 class and assign registers accordingly. */
2099
2100 /* Return the union class of CLASS1 and CLASS2.
2101 See the x86-64 PS ABI for details. */
2102
2103 static enum x86_64_reg_class
2104 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2105 {
2106 /* Rule #1: If both classes are equal, this is the resulting class. */
2107 if (class1 == class2)
2108 return class1;
2109
2110 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2111 the other class. */
2112 if (class1 == X86_64_NO_CLASS)
2113 return class2;
2114 if (class2 == X86_64_NO_CLASS)
2115 return class1;
2116
2117 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2118 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2119 return X86_64_MEMORY_CLASS;
2120
2121 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2122 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2123 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2124 return X86_64_INTEGERSI_CLASS;
2125 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2126 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2127 return X86_64_INTEGER_CLASS;
2128
2129 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2130 MEMORY is used. */
2131 if (class1 == X86_64_X87_CLASS
2132 || class1 == X86_64_X87UP_CLASS
2133 || class1 == X86_64_COMPLEX_X87_CLASS
2134 || class2 == X86_64_X87_CLASS
2135 || class2 == X86_64_X87UP_CLASS
2136 || class2 == X86_64_COMPLEX_X87_CLASS)
2137 return X86_64_MEMORY_CLASS;
2138
2139 /* Rule #6: Otherwise class SSE is used. */
2140 return X86_64_SSE_CLASS;
2141 }
2142
2143 /* Classify the argument of type TYPE and mode MODE.
2144 CLASSES will be filled by the register class used to pass each word
2145 of the operand. The number of words is returned. In case the parameter
2146 should be passed in memory, 0 is returned. As a special case for zero
2147 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2148
2149 BIT_OFFSET is used internally for handling records and specifies offset
2150 of the offset in bits modulo 256 to avoid overflow cases.
2151
2152 See the x86-64 PS ABI for details.
2153 */
2154
2155 static int
2156 classify_argument (enum machine_mode mode, tree type,
2157 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2158 {
2159 HOST_WIDE_INT bytes =
2160 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2161 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2162
2163 /* Variable sized entities are always passed/returned in memory. */
2164 if (bytes < 0)
2165 return 0;
2166
2167 if (mode != VOIDmode
2168 && targetm.calls.must_pass_in_stack (mode, type))
2169 return 0;
2170
2171 if (type && AGGREGATE_TYPE_P (type))
2172 {
2173 int i;
2174 tree field;
2175 enum x86_64_reg_class subclasses[MAX_CLASSES];
2176
2177 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2178 if (bytes > 16)
2179 return 0;
2180
2181 for (i = 0; i < words; i++)
2182 classes[i] = X86_64_NO_CLASS;
2183
2184 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2185 signalize memory class, so handle it as special case. */
2186 if (!words)
2187 {
2188 classes[0] = X86_64_NO_CLASS;
2189 return 1;
2190 }
2191
2192 /* Classify each field of record and merge classes. */
2193 if (TREE_CODE (type) == RECORD_TYPE)
2194 {
2195 /* For classes first merge in the field of the subclasses. */
2196 if (TYPE_BINFO (type))
2197 {
2198 tree binfo, base_binfo;
2199 int basenum;
2200
2201 for (binfo = TYPE_BINFO (type), basenum = 0;
2202 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2203 {
2204 int num;
2205 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2206 tree type = BINFO_TYPE (base_binfo);
2207
2208 num = classify_argument (TYPE_MODE (type),
2209 type, subclasses,
2210 (offset + bit_offset) % 256);
2211 if (!num)
2212 return 0;
2213 for (i = 0; i < num; i++)
2214 {
2215 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2216 classes[i + pos] =
2217 merge_classes (subclasses[i], classes[i + pos]);
2218 }
2219 }
2220 }
2221 /* And now merge the fields of structure. */
2222 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2223 {
2224 if (TREE_CODE (field) == FIELD_DECL)
2225 {
2226 int num;
2227
2228 /* Bitfields are always classified as integer. Handle them
2229 early, since later code would consider them to be
2230 misaligned integers. */
2231 if (DECL_BIT_FIELD (field))
2232 {
2233 for (i = int_bit_position (field) / 8 / 8;
2234 i < (int_bit_position (field)
2235 + tree_low_cst (DECL_SIZE (field), 0)
2236 + 63) / 8 / 8; i++)
2237 classes[i] =
2238 merge_classes (X86_64_INTEGER_CLASS,
2239 classes[i]);
2240 }
2241 else
2242 {
2243 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2244 TREE_TYPE (field), subclasses,
2245 (int_bit_position (field)
2246 + bit_offset) % 256);
2247 if (!num)
2248 return 0;
2249 for (i = 0; i < num; i++)
2250 {
2251 int pos =
2252 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2253 classes[i + pos] =
2254 merge_classes (subclasses[i], classes[i + pos]);
2255 }
2256 }
2257 }
2258 }
2259 }
2260 /* Arrays are handled as small records. */
2261 else if (TREE_CODE (type) == ARRAY_TYPE)
2262 {
2263 int num;
2264 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2265 TREE_TYPE (type), subclasses, bit_offset);
2266 if (!num)
2267 return 0;
2268
2269 /* The partial classes are now full classes. */
2270 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2271 subclasses[0] = X86_64_SSE_CLASS;
2272 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2273 subclasses[0] = X86_64_INTEGER_CLASS;
2274
2275 for (i = 0; i < words; i++)
2276 classes[i] = subclasses[i % num];
2277 }
2278 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2279 else if (TREE_CODE (type) == UNION_TYPE
2280 || TREE_CODE (type) == QUAL_UNION_TYPE)
2281 {
2282 /* For classes first merge in the field of the subclasses. */
2283 if (TYPE_BINFO (type))
2284 {
2285 tree binfo, base_binfo;
2286 int basenum;
2287
2288 for (binfo = TYPE_BINFO (type), basenum = 0;
2289 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2290 {
2291 int num;
2292 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2293 tree type = BINFO_TYPE (base_binfo);
2294
2295 num = classify_argument (TYPE_MODE (type),
2296 type, subclasses,
2297 (offset + (bit_offset % 64)) % 256);
2298 if (!num)
2299 return 0;
2300 for (i = 0; i < num; i++)
2301 {
2302 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2303 classes[i + pos] =
2304 merge_classes (subclasses[i], classes[i + pos]);
2305 }
2306 }
2307 }
2308 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2309 {
2310 if (TREE_CODE (field) == FIELD_DECL)
2311 {
2312 int num;
2313 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2314 TREE_TYPE (field), subclasses,
2315 bit_offset);
2316 if (!num)
2317 return 0;
2318 for (i = 0; i < num; i++)
2319 classes[i] = merge_classes (subclasses[i], classes[i]);
2320 }
2321 }
2322 }
2323 else
2324 abort ();
2325
2326 /* Final merger cleanup. */
2327 for (i = 0; i < words; i++)
2328 {
2329 /* If one class is MEMORY, everything should be passed in
2330 memory. */
2331 if (classes[i] == X86_64_MEMORY_CLASS)
2332 return 0;
2333
2334 /* The X86_64_SSEUP_CLASS should be always preceded by
2335 X86_64_SSE_CLASS. */
2336 if (classes[i] == X86_64_SSEUP_CLASS
2337 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2338 classes[i] = X86_64_SSE_CLASS;
2339
2340 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2341 if (classes[i] == X86_64_X87UP_CLASS
2342 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2343 classes[i] = X86_64_SSE_CLASS;
2344 }
2345 return words;
2346 }
2347
2348 /* Compute alignment needed. We align all types to natural boundaries with
2349 exception of XFmode that is aligned to 64bits. */
2350 if (mode != VOIDmode && mode != BLKmode)
2351 {
2352 int mode_alignment = GET_MODE_BITSIZE (mode);
2353
2354 if (mode == XFmode)
2355 mode_alignment = 128;
2356 else if (mode == XCmode)
2357 mode_alignment = 256;
2358 if (COMPLEX_MODE_P (mode))
2359 mode_alignment /= 2;
2360 /* Misaligned fields are always returned in memory. */
2361 if (bit_offset % mode_alignment)
2362 return 0;
2363 }
2364
2365 /* for V1xx modes, just use the base mode */
2366 if (VECTOR_MODE_P (mode)
2367 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2368 mode = GET_MODE_INNER (mode);
2369
2370 /* Classification of atomic types. */
2371 switch (mode)
2372 {
2373 case DImode:
2374 case SImode:
2375 case HImode:
2376 case QImode:
2377 case CSImode:
2378 case CHImode:
2379 case CQImode:
2380 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2381 classes[0] = X86_64_INTEGERSI_CLASS;
2382 else
2383 classes[0] = X86_64_INTEGER_CLASS;
2384 return 1;
2385 case CDImode:
2386 case TImode:
2387 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2388 return 2;
2389 case CTImode:
2390 return 0;
2391 case SFmode:
2392 if (!(bit_offset % 64))
2393 classes[0] = X86_64_SSESF_CLASS;
2394 else
2395 classes[0] = X86_64_SSE_CLASS;
2396 return 1;
2397 case DFmode:
2398 classes[0] = X86_64_SSEDF_CLASS;
2399 return 1;
2400 case XFmode:
2401 classes[0] = X86_64_X87_CLASS;
2402 classes[1] = X86_64_X87UP_CLASS;
2403 return 2;
2404 case TFmode:
2405 classes[0] = X86_64_SSE_CLASS;
2406 classes[1] = X86_64_SSEUP_CLASS;
2407 return 2;
2408 case SCmode:
2409 classes[0] = X86_64_SSE_CLASS;
2410 return 1;
2411 case DCmode:
2412 classes[0] = X86_64_SSEDF_CLASS;
2413 classes[1] = X86_64_SSEDF_CLASS;
2414 return 2;
2415 case XCmode:
2416 classes[0] = X86_64_COMPLEX_X87_CLASS;
2417 return 1;
2418 case TCmode:
2419 /* This modes is larger than 16 bytes. */
2420 return 0;
2421 case V4SFmode:
2422 case V4SImode:
2423 case V16QImode:
2424 case V8HImode:
2425 case V2DFmode:
2426 case V2DImode:
2427 classes[0] = X86_64_SSE_CLASS;
2428 classes[1] = X86_64_SSEUP_CLASS;
2429 return 2;
2430 case V2SFmode:
2431 case V2SImode:
2432 case V4HImode:
2433 case V8QImode:
2434 classes[0] = X86_64_SSE_CLASS;
2435 return 1;
2436 case BLKmode:
2437 case VOIDmode:
2438 return 0;
2439 default:
2440 if (VECTOR_MODE_P (mode))
2441 {
2442 if (bytes > 16)
2443 return 0;
2444 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2445 {
2446 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2447 classes[0] = X86_64_INTEGERSI_CLASS;
2448 else
2449 classes[0] = X86_64_INTEGER_CLASS;
2450 classes[1] = X86_64_INTEGER_CLASS;
2451 return 1 + (bytes > 8);
2452 }
2453 }
2454 abort ();
2455 }
2456 }
2457
2458 /* Examine the argument and return set number of register required in each
2459 class. Return 0 iff parameter should be passed in memory. */
2460 static int
2461 examine_argument (enum machine_mode mode, tree type, int in_return,
2462 int *int_nregs, int *sse_nregs)
2463 {
2464 enum x86_64_reg_class class[MAX_CLASSES];
2465 int n = classify_argument (mode, type, class, 0);
2466
2467 *int_nregs = 0;
2468 *sse_nregs = 0;
2469 if (!n)
2470 return 0;
2471 for (n--; n >= 0; n--)
2472 switch (class[n])
2473 {
2474 case X86_64_INTEGER_CLASS:
2475 case X86_64_INTEGERSI_CLASS:
2476 (*int_nregs)++;
2477 break;
2478 case X86_64_SSE_CLASS:
2479 case X86_64_SSESF_CLASS:
2480 case X86_64_SSEDF_CLASS:
2481 (*sse_nregs)++;
2482 break;
2483 case X86_64_NO_CLASS:
2484 case X86_64_SSEUP_CLASS:
2485 break;
2486 case X86_64_X87_CLASS:
2487 case X86_64_X87UP_CLASS:
2488 if (!in_return)
2489 return 0;
2490 break;
2491 case X86_64_COMPLEX_X87_CLASS:
2492 return in_return ? 2 : 0;
2493 case X86_64_MEMORY_CLASS:
2494 abort ();
2495 }
2496 return 1;
2497 }
2498
2499 /* Construct container for the argument used by GCC interface. See
2500 FUNCTION_ARG for the detailed description. */
2501
2502 static rtx
2503 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2504 tree type, int in_return, int nintregs, int nsseregs,
2505 const int *intreg, int sse_regno)
2506 {
2507 enum machine_mode tmpmode;
2508 int bytes =
2509 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2510 enum x86_64_reg_class class[MAX_CLASSES];
2511 int n;
2512 int i;
2513 int nexps = 0;
2514 int needed_sseregs, needed_intregs;
2515 rtx exp[MAX_CLASSES];
2516 rtx ret;
2517
2518 n = classify_argument (mode, type, class, 0);
2519 if (TARGET_DEBUG_ARG)
2520 {
2521 if (!n)
2522 fprintf (stderr, "Memory class\n");
2523 else
2524 {
2525 fprintf (stderr, "Classes:");
2526 for (i = 0; i < n; i++)
2527 {
2528 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2529 }
2530 fprintf (stderr, "\n");
2531 }
2532 }
2533 if (!n)
2534 return NULL;
2535 if (!examine_argument (mode, type, in_return, &needed_intregs,
2536 &needed_sseregs))
2537 return NULL;
2538 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2539 return NULL;
2540
2541 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2542 some less clueful developer tries to use floating-point anyway. */
2543 if (needed_sseregs && !TARGET_SSE)
2544 {
2545 static bool issued_error;
2546 if (!issued_error)
2547 {
2548 issued_error = true;
2549 if (in_return)
2550 error ("SSE register return with SSE disabled");
2551 else
2552 error ("SSE register argument with SSE disabled");
2553 }
2554 return NULL;
2555 }
2556
2557 /* First construct simple cases. Avoid SCmode, since we want to use
2558 single register to pass this type. */
2559 if (n == 1 && mode != SCmode)
2560 switch (class[0])
2561 {
2562 case X86_64_INTEGER_CLASS:
2563 case X86_64_INTEGERSI_CLASS:
2564 return gen_rtx_REG (mode, intreg[0]);
2565 case X86_64_SSE_CLASS:
2566 case X86_64_SSESF_CLASS:
2567 case X86_64_SSEDF_CLASS:
2568 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2569 case X86_64_X87_CLASS:
2570 case X86_64_COMPLEX_X87_CLASS:
2571 return gen_rtx_REG (mode, FIRST_STACK_REG);
2572 case X86_64_NO_CLASS:
2573 /* Zero sized array, struct or class. */
2574 return NULL;
2575 default:
2576 abort ();
2577 }
2578 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2579 && mode != BLKmode)
2580 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2581 if (n == 2
2582 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2583 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2584 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2585 && class[1] == X86_64_INTEGER_CLASS
2586 && (mode == CDImode || mode == TImode || mode == TFmode)
2587 && intreg[0] + 1 == intreg[1])
2588 return gen_rtx_REG (mode, intreg[0]);
2589
2590 /* Otherwise figure out the entries of the PARALLEL. */
2591 for (i = 0; i < n; i++)
2592 {
2593 switch (class[i])
2594 {
2595 case X86_64_NO_CLASS:
2596 break;
2597 case X86_64_INTEGER_CLASS:
2598 case X86_64_INTEGERSI_CLASS:
2599 /* Merge TImodes on aligned occasions here too. */
2600 if (i * 8 + 8 > bytes)
2601 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2602 else if (class[i] == X86_64_INTEGERSI_CLASS)
2603 tmpmode = SImode;
2604 else
2605 tmpmode = DImode;
2606 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2607 if (tmpmode == BLKmode)
2608 tmpmode = DImode;
2609 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2610 gen_rtx_REG (tmpmode, *intreg),
2611 GEN_INT (i*8));
2612 intreg++;
2613 break;
2614 case X86_64_SSESF_CLASS:
2615 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2616 gen_rtx_REG (SFmode,
2617 SSE_REGNO (sse_regno)),
2618 GEN_INT (i*8));
2619 sse_regno++;
2620 break;
2621 case X86_64_SSEDF_CLASS:
2622 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2623 gen_rtx_REG (DFmode,
2624 SSE_REGNO (sse_regno)),
2625 GEN_INT (i*8));
2626 sse_regno++;
2627 break;
2628 case X86_64_SSE_CLASS:
2629 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2630 tmpmode = TImode;
2631 else
2632 tmpmode = DImode;
2633 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2634 gen_rtx_REG (tmpmode,
2635 SSE_REGNO (sse_regno)),
2636 GEN_INT (i*8));
2637 if (tmpmode == TImode)
2638 i++;
2639 sse_regno++;
2640 break;
2641 default:
2642 abort ();
2643 }
2644 }
2645 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2646 for (i = 0; i < nexps; i++)
2647 XVECEXP (ret, 0, i) = exp [i];
2648 return ret;
2649 }
2650
2651 /* Update the data in CUM to advance over an argument
2652 of mode MODE and data type TYPE.
2653 (TYPE is null for libcalls where that information may not be available.) */
2654
2655 void
2656 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2657 tree type, int named)
2658 {
2659 int bytes =
2660 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2661 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2662
2663 if (type)
2664 mode = type_natural_mode (type);
2665
2666 if (TARGET_DEBUG_ARG)
2667 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2668 "mode=%s, named=%d)\n\n",
2669 words, cum->words, cum->nregs, cum->sse_nregs,
2670 GET_MODE_NAME (mode), named);
2671
2672 if (TARGET_64BIT)
2673 {
2674 int int_nregs, sse_nregs;
2675 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2676 cum->words += words;
2677 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2678 {
2679 cum->nregs -= int_nregs;
2680 cum->sse_nregs -= sse_nregs;
2681 cum->regno += int_nregs;
2682 cum->sse_regno += sse_nregs;
2683 }
2684 else
2685 cum->words += words;
2686 }
2687 else
2688 {
2689 switch (mode)
2690 {
2691 default:
2692 break;
2693
2694 case BLKmode:
2695 if (bytes < 0)
2696 break;
2697 /* FALLTHRU */
2698
2699 case DImode:
2700 case SImode:
2701 case HImode:
2702 case QImode:
2703 cum->words += words;
2704 cum->nregs -= words;
2705 cum->regno += words;
2706
2707 if (cum->nregs <= 0)
2708 {
2709 cum->nregs = 0;
2710 cum->regno = 0;
2711 }
2712 break;
2713
2714 case TImode:
2715 case V16QImode:
2716 case V8HImode:
2717 case V4SImode:
2718 case V2DImode:
2719 case V4SFmode:
2720 case V2DFmode:
2721 if (!type || !AGGREGATE_TYPE_P (type))
2722 {
2723 cum->sse_words += words;
2724 cum->sse_nregs -= 1;
2725 cum->sse_regno += 1;
2726 if (cum->sse_nregs <= 0)
2727 {
2728 cum->sse_nregs = 0;
2729 cum->sse_regno = 0;
2730 }
2731 }
2732 break;
2733
2734 case V8QImode:
2735 case V4HImode:
2736 case V2SImode:
2737 case V2SFmode:
2738 if (!type || !AGGREGATE_TYPE_P (type))
2739 {
2740 cum->mmx_words += words;
2741 cum->mmx_nregs -= 1;
2742 cum->mmx_regno += 1;
2743 if (cum->mmx_nregs <= 0)
2744 {
2745 cum->mmx_nregs = 0;
2746 cum->mmx_regno = 0;
2747 }
2748 }
2749 break;
2750 }
2751 }
2752 }
2753
2754 /* Define where to put the arguments to a function.
2755 Value is zero to push the argument on the stack,
2756 or a hard register in which to store the argument.
2757
2758 MODE is the argument's machine mode.
2759 TYPE is the data type of the argument (as a tree).
2760 This is null for libcalls where that information may
2761 not be available.
2762 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2763 the preceding args and about the function being called.
2764 NAMED is nonzero if this argument is a named parameter
2765 (otherwise it is an extra parameter matching an ellipsis). */
2766
2767 rtx
2768 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2769 tree type, int named)
2770 {
2771 enum machine_mode mode = orig_mode;
2772 rtx ret = NULL_RTX;
2773 int bytes =
2774 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2775 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2776 static bool warnedsse, warnedmmx;
2777
2778 /* To simplify the code below, represent vector types with a vector mode
2779 even if MMX/SSE are not active. */
2780 if (type && TREE_CODE (type) == VECTOR_TYPE)
2781 mode = type_natural_mode (type);
2782
2783 /* Handle a hidden AL argument containing number of registers for varargs
2784 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2785 any AL settings. */
2786 if (mode == VOIDmode)
2787 {
2788 if (TARGET_64BIT)
2789 return GEN_INT (cum->maybe_vaarg
2790 ? (cum->sse_nregs < 0
2791 ? SSE_REGPARM_MAX
2792 : cum->sse_regno)
2793 : -1);
2794 else
2795 return constm1_rtx;
2796 }
2797 if (TARGET_64BIT)
2798 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2799 cum->sse_nregs,
2800 &x86_64_int_parameter_registers [cum->regno],
2801 cum->sse_regno);
2802 else
2803 switch (mode)
2804 {
2805 /* For now, pass fp/complex values on the stack. */
2806 default:
2807 break;
2808
2809 case BLKmode:
2810 if (bytes < 0)
2811 break;
2812 /* FALLTHRU */
2813 case DImode:
2814 case SImode:
2815 case HImode:
2816 case QImode:
2817 if (words <= cum->nregs)
2818 {
2819 int regno = cum->regno;
2820
2821 /* Fastcall allocates the first two DWORD (SImode) or
2822 smaller arguments to ECX and EDX. */
2823 if (cum->fastcall)
2824 {
2825 if (mode == BLKmode || mode == DImode)
2826 break;
2827
2828 /* ECX not EAX is the first allocated register. */
2829 if (regno == 0)
2830 regno = 2;
2831 }
2832 ret = gen_rtx_REG (mode, regno);
2833 }
2834 break;
2835 case TImode:
2836 case V16QImode:
2837 case V8HImode:
2838 case V4SImode:
2839 case V2DImode:
2840 case V4SFmode:
2841 case V2DFmode:
2842 if (!type || !AGGREGATE_TYPE_P (type))
2843 {
2844 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2845 {
2846 warnedsse = true;
2847 warning ("SSE vector argument without SSE enabled "
2848 "changes the ABI");
2849 }
2850 if (cum->sse_nregs)
2851 ret = gen_reg_or_parallel (mode, orig_mode,
2852 cum->sse_regno + FIRST_SSE_REG);
2853 }
2854 break;
2855 case V8QImode:
2856 case V4HImode:
2857 case V2SImode:
2858 case V2SFmode:
2859 if (!type || !AGGREGATE_TYPE_P (type))
2860 {
2861 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2862 {
2863 warnedmmx = true;
2864 warning ("MMX vector argument without MMX enabled "
2865 "changes the ABI");
2866 }
2867 if (cum->mmx_nregs)
2868 ret = gen_reg_or_parallel (mode, orig_mode,
2869 cum->mmx_regno + FIRST_MMX_REG);
2870 }
2871 break;
2872 }
2873
2874 if (TARGET_DEBUG_ARG)
2875 {
2876 fprintf (stderr,
2877 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2878 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2879
2880 if (ret)
2881 print_simple_rtl (stderr, ret);
2882 else
2883 fprintf (stderr, ", stack");
2884
2885 fprintf (stderr, " )\n");
2886 }
2887
2888 return ret;
2889 }
2890
2891 /* A C expression that indicates when an argument must be passed by
2892 reference. If nonzero for an argument, a copy of that argument is
2893 made in memory and a pointer to the argument is passed instead of
2894 the argument itself. The pointer is passed in whatever way is
2895 appropriate for passing a pointer to that type. */
2896
2897 static bool
2898 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2899 enum machine_mode mode ATTRIBUTE_UNUSED,
2900 tree type, bool named ATTRIBUTE_UNUSED)
2901 {
2902 if (!TARGET_64BIT)
2903 return 0;
2904
2905 if (type && int_size_in_bytes (type) == -1)
2906 {
2907 if (TARGET_DEBUG_ARG)
2908 fprintf (stderr, "function_arg_pass_by_reference\n");
2909 return 1;
2910 }
2911
2912 return 0;
2913 }
2914
2915 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2916 ABI. Only called if TARGET_SSE. */
2917 static bool
2918 contains_128bit_aligned_vector_p (tree type)
2919 {
2920 enum machine_mode mode = TYPE_MODE (type);
2921 if (SSE_REG_MODE_P (mode)
2922 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2923 return true;
2924 if (TYPE_ALIGN (type) < 128)
2925 return false;
2926
2927 if (AGGREGATE_TYPE_P (type))
2928 {
2929 /* Walk the aggregates recursively. */
2930 if (TREE_CODE (type) == RECORD_TYPE
2931 || TREE_CODE (type) == UNION_TYPE
2932 || TREE_CODE (type) == QUAL_UNION_TYPE)
2933 {
2934 tree field;
2935
2936 if (TYPE_BINFO (type))
2937 {
2938 tree binfo, base_binfo;
2939 int i;
2940
2941 for (binfo = TYPE_BINFO (type), i = 0;
2942 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2943 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2944 return true;
2945 }
2946 /* And now merge the fields of structure. */
2947 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2948 {
2949 if (TREE_CODE (field) == FIELD_DECL
2950 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2951 return true;
2952 }
2953 }
2954 /* Just for use if some languages passes arrays by value. */
2955 else if (TREE_CODE (type) == ARRAY_TYPE)
2956 {
2957 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2958 return true;
2959 }
2960 else
2961 abort ();
2962 }
2963 return false;
2964 }
2965
2966 /* Gives the alignment boundary, in bits, of an argument with the
2967 specified mode and type. */
2968
2969 int
2970 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2971 {
2972 int align;
2973 if (type)
2974 align = TYPE_ALIGN (type);
2975 else
2976 align = GET_MODE_ALIGNMENT (mode);
2977 if (align < PARM_BOUNDARY)
2978 align = PARM_BOUNDARY;
2979 if (!TARGET_64BIT)
2980 {
2981 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2982 make an exception for SSE modes since these require 128bit
2983 alignment.
2984
2985 The handling here differs from field_alignment. ICC aligns MMX
2986 arguments to 4 byte boundaries, while structure fields are aligned
2987 to 8 byte boundaries. */
2988 if (!TARGET_SSE)
2989 align = PARM_BOUNDARY;
2990 else if (!type)
2991 {
2992 if (!SSE_REG_MODE_P (mode))
2993 align = PARM_BOUNDARY;
2994 }
2995 else
2996 {
2997 if (!contains_128bit_aligned_vector_p (type))
2998 align = PARM_BOUNDARY;
2999 }
3000 }
3001 if (align > 128)
3002 align = 128;
3003 return align;
3004 }
3005
3006 /* Return true if N is a possible register number of function value. */
3007 bool
3008 ix86_function_value_regno_p (int regno)
3009 {
3010 if (!TARGET_64BIT)
3011 {
3012 return ((regno) == 0
3013 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3014 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3015 }
3016 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3017 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3018 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3019 }
3020
3021 /* Define how to find the value returned by a function.
3022 VALTYPE is the data type of the value (as a tree).
3023 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3024 otherwise, FUNC is 0. */
3025 rtx
3026 ix86_function_value (tree valtype)
3027 {
3028 enum machine_mode natmode = type_natural_mode (valtype);
3029
3030 if (TARGET_64BIT)
3031 {
3032 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3033 1, REGPARM_MAX, SSE_REGPARM_MAX,
3034 x86_64_int_return_registers, 0);
3035 /* For zero sized structures, construct_container return NULL, but we
3036 need to keep rest of compiler happy by returning meaningful value. */
3037 if (!ret)
3038 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3039 return ret;
3040 }
3041 else
3042 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
3043 }
3044
3045 /* Return false iff type is returned in memory. */
3046 int
3047 ix86_return_in_memory (tree type)
3048 {
3049 int needed_intregs, needed_sseregs, size;
3050 enum machine_mode mode = type_natural_mode (type);
3051
3052 if (TARGET_64BIT)
3053 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3054
3055 if (mode == BLKmode)
3056 return 1;
3057
3058 size = int_size_in_bytes (type);
3059
3060 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3061 return 0;
3062
3063 if (VECTOR_MODE_P (mode) || mode == TImode)
3064 {
3065 /* User-created vectors small enough to fit in EAX. */
3066 if (size < 8)
3067 return 0;
3068
3069 /* MMX/3dNow values are returned on the stack, since we've
3070 got to EMMS/FEMMS before returning. */
3071 if (size == 8)
3072 return 1;
3073
3074 /* SSE values are returned in XMM0, except when it doesn't exist. */
3075 if (size == 16)
3076 return (TARGET_SSE ? 0 : 1);
3077 }
3078
3079 if (mode == XFmode)
3080 return 0;
3081
3082 if (size > 12)
3083 return 1;
3084 return 0;
3085 }
3086
3087 /* When returning SSE vector types, we have a choice of either
3088 (1) being abi incompatible with a -march switch, or
3089 (2) generating an error.
3090 Given no good solution, I think the safest thing is one warning.
3091 The user won't be able to use -Werror, but....
3092
3093 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3094 called in response to actually generating a caller or callee that
3095 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3096 via aggregate_value_p for general type probing from tree-ssa. */
3097
3098 static rtx
3099 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3100 {
3101 static bool warned;
3102
3103 if (!TARGET_SSE && type && !warned)
3104 {
3105 /* Look at the return type of the function, not the function type. */
3106 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3107
3108 if (mode == TImode
3109 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3110 {
3111 warned = true;
3112 warning ("SSE vector return without SSE enabled changes the ABI");
3113 }
3114 }
3115
3116 return NULL;
3117 }
3118
3119 /* Define how to find the value returned by a library function
3120 assuming the value has mode MODE. */
3121 rtx
3122 ix86_libcall_value (enum machine_mode mode)
3123 {
3124 if (TARGET_64BIT)
3125 {
3126 switch (mode)
3127 {
3128 case SFmode:
3129 case SCmode:
3130 case DFmode:
3131 case DCmode:
3132 case TFmode:
3133 return gen_rtx_REG (mode, FIRST_SSE_REG);
3134 case XFmode:
3135 case XCmode:
3136 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3137 case TCmode:
3138 return NULL;
3139 default:
3140 return gen_rtx_REG (mode, 0);
3141 }
3142 }
3143 else
3144 return gen_rtx_REG (mode, ix86_value_regno (mode));
3145 }
3146
3147 /* Given a mode, return the register to use for a return value. */
3148
3149 static int
3150 ix86_value_regno (enum machine_mode mode)
3151 {
3152 /* Floating point return values in %st(0). */
3153 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3154 return FIRST_FLOAT_REG;
3155 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3156 we prevent this case when sse is not available. */
3157 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3158 return FIRST_SSE_REG;
3159 /* Everything else in %eax. */
3160 return 0;
3161 }
3162 \f
3163 /* Create the va_list data type. */
3164
3165 static tree
3166 ix86_build_builtin_va_list (void)
3167 {
3168 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3169
3170 /* For i386 we use plain pointer to argument area. */
3171 if (!TARGET_64BIT)
3172 return build_pointer_type (char_type_node);
3173
3174 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3175 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3176
3177 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3178 unsigned_type_node);
3179 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3180 unsigned_type_node);
3181 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3182 ptr_type_node);
3183 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3184 ptr_type_node);
3185
3186 DECL_FIELD_CONTEXT (f_gpr) = record;
3187 DECL_FIELD_CONTEXT (f_fpr) = record;
3188 DECL_FIELD_CONTEXT (f_ovf) = record;
3189 DECL_FIELD_CONTEXT (f_sav) = record;
3190
3191 TREE_CHAIN (record) = type_decl;
3192 TYPE_NAME (record) = type_decl;
3193 TYPE_FIELDS (record) = f_gpr;
3194 TREE_CHAIN (f_gpr) = f_fpr;
3195 TREE_CHAIN (f_fpr) = f_ovf;
3196 TREE_CHAIN (f_ovf) = f_sav;
3197
3198 layout_type (record);
3199
3200 /* The correct type is an array type of one element. */
3201 return build_array_type (record, build_index_type (size_zero_node));
3202 }
3203
3204 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3205
3206 static void
3207 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3208 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3209 int no_rtl)
3210 {
3211 CUMULATIVE_ARGS next_cum;
3212 rtx save_area = NULL_RTX, mem;
3213 rtx label;
3214 rtx label_ref;
3215 rtx tmp_reg;
3216 rtx nsse_reg;
3217 int set;
3218 tree fntype;
3219 int stdarg_p;
3220 int i;
3221
3222 if (!TARGET_64BIT)
3223 return;
3224
3225 /* Indicate to allocate space on the stack for varargs save area. */
3226 ix86_save_varrargs_registers = 1;
3227
3228 cfun->stack_alignment_needed = 128;
3229
3230 fntype = TREE_TYPE (current_function_decl);
3231 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3232 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3233 != void_type_node));
3234
3235 /* For varargs, we do not want to skip the dummy va_dcl argument.
3236 For stdargs, we do want to skip the last named argument. */
3237 next_cum = *cum;
3238 if (stdarg_p)
3239 function_arg_advance (&next_cum, mode, type, 1);
3240
3241 if (!no_rtl)
3242 save_area = frame_pointer_rtx;
3243
3244 set = get_varargs_alias_set ();
3245
3246 for (i = next_cum.regno; i < ix86_regparm; i++)
3247 {
3248 mem = gen_rtx_MEM (Pmode,
3249 plus_constant (save_area, i * UNITS_PER_WORD));
3250 set_mem_alias_set (mem, set);
3251 emit_move_insn (mem, gen_rtx_REG (Pmode,
3252 x86_64_int_parameter_registers[i]));
3253 }
3254
3255 if (next_cum.sse_nregs)
3256 {
3257 /* Now emit code to save SSE registers. The AX parameter contains number
3258 of SSE parameter registers used to call this function. We use
3259 sse_prologue_save insn template that produces computed jump across
3260 SSE saves. We need some preparation work to get this working. */
3261
3262 label = gen_label_rtx ();
3263 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3264
3265 /* Compute address to jump to :
3266 label - 5*eax + nnamed_sse_arguments*5 */
3267 tmp_reg = gen_reg_rtx (Pmode);
3268 nsse_reg = gen_reg_rtx (Pmode);
3269 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3270 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3271 gen_rtx_MULT (Pmode, nsse_reg,
3272 GEN_INT (4))));
3273 if (next_cum.sse_regno)
3274 emit_move_insn
3275 (nsse_reg,
3276 gen_rtx_CONST (DImode,
3277 gen_rtx_PLUS (DImode,
3278 label_ref,
3279 GEN_INT (next_cum.sse_regno * 4))));
3280 else
3281 emit_move_insn (nsse_reg, label_ref);
3282 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3283
3284 /* Compute address of memory block we save into. We always use pointer
3285 pointing 127 bytes after first byte to store - this is needed to keep
3286 instruction size limited by 4 bytes. */
3287 tmp_reg = gen_reg_rtx (Pmode);
3288 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3289 plus_constant (save_area,
3290 8 * REGPARM_MAX + 127)));
3291 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3292 set_mem_alias_set (mem, set);
3293 set_mem_align (mem, BITS_PER_WORD);
3294
3295 /* And finally do the dirty job! */
3296 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3297 GEN_INT (next_cum.sse_regno), label));
3298 }
3299
3300 }
3301
3302 /* Implement va_start. */
3303
3304 void
3305 ix86_va_start (tree valist, rtx nextarg)
3306 {
3307 HOST_WIDE_INT words, n_gpr, n_fpr;
3308 tree f_gpr, f_fpr, f_ovf, f_sav;
3309 tree gpr, fpr, ovf, sav, t;
3310
3311 /* Only 64bit target needs something special. */
3312 if (!TARGET_64BIT)
3313 {
3314 std_expand_builtin_va_start (valist, nextarg);
3315 return;
3316 }
3317
3318 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3319 f_fpr = TREE_CHAIN (f_gpr);
3320 f_ovf = TREE_CHAIN (f_fpr);
3321 f_sav = TREE_CHAIN (f_ovf);
3322
3323 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3324 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3325 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3326 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3327 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3328
3329 /* Count number of gp and fp argument registers used. */
3330 words = current_function_args_info.words;
3331 n_gpr = current_function_args_info.regno;
3332 n_fpr = current_function_args_info.sse_regno;
3333
3334 if (TARGET_DEBUG_ARG)
3335 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3336 (int) words, (int) n_gpr, (int) n_fpr);
3337
3338 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3339 build_int_cst (NULL_TREE, n_gpr * 8));
3340 TREE_SIDE_EFFECTS (t) = 1;
3341 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3342
3343 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3344 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3345 TREE_SIDE_EFFECTS (t) = 1;
3346 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3347
3348 /* Find the overflow area. */
3349 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3350 if (words != 0)
3351 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3352 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3353 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3354 TREE_SIDE_EFFECTS (t) = 1;
3355 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3356
3357 /* Find the register save area.
3358 Prologue of the function save it right above stack frame. */
3359 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3360 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3361 TREE_SIDE_EFFECTS (t) = 1;
3362 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3363 }
3364
3365 /* Implement va_arg. */
3366
3367 tree
3368 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3369 {
3370 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3371 tree f_gpr, f_fpr, f_ovf, f_sav;
3372 tree gpr, fpr, ovf, sav, t;
3373 int size, rsize;
3374 tree lab_false, lab_over = NULL_TREE;
3375 tree addr, t2;
3376 rtx container;
3377 int indirect_p = 0;
3378 tree ptrtype;
3379 enum machine_mode nat_mode;
3380
3381 /* Only 64bit target needs something special. */
3382 if (!TARGET_64BIT)
3383 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3384
3385 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3386 f_fpr = TREE_CHAIN (f_gpr);
3387 f_ovf = TREE_CHAIN (f_fpr);
3388 f_sav = TREE_CHAIN (f_ovf);
3389
3390 valist = build_va_arg_indirect_ref (valist);
3391 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3392 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3393 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3394 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3395
3396 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3397 if (indirect_p)
3398 type = build_pointer_type (type);
3399 size = int_size_in_bytes (type);
3400 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3401
3402 nat_mode = type_natural_mode (type);
3403 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3404 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3405
3406 /* Pull the value out of the saved registers. */
3407
3408 addr = create_tmp_var (ptr_type_node, "addr");
3409 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3410
3411 if (container)
3412 {
3413 int needed_intregs, needed_sseregs;
3414 bool need_temp;
3415 tree int_addr, sse_addr;
3416
3417 lab_false = create_artificial_label ();
3418 lab_over = create_artificial_label ();
3419
3420 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3421
3422 need_temp = (!REG_P (container)
3423 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3424 || TYPE_ALIGN (type) > 128));
3425
3426 /* In case we are passing structure, verify that it is consecutive block
3427 on the register save area. If not we need to do moves. */
3428 if (!need_temp && !REG_P (container))
3429 {
3430 /* Verify that all registers are strictly consecutive */
3431 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3432 {
3433 int i;
3434
3435 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3436 {
3437 rtx slot = XVECEXP (container, 0, i);
3438 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3439 || INTVAL (XEXP (slot, 1)) != i * 16)
3440 need_temp = 1;
3441 }
3442 }
3443 else
3444 {
3445 int i;
3446
3447 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3448 {
3449 rtx slot = XVECEXP (container, 0, i);
3450 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3451 || INTVAL (XEXP (slot, 1)) != i * 8)
3452 need_temp = 1;
3453 }
3454 }
3455 }
3456 if (!need_temp)
3457 {
3458 int_addr = addr;
3459 sse_addr = addr;
3460 }
3461 else
3462 {
3463 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3464 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3465 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3466 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3467 }
3468
3469 /* First ensure that we fit completely in registers. */
3470 if (needed_intregs)
3471 {
3472 t = build_int_cst (TREE_TYPE (gpr),
3473 (REGPARM_MAX - needed_intregs + 1) * 8);
3474 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3475 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3476 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3477 gimplify_and_add (t, pre_p);
3478 }
3479 if (needed_sseregs)
3480 {
3481 t = build_int_cst (TREE_TYPE (fpr),
3482 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3483 + REGPARM_MAX * 8);
3484 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3485 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3486 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3487 gimplify_and_add (t, pre_p);
3488 }
3489
3490 /* Compute index to start of area used for integer regs. */
3491 if (needed_intregs)
3492 {
3493 /* int_addr = gpr + sav; */
3494 t = fold_convert (ptr_type_node, gpr);
3495 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3496 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3497 gimplify_and_add (t, pre_p);
3498 }
3499 if (needed_sseregs)
3500 {
3501 /* sse_addr = fpr + sav; */
3502 t = fold_convert (ptr_type_node, fpr);
3503 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3504 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3505 gimplify_and_add (t, pre_p);
3506 }
3507 if (need_temp)
3508 {
3509 int i;
3510 tree temp = create_tmp_var (type, "va_arg_tmp");
3511
3512 /* addr = &temp; */
3513 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3514 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3515 gimplify_and_add (t, pre_p);
3516
3517 for (i = 0; i < XVECLEN (container, 0); i++)
3518 {
3519 rtx slot = XVECEXP (container, 0, i);
3520 rtx reg = XEXP (slot, 0);
3521 enum machine_mode mode = GET_MODE (reg);
3522 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3523 tree addr_type = build_pointer_type (piece_type);
3524 tree src_addr, src;
3525 int src_offset;
3526 tree dest_addr, dest;
3527
3528 if (SSE_REGNO_P (REGNO (reg)))
3529 {
3530 src_addr = sse_addr;
3531 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3532 }
3533 else
3534 {
3535 src_addr = int_addr;
3536 src_offset = REGNO (reg) * 8;
3537 }
3538 src_addr = fold_convert (addr_type, src_addr);
3539 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3540 size_int (src_offset)));
3541 src = build_va_arg_indirect_ref (src_addr);
3542
3543 dest_addr = fold_convert (addr_type, addr);
3544 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3545 size_int (INTVAL (XEXP (slot, 1)))));
3546 dest = build_va_arg_indirect_ref (dest_addr);
3547
3548 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3549 gimplify_and_add (t, pre_p);
3550 }
3551 }
3552
3553 if (needed_intregs)
3554 {
3555 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3556 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3557 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3558 gimplify_and_add (t, pre_p);
3559 }
3560 if (needed_sseregs)
3561 {
3562 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3563 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3564 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3565 gimplify_and_add (t, pre_p);
3566 }
3567
3568 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3569 gimplify_and_add (t, pre_p);
3570
3571 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3572 append_to_statement_list (t, pre_p);
3573 }
3574
3575 /* ... otherwise out of the overflow area. */
3576
3577 /* Care for on-stack alignment if needed. */
3578 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3579 t = ovf;
3580 else
3581 {
3582 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3583 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3584 build_int_cst (TREE_TYPE (ovf), align - 1));
3585 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3586 build_int_cst (TREE_TYPE (t), -align));
3587 }
3588 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3589
3590 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3591 gimplify_and_add (t2, pre_p);
3592
3593 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3594 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3595 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3596 gimplify_and_add (t, pre_p);
3597
3598 if (container)
3599 {
3600 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3601 append_to_statement_list (t, pre_p);
3602 }
3603
3604 ptrtype = build_pointer_type (type);
3605 addr = fold_convert (ptrtype, addr);
3606
3607 if (indirect_p)
3608 addr = build_va_arg_indirect_ref (addr);
3609 return build_va_arg_indirect_ref (addr);
3610 }
3611 \f
3612 /* Return nonzero if OPNUM's MEM should be matched
3613 in movabs* patterns. */
3614
3615 int
3616 ix86_check_movabs (rtx insn, int opnum)
3617 {
3618 rtx set, mem;
3619
3620 set = PATTERN (insn);
3621 if (GET_CODE (set) == PARALLEL)
3622 set = XVECEXP (set, 0, 0);
3623 if (GET_CODE (set) != SET)
3624 abort ();
3625 mem = XEXP (set, opnum);
3626 while (GET_CODE (mem) == SUBREG)
3627 mem = SUBREG_REG (mem);
3628 if (GET_CODE (mem) != MEM)
3629 abort ();
3630 return (volatile_ok || !MEM_VOLATILE_P (mem));
3631 }
3632 \f
3633 /* Initialize the table of extra 80387 mathematical constants. */
3634
3635 static void
3636 init_ext_80387_constants (void)
3637 {
3638 static const char * cst[5] =
3639 {
3640 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3641 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3642 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3643 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3644 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3645 };
3646 int i;
3647
3648 for (i = 0; i < 5; i++)
3649 {
3650 real_from_string (&ext_80387_constants_table[i], cst[i]);
3651 /* Ensure each constant is rounded to XFmode precision. */
3652 real_convert (&ext_80387_constants_table[i],
3653 XFmode, &ext_80387_constants_table[i]);
3654 }
3655
3656 ext_80387_constants_init = 1;
3657 }
3658
3659 /* Return true if the constant is something that can be loaded with
3660 a special instruction. */
3661
3662 int
3663 standard_80387_constant_p (rtx x)
3664 {
3665 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3666 return -1;
3667
3668 if (x == CONST0_RTX (GET_MODE (x)))
3669 return 1;
3670 if (x == CONST1_RTX (GET_MODE (x)))
3671 return 2;
3672
3673 /* For XFmode constants, try to find a special 80387 instruction when
3674 optimizing for size or on those CPUs that benefit from them. */
3675 if (GET_MODE (x) == XFmode
3676 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3677 {
3678 REAL_VALUE_TYPE r;
3679 int i;
3680
3681 if (! ext_80387_constants_init)
3682 init_ext_80387_constants ();
3683
3684 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3685 for (i = 0; i < 5; i++)
3686 if (real_identical (&r, &ext_80387_constants_table[i]))
3687 return i + 3;
3688 }
3689
3690 return 0;
3691 }
3692
3693 /* Return the opcode of the special instruction to be used to load
3694 the constant X. */
3695
3696 const char *
3697 standard_80387_constant_opcode (rtx x)
3698 {
3699 switch (standard_80387_constant_p (x))
3700 {
3701 case 1:
3702 return "fldz";
3703 case 2:
3704 return "fld1";
3705 case 3:
3706 return "fldlg2";
3707 case 4:
3708 return "fldln2";
3709 case 5:
3710 return "fldl2e";
3711 case 6:
3712 return "fldl2t";
3713 case 7:
3714 return "fldpi";
3715 }
3716 abort ();
3717 }
3718
3719 /* Return the CONST_DOUBLE representing the 80387 constant that is
3720 loaded by the specified special instruction. The argument IDX
3721 matches the return value from standard_80387_constant_p. */
3722
3723 rtx
3724 standard_80387_constant_rtx (int idx)
3725 {
3726 int i;
3727
3728 if (! ext_80387_constants_init)
3729 init_ext_80387_constants ();
3730
3731 switch (idx)
3732 {
3733 case 3:
3734 case 4:
3735 case 5:
3736 case 6:
3737 case 7:
3738 i = idx - 3;
3739 break;
3740
3741 default:
3742 abort ();
3743 }
3744
3745 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3746 XFmode);
3747 }
3748
3749 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3750 */
3751 int
3752 standard_sse_constant_p (rtx x)
3753 {
3754 if (x == const0_rtx)
3755 return 1;
3756 return (x == CONST0_RTX (GET_MODE (x)));
3757 }
3758
3759 /* Returns 1 if OP contains a symbol reference */
3760
3761 int
3762 symbolic_reference_mentioned_p (rtx op)
3763 {
3764 const char *fmt;
3765 int i;
3766
3767 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3768 return 1;
3769
3770 fmt = GET_RTX_FORMAT (GET_CODE (op));
3771 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3772 {
3773 if (fmt[i] == 'E')
3774 {
3775 int j;
3776
3777 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3778 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3779 return 1;
3780 }
3781
3782 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3783 return 1;
3784 }
3785
3786 return 0;
3787 }
3788
3789 /* Return 1 if it is appropriate to emit `ret' instructions in the
3790 body of a function. Do this only if the epilogue is simple, needing a
3791 couple of insns. Prior to reloading, we can't tell how many registers
3792 must be saved, so return 0 then. Return 0 if there is no frame
3793 marker to de-allocate. */
3794
3795 int
3796 ix86_can_use_return_insn_p (void)
3797 {
3798 struct ix86_frame frame;
3799
3800 if (! reload_completed || frame_pointer_needed)
3801 return 0;
3802
3803 /* Don't allow more than 32 pop, since that's all we can do
3804 with one instruction. */
3805 if (current_function_pops_args
3806 && current_function_args_size >= 32768)
3807 return 0;
3808
3809 ix86_compute_frame_layout (&frame);
3810 return frame.to_allocate == 0 && frame.nregs == 0;
3811 }
3812 \f
3813 /* Value should be nonzero if functions must have frame pointers.
3814 Zero means the frame pointer need not be set up (and parms may
3815 be accessed via the stack pointer) in functions that seem suitable. */
3816
3817 int
3818 ix86_frame_pointer_required (void)
3819 {
3820 /* If we accessed previous frames, then the generated code expects
3821 to be able to access the saved ebp value in our frame. */
3822 if (cfun->machine->accesses_prev_frame)
3823 return 1;
3824
3825 /* Several x86 os'es need a frame pointer for other reasons,
3826 usually pertaining to setjmp. */
3827 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3828 return 1;
3829
3830 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3831 the frame pointer by default. Turn it back on now if we've not
3832 got a leaf function. */
3833 if (TARGET_OMIT_LEAF_FRAME_POINTER
3834 && (!current_function_is_leaf))
3835 return 1;
3836
3837 if (current_function_profile)
3838 return 1;
3839
3840 return 0;
3841 }
3842
3843 /* Record that the current function accesses previous call frames. */
3844
3845 void
3846 ix86_setup_frame_addresses (void)
3847 {
3848 cfun->machine->accesses_prev_frame = 1;
3849 }
3850 \f
3851 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3852 # define USE_HIDDEN_LINKONCE 1
3853 #else
3854 # define USE_HIDDEN_LINKONCE 0
3855 #endif
3856
3857 static int pic_labels_used;
3858
3859 /* Fills in the label name that should be used for a pc thunk for
3860 the given register. */
3861
3862 static void
3863 get_pc_thunk_name (char name[32], unsigned int regno)
3864 {
3865 if (USE_HIDDEN_LINKONCE)
3866 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3867 else
3868 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3869 }
3870
3871
3872 /* This function generates code for -fpic that loads %ebx with
3873 the return address of the caller and then returns. */
3874
3875 void
3876 ix86_file_end (void)
3877 {
3878 rtx xops[2];
3879 int regno;
3880
3881 for (regno = 0; regno < 8; ++regno)
3882 {
3883 char name[32];
3884
3885 if (! ((pic_labels_used >> regno) & 1))
3886 continue;
3887
3888 get_pc_thunk_name (name, regno);
3889
3890 if (USE_HIDDEN_LINKONCE)
3891 {
3892 tree decl;
3893
3894 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3895 error_mark_node);
3896 TREE_PUBLIC (decl) = 1;
3897 TREE_STATIC (decl) = 1;
3898 DECL_ONE_ONLY (decl) = 1;
3899
3900 (*targetm.asm_out.unique_section) (decl, 0);
3901 named_section (decl, NULL, 0);
3902
3903 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3904 fputs ("\t.hidden\t", asm_out_file);
3905 assemble_name (asm_out_file, name);
3906 fputc ('\n', asm_out_file);
3907 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3908 }
3909 else
3910 {
3911 text_section ();
3912 ASM_OUTPUT_LABEL (asm_out_file, name);
3913 }
3914
3915 xops[0] = gen_rtx_REG (SImode, regno);
3916 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3917 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3918 output_asm_insn ("ret", xops);
3919 }
3920
3921 if (NEED_INDICATE_EXEC_STACK)
3922 file_end_indicate_exec_stack ();
3923 }
3924
3925 /* Emit code for the SET_GOT patterns. */
3926
3927 const char *
3928 output_set_got (rtx dest)
3929 {
3930 rtx xops[3];
3931
3932 xops[0] = dest;
3933 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3934
3935 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3936 {
3937 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3938
3939 if (!flag_pic)
3940 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3941 else
3942 output_asm_insn ("call\t%a2", xops);
3943
3944 #if TARGET_MACHO
3945 /* Output the "canonical" label name ("Lxx$pb") here too. This
3946 is what will be referred to by the Mach-O PIC subsystem. */
3947 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3948 #endif
3949 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3950 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3951
3952 if (flag_pic)
3953 output_asm_insn ("pop{l}\t%0", xops);
3954 }
3955 else
3956 {
3957 char name[32];
3958 get_pc_thunk_name (name, REGNO (dest));
3959 pic_labels_used |= 1 << REGNO (dest);
3960
3961 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3962 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3963 output_asm_insn ("call\t%X2", xops);
3964 }
3965
3966 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3967 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3968 else if (!TARGET_MACHO)
3969 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3970
3971 return "";
3972 }
3973
3974 /* Generate an "push" pattern for input ARG. */
3975
3976 static rtx
3977 gen_push (rtx arg)
3978 {
3979 return gen_rtx_SET (VOIDmode,
3980 gen_rtx_MEM (Pmode,
3981 gen_rtx_PRE_DEC (Pmode,
3982 stack_pointer_rtx)),
3983 arg);
3984 }
3985
3986 /* Return >= 0 if there is an unused call-clobbered register available
3987 for the entire function. */
3988
3989 static unsigned int
3990 ix86_select_alt_pic_regnum (void)
3991 {
3992 if (current_function_is_leaf && !current_function_profile)
3993 {
3994 int i;
3995 for (i = 2; i >= 0; --i)
3996 if (!regs_ever_live[i])
3997 return i;
3998 }
3999
4000 return INVALID_REGNUM;
4001 }
4002
4003 /* Return 1 if we need to save REGNO. */
4004 static int
4005 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4006 {
4007 if (pic_offset_table_rtx
4008 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4009 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4010 || current_function_profile
4011 || current_function_calls_eh_return
4012 || current_function_uses_const_pool))
4013 {
4014 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4015 return 0;
4016 return 1;
4017 }
4018
4019 if (current_function_calls_eh_return && maybe_eh_return)
4020 {
4021 unsigned i;
4022 for (i = 0; ; i++)
4023 {
4024 unsigned test = EH_RETURN_DATA_REGNO (i);
4025 if (test == INVALID_REGNUM)
4026 break;
4027 if (test == regno)
4028 return 1;
4029 }
4030 }
4031
4032 return (regs_ever_live[regno]
4033 && !call_used_regs[regno]
4034 && !fixed_regs[regno]
4035 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4036 }
4037
4038 /* Return number of registers to be saved on the stack. */
4039
4040 static int
4041 ix86_nsaved_regs (void)
4042 {
4043 int nregs = 0;
4044 int regno;
4045
4046 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4047 if (ix86_save_reg (regno, true))
4048 nregs++;
4049 return nregs;
4050 }
4051
4052 /* Return the offset between two registers, one to be eliminated, and the other
4053 its replacement, at the start of a routine. */
4054
4055 HOST_WIDE_INT
4056 ix86_initial_elimination_offset (int from, int to)
4057 {
4058 struct ix86_frame frame;
4059 ix86_compute_frame_layout (&frame);
4060
4061 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4062 return frame.hard_frame_pointer_offset;
4063 else if (from == FRAME_POINTER_REGNUM
4064 && to == HARD_FRAME_POINTER_REGNUM)
4065 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4066 else
4067 {
4068 if (to != STACK_POINTER_REGNUM)
4069 abort ();
4070 else if (from == ARG_POINTER_REGNUM)
4071 return frame.stack_pointer_offset;
4072 else if (from != FRAME_POINTER_REGNUM)
4073 abort ();
4074 else
4075 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4076 }
4077 }
4078
4079 /* Fill structure ix86_frame about frame of currently computed function. */
4080
4081 static void
4082 ix86_compute_frame_layout (struct ix86_frame *frame)
4083 {
4084 HOST_WIDE_INT total_size;
4085 unsigned int stack_alignment_needed;
4086 HOST_WIDE_INT offset;
4087 unsigned int preferred_alignment;
4088 HOST_WIDE_INT size = get_frame_size ();
4089
4090 frame->nregs = ix86_nsaved_regs ();
4091 total_size = size;
4092
4093 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4094 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4095
4096 /* During reload iteration the amount of registers saved can change.
4097 Recompute the value as needed. Do not recompute when amount of registers
4098 didn't change as reload does mutiple calls to the function and does not
4099 expect the decision to change within single iteration. */
4100 if (!optimize_size
4101 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4102 {
4103 int count = frame->nregs;
4104
4105 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4106 /* The fast prologue uses move instead of push to save registers. This
4107 is significantly longer, but also executes faster as modern hardware
4108 can execute the moves in parallel, but can't do that for push/pop.
4109
4110 Be careful about choosing what prologue to emit: When function takes
4111 many instructions to execute we may use slow version as well as in
4112 case function is known to be outside hot spot (this is known with
4113 feedback only). Weight the size of function by number of registers
4114 to save as it is cheap to use one or two push instructions but very
4115 slow to use many of them. */
4116 if (count)
4117 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4118 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4119 || (flag_branch_probabilities
4120 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4121 cfun->machine->use_fast_prologue_epilogue = false;
4122 else
4123 cfun->machine->use_fast_prologue_epilogue
4124 = !expensive_function_p (count);
4125 }
4126 if (TARGET_PROLOGUE_USING_MOVE
4127 && cfun->machine->use_fast_prologue_epilogue)
4128 frame->save_regs_using_mov = true;
4129 else
4130 frame->save_regs_using_mov = false;
4131
4132
4133 /* Skip return address and saved base pointer. */
4134 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4135
4136 frame->hard_frame_pointer_offset = offset;
4137
4138 /* Do some sanity checking of stack_alignment_needed and
4139 preferred_alignment, since i386 port is the only using those features
4140 that may break easily. */
4141
4142 if (size && !stack_alignment_needed)
4143 abort ();
4144 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4145 abort ();
4146 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4147 abort ();
4148 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4149 abort ();
4150
4151 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4152 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4153
4154 /* Register save area */
4155 offset += frame->nregs * UNITS_PER_WORD;
4156
4157 /* Va-arg area */
4158 if (ix86_save_varrargs_registers)
4159 {
4160 offset += X86_64_VARARGS_SIZE;
4161 frame->va_arg_size = X86_64_VARARGS_SIZE;
4162 }
4163 else
4164 frame->va_arg_size = 0;
4165
4166 /* Align start of frame for local function. */
4167 frame->padding1 = ((offset + stack_alignment_needed - 1)
4168 & -stack_alignment_needed) - offset;
4169
4170 offset += frame->padding1;
4171
4172 /* Frame pointer points here. */
4173 frame->frame_pointer_offset = offset;
4174
4175 offset += size;
4176
4177 /* Add outgoing arguments area. Can be skipped if we eliminated
4178 all the function calls as dead code.
4179 Skipping is however impossible when function calls alloca. Alloca
4180 expander assumes that last current_function_outgoing_args_size
4181 of stack frame are unused. */
4182 if (ACCUMULATE_OUTGOING_ARGS
4183 && (!current_function_is_leaf || current_function_calls_alloca))
4184 {
4185 offset += current_function_outgoing_args_size;
4186 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4187 }
4188 else
4189 frame->outgoing_arguments_size = 0;
4190
4191 /* Align stack boundary. Only needed if we're calling another function
4192 or using alloca. */
4193 if (!current_function_is_leaf || current_function_calls_alloca)
4194 frame->padding2 = ((offset + preferred_alignment - 1)
4195 & -preferred_alignment) - offset;
4196 else
4197 frame->padding2 = 0;
4198
4199 offset += frame->padding2;
4200
4201 /* We've reached end of stack frame. */
4202 frame->stack_pointer_offset = offset;
4203
4204 /* Size prologue needs to allocate. */
4205 frame->to_allocate =
4206 (size + frame->padding1 + frame->padding2
4207 + frame->outgoing_arguments_size + frame->va_arg_size);
4208
4209 if ((!frame->to_allocate && frame->nregs <= 1)
4210 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4211 frame->save_regs_using_mov = false;
4212
4213 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4214 && current_function_is_leaf)
4215 {
4216 frame->red_zone_size = frame->to_allocate;
4217 if (frame->save_regs_using_mov)
4218 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4219 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4220 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4221 }
4222 else
4223 frame->red_zone_size = 0;
4224 frame->to_allocate -= frame->red_zone_size;
4225 frame->stack_pointer_offset -= frame->red_zone_size;
4226 #if 0
4227 fprintf (stderr, "nregs: %i\n", frame->nregs);
4228 fprintf (stderr, "size: %i\n", size);
4229 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4230 fprintf (stderr, "padding1: %i\n", frame->padding1);
4231 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4232 fprintf (stderr, "padding2: %i\n", frame->padding2);
4233 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4234 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4235 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4236 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4237 frame->hard_frame_pointer_offset);
4238 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4239 #endif
4240 }
4241
4242 /* Emit code to save registers in the prologue. */
4243
4244 static void
4245 ix86_emit_save_regs (void)
4246 {
4247 int regno;
4248 rtx insn;
4249
4250 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4251 if (ix86_save_reg (regno, true))
4252 {
4253 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4254 RTX_FRAME_RELATED_P (insn) = 1;
4255 }
4256 }
4257
4258 /* Emit code to save registers using MOV insns. First register
4259 is restored from POINTER + OFFSET. */
4260 static void
4261 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4262 {
4263 int regno;
4264 rtx insn;
4265
4266 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4267 if (ix86_save_reg (regno, true))
4268 {
4269 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4270 Pmode, offset),
4271 gen_rtx_REG (Pmode, regno));
4272 RTX_FRAME_RELATED_P (insn) = 1;
4273 offset += UNITS_PER_WORD;
4274 }
4275 }
4276
4277 /* Expand prologue or epilogue stack adjustment.
4278 The pattern exist to put a dependency on all ebp-based memory accesses.
4279 STYLE should be negative if instructions should be marked as frame related,
4280 zero if %r11 register is live and cannot be freely used and positive
4281 otherwise. */
4282
4283 static void
4284 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4285 {
4286 rtx insn;
4287
4288 if (! TARGET_64BIT)
4289 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4290 else if (x86_64_immediate_operand (offset, DImode))
4291 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4292 else
4293 {
4294 rtx r11;
4295 /* r11 is used by indirect sibcall return as well, set before the
4296 epilogue and used after the epilogue. ATM indirect sibcall
4297 shouldn't be used together with huge frame sizes in one
4298 function because of the frame_size check in sibcall.c. */
4299 if (style == 0)
4300 abort ();
4301 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4302 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4303 if (style < 0)
4304 RTX_FRAME_RELATED_P (insn) = 1;
4305 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4306 offset));
4307 }
4308 if (style < 0)
4309 RTX_FRAME_RELATED_P (insn) = 1;
4310 }
4311
4312 /* Expand the prologue into a bunch of separate insns. */
4313
4314 void
4315 ix86_expand_prologue (void)
4316 {
4317 rtx insn;
4318 bool pic_reg_used;
4319 struct ix86_frame frame;
4320 HOST_WIDE_INT allocate;
4321
4322 ix86_compute_frame_layout (&frame);
4323
4324 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4325 slower on all targets. Also sdb doesn't like it. */
4326
4327 if (frame_pointer_needed)
4328 {
4329 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4330 RTX_FRAME_RELATED_P (insn) = 1;
4331
4332 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4333 RTX_FRAME_RELATED_P (insn) = 1;
4334 }
4335
4336 allocate = frame.to_allocate;
4337
4338 if (!frame.save_regs_using_mov)
4339 ix86_emit_save_regs ();
4340 else
4341 allocate += frame.nregs * UNITS_PER_WORD;
4342
4343 /* When using red zone we may start register saving before allocating
4344 the stack frame saving one cycle of the prologue. */
4345 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4346 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4347 : stack_pointer_rtx,
4348 -frame.nregs * UNITS_PER_WORD);
4349
4350 if (allocate == 0)
4351 ;
4352 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4353 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4354 GEN_INT (-allocate), -1);
4355 else
4356 {
4357 /* Only valid for Win32. */
4358 rtx eax = gen_rtx_REG (SImode, 0);
4359 bool eax_live = ix86_eax_live_at_start_p ();
4360 rtx t;
4361
4362 if (TARGET_64BIT)
4363 abort ();
4364
4365 if (eax_live)
4366 {
4367 emit_insn (gen_push (eax));
4368 allocate -= 4;
4369 }
4370
4371 emit_move_insn (eax, GEN_INT (allocate));
4372
4373 insn = emit_insn (gen_allocate_stack_worker (eax));
4374 RTX_FRAME_RELATED_P (insn) = 1;
4375 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4376 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4377 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4378 t, REG_NOTES (insn));
4379
4380 if (eax_live)
4381 {
4382 if (frame_pointer_needed)
4383 t = plus_constant (hard_frame_pointer_rtx,
4384 allocate
4385 - frame.to_allocate
4386 - frame.nregs * UNITS_PER_WORD);
4387 else
4388 t = plus_constant (stack_pointer_rtx, allocate);
4389 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4390 }
4391 }
4392
4393 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4394 {
4395 if (!frame_pointer_needed || !frame.to_allocate)
4396 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4397 else
4398 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4399 -frame.nregs * UNITS_PER_WORD);
4400 }
4401
4402 pic_reg_used = false;
4403 if (pic_offset_table_rtx
4404 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4405 || current_function_profile))
4406 {
4407 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4408
4409 if (alt_pic_reg_used != INVALID_REGNUM)
4410 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4411
4412 pic_reg_used = true;
4413 }
4414
4415 if (pic_reg_used)
4416 {
4417 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4418
4419 /* Even with accurate pre-reload life analysis, we can wind up
4420 deleting all references to the pic register after reload.
4421 Consider if cross-jumping unifies two sides of a branch
4422 controlled by a comparison vs the only read from a global.
4423 In which case, allow the set_got to be deleted, though we're
4424 too late to do anything about the ebx save in the prologue. */
4425 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4426 }
4427
4428 /* Prevent function calls from be scheduled before the call to mcount.
4429 In the pic_reg_used case, make sure that the got load isn't deleted. */
4430 if (current_function_profile)
4431 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4432 }
4433
4434 /* Emit code to restore saved registers using MOV insns. First register
4435 is restored from POINTER + OFFSET. */
4436 static void
4437 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4438 int maybe_eh_return)
4439 {
4440 int regno;
4441 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4442
4443 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4444 if (ix86_save_reg (regno, maybe_eh_return))
4445 {
4446 /* Ensure that adjust_address won't be forced to produce pointer
4447 out of range allowed by x86-64 instruction set. */
4448 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4449 {
4450 rtx r11;
4451
4452 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4453 emit_move_insn (r11, GEN_INT (offset));
4454 emit_insn (gen_adddi3 (r11, r11, pointer));
4455 base_address = gen_rtx_MEM (Pmode, r11);
4456 offset = 0;
4457 }
4458 emit_move_insn (gen_rtx_REG (Pmode, regno),
4459 adjust_address (base_address, Pmode, offset));
4460 offset += UNITS_PER_WORD;
4461 }
4462 }
4463
4464 /* Restore function stack, frame, and registers. */
4465
4466 void
4467 ix86_expand_epilogue (int style)
4468 {
4469 int regno;
4470 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4471 struct ix86_frame frame;
4472 HOST_WIDE_INT offset;
4473
4474 ix86_compute_frame_layout (&frame);
4475
4476 /* Calculate start of saved registers relative to ebp. Special care
4477 must be taken for the normal return case of a function using
4478 eh_return: the eax and edx registers are marked as saved, but not
4479 restored along this path. */
4480 offset = frame.nregs;
4481 if (current_function_calls_eh_return && style != 2)
4482 offset -= 2;
4483 offset *= -UNITS_PER_WORD;
4484
4485 /* If we're only restoring one register and sp is not valid then
4486 using a move instruction to restore the register since it's
4487 less work than reloading sp and popping the register.
4488
4489 The default code result in stack adjustment using add/lea instruction,
4490 while this code results in LEAVE instruction (or discrete equivalent),
4491 so it is profitable in some other cases as well. Especially when there
4492 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4493 and there is exactly one register to pop. This heuristic may need some
4494 tuning in future. */
4495 if ((!sp_valid && frame.nregs <= 1)
4496 || (TARGET_EPILOGUE_USING_MOVE
4497 && cfun->machine->use_fast_prologue_epilogue
4498 && (frame.nregs > 1 || frame.to_allocate))
4499 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4500 || (frame_pointer_needed && TARGET_USE_LEAVE
4501 && cfun->machine->use_fast_prologue_epilogue
4502 && frame.nregs == 1)
4503 || current_function_calls_eh_return)
4504 {
4505 /* Restore registers. We can use ebp or esp to address the memory
4506 locations. If both are available, default to ebp, since offsets
4507 are known to be small. Only exception is esp pointing directly to the
4508 end of block of saved registers, where we may simplify addressing
4509 mode. */
4510
4511 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4512 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4513 frame.to_allocate, style == 2);
4514 else
4515 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4516 offset, style == 2);
4517
4518 /* eh_return epilogues need %ecx added to the stack pointer. */
4519 if (style == 2)
4520 {
4521 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4522
4523 if (frame_pointer_needed)
4524 {
4525 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4526 tmp = plus_constant (tmp, UNITS_PER_WORD);
4527 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4528
4529 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4530 emit_move_insn (hard_frame_pointer_rtx, tmp);
4531
4532 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4533 const0_rtx, style);
4534 }
4535 else
4536 {
4537 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4538 tmp = plus_constant (tmp, (frame.to_allocate
4539 + frame.nregs * UNITS_PER_WORD));
4540 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4541 }
4542 }
4543 else if (!frame_pointer_needed)
4544 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4545 GEN_INT (frame.to_allocate
4546 + frame.nregs * UNITS_PER_WORD),
4547 style);
4548 /* If not an i386, mov & pop is faster than "leave". */
4549 else if (TARGET_USE_LEAVE || optimize_size
4550 || !cfun->machine->use_fast_prologue_epilogue)
4551 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4552 else
4553 {
4554 pro_epilogue_adjust_stack (stack_pointer_rtx,
4555 hard_frame_pointer_rtx,
4556 const0_rtx, style);
4557 if (TARGET_64BIT)
4558 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4559 else
4560 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4561 }
4562 }
4563 else
4564 {
4565 /* First step is to deallocate the stack frame so that we can
4566 pop the registers. */
4567 if (!sp_valid)
4568 {
4569 if (!frame_pointer_needed)
4570 abort ();
4571 pro_epilogue_adjust_stack (stack_pointer_rtx,
4572 hard_frame_pointer_rtx,
4573 GEN_INT (offset), style);
4574 }
4575 else if (frame.to_allocate)
4576 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4577 GEN_INT (frame.to_allocate), style);
4578
4579 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4580 if (ix86_save_reg (regno, false))
4581 {
4582 if (TARGET_64BIT)
4583 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4584 else
4585 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4586 }
4587 if (frame_pointer_needed)
4588 {
4589 /* Leave results in shorter dependency chains on CPUs that are
4590 able to grok it fast. */
4591 if (TARGET_USE_LEAVE)
4592 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4593 else if (TARGET_64BIT)
4594 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4595 else
4596 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4597 }
4598 }
4599
4600 /* Sibcall epilogues don't want a return instruction. */
4601 if (style == 0)
4602 return;
4603
4604 if (current_function_pops_args && current_function_args_size)
4605 {
4606 rtx popc = GEN_INT (current_function_pops_args);
4607
4608 /* i386 can only pop 64K bytes. If asked to pop more, pop
4609 return address, do explicit add, and jump indirectly to the
4610 caller. */
4611
4612 if (current_function_pops_args >= 65536)
4613 {
4614 rtx ecx = gen_rtx_REG (SImode, 2);
4615
4616 /* There is no "pascal" calling convention in 64bit ABI. */
4617 if (TARGET_64BIT)
4618 abort ();
4619
4620 emit_insn (gen_popsi1 (ecx));
4621 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4622 emit_jump_insn (gen_return_indirect_internal (ecx));
4623 }
4624 else
4625 emit_jump_insn (gen_return_pop_internal (popc));
4626 }
4627 else
4628 emit_jump_insn (gen_return_internal ());
4629 }
4630
4631 /* Reset from the function's potential modifications. */
4632
4633 static void
4634 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4635 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4636 {
4637 if (pic_offset_table_rtx)
4638 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4639 }
4640 \f
4641 /* Extract the parts of an RTL expression that is a valid memory address
4642 for an instruction. Return 0 if the structure of the address is
4643 grossly off. Return -1 if the address contains ASHIFT, so it is not
4644 strictly valid, but still used for computing length of lea instruction. */
4645
4646 int
4647 ix86_decompose_address (rtx addr, struct ix86_address *out)
4648 {
4649 rtx base = NULL_RTX;
4650 rtx index = NULL_RTX;
4651 rtx disp = NULL_RTX;
4652 HOST_WIDE_INT scale = 1;
4653 rtx scale_rtx = NULL_RTX;
4654 int retval = 1;
4655 enum ix86_address_seg seg = SEG_DEFAULT;
4656
4657 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4658 base = addr;
4659 else if (GET_CODE (addr) == PLUS)
4660 {
4661 rtx addends[4], op;
4662 int n = 0, i;
4663
4664 op = addr;
4665 do
4666 {
4667 if (n >= 4)
4668 return 0;
4669 addends[n++] = XEXP (op, 1);
4670 op = XEXP (op, 0);
4671 }
4672 while (GET_CODE (op) == PLUS);
4673 if (n >= 4)
4674 return 0;
4675 addends[n] = op;
4676
4677 for (i = n; i >= 0; --i)
4678 {
4679 op = addends[i];
4680 switch (GET_CODE (op))
4681 {
4682 case MULT:
4683 if (index)
4684 return 0;
4685 index = XEXP (op, 0);
4686 scale_rtx = XEXP (op, 1);
4687 break;
4688
4689 case UNSPEC:
4690 if (XINT (op, 1) == UNSPEC_TP
4691 && TARGET_TLS_DIRECT_SEG_REFS
4692 && seg == SEG_DEFAULT)
4693 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4694 else
4695 return 0;
4696 break;
4697
4698 case REG:
4699 case SUBREG:
4700 if (!base)
4701 base = op;
4702 else if (!index)
4703 index = op;
4704 else
4705 return 0;
4706 break;
4707
4708 case CONST:
4709 case CONST_INT:
4710 case SYMBOL_REF:
4711 case LABEL_REF:
4712 if (disp)
4713 return 0;
4714 disp = op;
4715 break;
4716
4717 default:
4718 return 0;
4719 }
4720 }
4721 }
4722 else if (GET_CODE (addr) == MULT)
4723 {
4724 index = XEXP (addr, 0); /* index*scale */
4725 scale_rtx = XEXP (addr, 1);
4726 }
4727 else if (GET_CODE (addr) == ASHIFT)
4728 {
4729 rtx tmp;
4730
4731 /* We're called for lea too, which implements ashift on occasion. */
4732 index = XEXP (addr, 0);
4733 tmp = XEXP (addr, 1);
4734 if (GET_CODE (tmp) != CONST_INT)
4735 return 0;
4736 scale = INTVAL (tmp);
4737 if ((unsigned HOST_WIDE_INT) scale > 3)
4738 return 0;
4739 scale = 1 << scale;
4740 retval = -1;
4741 }
4742 else
4743 disp = addr; /* displacement */
4744
4745 /* Extract the integral value of scale. */
4746 if (scale_rtx)
4747 {
4748 if (GET_CODE (scale_rtx) != CONST_INT)
4749 return 0;
4750 scale = INTVAL (scale_rtx);
4751 }
4752
4753 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4754 if (base && index && scale == 1
4755 && (index == arg_pointer_rtx
4756 || index == frame_pointer_rtx
4757 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4758 {
4759 rtx tmp = base;
4760 base = index;
4761 index = tmp;
4762 }
4763
4764 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4765 if ((base == hard_frame_pointer_rtx
4766 || base == frame_pointer_rtx
4767 || base == arg_pointer_rtx) && !disp)
4768 disp = const0_rtx;
4769
4770 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4771 Avoid this by transforming to [%esi+0]. */
4772 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4773 && base && !index && !disp
4774 && REG_P (base)
4775 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4776 disp = const0_rtx;
4777
4778 /* Special case: encode reg+reg instead of reg*2. */
4779 if (!base && index && scale && scale == 2)
4780 base = index, scale = 1;
4781
4782 /* Special case: scaling cannot be encoded without base or displacement. */
4783 if (!base && !disp && index && scale != 1)
4784 disp = const0_rtx;
4785
4786 out->base = base;
4787 out->index = index;
4788 out->disp = disp;
4789 out->scale = scale;
4790 out->seg = seg;
4791
4792 return retval;
4793 }
4794 \f
4795 /* Return cost of the memory address x.
4796 For i386, it is better to use a complex address than let gcc copy
4797 the address into a reg and make a new pseudo. But not if the address
4798 requires to two regs - that would mean more pseudos with longer
4799 lifetimes. */
4800 static int
4801 ix86_address_cost (rtx x)
4802 {
4803 struct ix86_address parts;
4804 int cost = 1;
4805
4806 if (!ix86_decompose_address (x, &parts))
4807 abort ();
4808
4809 /* More complex memory references are better. */
4810 if (parts.disp && parts.disp != const0_rtx)
4811 cost--;
4812 if (parts.seg != SEG_DEFAULT)
4813 cost--;
4814
4815 /* Attempt to minimize number of registers in the address. */
4816 if ((parts.base
4817 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4818 || (parts.index
4819 && (!REG_P (parts.index)
4820 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4821 cost++;
4822
4823 if (parts.base
4824 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4825 && parts.index
4826 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4827 && parts.base != parts.index)
4828 cost++;
4829
4830 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4831 since it's predecode logic can't detect the length of instructions
4832 and it degenerates to vector decoded. Increase cost of such
4833 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4834 to split such addresses or even refuse such addresses at all.
4835
4836 Following addressing modes are affected:
4837 [base+scale*index]
4838 [scale*index+disp]
4839 [base+index]
4840
4841 The first and last case may be avoidable by explicitly coding the zero in
4842 memory address, but I don't have AMD-K6 machine handy to check this
4843 theory. */
4844
4845 if (TARGET_K6
4846 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4847 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4848 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4849 cost += 10;
4850
4851 return cost;
4852 }
4853 \f
4854 /* If X is a machine specific address (i.e. a symbol or label being
4855 referenced as a displacement from the GOT implemented using an
4856 UNSPEC), then return the base term. Otherwise return X. */
4857
4858 rtx
4859 ix86_find_base_term (rtx x)
4860 {
4861 rtx term;
4862
4863 if (TARGET_64BIT)
4864 {
4865 if (GET_CODE (x) != CONST)
4866 return x;
4867 term = XEXP (x, 0);
4868 if (GET_CODE (term) == PLUS
4869 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4870 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4871 term = XEXP (term, 0);
4872 if (GET_CODE (term) != UNSPEC
4873 || XINT (term, 1) != UNSPEC_GOTPCREL)
4874 return x;
4875
4876 term = XVECEXP (term, 0, 0);
4877
4878 if (GET_CODE (term) != SYMBOL_REF
4879 && GET_CODE (term) != LABEL_REF)
4880 return x;
4881
4882 return term;
4883 }
4884
4885 term = ix86_delegitimize_address (x);
4886
4887 if (GET_CODE (term) != SYMBOL_REF
4888 && GET_CODE (term) != LABEL_REF)
4889 return x;
4890
4891 return term;
4892 }
4893
4894 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4895 this is used for to form addresses to local data when -fPIC is in
4896 use. */
4897
4898 static bool
4899 darwin_local_data_pic (rtx disp)
4900 {
4901 if (GET_CODE (disp) == MINUS)
4902 {
4903 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4904 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4905 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4906 {
4907 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4908 if (! strcmp (sym_name, "<pic base>"))
4909 return true;
4910 }
4911 }
4912
4913 return false;
4914 }
4915 \f
4916 /* Determine if a given RTX is a valid constant. We already know this
4917 satisfies CONSTANT_P. */
4918
4919 bool
4920 legitimate_constant_p (rtx x)
4921 {
4922 switch (GET_CODE (x))
4923 {
4924 case CONST:
4925 x = XEXP (x, 0);
4926
4927 if (GET_CODE (x) == PLUS)
4928 {
4929 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4930 return false;
4931 x = XEXP (x, 0);
4932 }
4933
4934 if (TARGET_MACHO && darwin_local_data_pic (x))
4935 return true;
4936
4937 /* Only some unspecs are valid as "constants". */
4938 if (GET_CODE (x) == UNSPEC)
4939 switch (XINT (x, 1))
4940 {
4941 case UNSPEC_TPOFF:
4942 case UNSPEC_NTPOFF:
4943 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4944 case UNSPEC_DTPOFF:
4945 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4946 default:
4947 return false;
4948 }
4949
4950 /* We must have drilled down to a symbol. */
4951 if (!symbolic_operand (x, Pmode))
4952 return false;
4953 /* FALLTHRU */
4954
4955 case SYMBOL_REF:
4956 /* TLS symbols are never valid. */
4957 if (tls_symbolic_operand (x, Pmode))
4958 return false;
4959 break;
4960
4961 default:
4962 break;
4963 }
4964
4965 /* Otherwise we handle everything else in the move patterns. */
4966 return true;
4967 }
4968
4969 /* Determine if it's legal to put X into the constant pool. This
4970 is not possible for the address of thread-local symbols, which
4971 is checked above. */
4972
4973 static bool
4974 ix86_cannot_force_const_mem (rtx x)
4975 {
4976 return !legitimate_constant_p (x);
4977 }
4978
4979 /* Determine if a given RTX is a valid constant address. */
4980
4981 bool
4982 constant_address_p (rtx x)
4983 {
4984 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4985 }
4986
4987 /* Nonzero if the constant value X is a legitimate general operand
4988 when generating PIC code. It is given that flag_pic is on and
4989 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4990
4991 bool
4992 legitimate_pic_operand_p (rtx x)
4993 {
4994 rtx inner;
4995
4996 switch (GET_CODE (x))
4997 {
4998 case CONST:
4999 inner = XEXP (x, 0);
5000
5001 /* Only some unspecs are valid as "constants". */
5002 if (GET_CODE (inner) == UNSPEC)
5003 switch (XINT (inner, 1))
5004 {
5005 case UNSPEC_TPOFF:
5006 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5007 default:
5008 return false;
5009 }
5010 /* FALLTHRU */
5011
5012 case SYMBOL_REF:
5013 case LABEL_REF:
5014 return legitimate_pic_address_disp_p (x);
5015
5016 default:
5017 return true;
5018 }
5019 }
5020
5021 /* Determine if a given CONST RTX is a valid memory displacement
5022 in PIC mode. */
5023
5024 int
5025 legitimate_pic_address_disp_p (rtx disp)
5026 {
5027 bool saw_plus;
5028
5029 /* In 64bit mode we can allow direct addresses of symbols and labels
5030 when they are not dynamic symbols. */
5031 if (TARGET_64BIT)
5032 {
5033 /* TLS references should always be enclosed in UNSPEC. */
5034 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5035 return 0;
5036 if (GET_CODE (disp) == SYMBOL_REF
5037 && ix86_cmodel == CM_SMALL_PIC
5038 && SYMBOL_REF_LOCAL_P (disp))
5039 return 1;
5040 if (GET_CODE (disp) == LABEL_REF)
5041 return 1;
5042 if (GET_CODE (disp) == CONST
5043 && GET_CODE (XEXP (disp, 0)) == PLUS)
5044 {
5045 rtx op0 = XEXP (XEXP (disp, 0), 0);
5046 rtx op1 = XEXP (XEXP (disp, 0), 1);
5047
5048 /* TLS references should always be enclosed in UNSPEC. */
5049 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5050 return 0;
5051 if (((GET_CODE (op0) == SYMBOL_REF
5052 && ix86_cmodel == CM_SMALL_PIC
5053 && SYMBOL_REF_LOCAL_P (op0))
5054 || GET_CODE (op0) == LABEL_REF)
5055 && GET_CODE (op1) == CONST_INT
5056 && INTVAL (op1) < 16*1024*1024
5057 && INTVAL (op1) >= -16*1024*1024)
5058 return 1;
5059 }
5060 }
5061 if (GET_CODE (disp) != CONST)
5062 return 0;
5063 disp = XEXP (disp, 0);
5064
5065 if (TARGET_64BIT)
5066 {
5067 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5068 of GOT tables. We should not need these anyway. */
5069 if (GET_CODE (disp) != UNSPEC
5070 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5071 return 0;
5072
5073 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5074 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5075 return 0;
5076 return 1;
5077 }
5078
5079 saw_plus = false;
5080 if (GET_CODE (disp) == PLUS)
5081 {
5082 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5083 return 0;
5084 disp = XEXP (disp, 0);
5085 saw_plus = true;
5086 }
5087
5088 if (TARGET_MACHO && darwin_local_data_pic (disp))
5089 return 1;
5090
5091 if (GET_CODE (disp) != UNSPEC)
5092 return 0;
5093
5094 switch (XINT (disp, 1))
5095 {
5096 case UNSPEC_GOT:
5097 if (saw_plus)
5098 return false;
5099 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5100 case UNSPEC_GOTOFF:
5101 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5102 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5103 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5104 return false;
5105 case UNSPEC_GOTTPOFF:
5106 case UNSPEC_GOTNTPOFF:
5107 case UNSPEC_INDNTPOFF:
5108 if (saw_plus)
5109 return false;
5110 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5111 case UNSPEC_NTPOFF:
5112 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5113 case UNSPEC_DTPOFF:
5114 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5115 }
5116
5117 return 0;
5118 }
5119
5120 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5121 memory address for an instruction. The MODE argument is the machine mode
5122 for the MEM expression that wants to use this address.
5123
5124 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5125 convert common non-canonical forms to canonical form so that they will
5126 be recognized. */
5127
5128 int
5129 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5130 {
5131 struct ix86_address parts;
5132 rtx base, index, disp;
5133 HOST_WIDE_INT scale;
5134 const char *reason = NULL;
5135 rtx reason_rtx = NULL_RTX;
5136
5137 if (TARGET_DEBUG_ADDR)
5138 {
5139 fprintf (stderr,
5140 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5141 GET_MODE_NAME (mode), strict);
5142 debug_rtx (addr);
5143 }
5144
5145 if (ix86_decompose_address (addr, &parts) <= 0)
5146 {
5147 reason = "decomposition failed";
5148 goto report_error;
5149 }
5150
5151 base = parts.base;
5152 index = parts.index;
5153 disp = parts.disp;
5154 scale = parts.scale;
5155
5156 /* Validate base register.
5157
5158 Don't allow SUBREG's here, it can lead to spill failures when the base
5159 is one word out of a two word structure, which is represented internally
5160 as a DImode int. */
5161
5162 if (base)
5163 {
5164 reason_rtx = base;
5165
5166 if (GET_CODE (base) != REG)
5167 {
5168 reason = "base is not a register";
5169 goto report_error;
5170 }
5171
5172 if (GET_MODE (base) != Pmode)
5173 {
5174 reason = "base is not in Pmode";
5175 goto report_error;
5176 }
5177
5178 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5179 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5180 {
5181 reason = "base is not valid";
5182 goto report_error;
5183 }
5184 }
5185
5186 /* Validate index register.
5187
5188 Don't allow SUBREG's here, it can lead to spill failures when the index
5189 is one word out of a two word structure, which is represented internally
5190 as a DImode int. */
5191
5192 if (index)
5193 {
5194 reason_rtx = index;
5195
5196 if (GET_CODE (index) != REG)
5197 {
5198 reason = "index is not a register";
5199 goto report_error;
5200 }
5201
5202 if (GET_MODE (index) != Pmode)
5203 {
5204 reason = "index is not in Pmode";
5205 goto report_error;
5206 }
5207
5208 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5209 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5210 {
5211 reason = "index is not valid";
5212 goto report_error;
5213 }
5214 }
5215
5216 /* Validate scale factor. */
5217 if (scale != 1)
5218 {
5219 reason_rtx = GEN_INT (scale);
5220 if (!index)
5221 {
5222 reason = "scale without index";
5223 goto report_error;
5224 }
5225
5226 if (scale != 2 && scale != 4 && scale != 8)
5227 {
5228 reason = "scale is not a valid multiplier";
5229 goto report_error;
5230 }
5231 }
5232
5233 /* Validate displacement. */
5234 if (disp)
5235 {
5236 reason_rtx = disp;
5237
5238 if (GET_CODE (disp) == CONST
5239 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5240 switch (XINT (XEXP (disp, 0), 1))
5241 {
5242 case UNSPEC_GOT:
5243 case UNSPEC_GOTOFF:
5244 case UNSPEC_GOTPCREL:
5245 if (!flag_pic)
5246 abort ();
5247 goto is_legitimate_pic;
5248
5249 case UNSPEC_GOTTPOFF:
5250 case UNSPEC_GOTNTPOFF:
5251 case UNSPEC_INDNTPOFF:
5252 case UNSPEC_NTPOFF:
5253 case UNSPEC_DTPOFF:
5254 break;
5255
5256 default:
5257 reason = "invalid address unspec";
5258 goto report_error;
5259 }
5260
5261 else if (flag_pic && (SYMBOLIC_CONST (disp)
5262 #if TARGET_MACHO
5263 && !machopic_operand_p (disp)
5264 #endif
5265 ))
5266 {
5267 is_legitimate_pic:
5268 if (TARGET_64BIT && (index || base))
5269 {
5270 /* foo@dtpoff(%rX) is ok. */
5271 if (GET_CODE (disp) != CONST
5272 || GET_CODE (XEXP (disp, 0)) != PLUS
5273 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5274 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5275 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5276 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5277 {
5278 reason = "non-constant pic memory reference";
5279 goto report_error;
5280 }
5281 }
5282 else if (! legitimate_pic_address_disp_p (disp))
5283 {
5284 reason = "displacement is an invalid pic construct";
5285 goto report_error;
5286 }
5287
5288 /* This code used to verify that a symbolic pic displacement
5289 includes the pic_offset_table_rtx register.
5290
5291 While this is good idea, unfortunately these constructs may
5292 be created by "adds using lea" optimization for incorrect
5293 code like:
5294
5295 int a;
5296 int foo(int i)
5297 {
5298 return *(&a+i);
5299 }
5300
5301 This code is nonsensical, but results in addressing
5302 GOT table with pic_offset_table_rtx base. We can't
5303 just refuse it easily, since it gets matched by
5304 "addsi3" pattern, that later gets split to lea in the
5305 case output register differs from input. While this
5306 can be handled by separate addsi pattern for this case
5307 that never results in lea, this seems to be easier and
5308 correct fix for crash to disable this test. */
5309 }
5310 else if (GET_CODE (disp) != LABEL_REF
5311 && GET_CODE (disp) != CONST_INT
5312 && (GET_CODE (disp) != CONST
5313 || !legitimate_constant_p (disp))
5314 && (GET_CODE (disp) != SYMBOL_REF
5315 || !legitimate_constant_p (disp)))
5316 {
5317 reason = "displacement is not constant";
5318 goto report_error;
5319 }
5320 else if (TARGET_64BIT
5321 && !x86_64_immediate_operand (disp, VOIDmode))
5322 {
5323 reason = "displacement is out of range";
5324 goto report_error;
5325 }
5326 }
5327
5328 /* Everything looks valid. */
5329 if (TARGET_DEBUG_ADDR)
5330 fprintf (stderr, "Success.\n");
5331 return TRUE;
5332
5333 report_error:
5334 if (TARGET_DEBUG_ADDR)
5335 {
5336 fprintf (stderr, "Error: %s\n", reason);
5337 debug_rtx (reason_rtx);
5338 }
5339 return FALSE;
5340 }
5341 \f
5342 /* Return an unique alias set for the GOT. */
5343
5344 static HOST_WIDE_INT
5345 ix86_GOT_alias_set (void)
5346 {
5347 static HOST_WIDE_INT set = -1;
5348 if (set == -1)
5349 set = new_alias_set ();
5350 return set;
5351 }
5352
5353 /* Return a legitimate reference for ORIG (an address) using the
5354 register REG. If REG is 0, a new pseudo is generated.
5355
5356 There are two types of references that must be handled:
5357
5358 1. Global data references must load the address from the GOT, via
5359 the PIC reg. An insn is emitted to do this load, and the reg is
5360 returned.
5361
5362 2. Static data references, constant pool addresses, and code labels
5363 compute the address as an offset from the GOT, whose base is in
5364 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5365 differentiate them from global data objects. The returned
5366 address is the PIC reg + an unspec constant.
5367
5368 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5369 reg also appears in the address. */
5370
5371 static rtx
5372 legitimize_pic_address (rtx orig, rtx reg)
5373 {
5374 rtx addr = orig;
5375 rtx new = orig;
5376 rtx base;
5377
5378 #if TARGET_MACHO
5379 if (reg == 0)
5380 reg = gen_reg_rtx (Pmode);
5381 /* Use the generic Mach-O PIC machinery. */
5382 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5383 #endif
5384
5385 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5386 new = addr;
5387 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5388 {
5389 /* This symbol may be referenced via a displacement from the PIC
5390 base address (@GOTOFF). */
5391
5392 if (reload_in_progress)
5393 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5394 if (GET_CODE (addr) == CONST)
5395 addr = XEXP (addr, 0);
5396 if (GET_CODE (addr) == PLUS)
5397 {
5398 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5399 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5400 }
5401 else
5402 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5403 new = gen_rtx_CONST (Pmode, new);
5404 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5405
5406 if (reg != 0)
5407 {
5408 emit_move_insn (reg, new);
5409 new = reg;
5410 }
5411 }
5412 else if (GET_CODE (addr) == SYMBOL_REF)
5413 {
5414 if (TARGET_64BIT)
5415 {
5416 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5417 new = gen_rtx_CONST (Pmode, new);
5418 new = gen_const_mem (Pmode, new);
5419 set_mem_alias_set (new, ix86_GOT_alias_set ());
5420
5421 if (reg == 0)
5422 reg = gen_reg_rtx (Pmode);
5423 /* Use directly gen_movsi, otherwise the address is loaded
5424 into register for CSE. We don't want to CSE this addresses,
5425 instead we CSE addresses from the GOT table, so skip this. */
5426 emit_insn (gen_movsi (reg, new));
5427 new = reg;
5428 }
5429 else
5430 {
5431 /* This symbol must be referenced via a load from the
5432 Global Offset Table (@GOT). */
5433
5434 if (reload_in_progress)
5435 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5436 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5437 new = gen_rtx_CONST (Pmode, new);
5438 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5439 new = gen_const_mem (Pmode, new);
5440 set_mem_alias_set (new, ix86_GOT_alias_set ());
5441
5442 if (reg == 0)
5443 reg = gen_reg_rtx (Pmode);
5444 emit_move_insn (reg, new);
5445 new = reg;
5446 }
5447 }
5448 else
5449 {
5450 if (GET_CODE (addr) == CONST)
5451 {
5452 addr = XEXP (addr, 0);
5453
5454 /* We must match stuff we generate before. Assume the only
5455 unspecs that can get here are ours. Not that we could do
5456 anything with them anyway.... */
5457 if (GET_CODE (addr) == UNSPEC
5458 || (GET_CODE (addr) == PLUS
5459 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5460 return orig;
5461 if (GET_CODE (addr) != PLUS)
5462 abort ();
5463 }
5464 if (GET_CODE (addr) == PLUS)
5465 {
5466 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5467
5468 /* Check first to see if this is a constant offset from a @GOTOFF
5469 symbol reference. */
5470 if (local_symbolic_operand (op0, Pmode)
5471 && GET_CODE (op1) == CONST_INT)
5472 {
5473 if (!TARGET_64BIT)
5474 {
5475 if (reload_in_progress)
5476 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5477 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5478 UNSPEC_GOTOFF);
5479 new = gen_rtx_PLUS (Pmode, new, op1);
5480 new = gen_rtx_CONST (Pmode, new);
5481 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5482
5483 if (reg != 0)
5484 {
5485 emit_move_insn (reg, new);
5486 new = reg;
5487 }
5488 }
5489 else
5490 {
5491 if (INTVAL (op1) < -16*1024*1024
5492 || INTVAL (op1) >= 16*1024*1024)
5493 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5494 }
5495 }
5496 else
5497 {
5498 base = legitimize_pic_address (XEXP (addr, 0), reg);
5499 new = legitimize_pic_address (XEXP (addr, 1),
5500 base == reg ? NULL_RTX : reg);
5501
5502 if (GET_CODE (new) == CONST_INT)
5503 new = plus_constant (base, INTVAL (new));
5504 else
5505 {
5506 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5507 {
5508 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5509 new = XEXP (new, 1);
5510 }
5511 new = gen_rtx_PLUS (Pmode, base, new);
5512 }
5513 }
5514 }
5515 }
5516 return new;
5517 }
5518 \f
5519 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5520
5521 static rtx
5522 get_thread_pointer (int to_reg)
5523 {
5524 rtx tp, reg, insn;
5525
5526 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5527 if (!to_reg)
5528 return tp;
5529
5530 reg = gen_reg_rtx (Pmode);
5531 insn = gen_rtx_SET (VOIDmode, reg, tp);
5532 insn = emit_insn (insn);
5533
5534 return reg;
5535 }
5536
5537 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5538 false if we expect this to be used for a memory address and true if
5539 we expect to load the address into a register. */
5540
5541 static rtx
5542 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5543 {
5544 rtx dest, base, off, pic;
5545 int type;
5546
5547 switch (model)
5548 {
5549 case TLS_MODEL_GLOBAL_DYNAMIC:
5550 dest = gen_reg_rtx (Pmode);
5551 if (TARGET_64BIT)
5552 {
5553 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5554
5555 start_sequence ();
5556 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5557 insns = get_insns ();
5558 end_sequence ();
5559
5560 emit_libcall_block (insns, dest, rax, x);
5561 }
5562 else
5563 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5564 break;
5565
5566 case TLS_MODEL_LOCAL_DYNAMIC:
5567 base = gen_reg_rtx (Pmode);
5568 if (TARGET_64BIT)
5569 {
5570 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5571
5572 start_sequence ();
5573 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5574 insns = get_insns ();
5575 end_sequence ();
5576
5577 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5578 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5579 emit_libcall_block (insns, base, rax, note);
5580 }
5581 else
5582 emit_insn (gen_tls_local_dynamic_base_32 (base));
5583
5584 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5585 off = gen_rtx_CONST (Pmode, off);
5586
5587 return gen_rtx_PLUS (Pmode, base, off);
5588
5589 case TLS_MODEL_INITIAL_EXEC:
5590 if (TARGET_64BIT)
5591 {
5592 pic = NULL;
5593 type = UNSPEC_GOTNTPOFF;
5594 }
5595 else if (flag_pic)
5596 {
5597 if (reload_in_progress)
5598 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5599 pic = pic_offset_table_rtx;
5600 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5601 }
5602 else if (!TARGET_GNU_TLS)
5603 {
5604 pic = gen_reg_rtx (Pmode);
5605 emit_insn (gen_set_got (pic));
5606 type = UNSPEC_GOTTPOFF;
5607 }
5608 else
5609 {
5610 pic = NULL;
5611 type = UNSPEC_INDNTPOFF;
5612 }
5613
5614 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5615 off = gen_rtx_CONST (Pmode, off);
5616 if (pic)
5617 off = gen_rtx_PLUS (Pmode, pic, off);
5618 off = gen_const_mem (Pmode, off);
5619 set_mem_alias_set (off, ix86_GOT_alias_set ());
5620
5621 if (TARGET_64BIT || TARGET_GNU_TLS)
5622 {
5623 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5624 off = force_reg (Pmode, off);
5625 return gen_rtx_PLUS (Pmode, base, off);
5626 }
5627 else
5628 {
5629 base = get_thread_pointer (true);
5630 dest = gen_reg_rtx (Pmode);
5631 emit_insn (gen_subsi3 (dest, base, off));
5632 }
5633 break;
5634
5635 case TLS_MODEL_LOCAL_EXEC:
5636 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5637 (TARGET_64BIT || TARGET_GNU_TLS)
5638 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5639 off = gen_rtx_CONST (Pmode, off);
5640
5641 if (TARGET_64BIT || TARGET_GNU_TLS)
5642 {
5643 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5644 return gen_rtx_PLUS (Pmode, base, off);
5645 }
5646 else
5647 {
5648 base = get_thread_pointer (true);
5649 dest = gen_reg_rtx (Pmode);
5650 emit_insn (gen_subsi3 (dest, base, off));
5651 }
5652 break;
5653
5654 default:
5655 abort ();
5656 }
5657
5658 return dest;
5659 }
5660
5661 /* Try machine-dependent ways of modifying an illegitimate address
5662 to be legitimate. If we find one, return the new, valid address.
5663 This macro is used in only one place: `memory_address' in explow.c.
5664
5665 OLDX is the address as it was before break_out_memory_refs was called.
5666 In some cases it is useful to look at this to decide what needs to be done.
5667
5668 MODE and WIN are passed so that this macro can use
5669 GO_IF_LEGITIMATE_ADDRESS.
5670
5671 It is always safe for this macro to do nothing. It exists to recognize
5672 opportunities to optimize the output.
5673
5674 For the 80386, we handle X+REG by loading X into a register R and
5675 using R+REG. R will go in a general reg and indexing will be used.
5676 However, if REG is a broken-out memory address or multiplication,
5677 nothing needs to be done because REG can certainly go in a general reg.
5678
5679 When -fpic is used, special handling is needed for symbolic references.
5680 See comments by legitimize_pic_address in i386.c for details. */
5681
5682 rtx
5683 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5684 {
5685 int changed = 0;
5686 unsigned log;
5687
5688 if (TARGET_DEBUG_ADDR)
5689 {
5690 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5691 GET_MODE_NAME (mode));
5692 debug_rtx (x);
5693 }
5694
5695 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5696 if (log)
5697 return legitimize_tls_address (x, log, false);
5698 if (GET_CODE (x) == CONST
5699 && GET_CODE (XEXP (x, 0)) == PLUS
5700 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5701 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5702 {
5703 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5704 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5705 }
5706
5707 if (flag_pic && SYMBOLIC_CONST (x))
5708 return legitimize_pic_address (x, 0);
5709
5710 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5711 if (GET_CODE (x) == ASHIFT
5712 && GET_CODE (XEXP (x, 1)) == CONST_INT
5713 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5714 {
5715 changed = 1;
5716 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5717 GEN_INT (1 << log));
5718 }
5719
5720 if (GET_CODE (x) == PLUS)
5721 {
5722 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5723
5724 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5725 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5726 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5727 {
5728 changed = 1;
5729 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5730 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5731 GEN_INT (1 << log));
5732 }
5733
5734 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5735 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5736 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5737 {
5738 changed = 1;
5739 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5740 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5741 GEN_INT (1 << log));
5742 }
5743
5744 /* Put multiply first if it isn't already. */
5745 if (GET_CODE (XEXP (x, 1)) == MULT)
5746 {
5747 rtx tmp = XEXP (x, 0);
5748 XEXP (x, 0) = XEXP (x, 1);
5749 XEXP (x, 1) = tmp;
5750 changed = 1;
5751 }
5752
5753 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5754 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5755 created by virtual register instantiation, register elimination, and
5756 similar optimizations. */
5757 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5758 {
5759 changed = 1;
5760 x = gen_rtx_PLUS (Pmode,
5761 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5762 XEXP (XEXP (x, 1), 0)),
5763 XEXP (XEXP (x, 1), 1));
5764 }
5765
5766 /* Canonicalize
5767 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5768 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5769 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5770 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5771 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5772 && CONSTANT_P (XEXP (x, 1)))
5773 {
5774 rtx constant;
5775 rtx other = NULL_RTX;
5776
5777 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5778 {
5779 constant = XEXP (x, 1);
5780 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5781 }
5782 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5783 {
5784 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5785 other = XEXP (x, 1);
5786 }
5787 else
5788 constant = 0;
5789
5790 if (constant)
5791 {
5792 changed = 1;
5793 x = gen_rtx_PLUS (Pmode,
5794 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5795 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5796 plus_constant (other, INTVAL (constant)));
5797 }
5798 }
5799
5800 if (changed && legitimate_address_p (mode, x, FALSE))
5801 return x;
5802
5803 if (GET_CODE (XEXP (x, 0)) == MULT)
5804 {
5805 changed = 1;
5806 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5807 }
5808
5809 if (GET_CODE (XEXP (x, 1)) == MULT)
5810 {
5811 changed = 1;
5812 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5813 }
5814
5815 if (changed
5816 && GET_CODE (XEXP (x, 1)) == REG
5817 && GET_CODE (XEXP (x, 0)) == REG)
5818 return x;
5819
5820 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5821 {
5822 changed = 1;
5823 x = legitimize_pic_address (x, 0);
5824 }
5825
5826 if (changed && legitimate_address_p (mode, x, FALSE))
5827 return x;
5828
5829 if (GET_CODE (XEXP (x, 0)) == REG)
5830 {
5831 rtx temp = gen_reg_rtx (Pmode);
5832 rtx val = force_operand (XEXP (x, 1), temp);
5833 if (val != temp)
5834 emit_move_insn (temp, val);
5835
5836 XEXP (x, 1) = temp;
5837 return x;
5838 }
5839
5840 else if (GET_CODE (XEXP (x, 1)) == REG)
5841 {
5842 rtx temp = gen_reg_rtx (Pmode);
5843 rtx val = force_operand (XEXP (x, 0), temp);
5844 if (val != temp)
5845 emit_move_insn (temp, val);
5846
5847 XEXP (x, 0) = temp;
5848 return x;
5849 }
5850 }
5851
5852 return x;
5853 }
5854 \f
5855 /* Print an integer constant expression in assembler syntax. Addition
5856 and subtraction are the only arithmetic that may appear in these
5857 expressions. FILE is the stdio stream to write to, X is the rtx, and
5858 CODE is the operand print code from the output string. */
5859
5860 static void
5861 output_pic_addr_const (FILE *file, rtx x, int code)
5862 {
5863 char buf[256];
5864
5865 switch (GET_CODE (x))
5866 {
5867 case PC:
5868 if (flag_pic)
5869 putc ('.', file);
5870 else
5871 abort ();
5872 break;
5873
5874 case SYMBOL_REF:
5875 /* Mark the decl as referenced so that cgraph will output the function. */
5876 if (SYMBOL_REF_DECL (x))
5877 mark_decl_referenced (SYMBOL_REF_DECL (x));
5878
5879 assemble_name (file, XSTR (x, 0));
5880 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5881 fputs ("@PLT", file);
5882 break;
5883
5884 case LABEL_REF:
5885 x = XEXP (x, 0);
5886 /* FALLTHRU */
5887 case CODE_LABEL:
5888 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5889 assemble_name (asm_out_file, buf);
5890 break;
5891
5892 case CONST_INT:
5893 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5894 break;
5895
5896 case CONST:
5897 /* This used to output parentheses around the expression,
5898 but that does not work on the 386 (either ATT or BSD assembler). */
5899 output_pic_addr_const (file, XEXP (x, 0), code);
5900 break;
5901
5902 case CONST_DOUBLE:
5903 if (GET_MODE (x) == VOIDmode)
5904 {
5905 /* We can use %d if the number is <32 bits and positive. */
5906 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5907 fprintf (file, "0x%lx%08lx",
5908 (unsigned long) CONST_DOUBLE_HIGH (x),
5909 (unsigned long) CONST_DOUBLE_LOW (x));
5910 else
5911 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5912 }
5913 else
5914 /* We can't handle floating point constants;
5915 PRINT_OPERAND must handle them. */
5916 output_operand_lossage ("floating constant misused");
5917 break;
5918
5919 case PLUS:
5920 /* Some assemblers need integer constants to appear first. */
5921 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5922 {
5923 output_pic_addr_const (file, XEXP (x, 0), code);
5924 putc ('+', file);
5925 output_pic_addr_const (file, XEXP (x, 1), code);
5926 }
5927 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5928 {
5929 output_pic_addr_const (file, XEXP (x, 1), code);
5930 putc ('+', file);
5931 output_pic_addr_const (file, XEXP (x, 0), code);
5932 }
5933 else
5934 abort ();
5935 break;
5936
5937 case MINUS:
5938 if (!TARGET_MACHO)
5939 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5940 output_pic_addr_const (file, XEXP (x, 0), code);
5941 putc ('-', file);
5942 output_pic_addr_const (file, XEXP (x, 1), code);
5943 if (!TARGET_MACHO)
5944 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5945 break;
5946
5947 case UNSPEC:
5948 if (XVECLEN (x, 0) != 1)
5949 abort ();
5950 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5951 switch (XINT (x, 1))
5952 {
5953 case UNSPEC_GOT:
5954 fputs ("@GOT", file);
5955 break;
5956 case UNSPEC_GOTOFF:
5957 fputs ("@GOTOFF", file);
5958 break;
5959 case UNSPEC_GOTPCREL:
5960 fputs ("@GOTPCREL(%rip)", file);
5961 break;
5962 case UNSPEC_GOTTPOFF:
5963 /* FIXME: This might be @TPOFF in Sun ld too. */
5964 fputs ("@GOTTPOFF", file);
5965 break;
5966 case UNSPEC_TPOFF:
5967 fputs ("@TPOFF", file);
5968 break;
5969 case UNSPEC_NTPOFF:
5970 if (TARGET_64BIT)
5971 fputs ("@TPOFF", file);
5972 else
5973 fputs ("@NTPOFF", file);
5974 break;
5975 case UNSPEC_DTPOFF:
5976 fputs ("@DTPOFF", file);
5977 break;
5978 case UNSPEC_GOTNTPOFF:
5979 if (TARGET_64BIT)
5980 fputs ("@GOTTPOFF(%rip)", file);
5981 else
5982 fputs ("@GOTNTPOFF", file);
5983 break;
5984 case UNSPEC_INDNTPOFF:
5985 fputs ("@INDNTPOFF", file);
5986 break;
5987 default:
5988 output_operand_lossage ("invalid UNSPEC as operand");
5989 break;
5990 }
5991 break;
5992
5993 default:
5994 output_operand_lossage ("invalid expression as operand");
5995 }
5996 }
5997
5998 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5999 We need to emit DTP-relative relocations. */
6000
6001 void
6002 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6003 {
6004 fputs (ASM_LONG, file);
6005 output_addr_const (file, x);
6006 fputs ("@DTPOFF", file);
6007 switch (size)
6008 {
6009 case 4:
6010 break;
6011 case 8:
6012 fputs (", 0", file);
6013 break;
6014 default:
6015 abort ();
6016 }
6017 }
6018
6019 /* In the name of slightly smaller debug output, and to cater to
6020 general assembler losage, recognize PIC+GOTOFF and turn it back
6021 into a direct symbol reference. */
6022
6023 static rtx
6024 ix86_delegitimize_address (rtx orig_x)
6025 {
6026 rtx x = orig_x, y;
6027
6028 if (GET_CODE (x) == MEM)
6029 x = XEXP (x, 0);
6030
6031 if (TARGET_64BIT)
6032 {
6033 if (GET_CODE (x) != CONST
6034 || GET_CODE (XEXP (x, 0)) != UNSPEC
6035 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6036 || GET_CODE (orig_x) != MEM)
6037 return orig_x;
6038 return XVECEXP (XEXP (x, 0), 0, 0);
6039 }
6040
6041 if (GET_CODE (x) != PLUS
6042 || GET_CODE (XEXP (x, 1)) != CONST)
6043 return orig_x;
6044
6045 if (GET_CODE (XEXP (x, 0)) == REG
6046 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6047 /* %ebx + GOT/GOTOFF */
6048 y = NULL;
6049 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6050 {
6051 /* %ebx + %reg * scale + GOT/GOTOFF */
6052 y = XEXP (x, 0);
6053 if (GET_CODE (XEXP (y, 0)) == REG
6054 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6055 y = XEXP (y, 1);
6056 else if (GET_CODE (XEXP (y, 1)) == REG
6057 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6058 y = XEXP (y, 0);
6059 else
6060 return orig_x;
6061 if (GET_CODE (y) != REG
6062 && GET_CODE (y) != MULT
6063 && GET_CODE (y) != ASHIFT)
6064 return orig_x;
6065 }
6066 else
6067 return orig_x;
6068
6069 x = XEXP (XEXP (x, 1), 0);
6070 if (GET_CODE (x) == UNSPEC
6071 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6072 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6073 {
6074 if (y)
6075 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6076 return XVECEXP (x, 0, 0);
6077 }
6078
6079 if (GET_CODE (x) == PLUS
6080 && GET_CODE (XEXP (x, 0)) == UNSPEC
6081 && GET_CODE (XEXP (x, 1)) == CONST_INT
6082 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6083 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6084 && GET_CODE (orig_x) != MEM)))
6085 {
6086 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6087 if (y)
6088 return gen_rtx_PLUS (Pmode, y, x);
6089 return x;
6090 }
6091
6092 return orig_x;
6093 }
6094 \f
6095 static void
6096 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6097 int fp, FILE *file)
6098 {
6099 const char *suffix;
6100
6101 if (mode == CCFPmode || mode == CCFPUmode)
6102 {
6103 enum rtx_code second_code, bypass_code;
6104 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6105 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6106 abort ();
6107 code = ix86_fp_compare_code_to_integer (code);
6108 mode = CCmode;
6109 }
6110 if (reverse)
6111 code = reverse_condition (code);
6112
6113 switch (code)
6114 {
6115 case EQ:
6116 suffix = "e";
6117 break;
6118 case NE:
6119 suffix = "ne";
6120 break;
6121 case GT:
6122 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6123 abort ();
6124 suffix = "g";
6125 break;
6126 case GTU:
6127 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6128 Those same assemblers have the same but opposite losage on cmov. */
6129 if (mode != CCmode)
6130 abort ();
6131 suffix = fp ? "nbe" : "a";
6132 break;
6133 case LT:
6134 if (mode == CCNOmode || mode == CCGOCmode)
6135 suffix = "s";
6136 else if (mode == CCmode || mode == CCGCmode)
6137 suffix = "l";
6138 else
6139 abort ();
6140 break;
6141 case LTU:
6142 if (mode != CCmode)
6143 abort ();
6144 suffix = "b";
6145 break;
6146 case GE:
6147 if (mode == CCNOmode || mode == CCGOCmode)
6148 suffix = "ns";
6149 else if (mode == CCmode || mode == CCGCmode)
6150 suffix = "ge";
6151 else
6152 abort ();
6153 break;
6154 case GEU:
6155 /* ??? As above. */
6156 if (mode != CCmode)
6157 abort ();
6158 suffix = fp ? "nb" : "ae";
6159 break;
6160 case LE:
6161 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6162 abort ();
6163 suffix = "le";
6164 break;
6165 case LEU:
6166 if (mode != CCmode)
6167 abort ();
6168 suffix = "be";
6169 break;
6170 case UNORDERED:
6171 suffix = fp ? "u" : "p";
6172 break;
6173 case ORDERED:
6174 suffix = fp ? "nu" : "np";
6175 break;
6176 default:
6177 abort ();
6178 }
6179 fputs (suffix, file);
6180 }
6181
6182 /* Print the name of register X to FILE based on its machine mode and number.
6183 If CODE is 'w', pretend the mode is HImode.
6184 If CODE is 'b', pretend the mode is QImode.
6185 If CODE is 'k', pretend the mode is SImode.
6186 If CODE is 'q', pretend the mode is DImode.
6187 If CODE is 'h', pretend the reg is the `high' byte register.
6188 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6189
6190 void
6191 print_reg (rtx x, int code, FILE *file)
6192 {
6193 if (REGNO (x) == ARG_POINTER_REGNUM
6194 || REGNO (x) == FRAME_POINTER_REGNUM
6195 || REGNO (x) == FLAGS_REG
6196 || REGNO (x) == FPSR_REG)
6197 abort ();
6198
6199 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6200 putc ('%', file);
6201
6202 if (code == 'w' || MMX_REG_P (x))
6203 code = 2;
6204 else if (code == 'b')
6205 code = 1;
6206 else if (code == 'k')
6207 code = 4;
6208 else if (code == 'q')
6209 code = 8;
6210 else if (code == 'y')
6211 code = 3;
6212 else if (code == 'h')
6213 code = 0;
6214 else
6215 code = GET_MODE_SIZE (GET_MODE (x));
6216
6217 /* Irritatingly, AMD extended registers use different naming convention
6218 from the normal registers. */
6219 if (REX_INT_REG_P (x))
6220 {
6221 if (!TARGET_64BIT)
6222 abort ();
6223 switch (code)
6224 {
6225 case 0:
6226 error ("extended registers have no high halves");
6227 break;
6228 case 1:
6229 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6230 break;
6231 case 2:
6232 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6233 break;
6234 case 4:
6235 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6236 break;
6237 case 8:
6238 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6239 break;
6240 default:
6241 error ("unsupported operand size for extended register");
6242 break;
6243 }
6244 return;
6245 }
6246 switch (code)
6247 {
6248 case 3:
6249 if (STACK_TOP_P (x))
6250 {
6251 fputs ("st(0)", file);
6252 break;
6253 }
6254 /* FALLTHRU */
6255 case 8:
6256 case 4:
6257 case 12:
6258 if (! ANY_FP_REG_P (x))
6259 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6260 /* FALLTHRU */
6261 case 16:
6262 case 2:
6263 normal:
6264 fputs (hi_reg_name[REGNO (x)], file);
6265 break;
6266 case 1:
6267 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6268 goto normal;
6269 fputs (qi_reg_name[REGNO (x)], file);
6270 break;
6271 case 0:
6272 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6273 goto normal;
6274 fputs (qi_high_reg_name[REGNO (x)], file);
6275 break;
6276 default:
6277 abort ();
6278 }
6279 }
6280
6281 /* Locate some local-dynamic symbol still in use by this function
6282 so that we can print its name in some tls_local_dynamic_base
6283 pattern. */
6284
6285 static const char *
6286 get_some_local_dynamic_name (void)
6287 {
6288 rtx insn;
6289
6290 if (cfun->machine->some_ld_name)
6291 return cfun->machine->some_ld_name;
6292
6293 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6294 if (INSN_P (insn)
6295 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6296 return cfun->machine->some_ld_name;
6297
6298 abort ();
6299 }
6300
6301 static int
6302 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6303 {
6304 rtx x = *px;
6305
6306 if (GET_CODE (x) == SYMBOL_REF
6307 && local_dynamic_symbolic_operand (x, Pmode))
6308 {
6309 cfun->machine->some_ld_name = XSTR (x, 0);
6310 return 1;
6311 }
6312
6313 return 0;
6314 }
6315
6316 /* Meaning of CODE:
6317 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6318 C -- print opcode suffix for set/cmov insn.
6319 c -- like C, but print reversed condition
6320 F,f -- likewise, but for floating-point.
6321 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6322 otherwise nothing
6323 R -- print the prefix for register names.
6324 z -- print the opcode suffix for the size of the current operand.
6325 * -- print a star (in certain assembler syntax)
6326 A -- print an absolute memory reference.
6327 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6328 s -- print a shift double count, followed by the assemblers argument
6329 delimiter.
6330 b -- print the QImode name of the register for the indicated operand.
6331 %b0 would print %al if operands[0] is reg 0.
6332 w -- likewise, print the HImode name of the register.
6333 k -- likewise, print the SImode name of the register.
6334 q -- likewise, print the DImode name of the register.
6335 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6336 y -- print "st(0)" instead of "st" as a register.
6337 D -- print condition for SSE cmp instruction.
6338 P -- if PIC, print an @PLT suffix.
6339 X -- don't print any sort of PIC '@' suffix for a symbol.
6340 & -- print some in-use local-dynamic symbol name.
6341 H -- print a memory address offset by 8; used for sse high-parts
6342 */
6343
6344 void
6345 print_operand (FILE *file, rtx x, int code)
6346 {
6347 if (code)
6348 {
6349 switch (code)
6350 {
6351 case '*':
6352 if (ASSEMBLER_DIALECT == ASM_ATT)
6353 putc ('*', file);
6354 return;
6355
6356 case '&':
6357 assemble_name (file, get_some_local_dynamic_name ());
6358 return;
6359
6360 case 'A':
6361 if (ASSEMBLER_DIALECT == ASM_ATT)
6362 putc ('*', file);
6363 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6364 {
6365 /* Intel syntax. For absolute addresses, registers should not
6366 be surrounded by braces. */
6367 if (GET_CODE (x) != REG)
6368 {
6369 putc ('[', file);
6370 PRINT_OPERAND (file, x, 0);
6371 putc (']', file);
6372 return;
6373 }
6374 }
6375 else
6376 abort ();
6377
6378 PRINT_OPERAND (file, x, 0);
6379 return;
6380
6381
6382 case 'L':
6383 if (ASSEMBLER_DIALECT == ASM_ATT)
6384 putc ('l', file);
6385 return;
6386
6387 case 'W':
6388 if (ASSEMBLER_DIALECT == ASM_ATT)
6389 putc ('w', file);
6390 return;
6391
6392 case 'B':
6393 if (ASSEMBLER_DIALECT == ASM_ATT)
6394 putc ('b', file);
6395 return;
6396
6397 case 'Q':
6398 if (ASSEMBLER_DIALECT == ASM_ATT)
6399 putc ('l', file);
6400 return;
6401
6402 case 'S':
6403 if (ASSEMBLER_DIALECT == ASM_ATT)
6404 putc ('s', file);
6405 return;
6406
6407 case 'T':
6408 if (ASSEMBLER_DIALECT == ASM_ATT)
6409 putc ('t', file);
6410 return;
6411
6412 case 'z':
6413 /* 387 opcodes don't get size suffixes if the operands are
6414 registers. */
6415 if (STACK_REG_P (x))
6416 return;
6417
6418 /* Likewise if using Intel opcodes. */
6419 if (ASSEMBLER_DIALECT == ASM_INTEL)
6420 return;
6421
6422 /* This is the size of op from size of operand. */
6423 switch (GET_MODE_SIZE (GET_MODE (x)))
6424 {
6425 case 2:
6426 #ifdef HAVE_GAS_FILDS_FISTS
6427 putc ('s', file);
6428 #endif
6429 return;
6430
6431 case 4:
6432 if (GET_MODE (x) == SFmode)
6433 {
6434 putc ('s', file);
6435 return;
6436 }
6437 else
6438 putc ('l', file);
6439 return;
6440
6441 case 12:
6442 case 16:
6443 putc ('t', file);
6444 return;
6445
6446 case 8:
6447 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6448 {
6449 #ifdef GAS_MNEMONICS
6450 putc ('q', file);
6451 #else
6452 putc ('l', file);
6453 putc ('l', file);
6454 #endif
6455 }
6456 else
6457 putc ('l', file);
6458 return;
6459
6460 default:
6461 abort ();
6462 }
6463
6464 case 'b':
6465 case 'w':
6466 case 'k':
6467 case 'q':
6468 case 'h':
6469 case 'y':
6470 case 'X':
6471 case 'P':
6472 break;
6473
6474 case 's':
6475 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6476 {
6477 PRINT_OPERAND (file, x, 0);
6478 putc (',', file);
6479 }
6480 return;
6481
6482 case 'D':
6483 /* Little bit of braindamage here. The SSE compare instructions
6484 does use completely different names for the comparisons that the
6485 fp conditional moves. */
6486 switch (GET_CODE (x))
6487 {
6488 case EQ:
6489 case UNEQ:
6490 fputs ("eq", file);
6491 break;
6492 case LT:
6493 case UNLT:
6494 fputs ("lt", file);
6495 break;
6496 case LE:
6497 case UNLE:
6498 fputs ("le", file);
6499 break;
6500 case UNORDERED:
6501 fputs ("unord", file);
6502 break;
6503 case NE:
6504 case LTGT:
6505 fputs ("neq", file);
6506 break;
6507 case UNGE:
6508 case GE:
6509 fputs ("nlt", file);
6510 break;
6511 case UNGT:
6512 case GT:
6513 fputs ("nle", file);
6514 break;
6515 case ORDERED:
6516 fputs ("ord", file);
6517 break;
6518 default:
6519 abort ();
6520 break;
6521 }
6522 return;
6523 case 'O':
6524 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6525 if (ASSEMBLER_DIALECT == ASM_ATT)
6526 {
6527 switch (GET_MODE (x))
6528 {
6529 case HImode: putc ('w', file); break;
6530 case SImode:
6531 case SFmode: putc ('l', file); break;
6532 case DImode:
6533 case DFmode: putc ('q', file); break;
6534 default: abort ();
6535 }
6536 putc ('.', file);
6537 }
6538 #endif
6539 return;
6540 case 'C':
6541 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6542 return;
6543 case 'F':
6544 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6545 if (ASSEMBLER_DIALECT == ASM_ATT)
6546 putc ('.', file);
6547 #endif
6548 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6549 return;
6550
6551 /* Like above, but reverse condition */
6552 case 'c':
6553 /* Check to see if argument to %c is really a constant
6554 and not a condition code which needs to be reversed. */
6555 if (!COMPARISON_P (x))
6556 {
6557 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6558 return;
6559 }
6560 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6561 return;
6562 case 'f':
6563 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6564 if (ASSEMBLER_DIALECT == ASM_ATT)
6565 putc ('.', file);
6566 #endif
6567 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6568 return;
6569
6570 case 'H':
6571 /* It doesn't actually matter what mode we use here, as we're
6572 only going to use this for printing. */
6573 x = adjust_address_nv (x, DImode, 8);
6574 break;
6575
6576 case '+':
6577 {
6578 rtx x;
6579
6580 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6581 return;
6582
6583 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6584 if (x)
6585 {
6586 int pred_val = INTVAL (XEXP (x, 0));
6587
6588 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6589 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6590 {
6591 int taken = pred_val > REG_BR_PROB_BASE / 2;
6592 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6593
6594 /* Emit hints only in the case default branch prediction
6595 heuristics would fail. */
6596 if (taken != cputaken)
6597 {
6598 /* We use 3e (DS) prefix for taken branches and
6599 2e (CS) prefix for not taken branches. */
6600 if (taken)
6601 fputs ("ds ; ", file);
6602 else
6603 fputs ("cs ; ", file);
6604 }
6605 }
6606 }
6607 return;
6608 }
6609 default:
6610 output_operand_lossage ("invalid operand code '%c'", code);
6611 }
6612 }
6613
6614 if (GET_CODE (x) == REG)
6615 print_reg (x, code, file);
6616
6617 else if (GET_CODE (x) == MEM)
6618 {
6619 /* No `byte ptr' prefix for call instructions. */
6620 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6621 {
6622 const char * size;
6623 switch (GET_MODE_SIZE (GET_MODE (x)))
6624 {
6625 case 1: size = "BYTE"; break;
6626 case 2: size = "WORD"; break;
6627 case 4: size = "DWORD"; break;
6628 case 8: size = "QWORD"; break;
6629 case 12: size = "XWORD"; break;
6630 case 16: size = "XMMWORD"; break;
6631 default:
6632 abort ();
6633 }
6634
6635 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6636 if (code == 'b')
6637 size = "BYTE";
6638 else if (code == 'w')
6639 size = "WORD";
6640 else if (code == 'k')
6641 size = "DWORD";
6642
6643 fputs (size, file);
6644 fputs (" PTR ", file);
6645 }
6646
6647 x = XEXP (x, 0);
6648 /* Avoid (%rip) for call operands. */
6649 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6650 && GET_CODE (x) != CONST_INT)
6651 output_addr_const (file, x);
6652 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6653 output_operand_lossage ("invalid constraints for operand");
6654 else
6655 output_address (x);
6656 }
6657
6658 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6659 {
6660 REAL_VALUE_TYPE r;
6661 long l;
6662
6663 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6664 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6665
6666 if (ASSEMBLER_DIALECT == ASM_ATT)
6667 putc ('$', file);
6668 fprintf (file, "0x%08lx", l);
6669 }
6670
6671 /* These float cases don't actually occur as immediate operands. */
6672 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6673 {
6674 char dstr[30];
6675
6676 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6677 fprintf (file, "%s", dstr);
6678 }
6679
6680 else if (GET_CODE (x) == CONST_DOUBLE
6681 && GET_MODE (x) == XFmode)
6682 {
6683 char dstr[30];
6684
6685 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6686 fprintf (file, "%s", dstr);
6687 }
6688
6689 else
6690 {
6691 if (code != 'P')
6692 {
6693 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6694 {
6695 if (ASSEMBLER_DIALECT == ASM_ATT)
6696 putc ('$', file);
6697 }
6698 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6699 || GET_CODE (x) == LABEL_REF)
6700 {
6701 if (ASSEMBLER_DIALECT == ASM_ATT)
6702 putc ('$', file);
6703 else
6704 fputs ("OFFSET FLAT:", file);
6705 }
6706 }
6707 if (GET_CODE (x) == CONST_INT)
6708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6709 else if (flag_pic)
6710 output_pic_addr_const (file, x, code);
6711 else
6712 output_addr_const (file, x);
6713 }
6714 }
6715 \f
6716 /* Print a memory operand whose address is ADDR. */
6717
6718 void
6719 print_operand_address (FILE *file, rtx addr)
6720 {
6721 struct ix86_address parts;
6722 rtx base, index, disp;
6723 int scale;
6724
6725 if (! ix86_decompose_address (addr, &parts))
6726 abort ();
6727
6728 base = parts.base;
6729 index = parts.index;
6730 disp = parts.disp;
6731 scale = parts.scale;
6732
6733 switch (parts.seg)
6734 {
6735 case SEG_DEFAULT:
6736 break;
6737 case SEG_FS:
6738 case SEG_GS:
6739 if (USER_LABEL_PREFIX[0] == 0)
6740 putc ('%', file);
6741 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6742 break;
6743 default:
6744 abort ();
6745 }
6746
6747 if (!base && !index)
6748 {
6749 /* Displacement only requires special attention. */
6750
6751 if (GET_CODE (disp) == CONST_INT)
6752 {
6753 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6754 {
6755 if (USER_LABEL_PREFIX[0] == 0)
6756 putc ('%', file);
6757 fputs ("ds:", file);
6758 }
6759 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6760 }
6761 else if (flag_pic)
6762 output_pic_addr_const (file, disp, 0);
6763 else
6764 output_addr_const (file, disp);
6765
6766 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6767 if (TARGET_64BIT
6768 && ((GET_CODE (disp) == SYMBOL_REF
6769 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6770 || GET_CODE (disp) == LABEL_REF
6771 || (GET_CODE (disp) == CONST
6772 && GET_CODE (XEXP (disp, 0)) == PLUS
6773 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6774 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6775 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6776 fputs ("(%rip)", file);
6777 }
6778 else
6779 {
6780 if (ASSEMBLER_DIALECT == ASM_ATT)
6781 {
6782 if (disp)
6783 {
6784 if (flag_pic)
6785 output_pic_addr_const (file, disp, 0);
6786 else if (GET_CODE (disp) == LABEL_REF)
6787 output_asm_label (disp);
6788 else
6789 output_addr_const (file, disp);
6790 }
6791
6792 putc ('(', file);
6793 if (base)
6794 print_reg (base, 0, file);
6795 if (index)
6796 {
6797 putc (',', file);
6798 print_reg (index, 0, file);
6799 if (scale != 1)
6800 fprintf (file, ",%d", scale);
6801 }
6802 putc (')', file);
6803 }
6804 else
6805 {
6806 rtx offset = NULL_RTX;
6807
6808 if (disp)
6809 {
6810 /* Pull out the offset of a symbol; print any symbol itself. */
6811 if (GET_CODE (disp) == CONST
6812 && GET_CODE (XEXP (disp, 0)) == PLUS
6813 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6814 {
6815 offset = XEXP (XEXP (disp, 0), 1);
6816 disp = gen_rtx_CONST (VOIDmode,
6817 XEXP (XEXP (disp, 0), 0));
6818 }
6819
6820 if (flag_pic)
6821 output_pic_addr_const (file, disp, 0);
6822 else if (GET_CODE (disp) == LABEL_REF)
6823 output_asm_label (disp);
6824 else if (GET_CODE (disp) == CONST_INT)
6825 offset = disp;
6826 else
6827 output_addr_const (file, disp);
6828 }
6829
6830 putc ('[', file);
6831 if (base)
6832 {
6833 print_reg (base, 0, file);
6834 if (offset)
6835 {
6836 if (INTVAL (offset) >= 0)
6837 putc ('+', file);
6838 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6839 }
6840 }
6841 else if (offset)
6842 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6843 else
6844 putc ('0', file);
6845
6846 if (index)
6847 {
6848 putc ('+', file);
6849 print_reg (index, 0, file);
6850 if (scale != 1)
6851 fprintf (file, "*%d", scale);
6852 }
6853 putc (']', file);
6854 }
6855 }
6856 }
6857
6858 bool
6859 output_addr_const_extra (FILE *file, rtx x)
6860 {
6861 rtx op;
6862
6863 if (GET_CODE (x) != UNSPEC)
6864 return false;
6865
6866 op = XVECEXP (x, 0, 0);
6867 switch (XINT (x, 1))
6868 {
6869 case UNSPEC_GOTTPOFF:
6870 output_addr_const (file, op);
6871 /* FIXME: This might be @TPOFF in Sun ld. */
6872 fputs ("@GOTTPOFF", file);
6873 break;
6874 case UNSPEC_TPOFF:
6875 output_addr_const (file, op);
6876 fputs ("@TPOFF", file);
6877 break;
6878 case UNSPEC_NTPOFF:
6879 output_addr_const (file, op);
6880 if (TARGET_64BIT)
6881 fputs ("@TPOFF", file);
6882 else
6883 fputs ("@NTPOFF", file);
6884 break;
6885 case UNSPEC_DTPOFF:
6886 output_addr_const (file, op);
6887 fputs ("@DTPOFF", file);
6888 break;
6889 case UNSPEC_GOTNTPOFF:
6890 output_addr_const (file, op);
6891 if (TARGET_64BIT)
6892 fputs ("@GOTTPOFF(%rip)", file);
6893 else
6894 fputs ("@GOTNTPOFF", file);
6895 break;
6896 case UNSPEC_INDNTPOFF:
6897 output_addr_const (file, op);
6898 fputs ("@INDNTPOFF", file);
6899 break;
6900
6901 default:
6902 return false;
6903 }
6904
6905 return true;
6906 }
6907 \f
6908 /* Split one or more DImode RTL references into pairs of SImode
6909 references. The RTL can be REG, offsettable MEM, integer constant, or
6910 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6911 split and "num" is its length. lo_half and hi_half are output arrays
6912 that parallel "operands". */
6913
6914 void
6915 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6916 {
6917 while (num--)
6918 {
6919 rtx op = operands[num];
6920
6921 /* simplify_subreg refuse to split volatile memory addresses,
6922 but we still have to handle it. */
6923 if (GET_CODE (op) == MEM)
6924 {
6925 lo_half[num] = adjust_address (op, SImode, 0);
6926 hi_half[num] = adjust_address (op, SImode, 4);
6927 }
6928 else
6929 {
6930 lo_half[num] = simplify_gen_subreg (SImode, op,
6931 GET_MODE (op) == VOIDmode
6932 ? DImode : GET_MODE (op), 0);
6933 hi_half[num] = simplify_gen_subreg (SImode, op,
6934 GET_MODE (op) == VOIDmode
6935 ? DImode : GET_MODE (op), 4);
6936 }
6937 }
6938 }
6939 /* Split one or more TImode RTL references into pairs of SImode
6940 references. The RTL can be REG, offsettable MEM, integer constant, or
6941 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6942 split and "num" is its length. lo_half and hi_half are output arrays
6943 that parallel "operands". */
6944
6945 void
6946 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6947 {
6948 while (num--)
6949 {
6950 rtx op = operands[num];
6951
6952 /* simplify_subreg refuse to split volatile memory addresses, but we
6953 still have to handle it. */
6954 if (GET_CODE (op) == MEM)
6955 {
6956 lo_half[num] = adjust_address (op, DImode, 0);
6957 hi_half[num] = adjust_address (op, DImode, 8);
6958 }
6959 else
6960 {
6961 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6962 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6963 }
6964 }
6965 }
6966 \f
6967 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6968 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6969 is the expression of the binary operation. The output may either be
6970 emitted here, or returned to the caller, like all output_* functions.
6971
6972 There is no guarantee that the operands are the same mode, as they
6973 might be within FLOAT or FLOAT_EXTEND expressions. */
6974
6975 #ifndef SYSV386_COMPAT
6976 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6977 wants to fix the assemblers because that causes incompatibility
6978 with gcc. No-one wants to fix gcc because that causes
6979 incompatibility with assemblers... You can use the option of
6980 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6981 #define SYSV386_COMPAT 1
6982 #endif
6983
6984 const char *
6985 output_387_binary_op (rtx insn, rtx *operands)
6986 {
6987 static char buf[30];
6988 const char *p;
6989 const char *ssep;
6990 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
6991
6992 #ifdef ENABLE_CHECKING
6993 /* Even if we do not want to check the inputs, this documents input
6994 constraints. Which helps in understanding the following code. */
6995 if (STACK_REG_P (operands[0])
6996 && ((REG_P (operands[1])
6997 && REGNO (operands[0]) == REGNO (operands[1])
6998 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6999 || (REG_P (operands[2])
7000 && REGNO (operands[0]) == REGNO (operands[2])
7001 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7002 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7003 ; /* ok */
7004 else if (!is_sse)
7005 abort ();
7006 #endif
7007
7008 switch (GET_CODE (operands[3]))
7009 {
7010 case PLUS:
7011 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7012 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7013 p = "fiadd";
7014 else
7015 p = "fadd";
7016 ssep = "add";
7017 break;
7018
7019 case MINUS:
7020 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7021 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7022 p = "fisub";
7023 else
7024 p = "fsub";
7025 ssep = "sub";
7026 break;
7027
7028 case MULT:
7029 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7030 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7031 p = "fimul";
7032 else
7033 p = "fmul";
7034 ssep = "mul";
7035 break;
7036
7037 case DIV:
7038 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7039 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7040 p = "fidiv";
7041 else
7042 p = "fdiv";
7043 ssep = "div";
7044 break;
7045
7046 default:
7047 abort ();
7048 }
7049
7050 if (is_sse)
7051 {
7052 strcpy (buf, ssep);
7053 if (GET_MODE (operands[0]) == SFmode)
7054 strcat (buf, "ss\t{%2, %0|%0, %2}");
7055 else
7056 strcat (buf, "sd\t{%2, %0|%0, %2}");
7057 return buf;
7058 }
7059 strcpy (buf, p);
7060
7061 switch (GET_CODE (operands[3]))
7062 {
7063 case MULT:
7064 case PLUS:
7065 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7066 {
7067 rtx temp = operands[2];
7068 operands[2] = operands[1];
7069 operands[1] = temp;
7070 }
7071
7072 /* know operands[0] == operands[1]. */
7073
7074 if (GET_CODE (operands[2]) == MEM)
7075 {
7076 p = "%z2\t%2";
7077 break;
7078 }
7079
7080 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7081 {
7082 if (STACK_TOP_P (operands[0]))
7083 /* How is it that we are storing to a dead operand[2]?
7084 Well, presumably operands[1] is dead too. We can't
7085 store the result to st(0) as st(0) gets popped on this
7086 instruction. Instead store to operands[2] (which I
7087 think has to be st(1)). st(1) will be popped later.
7088 gcc <= 2.8.1 didn't have this check and generated
7089 assembly code that the Unixware assembler rejected. */
7090 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7091 else
7092 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7093 break;
7094 }
7095
7096 if (STACK_TOP_P (operands[0]))
7097 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7098 else
7099 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7100 break;
7101
7102 case MINUS:
7103 case DIV:
7104 if (GET_CODE (operands[1]) == MEM)
7105 {
7106 p = "r%z1\t%1";
7107 break;
7108 }
7109
7110 if (GET_CODE (operands[2]) == MEM)
7111 {
7112 p = "%z2\t%2";
7113 break;
7114 }
7115
7116 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7117 {
7118 #if SYSV386_COMPAT
7119 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7120 derived assemblers, confusingly reverse the direction of
7121 the operation for fsub{r} and fdiv{r} when the
7122 destination register is not st(0). The Intel assembler
7123 doesn't have this brain damage. Read !SYSV386_COMPAT to
7124 figure out what the hardware really does. */
7125 if (STACK_TOP_P (operands[0]))
7126 p = "{p\t%0, %2|rp\t%2, %0}";
7127 else
7128 p = "{rp\t%2, %0|p\t%0, %2}";
7129 #else
7130 if (STACK_TOP_P (operands[0]))
7131 /* As above for fmul/fadd, we can't store to st(0). */
7132 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7133 else
7134 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7135 #endif
7136 break;
7137 }
7138
7139 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7140 {
7141 #if SYSV386_COMPAT
7142 if (STACK_TOP_P (operands[0]))
7143 p = "{rp\t%0, %1|p\t%1, %0}";
7144 else
7145 p = "{p\t%1, %0|rp\t%0, %1}";
7146 #else
7147 if (STACK_TOP_P (operands[0]))
7148 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7149 else
7150 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7151 #endif
7152 break;
7153 }
7154
7155 if (STACK_TOP_P (operands[0]))
7156 {
7157 if (STACK_TOP_P (operands[1]))
7158 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7159 else
7160 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7161 break;
7162 }
7163 else if (STACK_TOP_P (operands[1]))
7164 {
7165 #if SYSV386_COMPAT
7166 p = "{\t%1, %0|r\t%0, %1}";
7167 #else
7168 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7169 #endif
7170 }
7171 else
7172 {
7173 #if SYSV386_COMPAT
7174 p = "{r\t%2, %0|\t%0, %2}";
7175 #else
7176 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7177 #endif
7178 }
7179 break;
7180
7181 default:
7182 abort ();
7183 }
7184
7185 strcat (buf, p);
7186 return buf;
7187 }
7188
7189 /* Output code to initialize control word copies used by trunc?f?i and
7190 rounding patterns. CURRENT_MODE is set to current control word,
7191 while NEW_MODE is set to new control word. */
7192
7193 void
7194 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7195 {
7196 rtx reg = gen_reg_rtx (HImode);
7197
7198 emit_insn (gen_x86_fnstcw_1 (current_mode));
7199 emit_move_insn (reg, current_mode);
7200
7201 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7202 && !TARGET_64BIT)
7203 {
7204 switch (mode)
7205 {
7206 case I387_CW_FLOOR:
7207 /* round down toward -oo */
7208 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7209 break;
7210
7211 case I387_CW_CEIL:
7212 /* round up toward +oo */
7213 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7214 break;
7215
7216 case I387_CW_TRUNC:
7217 /* round toward zero (truncate) */
7218 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7219 break;
7220
7221 case I387_CW_MASK_PM:
7222 /* mask precision exception for nearbyint() */
7223 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7224 break;
7225
7226 default:
7227 abort();
7228 }
7229 }
7230 else
7231 {
7232 switch (mode)
7233 {
7234 case I387_CW_FLOOR:
7235 /* round down toward -oo */
7236 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7237 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7238 break;
7239
7240 case I387_CW_CEIL:
7241 /* round up toward +oo */
7242 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7243 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7244 break;
7245
7246 case I387_CW_TRUNC:
7247 /* round toward zero (truncate) */
7248 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7249 break;
7250
7251 case I387_CW_MASK_PM:
7252 /* mask precision exception for nearbyint() */
7253 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7254 break;
7255
7256 default:
7257 abort();
7258 }
7259 }
7260
7261 emit_move_insn (new_mode, reg);
7262 }
7263
7264 /* Output code for INSN to convert a float to a signed int. OPERANDS
7265 are the insn operands. The output may be [HSD]Imode and the input
7266 operand may be [SDX]Fmode. */
7267
7268 const char *
7269 output_fix_trunc (rtx insn, rtx *operands)
7270 {
7271 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7272 int dimode_p = GET_MODE (operands[0]) == DImode;
7273
7274 /* Jump through a hoop or two for DImode, since the hardware has no
7275 non-popping instruction. We used to do this a different way, but
7276 that was somewhat fragile and broke with post-reload splitters. */
7277 if (dimode_p && !stack_top_dies)
7278 output_asm_insn ("fld\t%y1", operands);
7279
7280 if (!STACK_TOP_P (operands[1]))
7281 abort ();
7282
7283 if (GET_CODE (operands[0]) != MEM)
7284 abort ();
7285
7286 output_asm_insn ("fldcw\t%3", operands);
7287 if (stack_top_dies || dimode_p)
7288 output_asm_insn ("fistp%z0\t%0", operands);
7289 else
7290 output_asm_insn ("fist%z0\t%0", operands);
7291 output_asm_insn ("fldcw\t%2", operands);
7292
7293 return "";
7294 }
7295
7296 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7297 should be used. UNORDERED_P is true when fucom should be used. */
7298
7299 const char *
7300 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7301 {
7302 int stack_top_dies;
7303 rtx cmp_op0, cmp_op1;
7304 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7305
7306 if (eflags_p)
7307 {
7308 cmp_op0 = operands[0];
7309 cmp_op1 = operands[1];
7310 }
7311 else
7312 {
7313 cmp_op0 = operands[1];
7314 cmp_op1 = operands[2];
7315 }
7316
7317 if (is_sse)
7318 {
7319 if (GET_MODE (operands[0]) == SFmode)
7320 if (unordered_p)
7321 return "ucomiss\t{%1, %0|%0, %1}";
7322 else
7323 return "comiss\t{%1, %0|%0, %1}";
7324 else
7325 if (unordered_p)
7326 return "ucomisd\t{%1, %0|%0, %1}";
7327 else
7328 return "comisd\t{%1, %0|%0, %1}";
7329 }
7330
7331 if (! STACK_TOP_P (cmp_op0))
7332 abort ();
7333
7334 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7335
7336 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7337 {
7338 if (stack_top_dies)
7339 {
7340 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7341 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7342 }
7343 else
7344 return "ftst\n\tfnstsw\t%0";
7345 }
7346
7347 if (STACK_REG_P (cmp_op1)
7348 && stack_top_dies
7349 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7350 && REGNO (cmp_op1) != FIRST_STACK_REG)
7351 {
7352 /* If both the top of the 387 stack dies, and the other operand
7353 is also a stack register that dies, then this must be a
7354 `fcompp' float compare */
7355
7356 if (eflags_p)
7357 {
7358 /* There is no double popping fcomi variant. Fortunately,
7359 eflags is immune from the fstp's cc clobbering. */
7360 if (unordered_p)
7361 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7362 else
7363 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7364 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7365 }
7366 else
7367 {
7368 if (unordered_p)
7369 return "fucompp\n\tfnstsw\t%0";
7370 else
7371 return "fcompp\n\tfnstsw\t%0";
7372 }
7373 }
7374 else
7375 {
7376 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7377
7378 static const char * const alt[16] =
7379 {
7380 "fcom%z2\t%y2\n\tfnstsw\t%0",
7381 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7382 "fucom%z2\t%y2\n\tfnstsw\t%0",
7383 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7384
7385 "ficom%z2\t%y2\n\tfnstsw\t%0",
7386 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7387 NULL,
7388 NULL,
7389
7390 "fcomi\t{%y1, %0|%0, %y1}",
7391 "fcomip\t{%y1, %0|%0, %y1}",
7392 "fucomi\t{%y1, %0|%0, %y1}",
7393 "fucomip\t{%y1, %0|%0, %y1}",
7394
7395 NULL,
7396 NULL,
7397 NULL,
7398 NULL
7399 };
7400
7401 int mask;
7402 const char *ret;
7403
7404 mask = eflags_p << 3;
7405 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7406 mask |= unordered_p << 1;
7407 mask |= stack_top_dies;
7408
7409 if (mask >= 16)
7410 abort ();
7411 ret = alt[mask];
7412 if (ret == NULL)
7413 abort ();
7414
7415 return ret;
7416 }
7417 }
7418
7419 void
7420 ix86_output_addr_vec_elt (FILE *file, int value)
7421 {
7422 const char *directive = ASM_LONG;
7423
7424 if (TARGET_64BIT)
7425 {
7426 #ifdef ASM_QUAD
7427 directive = ASM_QUAD;
7428 #else
7429 abort ();
7430 #endif
7431 }
7432
7433 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7434 }
7435
7436 void
7437 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7438 {
7439 if (TARGET_64BIT)
7440 fprintf (file, "%s%s%d-%s%d\n",
7441 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7442 else if (HAVE_AS_GOTOFF_IN_DATA)
7443 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7444 #if TARGET_MACHO
7445 else if (TARGET_MACHO)
7446 {
7447 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7448 machopic_output_function_base_name (file);
7449 fprintf(file, "\n");
7450 }
7451 #endif
7452 else
7453 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7454 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7455 }
7456 \f
7457 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7458 for the target. */
7459
7460 void
7461 ix86_expand_clear (rtx dest)
7462 {
7463 rtx tmp;
7464
7465 /* We play register width games, which are only valid after reload. */
7466 if (!reload_completed)
7467 abort ();
7468
7469 /* Avoid HImode and its attendant prefix byte. */
7470 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7471 dest = gen_rtx_REG (SImode, REGNO (dest));
7472
7473 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7474
7475 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7476 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7477 {
7478 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7479 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7480 }
7481
7482 emit_insn (tmp);
7483 }
7484
7485 /* X is an unchanging MEM. If it is a constant pool reference, return
7486 the constant pool rtx, else NULL. */
7487
7488 rtx
7489 maybe_get_pool_constant (rtx x)
7490 {
7491 x = ix86_delegitimize_address (XEXP (x, 0));
7492
7493 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7494 return get_pool_constant (x);
7495
7496 return NULL_RTX;
7497 }
7498
7499 void
7500 ix86_expand_move (enum machine_mode mode, rtx operands[])
7501 {
7502 int strict = (reload_in_progress || reload_completed);
7503 rtx op0, op1;
7504 enum tls_model model;
7505
7506 op0 = operands[0];
7507 op1 = operands[1];
7508
7509 if (GET_CODE (op1) == SYMBOL_REF)
7510 {
7511 model = SYMBOL_REF_TLS_MODEL (op1);
7512 if (model)
7513 {
7514 op1 = legitimize_tls_address (op1, model, true);
7515 op1 = force_operand (op1, op0);
7516 if (op1 == op0)
7517 return;
7518 }
7519 }
7520 else if (GET_CODE (op1) == CONST
7521 && GET_CODE (XEXP (op1, 0)) == PLUS
7522 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7523 {
7524 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7525 if (model)
7526 {
7527 rtx addend = XEXP (XEXP (op1, 0), 1);
7528 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7529 op1 = force_operand (op1, NULL);
7530 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7531 op0, 1, OPTAB_DIRECT);
7532 if (op1 == op0)
7533 return;
7534 }
7535 }
7536
7537 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7538 {
7539 #if TARGET_MACHO
7540 if (MACHOPIC_PURE)
7541 {
7542 rtx temp = ((reload_in_progress
7543 || ((op0 && GET_CODE (op0) == REG)
7544 && mode == Pmode))
7545 ? op0 : gen_reg_rtx (Pmode));
7546 op1 = machopic_indirect_data_reference (op1, temp);
7547 op1 = machopic_legitimize_pic_address (op1, mode,
7548 temp == op1 ? 0 : temp);
7549 }
7550 else if (MACHOPIC_INDIRECT)
7551 op1 = machopic_indirect_data_reference (op1, 0);
7552 if (op0 == op1)
7553 return;
7554 #else
7555 if (GET_CODE (op0) == MEM)
7556 op1 = force_reg (Pmode, op1);
7557 else
7558 op1 = legitimize_address (op1, op1, Pmode);
7559 #endif /* TARGET_MACHO */
7560 }
7561 else
7562 {
7563 if (GET_CODE (op0) == MEM
7564 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7565 || !push_operand (op0, mode))
7566 && GET_CODE (op1) == MEM)
7567 op1 = force_reg (mode, op1);
7568
7569 if (push_operand (op0, mode)
7570 && ! general_no_elim_operand (op1, mode))
7571 op1 = copy_to_mode_reg (mode, op1);
7572
7573 /* Force large constants in 64bit compilation into register
7574 to get them CSEed. */
7575 if (TARGET_64BIT && mode == DImode
7576 && immediate_operand (op1, mode)
7577 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7578 && !register_operand (op0, mode)
7579 && optimize && !reload_completed && !reload_in_progress)
7580 op1 = copy_to_mode_reg (mode, op1);
7581
7582 if (FLOAT_MODE_P (mode))
7583 {
7584 /* If we are loading a floating point constant to a register,
7585 force the value to memory now, since we'll get better code
7586 out the back end. */
7587
7588 if (strict)
7589 ;
7590 else if (GET_CODE (op1) == CONST_DOUBLE)
7591 {
7592 op1 = validize_mem (force_const_mem (mode, op1));
7593 if (!register_operand (op0, mode))
7594 {
7595 rtx temp = gen_reg_rtx (mode);
7596 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7597 emit_move_insn (op0, temp);
7598 return;
7599 }
7600 }
7601 }
7602 }
7603
7604 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7605 }
7606
7607 void
7608 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7609 {
7610 rtx op0 = operands[0], op1 = operands[1];
7611
7612 /* Force constants other than zero into memory. We do not know how
7613 the instructions used to build constants modify the upper 64 bits
7614 of the register, once we have that information we may be able
7615 to handle some of them more efficiently. */
7616 if ((reload_in_progress | reload_completed) == 0
7617 && register_operand (op0, mode)
7618 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7619 op1 = validize_mem (force_const_mem (mode, op1));
7620
7621 /* Make operand1 a register if it isn't already. */
7622 if (!no_new_pseudos
7623 && !register_operand (op0, mode)
7624 && !register_operand (op1, mode))
7625 {
7626 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7627 return;
7628 }
7629
7630 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7631 }
7632
7633 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7634 straight to ix86_expand_vector_move. */
7635
7636 void
7637 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7638 {
7639 rtx op0, op1, m;
7640
7641 op0 = operands[0];
7642 op1 = operands[1];
7643
7644 if (MEM_P (op1))
7645 {
7646 /* If we're optimizing for size, movups is the smallest. */
7647 if (optimize_size)
7648 {
7649 op0 = gen_lowpart (V4SFmode, op0);
7650 op1 = gen_lowpart (V4SFmode, op1);
7651 emit_insn (gen_sse_movups (op0, op1));
7652 return;
7653 }
7654
7655 /* ??? If we have typed data, then it would appear that using
7656 movdqu is the only way to get unaligned data loaded with
7657 integer type. */
7658 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7659 {
7660 op0 = gen_lowpart (V16QImode, op0);
7661 op1 = gen_lowpart (V16QImode, op1);
7662 emit_insn (gen_sse2_movdqu (op0, op1));
7663 return;
7664 }
7665
7666 if (TARGET_SSE2 && mode == V2DFmode)
7667 {
7668 rtx zero;
7669
7670 /* When SSE registers are split into halves, we can avoid
7671 writing to the top half twice. */
7672 if (TARGET_SSE_SPLIT_REGS)
7673 {
7674 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7675 zero = op0;
7676 }
7677 else
7678 {
7679 /* ??? Not sure about the best option for the Intel chips.
7680 The following would seem to satisfy; the register is
7681 entirely cleared, breaking the dependency chain. We
7682 then store to the upper half, with a dependency depth
7683 of one. A rumor has it that Intel recommends two movsd
7684 followed by an unpacklpd, but this is unconfirmed. And
7685 given that the dependency depth of the unpacklpd would
7686 still be one, I'm not sure why this would be better. */
7687 zero = CONST0_RTX (V2DFmode);
7688 }
7689
7690 m = adjust_address (op1, DFmode, 0);
7691 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7692 m = adjust_address (op1, DFmode, 8);
7693 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7694 }
7695 else
7696 {
7697 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7698 emit_move_insn (op0, CONST0_RTX (mode));
7699 else
7700 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7701
7702 m = adjust_address (op1, V2SFmode, 0);
7703 emit_insn (gen_sse_loadlps (op0, op0, m));
7704 m = adjust_address (op1, V2SFmode, 8);
7705 emit_insn (gen_sse_loadhps (op0, op0, m));
7706 }
7707 }
7708 else if (MEM_P (op0))
7709 {
7710 /* If we're optimizing for size, movups is the smallest. */
7711 if (optimize_size)
7712 {
7713 op0 = gen_lowpart (V4SFmode, op0);
7714 op1 = gen_lowpart (V4SFmode, op1);
7715 emit_insn (gen_sse_movups (op0, op1));
7716 return;
7717 }
7718
7719 /* ??? Similar to above, only less clear because of quote
7720 typeless stores unquote. */
7721 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7722 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7723 {
7724 op0 = gen_lowpart (V16QImode, op0);
7725 op1 = gen_lowpart (V16QImode, op1);
7726 emit_insn (gen_sse2_movdqu (op0, op1));
7727 return;
7728 }
7729
7730 if (TARGET_SSE2 && mode == V2DFmode)
7731 {
7732 m = adjust_address (op0, DFmode, 0);
7733 emit_insn (gen_sse2_storelpd (m, op1));
7734 m = adjust_address (op0, DFmode, 8);
7735 emit_insn (gen_sse2_storehpd (m, op1));
7736 }
7737 else
7738 {
7739 if (mode != V4SFmode)
7740 op1 = gen_lowpart (V4SFmode, op1);
7741 m = adjust_address (op0, V2SFmode, 0);
7742 emit_insn (gen_sse_storelps (m, op1));
7743 m = adjust_address (op0, V2SFmode, 8);
7744 emit_insn (gen_sse_storehps (m, op1));
7745 }
7746 }
7747 else
7748 gcc_unreachable ();
7749 }
7750
7751
7752 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7753 destination to use for the operation. If different from the true
7754 destination in operands[0], a copy operation will be required. */
7755
7756 rtx
7757 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7758 rtx operands[])
7759 {
7760 int matching_memory;
7761 rtx src1, src2, dst;
7762
7763 dst = operands[0];
7764 src1 = operands[1];
7765 src2 = operands[2];
7766
7767 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7768 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7769 && (rtx_equal_p (dst, src2)
7770 || immediate_operand (src1, mode)))
7771 {
7772 rtx temp = src1;
7773 src1 = src2;
7774 src2 = temp;
7775 }
7776
7777 /* If the destination is memory, and we do not have matching source
7778 operands, do things in registers. */
7779 matching_memory = 0;
7780 if (GET_CODE (dst) == MEM)
7781 {
7782 if (rtx_equal_p (dst, src1))
7783 matching_memory = 1;
7784 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7785 && rtx_equal_p (dst, src2))
7786 matching_memory = 2;
7787 else
7788 dst = gen_reg_rtx (mode);
7789 }
7790
7791 /* Both source operands cannot be in memory. */
7792 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7793 {
7794 if (matching_memory != 2)
7795 src2 = force_reg (mode, src2);
7796 else
7797 src1 = force_reg (mode, src1);
7798 }
7799
7800 /* If the operation is not commutable, source 1 cannot be a constant
7801 or non-matching memory. */
7802 if ((CONSTANT_P (src1)
7803 || (!matching_memory && GET_CODE (src1) == MEM))
7804 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7805 src1 = force_reg (mode, src1);
7806
7807 /* If optimizing, copy to regs to improve CSE */
7808 if (optimize && ! no_new_pseudos)
7809 {
7810 if (GET_CODE (dst) == MEM)
7811 dst = gen_reg_rtx (mode);
7812 if (GET_CODE (src1) == MEM)
7813 src1 = force_reg (mode, src1);
7814 if (GET_CODE (src2) == MEM)
7815 src2 = force_reg (mode, src2);
7816 }
7817
7818 src1 = operands[1] = src1;
7819 src2 = operands[2] = src2;
7820 return dst;
7821 }
7822
7823 /* Similarly, but assume that the destination has already been
7824 set up properly. */
7825
7826 void
7827 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7828 enum machine_mode mode, rtx operands[])
7829 {
7830 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7831 gcc_assert (dst == operands[0]);
7832 }
7833
7834 /* Attempt to expand a binary operator. Make the expansion closer to the
7835 actual machine, then just general_operand, which will allow 3 separate
7836 memory references (one output, two input) in a single insn. */
7837
7838 void
7839 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7840 rtx operands[])
7841 {
7842 rtx src1, src2, dst, op, clob;
7843
7844 dst = ix86_fixup_binary_operands (code, mode, operands);
7845 src1 = operands[1];
7846 src2 = operands[2];
7847
7848 /* Emit the instruction. */
7849
7850 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7851 if (reload_in_progress)
7852 {
7853 /* Reload doesn't know about the flags register, and doesn't know that
7854 it doesn't want to clobber it. We can only do this with PLUS. */
7855 if (code != PLUS)
7856 abort ();
7857 emit_insn (op);
7858 }
7859 else
7860 {
7861 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7862 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7863 }
7864
7865 /* Fix up the destination if needed. */
7866 if (dst != operands[0])
7867 emit_move_insn (operands[0], dst);
7868 }
7869
7870 /* Return TRUE or FALSE depending on whether the binary operator meets the
7871 appropriate constraints. */
7872
7873 int
7874 ix86_binary_operator_ok (enum rtx_code code,
7875 enum machine_mode mode ATTRIBUTE_UNUSED,
7876 rtx operands[3])
7877 {
7878 /* Both source operands cannot be in memory. */
7879 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7880 return 0;
7881 /* If the operation is not commutable, source 1 cannot be a constant. */
7882 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7883 return 0;
7884 /* If the destination is memory, we must have a matching source operand. */
7885 if (GET_CODE (operands[0]) == MEM
7886 && ! (rtx_equal_p (operands[0], operands[1])
7887 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7888 && rtx_equal_p (operands[0], operands[2]))))
7889 return 0;
7890 /* If the operation is not commutable and the source 1 is memory, we must
7891 have a matching destination. */
7892 if (GET_CODE (operands[1]) == MEM
7893 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7894 && ! rtx_equal_p (operands[0], operands[1]))
7895 return 0;
7896 return 1;
7897 }
7898
7899 /* Attempt to expand a unary operator. Make the expansion closer to the
7900 actual machine, then just general_operand, which will allow 2 separate
7901 memory references (one output, one input) in a single insn. */
7902
7903 void
7904 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7905 rtx operands[])
7906 {
7907 int matching_memory;
7908 rtx src, dst, op, clob;
7909
7910 dst = operands[0];
7911 src = operands[1];
7912
7913 /* If the destination is memory, and we do not have matching source
7914 operands, do things in registers. */
7915 matching_memory = 0;
7916 if (MEM_P (dst))
7917 {
7918 if (rtx_equal_p (dst, src))
7919 matching_memory = 1;
7920 else
7921 dst = gen_reg_rtx (mode);
7922 }
7923
7924 /* When source operand is memory, destination must match. */
7925 if (MEM_P (src) && !matching_memory)
7926 src = force_reg (mode, src);
7927
7928 /* If optimizing, copy to regs to improve CSE. */
7929 if (optimize && ! no_new_pseudos)
7930 {
7931 if (GET_CODE (dst) == MEM)
7932 dst = gen_reg_rtx (mode);
7933 if (GET_CODE (src) == MEM)
7934 src = force_reg (mode, src);
7935 }
7936
7937 /* Emit the instruction. */
7938
7939 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7940 if (reload_in_progress || code == NOT)
7941 {
7942 /* Reload doesn't know about the flags register, and doesn't know that
7943 it doesn't want to clobber it. */
7944 if (code != NOT)
7945 abort ();
7946 emit_insn (op);
7947 }
7948 else
7949 {
7950 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7951 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7952 }
7953
7954 /* Fix up the destination if needed. */
7955 if (dst != operands[0])
7956 emit_move_insn (operands[0], dst);
7957 }
7958
7959 /* Return TRUE or FALSE depending on whether the unary operator meets the
7960 appropriate constraints. */
7961
7962 int
7963 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7964 enum machine_mode mode ATTRIBUTE_UNUSED,
7965 rtx operands[2] ATTRIBUTE_UNUSED)
7966 {
7967 /* If one of operands is memory, source and destination must match. */
7968 if ((GET_CODE (operands[0]) == MEM
7969 || GET_CODE (operands[1]) == MEM)
7970 && ! rtx_equal_p (operands[0], operands[1]))
7971 return FALSE;
7972 return TRUE;
7973 }
7974
7975 /* Generate code for floating point ABS or NEG. */
7976
7977 void
7978 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7979 rtx operands[])
7980 {
7981 rtx mask, set, use, clob, dst, src;
7982 bool matching_memory;
7983 bool use_sse = false;
7984 bool vector_mode = VECTOR_MODE_P (mode);
7985 enum machine_mode elt_mode = mode;
7986 enum machine_mode vec_mode = VOIDmode;
7987
7988 if (vector_mode)
7989 {
7990 elt_mode = GET_MODE_INNER (mode);
7991 vec_mode = mode;
7992 use_sse = true;
7993 }
7994 if (TARGET_SSE_MATH)
7995 {
7996 if (mode == SFmode)
7997 {
7998 use_sse = true;
7999 vec_mode = V4SFmode;
8000 }
8001 else if (mode == DFmode && TARGET_SSE2)
8002 {
8003 use_sse = true;
8004 vec_mode = V2DFmode;
8005 }
8006 }
8007
8008 /* NEG and ABS performed with SSE use bitwise mask operations.
8009 Create the appropriate mask now. */
8010 if (use_sse)
8011 {
8012 HOST_WIDE_INT hi, lo;
8013 int shift = 63;
8014 rtvec v;
8015
8016 /* Find the sign bit, sign extended to 2*HWI. */
8017 if (elt_mode == SFmode)
8018 lo = 0x80000000, hi = lo < 0;
8019 else if (HOST_BITS_PER_WIDE_INT >= 64)
8020 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8021 else
8022 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8023
8024 /* If we're looking for the absolute value, then we want
8025 the compliment. */
8026 if (code == ABS)
8027 lo = ~lo, hi = ~hi;
8028
8029 /* Force this value into the low part of a fp vector constant. */
8030 mask = immed_double_const (lo, hi, elt_mode == SFmode ? SImode : DImode);
8031 mask = gen_lowpart (elt_mode, mask);
8032
8033 switch (mode)
8034 {
8035 case SFmode:
8036 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8037 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8038 break;
8039
8040 case DFmode:
8041 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8042 break;
8043
8044 case V4SFmode:
8045 v = gen_rtvec (4, mask, mask, mask, mask);
8046 break;
8047
8048 case V4DFmode:
8049 v = gen_rtvec (2, mask, mask);
8050 break;
8051
8052 default:
8053 gcc_unreachable ();
8054 }
8055
8056 mask = gen_rtx_CONST_VECTOR (vec_mode, v);
8057 mask = force_reg (vec_mode, mask);
8058 }
8059 else
8060 {
8061 /* When not using SSE, we don't use the mask, but prefer to keep the
8062 same general form of the insn pattern to reduce duplication when
8063 it comes time to split. */
8064 mask = const0_rtx;
8065 }
8066
8067 dst = operands[0];
8068 src = operands[1];
8069
8070 /* If the destination is memory, and we don't have matching source
8071 operands, do things in registers. */
8072 matching_memory = false;
8073 if (MEM_P (dst))
8074 {
8075 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8076 matching_memory = true;
8077 else
8078 dst = gen_reg_rtx (mode);
8079 }
8080 if (MEM_P (src) && !matching_memory)
8081 src = force_reg (mode, src);
8082
8083 if (vector_mode)
8084 {
8085 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8086 set = gen_rtx_SET (VOIDmode, dst, set);
8087 emit_insn (set);
8088 }
8089 else
8090 {
8091 set = gen_rtx_fmt_e (code, mode, src);
8092 set = gen_rtx_SET (VOIDmode, dst, set);
8093 use = gen_rtx_USE (VOIDmode, mask);
8094 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8095 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8096 }
8097
8098 if (dst != operands[0])
8099 emit_move_insn (operands[0], dst);
8100 }
8101
8102 /* Return TRUE or FALSE depending on whether the first SET in INSN
8103 has source and destination with matching CC modes, and that the
8104 CC mode is at least as constrained as REQ_MODE. */
8105
8106 int
8107 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8108 {
8109 rtx set;
8110 enum machine_mode set_mode;
8111
8112 set = PATTERN (insn);
8113 if (GET_CODE (set) == PARALLEL)
8114 set = XVECEXP (set, 0, 0);
8115 if (GET_CODE (set) != SET)
8116 abort ();
8117 if (GET_CODE (SET_SRC (set)) != COMPARE)
8118 abort ();
8119
8120 set_mode = GET_MODE (SET_DEST (set));
8121 switch (set_mode)
8122 {
8123 case CCNOmode:
8124 if (req_mode != CCNOmode
8125 && (req_mode != CCmode
8126 || XEXP (SET_SRC (set), 1) != const0_rtx))
8127 return 0;
8128 break;
8129 case CCmode:
8130 if (req_mode == CCGCmode)
8131 return 0;
8132 /* FALLTHRU */
8133 case CCGCmode:
8134 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8135 return 0;
8136 /* FALLTHRU */
8137 case CCGOCmode:
8138 if (req_mode == CCZmode)
8139 return 0;
8140 /* FALLTHRU */
8141 case CCZmode:
8142 break;
8143
8144 default:
8145 abort ();
8146 }
8147
8148 return (GET_MODE (SET_SRC (set)) == set_mode);
8149 }
8150
8151 /* Generate insn patterns to do an integer compare of OPERANDS. */
8152
8153 static rtx
8154 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8155 {
8156 enum machine_mode cmpmode;
8157 rtx tmp, flags;
8158
8159 cmpmode = SELECT_CC_MODE (code, op0, op1);
8160 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8161
8162 /* This is very simple, but making the interface the same as in the
8163 FP case makes the rest of the code easier. */
8164 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8165 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8166
8167 /* Return the test that should be put into the flags user, i.e.
8168 the bcc, scc, or cmov instruction. */
8169 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8170 }
8171
8172 /* Figure out whether to use ordered or unordered fp comparisons.
8173 Return the appropriate mode to use. */
8174
8175 enum machine_mode
8176 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8177 {
8178 /* ??? In order to make all comparisons reversible, we do all comparisons
8179 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8180 all forms trapping and nontrapping comparisons, we can make inequality
8181 comparisons trapping again, since it results in better code when using
8182 FCOM based compares. */
8183 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8184 }
8185
8186 enum machine_mode
8187 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8188 {
8189 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8190 return ix86_fp_compare_mode (code);
8191 switch (code)
8192 {
8193 /* Only zero flag is needed. */
8194 case EQ: /* ZF=0 */
8195 case NE: /* ZF!=0 */
8196 return CCZmode;
8197 /* Codes needing carry flag. */
8198 case GEU: /* CF=0 */
8199 case GTU: /* CF=0 & ZF=0 */
8200 case LTU: /* CF=1 */
8201 case LEU: /* CF=1 | ZF=1 */
8202 return CCmode;
8203 /* Codes possibly doable only with sign flag when
8204 comparing against zero. */
8205 case GE: /* SF=OF or SF=0 */
8206 case LT: /* SF<>OF or SF=1 */
8207 if (op1 == const0_rtx)
8208 return CCGOCmode;
8209 else
8210 /* For other cases Carry flag is not required. */
8211 return CCGCmode;
8212 /* Codes doable only with sign flag when comparing
8213 against zero, but we miss jump instruction for it
8214 so we need to use relational tests against overflow
8215 that thus needs to be zero. */
8216 case GT: /* ZF=0 & SF=OF */
8217 case LE: /* ZF=1 | SF<>OF */
8218 if (op1 == const0_rtx)
8219 return CCNOmode;
8220 else
8221 return CCGCmode;
8222 /* strcmp pattern do (use flags) and combine may ask us for proper
8223 mode. */
8224 case USE:
8225 return CCmode;
8226 default:
8227 abort ();
8228 }
8229 }
8230
8231 /* Return the fixed registers used for condition codes. */
8232
8233 static bool
8234 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8235 {
8236 *p1 = FLAGS_REG;
8237 *p2 = FPSR_REG;
8238 return true;
8239 }
8240
8241 /* If two condition code modes are compatible, return a condition code
8242 mode which is compatible with both. Otherwise, return
8243 VOIDmode. */
8244
8245 static enum machine_mode
8246 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8247 {
8248 if (m1 == m2)
8249 return m1;
8250
8251 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8252 return VOIDmode;
8253
8254 if ((m1 == CCGCmode && m2 == CCGOCmode)
8255 || (m1 == CCGOCmode && m2 == CCGCmode))
8256 return CCGCmode;
8257
8258 switch (m1)
8259 {
8260 default:
8261 abort ();
8262
8263 case CCmode:
8264 case CCGCmode:
8265 case CCGOCmode:
8266 case CCNOmode:
8267 case CCZmode:
8268 switch (m2)
8269 {
8270 default:
8271 return VOIDmode;
8272
8273 case CCmode:
8274 case CCGCmode:
8275 case CCGOCmode:
8276 case CCNOmode:
8277 case CCZmode:
8278 return CCmode;
8279 }
8280
8281 case CCFPmode:
8282 case CCFPUmode:
8283 /* These are only compatible with themselves, which we already
8284 checked above. */
8285 return VOIDmode;
8286 }
8287 }
8288
8289 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8290
8291 int
8292 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8293 {
8294 enum rtx_code swapped_code = swap_condition (code);
8295 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8296 || (ix86_fp_comparison_cost (swapped_code)
8297 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8298 }
8299
8300 /* Swap, force into registers, or otherwise massage the two operands
8301 to a fp comparison. The operands are updated in place; the new
8302 comparison code is returned. */
8303
8304 static enum rtx_code
8305 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8306 {
8307 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8308 rtx op0 = *pop0, op1 = *pop1;
8309 enum machine_mode op_mode = GET_MODE (op0);
8310 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
8311
8312 /* All of the unordered compare instructions only work on registers.
8313 The same is true of the fcomi compare instructions. The same is
8314 true of the XFmode compare instructions if not comparing with
8315 zero (ftst insn is used in this case). */
8316
8317 if (!is_sse
8318 && (fpcmp_mode == CCFPUmode
8319 || (op_mode == XFmode
8320 && ! (standard_80387_constant_p (op0) == 1
8321 || standard_80387_constant_p (op1) == 1))
8322 || ix86_use_fcomi_compare (code)))
8323 {
8324 op0 = force_reg (op_mode, op0);
8325 op1 = force_reg (op_mode, op1);
8326 }
8327 else
8328 {
8329 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8330 things around if they appear profitable, otherwise force op0
8331 into a register. */
8332
8333 if (standard_80387_constant_p (op0) == 0
8334 || (GET_CODE (op0) == MEM
8335 && ! (standard_80387_constant_p (op1) == 0
8336 || GET_CODE (op1) == MEM)))
8337 {
8338 rtx tmp;
8339 tmp = op0, op0 = op1, op1 = tmp;
8340 code = swap_condition (code);
8341 }
8342
8343 if (GET_CODE (op0) != REG)
8344 op0 = force_reg (op_mode, op0);
8345
8346 if (CONSTANT_P (op1))
8347 {
8348 int tmp = standard_80387_constant_p (op1);
8349 if (tmp == 0)
8350 op1 = validize_mem (force_const_mem (op_mode, op1));
8351 else if (tmp == 1)
8352 {
8353 if (TARGET_CMOVE)
8354 op1 = force_reg (op_mode, op1);
8355 }
8356 else
8357 op1 = force_reg (op_mode, op1);
8358 }
8359 }
8360
8361 /* Try to rearrange the comparison to make it cheaper. */
8362 if (ix86_fp_comparison_cost (code)
8363 > ix86_fp_comparison_cost (swap_condition (code))
8364 && (GET_CODE (op1) == REG || !no_new_pseudos))
8365 {
8366 rtx tmp;
8367 tmp = op0, op0 = op1, op1 = tmp;
8368 code = swap_condition (code);
8369 if (GET_CODE (op0) != REG)
8370 op0 = force_reg (op_mode, op0);
8371 }
8372
8373 *pop0 = op0;
8374 *pop1 = op1;
8375 return code;
8376 }
8377
8378 /* Convert comparison codes we use to represent FP comparison to integer
8379 code that will result in proper branch. Return UNKNOWN if no such code
8380 is available. */
8381
8382 enum rtx_code
8383 ix86_fp_compare_code_to_integer (enum rtx_code code)
8384 {
8385 switch (code)
8386 {
8387 case GT:
8388 return GTU;
8389 case GE:
8390 return GEU;
8391 case ORDERED:
8392 case UNORDERED:
8393 return code;
8394 break;
8395 case UNEQ:
8396 return EQ;
8397 break;
8398 case UNLT:
8399 return LTU;
8400 break;
8401 case UNLE:
8402 return LEU;
8403 break;
8404 case LTGT:
8405 return NE;
8406 break;
8407 default:
8408 return UNKNOWN;
8409 }
8410 }
8411
8412 /* Split comparison code CODE into comparisons we can do using branch
8413 instructions. BYPASS_CODE is comparison code for branch that will
8414 branch around FIRST_CODE and SECOND_CODE. If some of branches
8415 is not required, set value to UNKNOWN.
8416 We never require more than two branches. */
8417
8418 void
8419 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8420 enum rtx_code *first_code,
8421 enum rtx_code *second_code)
8422 {
8423 *first_code = code;
8424 *bypass_code = UNKNOWN;
8425 *second_code = UNKNOWN;
8426
8427 /* The fcomi comparison sets flags as follows:
8428
8429 cmp ZF PF CF
8430 > 0 0 0
8431 < 0 0 1
8432 = 1 0 0
8433 un 1 1 1 */
8434
8435 switch (code)
8436 {
8437 case GT: /* GTU - CF=0 & ZF=0 */
8438 case GE: /* GEU - CF=0 */
8439 case ORDERED: /* PF=0 */
8440 case UNORDERED: /* PF=1 */
8441 case UNEQ: /* EQ - ZF=1 */
8442 case UNLT: /* LTU - CF=1 */
8443 case UNLE: /* LEU - CF=1 | ZF=1 */
8444 case LTGT: /* EQ - ZF=0 */
8445 break;
8446 case LT: /* LTU - CF=1 - fails on unordered */
8447 *first_code = UNLT;
8448 *bypass_code = UNORDERED;
8449 break;
8450 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8451 *first_code = UNLE;
8452 *bypass_code = UNORDERED;
8453 break;
8454 case EQ: /* EQ - ZF=1 - fails on unordered */
8455 *first_code = UNEQ;
8456 *bypass_code = UNORDERED;
8457 break;
8458 case NE: /* NE - ZF=0 - fails on unordered */
8459 *first_code = LTGT;
8460 *second_code = UNORDERED;
8461 break;
8462 case UNGE: /* GEU - CF=0 - fails on unordered */
8463 *first_code = GE;
8464 *second_code = UNORDERED;
8465 break;
8466 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8467 *first_code = GT;
8468 *second_code = UNORDERED;
8469 break;
8470 default:
8471 abort ();
8472 }
8473 if (!TARGET_IEEE_FP)
8474 {
8475 *second_code = UNKNOWN;
8476 *bypass_code = UNKNOWN;
8477 }
8478 }
8479
8480 /* Return cost of comparison done fcom + arithmetics operations on AX.
8481 All following functions do use number of instructions as a cost metrics.
8482 In future this should be tweaked to compute bytes for optimize_size and
8483 take into account performance of various instructions on various CPUs. */
8484 static int
8485 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8486 {
8487 if (!TARGET_IEEE_FP)
8488 return 4;
8489 /* The cost of code output by ix86_expand_fp_compare. */
8490 switch (code)
8491 {
8492 case UNLE:
8493 case UNLT:
8494 case LTGT:
8495 case GT:
8496 case GE:
8497 case UNORDERED:
8498 case ORDERED:
8499 case UNEQ:
8500 return 4;
8501 break;
8502 case LT:
8503 case NE:
8504 case EQ:
8505 case UNGE:
8506 return 5;
8507 break;
8508 case LE:
8509 case UNGT:
8510 return 6;
8511 break;
8512 default:
8513 abort ();
8514 }
8515 }
8516
8517 /* Return cost of comparison done using fcomi operation.
8518 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8519 static int
8520 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8521 {
8522 enum rtx_code bypass_code, first_code, second_code;
8523 /* Return arbitrarily high cost when instruction is not supported - this
8524 prevents gcc from using it. */
8525 if (!TARGET_CMOVE)
8526 return 1024;
8527 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8528 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8529 }
8530
8531 /* Return cost of comparison done using sahf operation.
8532 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8533 static int
8534 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8535 {
8536 enum rtx_code bypass_code, first_code, second_code;
8537 /* Return arbitrarily high cost when instruction is not preferred - this
8538 avoids gcc from using it. */
8539 if (!TARGET_USE_SAHF && !optimize_size)
8540 return 1024;
8541 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8542 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8543 }
8544
8545 /* Compute cost of the comparison done using any method.
8546 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8547 static int
8548 ix86_fp_comparison_cost (enum rtx_code code)
8549 {
8550 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8551 int min;
8552
8553 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8554 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8555
8556 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8557 if (min > sahf_cost)
8558 min = sahf_cost;
8559 if (min > fcomi_cost)
8560 min = fcomi_cost;
8561 return min;
8562 }
8563
8564 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8565
8566 static rtx
8567 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8568 rtx *second_test, rtx *bypass_test)
8569 {
8570 enum machine_mode fpcmp_mode, intcmp_mode;
8571 rtx tmp, tmp2;
8572 int cost = ix86_fp_comparison_cost (code);
8573 enum rtx_code bypass_code, first_code, second_code;
8574
8575 fpcmp_mode = ix86_fp_compare_mode (code);
8576 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8577
8578 if (second_test)
8579 *second_test = NULL_RTX;
8580 if (bypass_test)
8581 *bypass_test = NULL_RTX;
8582
8583 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8584
8585 /* Do fcomi/sahf based test when profitable. */
8586 if ((bypass_code == UNKNOWN || bypass_test)
8587 && (second_code == UNKNOWN || second_test)
8588 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8589 {
8590 if (TARGET_CMOVE)
8591 {
8592 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8593 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8594 tmp);
8595 emit_insn (tmp);
8596 }
8597 else
8598 {
8599 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8600 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8601 if (!scratch)
8602 scratch = gen_reg_rtx (HImode);
8603 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8604 emit_insn (gen_x86_sahf_1 (scratch));
8605 }
8606
8607 /* The FP codes work out to act like unsigned. */
8608 intcmp_mode = fpcmp_mode;
8609 code = first_code;
8610 if (bypass_code != UNKNOWN)
8611 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8612 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8613 const0_rtx);
8614 if (second_code != UNKNOWN)
8615 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8616 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8617 const0_rtx);
8618 }
8619 else
8620 {
8621 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8622 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8623 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8624 if (!scratch)
8625 scratch = gen_reg_rtx (HImode);
8626 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8627
8628 /* In the unordered case, we have to check C2 for NaN's, which
8629 doesn't happen to work out to anything nice combination-wise.
8630 So do some bit twiddling on the value we've got in AH to come
8631 up with an appropriate set of condition codes. */
8632
8633 intcmp_mode = CCNOmode;
8634 switch (code)
8635 {
8636 case GT:
8637 case UNGT:
8638 if (code == GT || !TARGET_IEEE_FP)
8639 {
8640 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8641 code = EQ;
8642 }
8643 else
8644 {
8645 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8646 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8647 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8648 intcmp_mode = CCmode;
8649 code = GEU;
8650 }
8651 break;
8652 case LT:
8653 case UNLT:
8654 if (code == LT && TARGET_IEEE_FP)
8655 {
8656 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8657 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8658 intcmp_mode = CCmode;
8659 code = EQ;
8660 }
8661 else
8662 {
8663 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8664 code = NE;
8665 }
8666 break;
8667 case GE:
8668 case UNGE:
8669 if (code == GE || !TARGET_IEEE_FP)
8670 {
8671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8672 code = EQ;
8673 }
8674 else
8675 {
8676 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8677 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8678 GEN_INT (0x01)));
8679 code = NE;
8680 }
8681 break;
8682 case LE:
8683 case UNLE:
8684 if (code == LE && TARGET_IEEE_FP)
8685 {
8686 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8687 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8688 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8689 intcmp_mode = CCmode;
8690 code = LTU;
8691 }
8692 else
8693 {
8694 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8695 code = NE;
8696 }
8697 break;
8698 case EQ:
8699 case UNEQ:
8700 if (code == EQ && TARGET_IEEE_FP)
8701 {
8702 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8703 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8704 intcmp_mode = CCmode;
8705 code = EQ;
8706 }
8707 else
8708 {
8709 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8710 code = NE;
8711 break;
8712 }
8713 break;
8714 case NE:
8715 case LTGT:
8716 if (code == NE && TARGET_IEEE_FP)
8717 {
8718 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8719 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8720 GEN_INT (0x40)));
8721 code = NE;
8722 }
8723 else
8724 {
8725 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8726 code = EQ;
8727 }
8728 break;
8729
8730 case UNORDERED:
8731 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8732 code = NE;
8733 break;
8734 case ORDERED:
8735 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8736 code = EQ;
8737 break;
8738
8739 default:
8740 abort ();
8741 }
8742 }
8743
8744 /* Return the test that should be put into the flags user, i.e.
8745 the bcc, scc, or cmov instruction. */
8746 return gen_rtx_fmt_ee (code, VOIDmode,
8747 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8748 const0_rtx);
8749 }
8750
8751 rtx
8752 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8753 {
8754 rtx op0, op1, ret;
8755 op0 = ix86_compare_op0;
8756 op1 = ix86_compare_op1;
8757
8758 if (second_test)
8759 *second_test = NULL_RTX;
8760 if (bypass_test)
8761 *bypass_test = NULL_RTX;
8762
8763 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8764 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8765 second_test, bypass_test);
8766 else
8767 ret = ix86_expand_int_compare (code, op0, op1);
8768
8769 return ret;
8770 }
8771
8772 /* Return true if the CODE will result in nontrivial jump sequence. */
8773 bool
8774 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8775 {
8776 enum rtx_code bypass_code, first_code, second_code;
8777 if (!TARGET_CMOVE)
8778 return true;
8779 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8780 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8781 }
8782
8783 void
8784 ix86_expand_branch (enum rtx_code code, rtx label)
8785 {
8786 rtx tmp;
8787
8788 switch (GET_MODE (ix86_compare_op0))
8789 {
8790 case QImode:
8791 case HImode:
8792 case SImode:
8793 simple:
8794 tmp = ix86_expand_compare (code, NULL, NULL);
8795 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8796 gen_rtx_LABEL_REF (VOIDmode, label),
8797 pc_rtx);
8798 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8799 return;
8800
8801 case SFmode:
8802 case DFmode:
8803 case XFmode:
8804 {
8805 rtvec vec;
8806 int use_fcomi;
8807 enum rtx_code bypass_code, first_code, second_code;
8808
8809 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8810 &ix86_compare_op1);
8811
8812 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8813
8814 /* Check whether we will use the natural sequence with one jump. If
8815 so, we can expand jump early. Otherwise delay expansion by
8816 creating compound insn to not confuse optimizers. */
8817 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8818 && TARGET_CMOVE)
8819 {
8820 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8821 gen_rtx_LABEL_REF (VOIDmode, label),
8822 pc_rtx, NULL_RTX, NULL_RTX);
8823 }
8824 else
8825 {
8826 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8827 ix86_compare_op0, ix86_compare_op1);
8828 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8829 gen_rtx_LABEL_REF (VOIDmode, label),
8830 pc_rtx);
8831 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8832
8833 use_fcomi = ix86_use_fcomi_compare (code);
8834 vec = rtvec_alloc (3 + !use_fcomi);
8835 RTVEC_ELT (vec, 0) = tmp;
8836 RTVEC_ELT (vec, 1)
8837 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8838 RTVEC_ELT (vec, 2)
8839 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8840 if (! use_fcomi)
8841 RTVEC_ELT (vec, 3)
8842 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8843
8844 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8845 }
8846 return;
8847 }
8848
8849 case DImode:
8850 if (TARGET_64BIT)
8851 goto simple;
8852 /* Expand DImode branch into multiple compare+branch. */
8853 {
8854 rtx lo[2], hi[2], label2;
8855 enum rtx_code code1, code2, code3;
8856
8857 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8858 {
8859 tmp = ix86_compare_op0;
8860 ix86_compare_op0 = ix86_compare_op1;
8861 ix86_compare_op1 = tmp;
8862 code = swap_condition (code);
8863 }
8864 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8865 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8866
8867 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8868 avoid two branches. This costs one extra insn, so disable when
8869 optimizing for size. */
8870
8871 if ((code == EQ || code == NE)
8872 && (!optimize_size
8873 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8874 {
8875 rtx xor0, xor1;
8876
8877 xor1 = hi[0];
8878 if (hi[1] != const0_rtx)
8879 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8880 NULL_RTX, 0, OPTAB_WIDEN);
8881
8882 xor0 = lo[0];
8883 if (lo[1] != const0_rtx)
8884 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8885 NULL_RTX, 0, OPTAB_WIDEN);
8886
8887 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8888 NULL_RTX, 0, OPTAB_WIDEN);
8889
8890 ix86_compare_op0 = tmp;
8891 ix86_compare_op1 = const0_rtx;
8892 ix86_expand_branch (code, label);
8893 return;
8894 }
8895
8896 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8897 op1 is a constant and the low word is zero, then we can just
8898 examine the high word. */
8899
8900 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8901 switch (code)
8902 {
8903 case LT: case LTU: case GE: case GEU:
8904 ix86_compare_op0 = hi[0];
8905 ix86_compare_op1 = hi[1];
8906 ix86_expand_branch (code, label);
8907 return;
8908 default:
8909 break;
8910 }
8911
8912 /* Otherwise, we need two or three jumps. */
8913
8914 label2 = gen_label_rtx ();
8915
8916 code1 = code;
8917 code2 = swap_condition (code);
8918 code3 = unsigned_condition (code);
8919
8920 switch (code)
8921 {
8922 case LT: case GT: case LTU: case GTU:
8923 break;
8924
8925 case LE: code1 = LT; code2 = GT; break;
8926 case GE: code1 = GT; code2 = LT; break;
8927 case LEU: code1 = LTU; code2 = GTU; break;
8928 case GEU: code1 = GTU; code2 = LTU; break;
8929
8930 case EQ: code1 = UNKNOWN; code2 = NE; break;
8931 case NE: code2 = UNKNOWN; break;
8932
8933 default:
8934 abort ();
8935 }
8936
8937 /*
8938 * a < b =>
8939 * if (hi(a) < hi(b)) goto true;
8940 * if (hi(a) > hi(b)) goto false;
8941 * if (lo(a) < lo(b)) goto true;
8942 * false:
8943 */
8944
8945 ix86_compare_op0 = hi[0];
8946 ix86_compare_op1 = hi[1];
8947
8948 if (code1 != UNKNOWN)
8949 ix86_expand_branch (code1, label);
8950 if (code2 != UNKNOWN)
8951 ix86_expand_branch (code2, label2);
8952
8953 ix86_compare_op0 = lo[0];
8954 ix86_compare_op1 = lo[1];
8955 ix86_expand_branch (code3, label);
8956
8957 if (code2 != UNKNOWN)
8958 emit_label (label2);
8959 return;
8960 }
8961
8962 default:
8963 abort ();
8964 }
8965 }
8966
8967 /* Split branch based on floating point condition. */
8968 void
8969 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8970 rtx target1, rtx target2, rtx tmp, rtx pushed)
8971 {
8972 rtx second, bypass;
8973 rtx label = NULL_RTX;
8974 rtx condition;
8975 int bypass_probability = -1, second_probability = -1, probability = -1;
8976 rtx i;
8977
8978 if (target2 != pc_rtx)
8979 {
8980 rtx tmp = target2;
8981 code = reverse_condition_maybe_unordered (code);
8982 target2 = target1;
8983 target1 = tmp;
8984 }
8985
8986 condition = ix86_expand_fp_compare (code, op1, op2,
8987 tmp, &second, &bypass);
8988
8989 /* Remove pushed operand from stack. */
8990 if (pushed)
8991 ix86_free_from_memory (GET_MODE (pushed));
8992
8993 if (split_branch_probability >= 0)
8994 {
8995 /* Distribute the probabilities across the jumps.
8996 Assume the BYPASS and SECOND to be always test
8997 for UNORDERED. */
8998 probability = split_branch_probability;
8999
9000 /* Value of 1 is low enough to make no need for probability
9001 to be updated. Later we may run some experiments and see
9002 if unordered values are more frequent in practice. */
9003 if (bypass)
9004 bypass_probability = 1;
9005 if (second)
9006 second_probability = 1;
9007 }
9008 if (bypass != NULL_RTX)
9009 {
9010 label = gen_label_rtx ();
9011 i = emit_jump_insn (gen_rtx_SET
9012 (VOIDmode, pc_rtx,
9013 gen_rtx_IF_THEN_ELSE (VOIDmode,
9014 bypass,
9015 gen_rtx_LABEL_REF (VOIDmode,
9016 label),
9017 pc_rtx)));
9018 if (bypass_probability >= 0)
9019 REG_NOTES (i)
9020 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9021 GEN_INT (bypass_probability),
9022 REG_NOTES (i));
9023 }
9024 i = emit_jump_insn (gen_rtx_SET
9025 (VOIDmode, pc_rtx,
9026 gen_rtx_IF_THEN_ELSE (VOIDmode,
9027 condition, target1, target2)));
9028 if (probability >= 0)
9029 REG_NOTES (i)
9030 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9031 GEN_INT (probability),
9032 REG_NOTES (i));
9033 if (second != NULL_RTX)
9034 {
9035 i = emit_jump_insn (gen_rtx_SET
9036 (VOIDmode, pc_rtx,
9037 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9038 target2)));
9039 if (second_probability >= 0)
9040 REG_NOTES (i)
9041 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9042 GEN_INT (second_probability),
9043 REG_NOTES (i));
9044 }
9045 if (label != NULL_RTX)
9046 emit_label (label);
9047 }
9048
9049 int
9050 ix86_expand_setcc (enum rtx_code code, rtx dest)
9051 {
9052 rtx ret, tmp, tmpreg, equiv;
9053 rtx second_test, bypass_test;
9054
9055 if (GET_MODE (ix86_compare_op0) == DImode
9056 && !TARGET_64BIT)
9057 return 0; /* FAIL */
9058
9059 if (GET_MODE (dest) != QImode)
9060 abort ();
9061
9062 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9063 PUT_MODE (ret, QImode);
9064
9065 tmp = dest;
9066 tmpreg = dest;
9067
9068 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9069 if (bypass_test || second_test)
9070 {
9071 rtx test = second_test;
9072 int bypass = 0;
9073 rtx tmp2 = gen_reg_rtx (QImode);
9074 if (bypass_test)
9075 {
9076 if (second_test)
9077 abort ();
9078 test = bypass_test;
9079 bypass = 1;
9080 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9081 }
9082 PUT_MODE (test, QImode);
9083 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9084
9085 if (bypass)
9086 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9087 else
9088 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9089 }
9090
9091 /* Attach a REG_EQUAL note describing the comparison result. */
9092 equiv = simplify_gen_relational (code, QImode,
9093 GET_MODE (ix86_compare_op0),
9094 ix86_compare_op0, ix86_compare_op1);
9095 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9096
9097 return 1; /* DONE */
9098 }
9099
9100 /* Expand comparison setting or clearing carry flag. Return true when
9101 successful and set pop for the operation. */
9102 static bool
9103 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9104 {
9105 enum machine_mode mode =
9106 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9107
9108 /* Do not handle DImode compares that go trought special path. Also we can't
9109 deal with FP compares yet. This is possible to add. */
9110 if ((mode == DImode && !TARGET_64BIT))
9111 return false;
9112 if (FLOAT_MODE_P (mode))
9113 {
9114 rtx second_test = NULL, bypass_test = NULL;
9115 rtx compare_op, compare_seq;
9116
9117 /* Shortcut: following common codes never translate into carry flag compares. */
9118 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9119 || code == ORDERED || code == UNORDERED)
9120 return false;
9121
9122 /* These comparisons require zero flag; swap operands so they won't. */
9123 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9124 && !TARGET_IEEE_FP)
9125 {
9126 rtx tmp = op0;
9127 op0 = op1;
9128 op1 = tmp;
9129 code = swap_condition (code);
9130 }
9131
9132 /* Try to expand the comparison and verify that we end up with carry flag
9133 based comparison. This is fails to be true only when we decide to expand
9134 comparison using arithmetic that is not too common scenario. */
9135 start_sequence ();
9136 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9137 &second_test, &bypass_test);
9138 compare_seq = get_insns ();
9139 end_sequence ();
9140
9141 if (second_test || bypass_test)
9142 return false;
9143 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9144 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9145 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9146 else
9147 code = GET_CODE (compare_op);
9148 if (code != LTU && code != GEU)
9149 return false;
9150 emit_insn (compare_seq);
9151 *pop = compare_op;
9152 return true;
9153 }
9154 if (!INTEGRAL_MODE_P (mode))
9155 return false;
9156 switch (code)
9157 {
9158 case LTU:
9159 case GEU:
9160 break;
9161
9162 /* Convert a==0 into (unsigned)a<1. */
9163 case EQ:
9164 case NE:
9165 if (op1 != const0_rtx)
9166 return false;
9167 op1 = const1_rtx;
9168 code = (code == EQ ? LTU : GEU);
9169 break;
9170
9171 /* Convert a>b into b<a or a>=b-1. */
9172 case GTU:
9173 case LEU:
9174 if (GET_CODE (op1) == CONST_INT)
9175 {
9176 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9177 /* Bail out on overflow. We still can swap operands but that
9178 would force loading of the constant into register. */
9179 if (op1 == const0_rtx
9180 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9181 return false;
9182 code = (code == GTU ? GEU : LTU);
9183 }
9184 else
9185 {
9186 rtx tmp = op1;
9187 op1 = op0;
9188 op0 = tmp;
9189 code = (code == GTU ? LTU : GEU);
9190 }
9191 break;
9192
9193 /* Convert a>=0 into (unsigned)a<0x80000000. */
9194 case LT:
9195 case GE:
9196 if (mode == DImode || op1 != const0_rtx)
9197 return false;
9198 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9199 code = (code == LT ? GEU : LTU);
9200 break;
9201 case LE:
9202 case GT:
9203 if (mode == DImode || op1 != constm1_rtx)
9204 return false;
9205 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9206 code = (code == LE ? GEU : LTU);
9207 break;
9208
9209 default:
9210 return false;
9211 }
9212 /* Swapping operands may cause constant to appear as first operand. */
9213 if (!nonimmediate_operand (op0, VOIDmode))
9214 {
9215 if (no_new_pseudos)
9216 return false;
9217 op0 = force_reg (mode, op0);
9218 }
9219 ix86_compare_op0 = op0;
9220 ix86_compare_op1 = op1;
9221 *pop = ix86_expand_compare (code, NULL, NULL);
9222 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9223 abort ();
9224 return true;
9225 }
9226
9227 int
9228 ix86_expand_int_movcc (rtx operands[])
9229 {
9230 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9231 rtx compare_seq, compare_op;
9232 rtx second_test, bypass_test;
9233 enum machine_mode mode = GET_MODE (operands[0]);
9234 bool sign_bit_compare_p = false;;
9235
9236 start_sequence ();
9237 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9238 compare_seq = get_insns ();
9239 end_sequence ();
9240
9241 compare_code = GET_CODE (compare_op);
9242
9243 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9244 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9245 sign_bit_compare_p = true;
9246
9247 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9248 HImode insns, we'd be swallowed in word prefix ops. */
9249
9250 if ((mode != HImode || TARGET_FAST_PREFIX)
9251 && (mode != DImode || TARGET_64BIT)
9252 && GET_CODE (operands[2]) == CONST_INT
9253 && GET_CODE (operands[3]) == CONST_INT)
9254 {
9255 rtx out = operands[0];
9256 HOST_WIDE_INT ct = INTVAL (operands[2]);
9257 HOST_WIDE_INT cf = INTVAL (operands[3]);
9258 HOST_WIDE_INT diff;
9259
9260 diff = ct - cf;
9261 /* Sign bit compares are better done using shifts than we do by using
9262 sbb. */
9263 if (sign_bit_compare_p
9264 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9265 ix86_compare_op1, &compare_op))
9266 {
9267 /* Detect overlap between destination and compare sources. */
9268 rtx tmp = out;
9269
9270 if (!sign_bit_compare_p)
9271 {
9272 bool fpcmp = false;
9273
9274 compare_code = GET_CODE (compare_op);
9275
9276 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9277 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9278 {
9279 fpcmp = true;
9280 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9281 }
9282
9283 /* To simplify rest of code, restrict to the GEU case. */
9284 if (compare_code == LTU)
9285 {
9286 HOST_WIDE_INT tmp = ct;
9287 ct = cf;
9288 cf = tmp;
9289 compare_code = reverse_condition (compare_code);
9290 code = reverse_condition (code);
9291 }
9292 else
9293 {
9294 if (fpcmp)
9295 PUT_CODE (compare_op,
9296 reverse_condition_maybe_unordered
9297 (GET_CODE (compare_op)));
9298 else
9299 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9300 }
9301 diff = ct - cf;
9302
9303 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9304 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9305 tmp = gen_reg_rtx (mode);
9306
9307 if (mode == DImode)
9308 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9309 else
9310 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9311 }
9312 else
9313 {
9314 if (code == GT || code == GE)
9315 code = reverse_condition (code);
9316 else
9317 {
9318 HOST_WIDE_INT tmp = ct;
9319 ct = cf;
9320 cf = tmp;
9321 diff = ct - cf;
9322 }
9323 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9324 ix86_compare_op1, VOIDmode, 0, -1);
9325 }
9326
9327 if (diff == 1)
9328 {
9329 /*
9330 * cmpl op0,op1
9331 * sbbl dest,dest
9332 * [addl dest, ct]
9333 *
9334 * Size 5 - 8.
9335 */
9336 if (ct)
9337 tmp = expand_simple_binop (mode, PLUS,
9338 tmp, GEN_INT (ct),
9339 copy_rtx (tmp), 1, OPTAB_DIRECT);
9340 }
9341 else if (cf == -1)
9342 {
9343 /*
9344 * cmpl op0,op1
9345 * sbbl dest,dest
9346 * orl $ct, dest
9347 *
9348 * Size 8.
9349 */
9350 tmp = expand_simple_binop (mode, IOR,
9351 tmp, GEN_INT (ct),
9352 copy_rtx (tmp), 1, OPTAB_DIRECT);
9353 }
9354 else if (diff == -1 && ct)
9355 {
9356 /*
9357 * cmpl op0,op1
9358 * sbbl dest,dest
9359 * notl dest
9360 * [addl dest, cf]
9361 *
9362 * Size 8 - 11.
9363 */
9364 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9365 if (cf)
9366 tmp = expand_simple_binop (mode, PLUS,
9367 copy_rtx (tmp), GEN_INT (cf),
9368 copy_rtx (tmp), 1, OPTAB_DIRECT);
9369 }
9370 else
9371 {
9372 /*
9373 * cmpl op0,op1
9374 * sbbl dest,dest
9375 * [notl dest]
9376 * andl cf - ct, dest
9377 * [addl dest, ct]
9378 *
9379 * Size 8 - 11.
9380 */
9381
9382 if (cf == 0)
9383 {
9384 cf = ct;
9385 ct = 0;
9386 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9387 }
9388
9389 tmp = expand_simple_binop (mode, AND,
9390 copy_rtx (tmp),
9391 gen_int_mode (cf - ct, mode),
9392 copy_rtx (tmp), 1, OPTAB_DIRECT);
9393 if (ct)
9394 tmp = expand_simple_binop (mode, PLUS,
9395 copy_rtx (tmp), GEN_INT (ct),
9396 copy_rtx (tmp), 1, OPTAB_DIRECT);
9397 }
9398
9399 if (!rtx_equal_p (tmp, out))
9400 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9401
9402 return 1; /* DONE */
9403 }
9404
9405 if (diff < 0)
9406 {
9407 HOST_WIDE_INT tmp;
9408 tmp = ct, ct = cf, cf = tmp;
9409 diff = -diff;
9410 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9411 {
9412 /* We may be reversing unordered compare to normal compare, that
9413 is not valid in general (we may convert non-trapping condition
9414 to trapping one), however on i386 we currently emit all
9415 comparisons unordered. */
9416 compare_code = reverse_condition_maybe_unordered (compare_code);
9417 code = reverse_condition_maybe_unordered (code);
9418 }
9419 else
9420 {
9421 compare_code = reverse_condition (compare_code);
9422 code = reverse_condition (code);
9423 }
9424 }
9425
9426 compare_code = UNKNOWN;
9427 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9428 && GET_CODE (ix86_compare_op1) == CONST_INT)
9429 {
9430 if (ix86_compare_op1 == const0_rtx
9431 && (code == LT || code == GE))
9432 compare_code = code;
9433 else if (ix86_compare_op1 == constm1_rtx)
9434 {
9435 if (code == LE)
9436 compare_code = LT;
9437 else if (code == GT)
9438 compare_code = GE;
9439 }
9440 }
9441
9442 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9443 if (compare_code != UNKNOWN
9444 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9445 && (cf == -1 || ct == -1))
9446 {
9447 /* If lea code below could be used, only optimize
9448 if it results in a 2 insn sequence. */
9449
9450 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9451 || diff == 3 || diff == 5 || diff == 9)
9452 || (compare_code == LT && ct == -1)
9453 || (compare_code == GE && cf == -1))
9454 {
9455 /*
9456 * notl op1 (if necessary)
9457 * sarl $31, op1
9458 * orl cf, op1
9459 */
9460 if (ct != -1)
9461 {
9462 cf = ct;
9463 ct = -1;
9464 code = reverse_condition (code);
9465 }
9466
9467 out = emit_store_flag (out, code, ix86_compare_op0,
9468 ix86_compare_op1, VOIDmode, 0, -1);
9469
9470 out = expand_simple_binop (mode, IOR,
9471 out, GEN_INT (cf),
9472 out, 1, OPTAB_DIRECT);
9473 if (out != operands[0])
9474 emit_move_insn (operands[0], out);
9475
9476 return 1; /* DONE */
9477 }
9478 }
9479
9480
9481 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9482 || diff == 3 || diff == 5 || diff == 9)
9483 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9484 && (mode != DImode
9485 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9486 {
9487 /*
9488 * xorl dest,dest
9489 * cmpl op1,op2
9490 * setcc dest
9491 * lea cf(dest*(ct-cf)),dest
9492 *
9493 * Size 14.
9494 *
9495 * This also catches the degenerate setcc-only case.
9496 */
9497
9498 rtx tmp;
9499 int nops;
9500
9501 out = emit_store_flag (out, code, ix86_compare_op0,
9502 ix86_compare_op1, VOIDmode, 0, 1);
9503
9504 nops = 0;
9505 /* On x86_64 the lea instruction operates on Pmode, so we need
9506 to get arithmetics done in proper mode to match. */
9507 if (diff == 1)
9508 tmp = copy_rtx (out);
9509 else
9510 {
9511 rtx out1;
9512 out1 = copy_rtx (out);
9513 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9514 nops++;
9515 if (diff & 1)
9516 {
9517 tmp = gen_rtx_PLUS (mode, tmp, out1);
9518 nops++;
9519 }
9520 }
9521 if (cf != 0)
9522 {
9523 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9524 nops++;
9525 }
9526 if (!rtx_equal_p (tmp, out))
9527 {
9528 if (nops == 1)
9529 out = force_operand (tmp, copy_rtx (out));
9530 else
9531 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9532 }
9533 if (!rtx_equal_p (out, operands[0]))
9534 emit_move_insn (operands[0], copy_rtx (out));
9535
9536 return 1; /* DONE */
9537 }
9538
9539 /*
9540 * General case: Jumpful:
9541 * xorl dest,dest cmpl op1, op2
9542 * cmpl op1, op2 movl ct, dest
9543 * setcc dest jcc 1f
9544 * decl dest movl cf, dest
9545 * andl (cf-ct),dest 1:
9546 * addl ct,dest
9547 *
9548 * Size 20. Size 14.
9549 *
9550 * This is reasonably steep, but branch mispredict costs are
9551 * high on modern cpus, so consider failing only if optimizing
9552 * for space.
9553 */
9554
9555 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9556 && BRANCH_COST >= 2)
9557 {
9558 if (cf == 0)
9559 {
9560 cf = ct;
9561 ct = 0;
9562 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9563 /* We may be reversing unordered compare to normal compare,
9564 that is not valid in general (we may convert non-trapping
9565 condition to trapping one), however on i386 we currently
9566 emit all comparisons unordered. */
9567 code = reverse_condition_maybe_unordered (code);
9568 else
9569 {
9570 code = reverse_condition (code);
9571 if (compare_code != UNKNOWN)
9572 compare_code = reverse_condition (compare_code);
9573 }
9574 }
9575
9576 if (compare_code != UNKNOWN)
9577 {
9578 /* notl op1 (if needed)
9579 sarl $31, op1
9580 andl (cf-ct), op1
9581 addl ct, op1
9582
9583 For x < 0 (resp. x <= -1) there will be no notl,
9584 so if possible swap the constants to get rid of the
9585 complement.
9586 True/false will be -1/0 while code below (store flag
9587 followed by decrement) is 0/-1, so the constants need
9588 to be exchanged once more. */
9589
9590 if (compare_code == GE || !cf)
9591 {
9592 code = reverse_condition (code);
9593 compare_code = LT;
9594 }
9595 else
9596 {
9597 HOST_WIDE_INT tmp = cf;
9598 cf = ct;
9599 ct = tmp;
9600 }
9601
9602 out = emit_store_flag (out, code, ix86_compare_op0,
9603 ix86_compare_op1, VOIDmode, 0, -1);
9604 }
9605 else
9606 {
9607 out = emit_store_flag (out, code, ix86_compare_op0,
9608 ix86_compare_op1, VOIDmode, 0, 1);
9609
9610 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9611 copy_rtx (out), 1, OPTAB_DIRECT);
9612 }
9613
9614 out = expand_simple_binop (mode, AND, copy_rtx (out),
9615 gen_int_mode (cf - ct, mode),
9616 copy_rtx (out), 1, OPTAB_DIRECT);
9617 if (ct)
9618 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9619 copy_rtx (out), 1, OPTAB_DIRECT);
9620 if (!rtx_equal_p (out, operands[0]))
9621 emit_move_insn (operands[0], copy_rtx (out));
9622
9623 return 1; /* DONE */
9624 }
9625 }
9626
9627 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9628 {
9629 /* Try a few things more with specific constants and a variable. */
9630
9631 optab op;
9632 rtx var, orig_out, out, tmp;
9633
9634 if (BRANCH_COST <= 2)
9635 return 0; /* FAIL */
9636
9637 /* If one of the two operands is an interesting constant, load a
9638 constant with the above and mask it in with a logical operation. */
9639
9640 if (GET_CODE (operands[2]) == CONST_INT)
9641 {
9642 var = operands[3];
9643 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9644 operands[3] = constm1_rtx, op = and_optab;
9645 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9646 operands[3] = const0_rtx, op = ior_optab;
9647 else
9648 return 0; /* FAIL */
9649 }
9650 else if (GET_CODE (operands[3]) == CONST_INT)
9651 {
9652 var = operands[2];
9653 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9654 operands[2] = constm1_rtx, op = and_optab;
9655 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9656 operands[2] = const0_rtx, op = ior_optab;
9657 else
9658 return 0; /* FAIL */
9659 }
9660 else
9661 return 0; /* FAIL */
9662
9663 orig_out = operands[0];
9664 tmp = gen_reg_rtx (mode);
9665 operands[0] = tmp;
9666
9667 /* Recurse to get the constant loaded. */
9668 if (ix86_expand_int_movcc (operands) == 0)
9669 return 0; /* FAIL */
9670
9671 /* Mask in the interesting variable. */
9672 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9673 OPTAB_WIDEN);
9674 if (!rtx_equal_p (out, orig_out))
9675 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9676
9677 return 1; /* DONE */
9678 }
9679
9680 /*
9681 * For comparison with above,
9682 *
9683 * movl cf,dest
9684 * movl ct,tmp
9685 * cmpl op1,op2
9686 * cmovcc tmp,dest
9687 *
9688 * Size 15.
9689 */
9690
9691 if (! nonimmediate_operand (operands[2], mode))
9692 operands[2] = force_reg (mode, operands[2]);
9693 if (! nonimmediate_operand (operands[3], mode))
9694 operands[3] = force_reg (mode, operands[3]);
9695
9696 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9697 {
9698 rtx tmp = gen_reg_rtx (mode);
9699 emit_move_insn (tmp, operands[3]);
9700 operands[3] = tmp;
9701 }
9702 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9703 {
9704 rtx tmp = gen_reg_rtx (mode);
9705 emit_move_insn (tmp, operands[2]);
9706 operands[2] = tmp;
9707 }
9708
9709 if (! register_operand (operands[2], VOIDmode)
9710 && (mode == QImode
9711 || ! register_operand (operands[3], VOIDmode)))
9712 operands[2] = force_reg (mode, operands[2]);
9713
9714 if (mode == QImode
9715 && ! register_operand (operands[3], VOIDmode))
9716 operands[3] = force_reg (mode, operands[3]);
9717
9718 emit_insn (compare_seq);
9719 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9720 gen_rtx_IF_THEN_ELSE (mode,
9721 compare_op, operands[2],
9722 operands[3])));
9723 if (bypass_test)
9724 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9725 gen_rtx_IF_THEN_ELSE (mode,
9726 bypass_test,
9727 copy_rtx (operands[3]),
9728 copy_rtx (operands[0]))));
9729 if (second_test)
9730 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9731 gen_rtx_IF_THEN_ELSE (mode,
9732 second_test,
9733 copy_rtx (operands[2]),
9734 copy_rtx (operands[0]))));
9735
9736 return 1; /* DONE */
9737 }
9738
9739 int
9740 ix86_expand_fp_movcc (rtx operands[])
9741 {
9742 enum machine_mode mode = GET_MODE (operands[0]);
9743 enum rtx_code code = GET_CODE (operands[1]);
9744 rtx tmp, compare_op, second_test, bypass_test;
9745
9746 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9747 {
9748 rtx cmp_op0, cmp_op1, if_true, if_false;
9749 rtx clob;
9750 enum machine_mode vmode, cmode;
9751 bool is_minmax = false;
9752
9753 cmp_op0 = ix86_compare_op0;
9754 cmp_op1 = ix86_compare_op1;
9755 if_true = operands[2];
9756 if_false = operands[3];
9757
9758 /* Since we've no cmove for sse registers, don't force bad register
9759 allocation just to gain access to it. Deny movcc when the
9760 comparison mode doesn't match the move mode. */
9761 cmode = GET_MODE (cmp_op0);
9762 if (cmode == VOIDmode)
9763 cmode = GET_MODE (cmp_op1);
9764 if (cmode != mode)
9765 return 0;
9766
9767 /* Massage condition to satisfy sse_comparison_operator. Try
9768 to canonicalize the destination operand to be first in the
9769 comparison - this helps reload to avoid extra moves. */
9770 if (!sse_comparison_operator (operands[1], VOIDmode)
9771 || (COMMUTATIVE_P (operands[1])
9772 && rtx_equal_p (operands[0], cmp_op1)))
9773 {
9774 tmp = cmp_op0;
9775 cmp_op0 = cmp_op1;
9776 cmp_op1 = tmp;
9777 code = swap_condition (code);
9778 }
9779
9780 /* Detect conditional moves that exactly match min/max operational
9781 semantics. Note that this is IEEE safe, as long as we don't
9782 interchange the operands. Which is why we keep this in the form
9783 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
9784 if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
9785 {
9786 if (((cmp_op0 == if_true && cmp_op1 == if_false)
9787 || (cmp_op0 == if_false && cmp_op1 == if_true)))
9788 {
9789 is_minmax = true;
9790 if (code == UNGE)
9791 {
9792 code = LT;
9793 tmp = if_true;
9794 if_true = if_false;
9795 if_false = tmp;
9796 }
9797 }
9798 }
9799
9800 if (mode == SFmode)
9801 vmode = V4SFmode;
9802 else if (mode == DFmode)
9803 vmode = V2DFmode;
9804 else
9805 gcc_unreachable ();
9806
9807 cmp_op0 = force_reg (mode, cmp_op0);
9808 if (!nonimmediate_operand (cmp_op1, mode))
9809 cmp_op1 = force_reg (mode, cmp_op1);
9810
9811 tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
9812 gcc_assert (sse_comparison_operator (tmp, VOIDmode));
9813
9814 tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
9815 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
9816
9817 if (!is_minmax)
9818 {
9819 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
9820 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9821 }
9822
9823 emit_insn (tmp);
9824 return 1;
9825 }
9826
9827 /* The floating point conditional move instructions don't directly
9828 support conditions resulting from a signed integer comparison. */
9829
9830 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9831
9832 /* The floating point conditional move instructions don't directly
9833 support signed integer comparisons. */
9834
9835 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9836 {
9837 if (second_test != NULL || bypass_test != NULL)
9838 abort ();
9839 tmp = gen_reg_rtx (QImode);
9840 ix86_expand_setcc (code, tmp);
9841 code = NE;
9842 ix86_compare_op0 = tmp;
9843 ix86_compare_op1 = const0_rtx;
9844 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9845 }
9846 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9847 {
9848 tmp = gen_reg_rtx (mode);
9849 emit_move_insn (tmp, operands[3]);
9850 operands[3] = tmp;
9851 }
9852 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9853 {
9854 tmp = gen_reg_rtx (mode);
9855 emit_move_insn (tmp, operands[2]);
9856 operands[2] = tmp;
9857 }
9858
9859 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9860 gen_rtx_IF_THEN_ELSE (mode, compare_op,
9861 operands[2], operands[3])));
9862 if (bypass_test)
9863 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9864 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
9865 operands[3], operands[0])));
9866 if (second_test)
9867 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9868 gen_rtx_IF_THEN_ELSE (mode, second_test,
9869 operands[2], operands[0])));
9870
9871 return 1;
9872 }
9873
9874 void
9875 ix86_split_sse_movcc (rtx operands[])
9876 {
9877 rtx dest, scratch, cmp, op_true, op_false, x;
9878 enum machine_mode mode, vmode;
9879
9880 /* Note that the operator CMP has been set up with matching constraints
9881 such that dest is valid for the comparison. Unless one of the true
9882 or false operands are zero, the true operand has already been placed
9883 in SCRATCH. */
9884 dest = operands[0];
9885 scratch = operands[1];
9886 op_true = operands[2];
9887 op_false = operands[3];
9888 cmp = operands[4];
9889
9890 mode = GET_MODE (dest);
9891 vmode = GET_MODE (scratch);
9892
9893 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
9894
9895 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9896
9897 if (op_false == CONST0_RTX (mode))
9898 {
9899 op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
9900 x = gen_rtx_AND (vmode, dest, op_true);
9901 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9902 }
9903 else
9904 {
9905 op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
9906
9907 if (op_true == CONST0_RTX (mode))
9908 {
9909 x = gen_rtx_NOT (vmode, dest);
9910 x = gen_rtx_AND (vmode, x, op_false);
9911 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9912 }
9913 else
9914 {
9915 x = gen_rtx_AND (vmode, scratch, dest);
9916 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9917
9918 x = gen_rtx_NOT (vmode, dest);
9919 x = gen_rtx_AND (vmode, x, op_false);
9920 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9921
9922 x = gen_rtx_IOR (vmode, dest, scratch);
9923 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9924 }
9925 }
9926 }
9927
9928 /* Expand conditional increment or decrement using adb/sbb instructions.
9929 The default case using setcc followed by the conditional move can be
9930 done by generic code. */
9931 int
9932 ix86_expand_int_addcc (rtx operands[])
9933 {
9934 enum rtx_code code = GET_CODE (operands[1]);
9935 rtx compare_op;
9936 rtx val = const0_rtx;
9937 bool fpcmp = false;
9938 enum machine_mode mode = GET_MODE (operands[0]);
9939
9940 if (operands[3] != const1_rtx
9941 && operands[3] != constm1_rtx)
9942 return 0;
9943 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9944 ix86_compare_op1, &compare_op))
9945 return 0;
9946 code = GET_CODE (compare_op);
9947
9948 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9949 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9950 {
9951 fpcmp = true;
9952 code = ix86_fp_compare_code_to_integer (code);
9953 }
9954
9955 if (code != LTU)
9956 {
9957 val = constm1_rtx;
9958 if (fpcmp)
9959 PUT_CODE (compare_op,
9960 reverse_condition_maybe_unordered
9961 (GET_CODE (compare_op)));
9962 else
9963 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9964 }
9965 PUT_MODE (compare_op, mode);
9966
9967 /* Construct either adc or sbb insn. */
9968 if ((code == LTU) == (operands[3] == constm1_rtx))
9969 {
9970 switch (GET_MODE (operands[0]))
9971 {
9972 case QImode:
9973 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9974 break;
9975 case HImode:
9976 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9977 break;
9978 case SImode:
9979 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9980 break;
9981 case DImode:
9982 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9983 break;
9984 default:
9985 abort ();
9986 }
9987 }
9988 else
9989 {
9990 switch (GET_MODE (operands[0]))
9991 {
9992 case QImode:
9993 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9994 break;
9995 case HImode:
9996 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9997 break;
9998 case SImode:
9999 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10000 break;
10001 case DImode:
10002 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10003 break;
10004 default:
10005 abort ();
10006 }
10007 }
10008 return 1; /* DONE */
10009 }
10010
10011
10012 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10013 works for floating pointer parameters and nonoffsetable memories.
10014 For pushes, it returns just stack offsets; the values will be saved
10015 in the right order. Maximally three parts are generated. */
10016
10017 static int
10018 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10019 {
10020 int size;
10021
10022 if (!TARGET_64BIT)
10023 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10024 else
10025 size = (GET_MODE_SIZE (mode) + 4) / 8;
10026
10027 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10028 abort ();
10029 if (size < 2 || size > 3)
10030 abort ();
10031
10032 /* Optimize constant pool reference to immediates. This is used by fp
10033 moves, that force all constants to memory to allow combining. */
10034 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10035 {
10036 rtx tmp = maybe_get_pool_constant (operand);
10037 if (tmp)
10038 operand = tmp;
10039 }
10040
10041 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10042 {
10043 /* The only non-offsetable memories we handle are pushes. */
10044 if (! push_operand (operand, VOIDmode))
10045 abort ();
10046
10047 operand = copy_rtx (operand);
10048 PUT_MODE (operand, Pmode);
10049 parts[0] = parts[1] = parts[2] = operand;
10050 }
10051 else if (!TARGET_64BIT)
10052 {
10053 if (mode == DImode)
10054 split_di (&operand, 1, &parts[0], &parts[1]);
10055 else
10056 {
10057 if (REG_P (operand))
10058 {
10059 if (!reload_completed)
10060 abort ();
10061 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10062 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10063 if (size == 3)
10064 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10065 }
10066 else if (offsettable_memref_p (operand))
10067 {
10068 operand = adjust_address (operand, SImode, 0);
10069 parts[0] = operand;
10070 parts[1] = adjust_address (operand, SImode, 4);
10071 if (size == 3)
10072 parts[2] = adjust_address (operand, SImode, 8);
10073 }
10074 else if (GET_CODE (operand) == CONST_DOUBLE)
10075 {
10076 REAL_VALUE_TYPE r;
10077 long l[4];
10078
10079 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10080 switch (mode)
10081 {
10082 case XFmode:
10083 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10084 parts[2] = gen_int_mode (l[2], SImode);
10085 break;
10086 case DFmode:
10087 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10088 break;
10089 default:
10090 abort ();
10091 }
10092 parts[1] = gen_int_mode (l[1], SImode);
10093 parts[0] = gen_int_mode (l[0], SImode);
10094 }
10095 else
10096 abort ();
10097 }
10098 }
10099 else
10100 {
10101 if (mode == TImode)
10102 split_ti (&operand, 1, &parts[0], &parts[1]);
10103 if (mode == XFmode || mode == TFmode)
10104 {
10105 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10106 if (REG_P (operand))
10107 {
10108 if (!reload_completed)
10109 abort ();
10110 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10111 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10112 }
10113 else if (offsettable_memref_p (operand))
10114 {
10115 operand = adjust_address (operand, DImode, 0);
10116 parts[0] = operand;
10117 parts[1] = adjust_address (operand, upper_mode, 8);
10118 }
10119 else if (GET_CODE (operand) == CONST_DOUBLE)
10120 {
10121 REAL_VALUE_TYPE r;
10122 long l[4];
10123
10124 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10125 real_to_target (l, &r, mode);
10126
10127 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10128 if (HOST_BITS_PER_WIDE_INT >= 64)
10129 parts[0]
10130 = gen_int_mode
10131 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10132 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10133 DImode);
10134 else
10135 parts[0] = immed_double_const (l[0], l[1], DImode);
10136
10137 if (upper_mode == SImode)
10138 parts[1] = gen_int_mode (l[2], SImode);
10139 else if (HOST_BITS_PER_WIDE_INT >= 64)
10140 parts[1]
10141 = gen_int_mode
10142 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10143 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10144 DImode);
10145 else
10146 parts[1] = immed_double_const (l[2], l[3], DImode);
10147 }
10148 else
10149 abort ();
10150 }
10151 }
10152
10153 return size;
10154 }
10155
10156 /* Emit insns to perform a move or push of DI, DF, and XF values.
10157 Return false when normal moves are needed; true when all required
10158 insns have been emitted. Operands 2-4 contain the input values
10159 int the correct order; operands 5-7 contain the output values. */
10160
10161 void
10162 ix86_split_long_move (rtx operands[])
10163 {
10164 rtx part[2][3];
10165 int nparts;
10166 int push = 0;
10167 int collisions = 0;
10168 enum machine_mode mode = GET_MODE (operands[0]);
10169
10170 /* The DFmode expanders may ask us to move double.
10171 For 64bit target this is single move. By hiding the fact
10172 here we simplify i386.md splitters. */
10173 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10174 {
10175 /* Optimize constant pool reference to immediates. This is used by
10176 fp moves, that force all constants to memory to allow combining. */
10177
10178 if (GET_CODE (operands[1]) == MEM
10179 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10180 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10181 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10182 if (push_operand (operands[0], VOIDmode))
10183 {
10184 operands[0] = copy_rtx (operands[0]);
10185 PUT_MODE (operands[0], Pmode);
10186 }
10187 else
10188 operands[0] = gen_lowpart (DImode, operands[0]);
10189 operands[1] = gen_lowpart (DImode, operands[1]);
10190 emit_move_insn (operands[0], operands[1]);
10191 return;
10192 }
10193
10194 /* The only non-offsettable memory we handle is push. */
10195 if (push_operand (operands[0], VOIDmode))
10196 push = 1;
10197 else if (GET_CODE (operands[0]) == MEM
10198 && ! offsettable_memref_p (operands[0]))
10199 abort ();
10200
10201 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10202 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10203
10204 /* When emitting push, take care for source operands on the stack. */
10205 if (push && GET_CODE (operands[1]) == MEM
10206 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10207 {
10208 if (nparts == 3)
10209 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10210 XEXP (part[1][2], 0));
10211 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10212 XEXP (part[1][1], 0));
10213 }
10214
10215 /* We need to do copy in the right order in case an address register
10216 of the source overlaps the destination. */
10217 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10218 {
10219 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10220 collisions++;
10221 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10222 collisions++;
10223 if (nparts == 3
10224 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10225 collisions++;
10226
10227 /* Collision in the middle part can be handled by reordering. */
10228 if (collisions == 1 && nparts == 3
10229 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10230 {
10231 rtx tmp;
10232 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10233 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10234 }
10235
10236 /* If there are more collisions, we can't handle it by reordering.
10237 Do an lea to the last part and use only one colliding move. */
10238 else if (collisions > 1)
10239 {
10240 rtx base;
10241
10242 collisions = 1;
10243
10244 base = part[0][nparts - 1];
10245
10246 /* Handle the case when the last part isn't valid for lea.
10247 Happens in 64-bit mode storing the 12-byte XFmode. */
10248 if (GET_MODE (base) != Pmode)
10249 base = gen_rtx_REG (Pmode, REGNO (base));
10250
10251 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10252 part[1][0] = replace_equiv_address (part[1][0], base);
10253 part[1][1] = replace_equiv_address (part[1][1],
10254 plus_constant (base, UNITS_PER_WORD));
10255 if (nparts == 3)
10256 part[1][2] = replace_equiv_address (part[1][2],
10257 plus_constant (base, 8));
10258 }
10259 }
10260
10261 if (push)
10262 {
10263 if (!TARGET_64BIT)
10264 {
10265 if (nparts == 3)
10266 {
10267 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10268 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10269 emit_move_insn (part[0][2], part[1][2]);
10270 }
10271 }
10272 else
10273 {
10274 /* In 64bit mode we don't have 32bit push available. In case this is
10275 register, it is OK - we will just use larger counterpart. We also
10276 retype memory - these comes from attempt to avoid REX prefix on
10277 moving of second half of TFmode value. */
10278 if (GET_MODE (part[1][1]) == SImode)
10279 {
10280 if (GET_CODE (part[1][1]) == MEM)
10281 part[1][1] = adjust_address (part[1][1], DImode, 0);
10282 else if (REG_P (part[1][1]))
10283 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10284 else
10285 abort ();
10286 if (GET_MODE (part[1][0]) == SImode)
10287 part[1][0] = part[1][1];
10288 }
10289 }
10290 emit_move_insn (part[0][1], part[1][1]);
10291 emit_move_insn (part[0][0], part[1][0]);
10292 return;
10293 }
10294
10295 /* Choose correct order to not overwrite the source before it is copied. */
10296 if ((REG_P (part[0][0])
10297 && REG_P (part[1][1])
10298 && (REGNO (part[0][0]) == REGNO (part[1][1])
10299 || (nparts == 3
10300 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10301 || (collisions > 0
10302 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10303 {
10304 if (nparts == 3)
10305 {
10306 operands[2] = part[0][2];
10307 operands[3] = part[0][1];
10308 operands[4] = part[0][0];
10309 operands[5] = part[1][2];
10310 operands[6] = part[1][1];
10311 operands[7] = part[1][0];
10312 }
10313 else
10314 {
10315 operands[2] = part[0][1];
10316 operands[3] = part[0][0];
10317 operands[5] = part[1][1];
10318 operands[6] = part[1][0];
10319 }
10320 }
10321 else
10322 {
10323 if (nparts == 3)
10324 {
10325 operands[2] = part[0][0];
10326 operands[3] = part[0][1];
10327 operands[4] = part[0][2];
10328 operands[5] = part[1][0];
10329 operands[6] = part[1][1];
10330 operands[7] = part[1][2];
10331 }
10332 else
10333 {
10334 operands[2] = part[0][0];
10335 operands[3] = part[0][1];
10336 operands[5] = part[1][0];
10337 operands[6] = part[1][1];
10338 }
10339 }
10340
10341 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10342 if (optimize_size)
10343 {
10344 if (GET_CODE (operands[5]) == CONST_INT
10345 && operands[5] != const0_rtx
10346 && REG_P (operands[2]))
10347 {
10348 if (GET_CODE (operands[6]) == CONST_INT
10349 && INTVAL (operands[6]) == INTVAL (operands[5]))
10350 operands[6] = operands[2];
10351
10352 if (nparts == 3
10353 && GET_CODE (operands[7]) == CONST_INT
10354 && INTVAL (operands[7]) == INTVAL (operands[5]))
10355 operands[7] = operands[2];
10356 }
10357
10358 if (nparts == 3
10359 && GET_CODE (operands[6]) == CONST_INT
10360 && operands[6] != const0_rtx
10361 && REG_P (operands[3])
10362 && GET_CODE (operands[7]) == CONST_INT
10363 && INTVAL (operands[7]) == INTVAL (operands[6]))
10364 operands[7] = operands[3];
10365 }
10366
10367 emit_move_insn (operands[2], operands[5]);
10368 emit_move_insn (operands[3], operands[6]);
10369 if (nparts == 3)
10370 emit_move_insn (operands[4], operands[7]);
10371
10372 return;
10373 }
10374
10375 /* Helper function of ix86_split_ashldi used to generate an SImode
10376 left shift by a constant, either using a single shift or
10377 a sequence of add instructions. */
10378
10379 static void
10380 ix86_expand_ashlsi3_const (rtx operand, int count)
10381 {
10382 if (count == 1)
10383 emit_insn (gen_addsi3 (operand, operand, operand));
10384 else if (!optimize_size
10385 && count * ix86_cost->add <= ix86_cost->shift_const)
10386 {
10387 int i;
10388 for (i=0; i<count; i++)
10389 emit_insn (gen_addsi3 (operand, operand, operand));
10390 }
10391 else
10392 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10393 }
10394
10395 void
10396 ix86_split_ashldi (rtx *operands, rtx scratch)
10397 {
10398 rtx low[2], high[2];
10399 int count;
10400
10401 if (GET_CODE (operands[2]) == CONST_INT)
10402 {
10403 split_di (operands, 2, low, high);
10404 count = INTVAL (operands[2]) & 63;
10405
10406 if (count >= 32)
10407 {
10408 emit_move_insn (high[0], low[1]);
10409 emit_move_insn (low[0], const0_rtx);
10410
10411 if (count > 32)
10412 ix86_expand_ashlsi3_const (high[0], count - 32);
10413 }
10414 else
10415 {
10416 if (!rtx_equal_p (operands[0], operands[1]))
10417 emit_move_insn (operands[0], operands[1]);
10418 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10419 ix86_expand_ashlsi3_const (low[0], count);
10420 }
10421 return;
10422 }
10423
10424 split_di (operands, 1, low, high);
10425
10426 if (operands[1] == const1_rtx)
10427 {
10428 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10429 can be done with two 32-bit shifts, no branches, no cmoves. */
10430 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10431 {
10432 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10433
10434 ix86_expand_clear (low[0]);
10435 ix86_expand_clear (high[0]);
10436 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10437
10438 d = gen_lowpart (QImode, low[0]);
10439 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10440 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10441 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10442
10443 d = gen_lowpart (QImode, high[0]);
10444 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10445 s = gen_rtx_NE (QImode, flags, const0_rtx);
10446 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10447 }
10448
10449 /* Otherwise, we can get the same results by manually performing
10450 a bit extract operation on bit 5, and then performing the two
10451 shifts. The two methods of getting 0/1 into low/high are exactly
10452 the same size. Avoiding the shift in the bit extract case helps
10453 pentium4 a bit; no one else seems to care much either way. */
10454 else
10455 {
10456 rtx x;
10457
10458 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10459 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10460 else
10461 x = gen_lowpart (SImode, operands[2]);
10462 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10463
10464 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10465 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10466 emit_move_insn (low[0], high[0]);
10467 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10468 }
10469
10470 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10471 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10472 return;
10473 }
10474
10475 if (operands[1] == constm1_rtx)
10476 {
10477 /* For -1LL << N, we can avoid the shld instruction, because we
10478 know that we're shifting 0...31 ones into a -1. */
10479 emit_move_insn (low[0], constm1_rtx);
10480 if (optimize_size)
10481 emit_move_insn (high[0], low[0]);
10482 else
10483 emit_move_insn (high[0], constm1_rtx);
10484 }
10485 else
10486 {
10487 if (!rtx_equal_p (operands[0], operands[1]))
10488 emit_move_insn (operands[0], operands[1]);
10489
10490 split_di (operands, 1, low, high);
10491 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10492 }
10493
10494 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10495
10496 if (TARGET_CMOVE && scratch)
10497 {
10498 ix86_expand_clear (scratch);
10499 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10500 }
10501 else
10502 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10503 }
10504
10505 void
10506 ix86_split_ashrdi (rtx *operands, rtx scratch)
10507 {
10508 rtx low[2], high[2];
10509 int count;
10510
10511 if (GET_CODE (operands[2]) == CONST_INT)
10512 {
10513 split_di (operands, 2, low, high);
10514 count = INTVAL (operands[2]) & 63;
10515
10516 if (count == 63)
10517 {
10518 emit_move_insn (high[0], high[1]);
10519 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10520 emit_move_insn (low[0], high[0]);
10521
10522 }
10523 else if (count >= 32)
10524 {
10525 emit_move_insn (low[0], high[1]);
10526 emit_move_insn (high[0], low[0]);
10527 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10528 if (count > 32)
10529 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10530 }
10531 else
10532 {
10533 if (!rtx_equal_p (operands[0], operands[1]))
10534 emit_move_insn (operands[0], operands[1]);
10535 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10536 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10537 }
10538 }
10539 else
10540 {
10541 if (!rtx_equal_p (operands[0], operands[1]))
10542 emit_move_insn (operands[0], operands[1]);
10543
10544 split_di (operands, 1, low, high);
10545
10546 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10547 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10548
10549 if (TARGET_CMOVE && scratch)
10550 {
10551 emit_move_insn (scratch, high[0]);
10552 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10553 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10554 scratch));
10555 }
10556 else
10557 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10558 }
10559 }
10560
10561 void
10562 ix86_split_lshrdi (rtx *operands, rtx scratch)
10563 {
10564 rtx low[2], high[2];
10565 int count;
10566
10567 if (GET_CODE (operands[2]) == CONST_INT)
10568 {
10569 split_di (operands, 2, low, high);
10570 count = INTVAL (operands[2]) & 63;
10571
10572 if (count >= 32)
10573 {
10574 emit_move_insn (low[0], high[1]);
10575 ix86_expand_clear (high[0]);
10576
10577 if (count > 32)
10578 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10579 }
10580 else
10581 {
10582 if (!rtx_equal_p (operands[0], operands[1]))
10583 emit_move_insn (operands[0], operands[1]);
10584 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10585 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10586 }
10587 }
10588 else
10589 {
10590 if (!rtx_equal_p (operands[0], operands[1]))
10591 emit_move_insn (operands[0], operands[1]);
10592
10593 split_di (operands, 1, low, high);
10594
10595 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10596 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10597
10598 /* Heh. By reversing the arguments, we can reuse this pattern. */
10599 if (TARGET_CMOVE && scratch)
10600 {
10601 ix86_expand_clear (scratch);
10602 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10603 scratch));
10604 }
10605 else
10606 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10607 }
10608 }
10609
10610 /* Helper function for the string operations below. Dest VARIABLE whether
10611 it is aligned to VALUE bytes. If true, jump to the label. */
10612 static rtx
10613 ix86_expand_aligntest (rtx variable, int value)
10614 {
10615 rtx label = gen_label_rtx ();
10616 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10617 if (GET_MODE (variable) == DImode)
10618 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10619 else
10620 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10621 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10622 1, label);
10623 return label;
10624 }
10625
10626 /* Adjust COUNTER by the VALUE. */
10627 static void
10628 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10629 {
10630 if (GET_MODE (countreg) == DImode)
10631 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10632 else
10633 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10634 }
10635
10636 /* Zero extend possibly SImode EXP to Pmode register. */
10637 rtx
10638 ix86_zero_extend_to_Pmode (rtx exp)
10639 {
10640 rtx r;
10641 if (GET_MODE (exp) == VOIDmode)
10642 return force_reg (Pmode, exp);
10643 if (GET_MODE (exp) == Pmode)
10644 return copy_to_mode_reg (Pmode, exp);
10645 r = gen_reg_rtx (Pmode);
10646 emit_insn (gen_zero_extendsidi2 (r, exp));
10647 return r;
10648 }
10649
10650 /* Expand string move (memcpy) operation. Use i386 string operations when
10651 profitable. expand_clrmem contains similar code. */
10652 int
10653 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10654 {
10655 rtx srcreg, destreg, countreg, srcexp, destexp;
10656 enum machine_mode counter_mode;
10657 HOST_WIDE_INT align = 0;
10658 unsigned HOST_WIDE_INT count = 0;
10659
10660 if (GET_CODE (align_exp) == CONST_INT)
10661 align = INTVAL (align_exp);
10662
10663 /* Can't use any of this if the user has appropriated esi or edi. */
10664 if (global_regs[4] || global_regs[5])
10665 return 0;
10666
10667 /* This simple hack avoids all inlining code and simplifies code below. */
10668 if (!TARGET_ALIGN_STRINGOPS)
10669 align = 64;
10670
10671 if (GET_CODE (count_exp) == CONST_INT)
10672 {
10673 count = INTVAL (count_exp);
10674 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10675 return 0;
10676 }
10677
10678 /* Figure out proper mode for counter. For 32bits it is always SImode,
10679 for 64bits use SImode when possible, otherwise DImode.
10680 Set count to number of bytes copied when known at compile time. */
10681 if (!TARGET_64BIT
10682 || GET_MODE (count_exp) == SImode
10683 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10684 counter_mode = SImode;
10685 else
10686 counter_mode = DImode;
10687
10688 if (counter_mode != SImode && counter_mode != DImode)
10689 abort ();
10690
10691 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10692 if (destreg != XEXP (dst, 0))
10693 dst = replace_equiv_address_nv (dst, destreg);
10694 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10695 if (srcreg != XEXP (src, 0))
10696 src = replace_equiv_address_nv (src, srcreg);
10697
10698 /* When optimizing for size emit simple rep ; movsb instruction for
10699 counts not divisible by 4. */
10700
10701 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10702 {
10703 emit_insn (gen_cld ());
10704 countreg = ix86_zero_extend_to_Pmode (count_exp);
10705 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10706 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10707 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10708 destexp, srcexp));
10709 }
10710
10711 /* For constant aligned (or small unaligned) copies use rep movsl
10712 followed by code copying the rest. For PentiumPro ensure 8 byte
10713 alignment to allow rep movsl acceleration. */
10714
10715 else if (count != 0
10716 && (align >= 8
10717 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10718 || optimize_size || count < (unsigned int) 64))
10719 {
10720 unsigned HOST_WIDE_INT offset = 0;
10721 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10722 rtx srcmem, dstmem;
10723
10724 emit_insn (gen_cld ());
10725 if (count & ~(size - 1))
10726 {
10727 countreg = copy_to_mode_reg (counter_mode,
10728 GEN_INT ((count >> (size == 4 ? 2 : 3))
10729 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10730 countreg = ix86_zero_extend_to_Pmode (countreg);
10731
10732 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10733 GEN_INT (size == 4 ? 2 : 3));
10734 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10735 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10736
10737 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10738 countreg, destexp, srcexp));
10739 offset = count & ~(size - 1);
10740 }
10741 if (size == 8 && (count & 0x04))
10742 {
10743 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10744 offset);
10745 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10746 offset);
10747 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10748 offset += 4;
10749 }
10750 if (count & 0x02)
10751 {
10752 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10753 offset);
10754 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10755 offset);
10756 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10757 offset += 2;
10758 }
10759 if (count & 0x01)
10760 {
10761 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10762 offset);
10763 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10764 offset);
10765 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10766 }
10767 }
10768 /* The generic code based on the glibc implementation:
10769 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10770 allowing accelerated copying there)
10771 - copy the data using rep movsl
10772 - copy the rest. */
10773 else
10774 {
10775 rtx countreg2;
10776 rtx label = NULL;
10777 rtx srcmem, dstmem;
10778 int desired_alignment = (TARGET_PENTIUMPRO
10779 && (count == 0 || count >= (unsigned int) 260)
10780 ? 8 : UNITS_PER_WORD);
10781 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10782 dst = change_address (dst, BLKmode, destreg);
10783 src = change_address (src, BLKmode, srcreg);
10784
10785 /* In case we don't know anything about the alignment, default to
10786 library version, since it is usually equally fast and result in
10787 shorter code.
10788
10789 Also emit call when we know that the count is large and call overhead
10790 will not be important. */
10791 if (!TARGET_INLINE_ALL_STRINGOPS
10792 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10793 return 0;
10794
10795 if (TARGET_SINGLE_STRINGOP)
10796 emit_insn (gen_cld ());
10797
10798 countreg2 = gen_reg_rtx (Pmode);
10799 countreg = copy_to_mode_reg (counter_mode, count_exp);
10800
10801 /* We don't use loops to align destination and to copy parts smaller
10802 than 4 bytes, because gcc is able to optimize such code better (in
10803 the case the destination or the count really is aligned, gcc is often
10804 able to predict the branches) and also it is friendlier to the
10805 hardware branch prediction.
10806
10807 Using loops is beneficial for generic case, because we can
10808 handle small counts using the loops. Many CPUs (such as Athlon)
10809 have large REP prefix setup costs.
10810
10811 This is quite costly. Maybe we can revisit this decision later or
10812 add some customizability to this code. */
10813
10814 if (count == 0 && align < desired_alignment)
10815 {
10816 label = gen_label_rtx ();
10817 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10818 LEU, 0, counter_mode, 1, label);
10819 }
10820 if (align <= 1)
10821 {
10822 rtx label = ix86_expand_aligntest (destreg, 1);
10823 srcmem = change_address (src, QImode, srcreg);
10824 dstmem = change_address (dst, QImode, destreg);
10825 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10826 ix86_adjust_counter (countreg, 1);
10827 emit_label (label);
10828 LABEL_NUSES (label) = 1;
10829 }
10830 if (align <= 2)
10831 {
10832 rtx label = ix86_expand_aligntest (destreg, 2);
10833 srcmem = change_address (src, HImode, srcreg);
10834 dstmem = change_address (dst, HImode, destreg);
10835 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10836 ix86_adjust_counter (countreg, 2);
10837 emit_label (label);
10838 LABEL_NUSES (label) = 1;
10839 }
10840 if (align <= 4 && desired_alignment > 4)
10841 {
10842 rtx label = ix86_expand_aligntest (destreg, 4);
10843 srcmem = change_address (src, SImode, srcreg);
10844 dstmem = change_address (dst, SImode, destreg);
10845 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10846 ix86_adjust_counter (countreg, 4);
10847 emit_label (label);
10848 LABEL_NUSES (label) = 1;
10849 }
10850
10851 if (label && desired_alignment > 4 && !TARGET_64BIT)
10852 {
10853 emit_label (label);
10854 LABEL_NUSES (label) = 1;
10855 label = NULL_RTX;
10856 }
10857 if (!TARGET_SINGLE_STRINGOP)
10858 emit_insn (gen_cld ());
10859 if (TARGET_64BIT)
10860 {
10861 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10862 GEN_INT (3)));
10863 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10864 }
10865 else
10866 {
10867 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10868 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10869 }
10870 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10871 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10872 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10873 countreg2, destexp, srcexp));
10874
10875 if (label)
10876 {
10877 emit_label (label);
10878 LABEL_NUSES (label) = 1;
10879 }
10880 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10881 {
10882 srcmem = change_address (src, SImode, srcreg);
10883 dstmem = change_address (dst, SImode, destreg);
10884 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10885 }
10886 if ((align <= 4 || count == 0) && TARGET_64BIT)
10887 {
10888 rtx label = ix86_expand_aligntest (countreg, 4);
10889 srcmem = change_address (src, SImode, srcreg);
10890 dstmem = change_address (dst, SImode, destreg);
10891 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10892 emit_label (label);
10893 LABEL_NUSES (label) = 1;
10894 }
10895 if (align > 2 && count != 0 && (count & 2))
10896 {
10897 srcmem = change_address (src, HImode, srcreg);
10898 dstmem = change_address (dst, HImode, destreg);
10899 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10900 }
10901 if (align <= 2 || count == 0)
10902 {
10903 rtx label = ix86_expand_aligntest (countreg, 2);
10904 srcmem = change_address (src, HImode, srcreg);
10905 dstmem = change_address (dst, HImode, destreg);
10906 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10907 emit_label (label);
10908 LABEL_NUSES (label) = 1;
10909 }
10910 if (align > 1 && count != 0 && (count & 1))
10911 {
10912 srcmem = change_address (src, QImode, srcreg);
10913 dstmem = change_address (dst, QImode, destreg);
10914 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10915 }
10916 if (align <= 1 || count == 0)
10917 {
10918 rtx label = ix86_expand_aligntest (countreg, 1);
10919 srcmem = change_address (src, QImode, srcreg);
10920 dstmem = change_address (dst, QImode, destreg);
10921 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10922 emit_label (label);
10923 LABEL_NUSES (label) = 1;
10924 }
10925 }
10926
10927 return 1;
10928 }
10929
10930 /* Expand string clear operation (bzero). Use i386 string operations when
10931 profitable. expand_movmem contains similar code. */
10932 int
10933 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10934 {
10935 rtx destreg, zeroreg, countreg, destexp;
10936 enum machine_mode counter_mode;
10937 HOST_WIDE_INT align = 0;
10938 unsigned HOST_WIDE_INT count = 0;
10939
10940 if (GET_CODE (align_exp) == CONST_INT)
10941 align = INTVAL (align_exp);
10942
10943 /* Can't use any of this if the user has appropriated esi. */
10944 if (global_regs[4])
10945 return 0;
10946
10947 /* This simple hack avoids all inlining code and simplifies code below. */
10948 if (!TARGET_ALIGN_STRINGOPS)
10949 align = 32;
10950
10951 if (GET_CODE (count_exp) == CONST_INT)
10952 {
10953 count = INTVAL (count_exp);
10954 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10955 return 0;
10956 }
10957 /* Figure out proper mode for counter. For 32bits it is always SImode,
10958 for 64bits use SImode when possible, otherwise DImode.
10959 Set count to number of bytes copied when known at compile time. */
10960 if (!TARGET_64BIT
10961 || GET_MODE (count_exp) == SImode
10962 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10963 counter_mode = SImode;
10964 else
10965 counter_mode = DImode;
10966
10967 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10968 if (destreg != XEXP (dst, 0))
10969 dst = replace_equiv_address_nv (dst, destreg);
10970
10971
10972 /* When optimizing for size emit simple rep ; movsb instruction for
10973 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10974 sequence is 7 bytes long, so if optimizing for size and count is
10975 small enough that some stosl, stosw and stosb instructions without
10976 rep are shorter, fall back into the next if. */
10977
10978 if ((!optimize || optimize_size)
10979 && (count == 0
10980 || ((count & 0x03)
10981 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10982 {
10983 emit_insn (gen_cld ());
10984
10985 countreg = ix86_zero_extend_to_Pmode (count_exp);
10986 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10987 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10988 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10989 }
10990 else if (count != 0
10991 && (align >= 8
10992 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10993 || optimize_size || count < (unsigned int) 64))
10994 {
10995 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10996 unsigned HOST_WIDE_INT offset = 0;
10997
10998 emit_insn (gen_cld ());
10999
11000 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11001 if (count & ~(size - 1))
11002 {
11003 unsigned HOST_WIDE_INT repcount;
11004 unsigned int max_nonrep;
11005
11006 repcount = count >> (size == 4 ? 2 : 3);
11007 if (!TARGET_64BIT)
11008 repcount &= 0x3fffffff;
11009
11010 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11011 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11012 bytes. In both cases the latter seems to be faster for small
11013 values of N. */
11014 max_nonrep = size == 4 ? 7 : 4;
11015 if (!optimize_size)
11016 switch (ix86_tune)
11017 {
11018 case PROCESSOR_PENTIUM4:
11019 case PROCESSOR_NOCONA:
11020 max_nonrep = 3;
11021 break;
11022 default:
11023 break;
11024 }
11025
11026 if (repcount <= max_nonrep)
11027 while (repcount-- > 0)
11028 {
11029 rtx mem = adjust_automodify_address_nv (dst,
11030 GET_MODE (zeroreg),
11031 destreg, offset);
11032 emit_insn (gen_strset (destreg, mem, zeroreg));
11033 offset += size;
11034 }
11035 else
11036 {
11037 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11038 countreg = ix86_zero_extend_to_Pmode (countreg);
11039 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11040 GEN_INT (size == 4 ? 2 : 3));
11041 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11042 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11043 destexp));
11044 offset = count & ~(size - 1);
11045 }
11046 }
11047 if (size == 8 && (count & 0x04))
11048 {
11049 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11050 offset);
11051 emit_insn (gen_strset (destreg, mem,
11052 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11053 offset += 4;
11054 }
11055 if (count & 0x02)
11056 {
11057 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11058 offset);
11059 emit_insn (gen_strset (destreg, mem,
11060 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11061 offset += 2;
11062 }
11063 if (count & 0x01)
11064 {
11065 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11066 offset);
11067 emit_insn (gen_strset (destreg, mem,
11068 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11069 }
11070 }
11071 else
11072 {
11073 rtx countreg2;
11074 rtx label = NULL;
11075 /* Compute desired alignment of the string operation. */
11076 int desired_alignment = (TARGET_PENTIUMPRO
11077 && (count == 0 || count >= (unsigned int) 260)
11078 ? 8 : UNITS_PER_WORD);
11079
11080 /* In case we don't know anything about the alignment, default to
11081 library version, since it is usually equally fast and result in
11082 shorter code.
11083
11084 Also emit call when we know that the count is large and call overhead
11085 will not be important. */
11086 if (!TARGET_INLINE_ALL_STRINGOPS
11087 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11088 return 0;
11089
11090 if (TARGET_SINGLE_STRINGOP)
11091 emit_insn (gen_cld ());
11092
11093 countreg2 = gen_reg_rtx (Pmode);
11094 countreg = copy_to_mode_reg (counter_mode, count_exp);
11095 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11096 /* Get rid of MEM_OFFSET, it won't be accurate. */
11097 dst = change_address (dst, BLKmode, destreg);
11098
11099 if (count == 0 && align < desired_alignment)
11100 {
11101 label = gen_label_rtx ();
11102 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11103 LEU, 0, counter_mode, 1, label);
11104 }
11105 if (align <= 1)
11106 {
11107 rtx label = ix86_expand_aligntest (destreg, 1);
11108 emit_insn (gen_strset (destreg, dst,
11109 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11110 ix86_adjust_counter (countreg, 1);
11111 emit_label (label);
11112 LABEL_NUSES (label) = 1;
11113 }
11114 if (align <= 2)
11115 {
11116 rtx label = ix86_expand_aligntest (destreg, 2);
11117 emit_insn (gen_strset (destreg, dst,
11118 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11119 ix86_adjust_counter (countreg, 2);
11120 emit_label (label);
11121 LABEL_NUSES (label) = 1;
11122 }
11123 if (align <= 4 && desired_alignment > 4)
11124 {
11125 rtx label = ix86_expand_aligntest (destreg, 4);
11126 emit_insn (gen_strset (destreg, dst,
11127 (TARGET_64BIT
11128 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11129 : zeroreg)));
11130 ix86_adjust_counter (countreg, 4);
11131 emit_label (label);
11132 LABEL_NUSES (label) = 1;
11133 }
11134
11135 if (label && desired_alignment > 4 && !TARGET_64BIT)
11136 {
11137 emit_label (label);
11138 LABEL_NUSES (label) = 1;
11139 label = NULL_RTX;
11140 }
11141
11142 if (!TARGET_SINGLE_STRINGOP)
11143 emit_insn (gen_cld ());
11144 if (TARGET_64BIT)
11145 {
11146 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11147 GEN_INT (3)));
11148 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11149 }
11150 else
11151 {
11152 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11153 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11154 }
11155 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11156 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11157
11158 if (label)
11159 {
11160 emit_label (label);
11161 LABEL_NUSES (label) = 1;
11162 }
11163
11164 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11165 emit_insn (gen_strset (destreg, dst,
11166 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11167 if (TARGET_64BIT && (align <= 4 || count == 0))
11168 {
11169 rtx label = ix86_expand_aligntest (countreg, 4);
11170 emit_insn (gen_strset (destreg, dst,
11171 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11172 emit_label (label);
11173 LABEL_NUSES (label) = 1;
11174 }
11175 if (align > 2 && count != 0 && (count & 2))
11176 emit_insn (gen_strset (destreg, dst,
11177 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11178 if (align <= 2 || count == 0)
11179 {
11180 rtx label = ix86_expand_aligntest (countreg, 2);
11181 emit_insn (gen_strset (destreg, dst,
11182 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11183 emit_label (label);
11184 LABEL_NUSES (label) = 1;
11185 }
11186 if (align > 1 && count != 0 && (count & 1))
11187 emit_insn (gen_strset (destreg, dst,
11188 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11189 if (align <= 1 || count == 0)
11190 {
11191 rtx label = ix86_expand_aligntest (countreg, 1);
11192 emit_insn (gen_strset (destreg, dst,
11193 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11194 emit_label (label);
11195 LABEL_NUSES (label) = 1;
11196 }
11197 }
11198 return 1;
11199 }
11200
11201 /* Expand strlen. */
11202 int
11203 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11204 {
11205 rtx addr, scratch1, scratch2, scratch3, scratch4;
11206
11207 /* The generic case of strlen expander is long. Avoid it's
11208 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11209
11210 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11211 && !TARGET_INLINE_ALL_STRINGOPS
11212 && !optimize_size
11213 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11214 return 0;
11215
11216 addr = force_reg (Pmode, XEXP (src, 0));
11217 scratch1 = gen_reg_rtx (Pmode);
11218
11219 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11220 && !optimize_size)
11221 {
11222 /* Well it seems that some optimizer does not combine a call like
11223 foo(strlen(bar), strlen(bar));
11224 when the move and the subtraction is done here. It does calculate
11225 the length just once when these instructions are done inside of
11226 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11227 often used and I use one fewer register for the lifetime of
11228 output_strlen_unroll() this is better. */
11229
11230 emit_move_insn (out, addr);
11231
11232 ix86_expand_strlensi_unroll_1 (out, src, align);
11233
11234 /* strlensi_unroll_1 returns the address of the zero at the end of
11235 the string, like memchr(), so compute the length by subtracting
11236 the start address. */
11237 if (TARGET_64BIT)
11238 emit_insn (gen_subdi3 (out, out, addr));
11239 else
11240 emit_insn (gen_subsi3 (out, out, addr));
11241 }
11242 else
11243 {
11244 rtx unspec;
11245 scratch2 = gen_reg_rtx (Pmode);
11246 scratch3 = gen_reg_rtx (Pmode);
11247 scratch4 = force_reg (Pmode, constm1_rtx);
11248
11249 emit_move_insn (scratch3, addr);
11250 eoschar = force_reg (QImode, eoschar);
11251
11252 emit_insn (gen_cld ());
11253 src = replace_equiv_address_nv (src, scratch3);
11254
11255 /* If .md starts supporting :P, this can be done in .md. */
11256 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11257 scratch4), UNSPEC_SCAS);
11258 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11259 if (TARGET_64BIT)
11260 {
11261 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11262 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11263 }
11264 else
11265 {
11266 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11267 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11268 }
11269 }
11270 return 1;
11271 }
11272
11273 /* Expand the appropriate insns for doing strlen if not just doing
11274 repnz; scasb
11275
11276 out = result, initialized with the start address
11277 align_rtx = alignment of the address.
11278 scratch = scratch register, initialized with the startaddress when
11279 not aligned, otherwise undefined
11280
11281 This is just the body. It needs the initializations mentioned above and
11282 some address computing at the end. These things are done in i386.md. */
11283
11284 static void
11285 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11286 {
11287 int align;
11288 rtx tmp;
11289 rtx align_2_label = NULL_RTX;
11290 rtx align_3_label = NULL_RTX;
11291 rtx align_4_label = gen_label_rtx ();
11292 rtx end_0_label = gen_label_rtx ();
11293 rtx mem;
11294 rtx tmpreg = gen_reg_rtx (SImode);
11295 rtx scratch = gen_reg_rtx (SImode);
11296 rtx cmp;
11297
11298 align = 0;
11299 if (GET_CODE (align_rtx) == CONST_INT)
11300 align = INTVAL (align_rtx);
11301
11302 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11303
11304 /* Is there a known alignment and is it less than 4? */
11305 if (align < 4)
11306 {
11307 rtx scratch1 = gen_reg_rtx (Pmode);
11308 emit_move_insn (scratch1, out);
11309 /* Is there a known alignment and is it not 2? */
11310 if (align != 2)
11311 {
11312 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11313 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11314
11315 /* Leave just the 3 lower bits. */
11316 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11317 NULL_RTX, 0, OPTAB_WIDEN);
11318
11319 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11320 Pmode, 1, align_4_label);
11321 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11322 Pmode, 1, align_2_label);
11323 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11324 Pmode, 1, align_3_label);
11325 }
11326 else
11327 {
11328 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11329 check if is aligned to 4 - byte. */
11330
11331 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11332 NULL_RTX, 0, OPTAB_WIDEN);
11333
11334 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11335 Pmode, 1, align_4_label);
11336 }
11337
11338 mem = change_address (src, QImode, out);
11339
11340 /* Now compare the bytes. */
11341
11342 /* Compare the first n unaligned byte on a byte per byte basis. */
11343 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11344 QImode, 1, end_0_label);
11345
11346 /* Increment the address. */
11347 if (TARGET_64BIT)
11348 emit_insn (gen_adddi3 (out, out, const1_rtx));
11349 else
11350 emit_insn (gen_addsi3 (out, out, const1_rtx));
11351
11352 /* Not needed with an alignment of 2 */
11353 if (align != 2)
11354 {
11355 emit_label (align_2_label);
11356
11357 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11358 end_0_label);
11359
11360 if (TARGET_64BIT)
11361 emit_insn (gen_adddi3 (out, out, const1_rtx));
11362 else
11363 emit_insn (gen_addsi3 (out, out, const1_rtx));
11364
11365 emit_label (align_3_label);
11366 }
11367
11368 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11369 end_0_label);
11370
11371 if (TARGET_64BIT)
11372 emit_insn (gen_adddi3 (out, out, const1_rtx));
11373 else
11374 emit_insn (gen_addsi3 (out, out, const1_rtx));
11375 }
11376
11377 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11378 align this loop. It gives only huge programs, but does not help to
11379 speed up. */
11380 emit_label (align_4_label);
11381
11382 mem = change_address (src, SImode, out);
11383 emit_move_insn (scratch, mem);
11384 if (TARGET_64BIT)
11385 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11386 else
11387 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11388
11389 /* This formula yields a nonzero result iff one of the bytes is zero.
11390 This saves three branches inside loop and many cycles. */
11391
11392 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11393 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11394 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11395 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11396 gen_int_mode (0x80808080, SImode)));
11397 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11398 align_4_label);
11399
11400 if (TARGET_CMOVE)
11401 {
11402 rtx reg = gen_reg_rtx (SImode);
11403 rtx reg2 = gen_reg_rtx (Pmode);
11404 emit_move_insn (reg, tmpreg);
11405 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11406
11407 /* If zero is not in the first two bytes, move two bytes forward. */
11408 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11409 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11410 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11411 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11412 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11413 reg,
11414 tmpreg)));
11415 /* Emit lea manually to avoid clobbering of flags. */
11416 emit_insn (gen_rtx_SET (SImode, reg2,
11417 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11418
11419 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11420 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11421 emit_insn (gen_rtx_SET (VOIDmode, out,
11422 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11423 reg2,
11424 out)));
11425
11426 }
11427 else
11428 {
11429 rtx end_2_label = gen_label_rtx ();
11430 /* Is zero in the first two bytes? */
11431
11432 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11433 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11434 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11435 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11436 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11437 pc_rtx);
11438 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11439 JUMP_LABEL (tmp) = end_2_label;
11440
11441 /* Not in the first two. Move two bytes forward. */
11442 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11443 if (TARGET_64BIT)
11444 emit_insn (gen_adddi3 (out, out, const2_rtx));
11445 else
11446 emit_insn (gen_addsi3 (out, out, const2_rtx));
11447
11448 emit_label (end_2_label);
11449
11450 }
11451
11452 /* Avoid branch in fixing the byte. */
11453 tmpreg = gen_lowpart (QImode, tmpreg);
11454 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11455 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11456 if (TARGET_64BIT)
11457 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11458 else
11459 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11460
11461 emit_label (end_0_label);
11462 }
11463
11464 void
11465 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11466 rtx callarg2 ATTRIBUTE_UNUSED,
11467 rtx pop, int sibcall)
11468 {
11469 rtx use = NULL, call;
11470
11471 if (pop == const0_rtx)
11472 pop = NULL;
11473 if (TARGET_64BIT && pop)
11474 abort ();
11475
11476 #if TARGET_MACHO
11477 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11478 fnaddr = machopic_indirect_call_target (fnaddr);
11479 #else
11480 /* Static functions and indirect calls don't need the pic register. */
11481 if (! TARGET_64BIT && flag_pic
11482 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11483 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11484 use_reg (&use, pic_offset_table_rtx);
11485
11486 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11487 {
11488 rtx al = gen_rtx_REG (QImode, 0);
11489 emit_move_insn (al, callarg2);
11490 use_reg (&use, al);
11491 }
11492 #endif /* TARGET_MACHO */
11493
11494 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11495 {
11496 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11497 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11498 }
11499 if (sibcall && TARGET_64BIT
11500 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11501 {
11502 rtx addr;
11503 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11504 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11505 emit_move_insn (fnaddr, addr);
11506 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11507 }
11508
11509 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11510 if (retval)
11511 call = gen_rtx_SET (VOIDmode, retval, call);
11512 if (pop)
11513 {
11514 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11515 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11516 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11517 }
11518
11519 call = emit_call_insn (call);
11520 if (use)
11521 CALL_INSN_FUNCTION_USAGE (call) = use;
11522 }
11523
11524 \f
11525 /* Clear stack slot assignments remembered from previous functions.
11526 This is called from INIT_EXPANDERS once before RTL is emitted for each
11527 function. */
11528
11529 static struct machine_function *
11530 ix86_init_machine_status (void)
11531 {
11532 struct machine_function *f;
11533
11534 f = ggc_alloc_cleared (sizeof (struct machine_function));
11535 f->use_fast_prologue_epilogue_nregs = -1;
11536
11537 return f;
11538 }
11539
11540 /* Return a MEM corresponding to a stack slot with mode MODE.
11541 Allocate a new slot if necessary.
11542
11543 The RTL for a function can have several slots available: N is
11544 which slot to use. */
11545
11546 rtx
11547 assign_386_stack_local (enum machine_mode mode, int n)
11548 {
11549 struct stack_local_entry *s;
11550
11551 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11552 abort ();
11553
11554 for (s = ix86_stack_locals; s; s = s->next)
11555 if (s->mode == mode && s->n == n)
11556 return s->rtl;
11557
11558 s = (struct stack_local_entry *)
11559 ggc_alloc (sizeof (struct stack_local_entry));
11560 s->n = n;
11561 s->mode = mode;
11562 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11563
11564 s->next = ix86_stack_locals;
11565 ix86_stack_locals = s;
11566 return s->rtl;
11567 }
11568
11569 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11570
11571 static GTY(()) rtx ix86_tls_symbol;
11572 rtx
11573 ix86_tls_get_addr (void)
11574 {
11575
11576 if (!ix86_tls_symbol)
11577 {
11578 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11579 (TARGET_GNU_TLS && !TARGET_64BIT)
11580 ? "___tls_get_addr"
11581 : "__tls_get_addr");
11582 }
11583
11584 return ix86_tls_symbol;
11585 }
11586 \f
11587 /* Calculate the length of the memory address in the instruction
11588 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11589
11590 int
11591 memory_address_length (rtx addr)
11592 {
11593 struct ix86_address parts;
11594 rtx base, index, disp;
11595 int len;
11596
11597 if (GET_CODE (addr) == PRE_DEC
11598 || GET_CODE (addr) == POST_INC
11599 || GET_CODE (addr) == PRE_MODIFY
11600 || GET_CODE (addr) == POST_MODIFY)
11601 return 0;
11602
11603 if (! ix86_decompose_address (addr, &parts))
11604 abort ();
11605
11606 base = parts.base;
11607 index = parts.index;
11608 disp = parts.disp;
11609 len = 0;
11610
11611 /* Rule of thumb:
11612 - esp as the base always wants an index,
11613 - ebp as the base always wants a displacement. */
11614
11615 /* Register Indirect. */
11616 if (base && !index && !disp)
11617 {
11618 /* esp (for its index) and ebp (for its displacement) need
11619 the two-byte modrm form. */
11620 if (addr == stack_pointer_rtx
11621 || addr == arg_pointer_rtx
11622 || addr == frame_pointer_rtx
11623 || addr == hard_frame_pointer_rtx)
11624 len = 1;
11625 }
11626
11627 /* Direct Addressing. */
11628 else if (disp && !base && !index)
11629 len = 4;
11630
11631 else
11632 {
11633 /* Find the length of the displacement constant. */
11634 if (disp)
11635 {
11636 if (GET_CODE (disp) == CONST_INT
11637 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11638 && base)
11639 len = 1;
11640 else
11641 len = 4;
11642 }
11643 /* ebp always wants a displacement. */
11644 else if (base == hard_frame_pointer_rtx)
11645 len = 1;
11646
11647 /* An index requires the two-byte modrm form.... */
11648 if (index
11649 /* ...like esp, which always wants an index. */
11650 || base == stack_pointer_rtx
11651 || base == arg_pointer_rtx
11652 || base == frame_pointer_rtx)
11653 len += 1;
11654 }
11655
11656 return len;
11657 }
11658
11659 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11660 is set, expect that insn have 8bit immediate alternative. */
11661 int
11662 ix86_attr_length_immediate_default (rtx insn, int shortform)
11663 {
11664 int len = 0;
11665 int i;
11666 extract_insn_cached (insn);
11667 for (i = recog_data.n_operands - 1; i >= 0; --i)
11668 if (CONSTANT_P (recog_data.operand[i]))
11669 {
11670 if (len)
11671 abort ();
11672 if (shortform
11673 && GET_CODE (recog_data.operand[i]) == CONST_INT
11674 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11675 len = 1;
11676 else
11677 {
11678 switch (get_attr_mode (insn))
11679 {
11680 case MODE_QI:
11681 len+=1;
11682 break;
11683 case MODE_HI:
11684 len+=2;
11685 break;
11686 case MODE_SI:
11687 len+=4;
11688 break;
11689 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11690 case MODE_DI:
11691 len+=4;
11692 break;
11693 default:
11694 fatal_insn ("unknown insn mode", insn);
11695 }
11696 }
11697 }
11698 return len;
11699 }
11700 /* Compute default value for "length_address" attribute. */
11701 int
11702 ix86_attr_length_address_default (rtx insn)
11703 {
11704 int i;
11705
11706 if (get_attr_type (insn) == TYPE_LEA)
11707 {
11708 rtx set = PATTERN (insn);
11709 if (GET_CODE (set) == SET)
11710 ;
11711 else if (GET_CODE (set) == PARALLEL
11712 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11713 set = XVECEXP (set, 0, 0);
11714 else
11715 {
11716 #ifdef ENABLE_CHECKING
11717 abort ();
11718 #endif
11719 return 0;
11720 }
11721
11722 return memory_address_length (SET_SRC (set));
11723 }
11724
11725 extract_insn_cached (insn);
11726 for (i = recog_data.n_operands - 1; i >= 0; --i)
11727 if (GET_CODE (recog_data.operand[i]) == MEM)
11728 {
11729 return memory_address_length (XEXP (recog_data.operand[i], 0));
11730 break;
11731 }
11732 return 0;
11733 }
11734 \f
11735 /* Return the maximum number of instructions a cpu can issue. */
11736
11737 static int
11738 ix86_issue_rate (void)
11739 {
11740 switch (ix86_tune)
11741 {
11742 case PROCESSOR_PENTIUM:
11743 case PROCESSOR_K6:
11744 return 2;
11745
11746 case PROCESSOR_PENTIUMPRO:
11747 case PROCESSOR_PENTIUM4:
11748 case PROCESSOR_ATHLON:
11749 case PROCESSOR_K8:
11750 case PROCESSOR_NOCONA:
11751 return 3;
11752
11753 default:
11754 return 1;
11755 }
11756 }
11757
11758 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11759 by DEP_INSN and nothing set by DEP_INSN. */
11760
11761 static int
11762 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11763 {
11764 rtx set, set2;
11765
11766 /* Simplify the test for uninteresting insns. */
11767 if (insn_type != TYPE_SETCC
11768 && insn_type != TYPE_ICMOV
11769 && insn_type != TYPE_FCMOV
11770 && insn_type != TYPE_IBR)
11771 return 0;
11772
11773 if ((set = single_set (dep_insn)) != 0)
11774 {
11775 set = SET_DEST (set);
11776 set2 = NULL_RTX;
11777 }
11778 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11779 && XVECLEN (PATTERN (dep_insn), 0) == 2
11780 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11781 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11782 {
11783 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11784 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11785 }
11786 else
11787 return 0;
11788
11789 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11790 return 0;
11791
11792 /* This test is true if the dependent insn reads the flags but
11793 not any other potentially set register. */
11794 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11795 return 0;
11796
11797 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11798 return 0;
11799
11800 return 1;
11801 }
11802
11803 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11804 address with operands set by DEP_INSN. */
11805
11806 static int
11807 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11808 {
11809 rtx addr;
11810
11811 if (insn_type == TYPE_LEA
11812 && TARGET_PENTIUM)
11813 {
11814 addr = PATTERN (insn);
11815 if (GET_CODE (addr) == SET)
11816 ;
11817 else if (GET_CODE (addr) == PARALLEL
11818 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11819 addr = XVECEXP (addr, 0, 0);
11820 else
11821 abort ();
11822 addr = SET_SRC (addr);
11823 }
11824 else
11825 {
11826 int i;
11827 extract_insn_cached (insn);
11828 for (i = recog_data.n_operands - 1; i >= 0; --i)
11829 if (GET_CODE (recog_data.operand[i]) == MEM)
11830 {
11831 addr = XEXP (recog_data.operand[i], 0);
11832 goto found;
11833 }
11834 return 0;
11835 found:;
11836 }
11837
11838 return modified_in_p (addr, dep_insn);
11839 }
11840
11841 static int
11842 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11843 {
11844 enum attr_type insn_type, dep_insn_type;
11845 enum attr_memory memory;
11846 rtx set, set2;
11847 int dep_insn_code_number;
11848
11849 /* Anti and output dependencies have zero cost on all CPUs. */
11850 if (REG_NOTE_KIND (link) != 0)
11851 return 0;
11852
11853 dep_insn_code_number = recog_memoized (dep_insn);
11854
11855 /* If we can't recognize the insns, we can't really do anything. */
11856 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11857 return cost;
11858
11859 insn_type = get_attr_type (insn);
11860 dep_insn_type = get_attr_type (dep_insn);
11861
11862 switch (ix86_tune)
11863 {
11864 case PROCESSOR_PENTIUM:
11865 /* Address Generation Interlock adds a cycle of latency. */
11866 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11867 cost += 1;
11868
11869 /* ??? Compares pair with jump/setcc. */
11870 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11871 cost = 0;
11872
11873 /* Floating point stores require value to be ready one cycle earlier. */
11874 if (insn_type == TYPE_FMOV
11875 && get_attr_memory (insn) == MEMORY_STORE
11876 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11877 cost += 1;
11878 break;
11879
11880 case PROCESSOR_PENTIUMPRO:
11881 memory = get_attr_memory (insn);
11882
11883 /* INT->FP conversion is expensive. */
11884 if (get_attr_fp_int_src (dep_insn))
11885 cost += 5;
11886
11887 /* There is one cycle extra latency between an FP op and a store. */
11888 if (insn_type == TYPE_FMOV
11889 && (set = single_set (dep_insn)) != NULL_RTX
11890 && (set2 = single_set (insn)) != NULL_RTX
11891 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11892 && GET_CODE (SET_DEST (set2)) == MEM)
11893 cost += 1;
11894
11895 /* Show ability of reorder buffer to hide latency of load by executing
11896 in parallel with previous instruction in case
11897 previous instruction is not needed to compute the address. */
11898 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11899 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11900 {
11901 /* Claim moves to take one cycle, as core can issue one load
11902 at time and the next load can start cycle later. */
11903 if (dep_insn_type == TYPE_IMOV
11904 || dep_insn_type == TYPE_FMOV)
11905 cost = 1;
11906 else if (cost > 1)
11907 cost--;
11908 }
11909 break;
11910
11911 case PROCESSOR_K6:
11912 memory = get_attr_memory (insn);
11913
11914 /* The esp dependency is resolved before the instruction is really
11915 finished. */
11916 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11917 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11918 return 1;
11919
11920 /* INT->FP conversion is expensive. */
11921 if (get_attr_fp_int_src (dep_insn))
11922 cost += 5;
11923
11924 /* Show ability of reorder buffer to hide latency of load by executing
11925 in parallel with previous instruction in case
11926 previous instruction is not needed to compute the address. */
11927 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11928 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11929 {
11930 /* Claim moves to take one cycle, as core can issue one load
11931 at time and the next load can start cycle later. */
11932 if (dep_insn_type == TYPE_IMOV
11933 || dep_insn_type == TYPE_FMOV)
11934 cost = 1;
11935 else if (cost > 2)
11936 cost -= 2;
11937 else
11938 cost = 1;
11939 }
11940 break;
11941
11942 case PROCESSOR_ATHLON:
11943 case PROCESSOR_K8:
11944 memory = get_attr_memory (insn);
11945
11946 /* Show ability of reorder buffer to hide latency of load by executing
11947 in parallel with previous instruction in case
11948 previous instruction is not needed to compute the address. */
11949 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11950 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11951 {
11952 enum attr_unit unit = get_attr_unit (insn);
11953 int loadcost = 3;
11954
11955 /* Because of the difference between the length of integer and
11956 floating unit pipeline preparation stages, the memory operands
11957 for floating point are cheaper.
11958
11959 ??? For Athlon it the difference is most probably 2. */
11960 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11961 loadcost = 3;
11962 else
11963 loadcost = TARGET_ATHLON ? 2 : 0;
11964
11965 if (cost >= loadcost)
11966 cost -= loadcost;
11967 else
11968 cost = 0;
11969 }
11970
11971 default:
11972 break;
11973 }
11974
11975 return cost;
11976 }
11977
11978 /* How many alternative schedules to try. This should be as wide as the
11979 scheduling freedom in the DFA, but no wider. Making this value too
11980 large results extra work for the scheduler. */
11981
11982 static int
11983 ia32_multipass_dfa_lookahead (void)
11984 {
11985 if (ix86_tune == PROCESSOR_PENTIUM)
11986 return 2;
11987
11988 if (ix86_tune == PROCESSOR_PENTIUMPRO
11989 || ix86_tune == PROCESSOR_K6)
11990 return 1;
11991
11992 else
11993 return 0;
11994 }
11995
11996 \f
11997 /* Compute the alignment given to a constant that is being placed in memory.
11998 EXP is the constant and ALIGN is the alignment that the object would
11999 ordinarily have.
12000 The value of this function is used instead of that alignment to align
12001 the object. */
12002
12003 int
12004 ix86_constant_alignment (tree exp, int align)
12005 {
12006 if (TREE_CODE (exp) == REAL_CST)
12007 {
12008 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12009 return 64;
12010 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12011 return 128;
12012 }
12013 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12014 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12015 return BITS_PER_WORD;
12016
12017 return align;
12018 }
12019
12020 /* Compute the alignment for a static variable.
12021 TYPE is the data type, and ALIGN is the alignment that
12022 the object would ordinarily have. The value of this function is used
12023 instead of that alignment to align the object. */
12024
12025 int
12026 ix86_data_alignment (tree type, int align)
12027 {
12028 if (AGGREGATE_TYPE_P (type)
12029 && TYPE_SIZE (type)
12030 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12031 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12032 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12033 return 256;
12034
12035 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12036 to 16byte boundary. */
12037 if (TARGET_64BIT)
12038 {
12039 if (AGGREGATE_TYPE_P (type)
12040 && TYPE_SIZE (type)
12041 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12042 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12043 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12044 return 128;
12045 }
12046
12047 if (TREE_CODE (type) == ARRAY_TYPE)
12048 {
12049 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12050 return 64;
12051 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12052 return 128;
12053 }
12054 else if (TREE_CODE (type) == COMPLEX_TYPE)
12055 {
12056
12057 if (TYPE_MODE (type) == DCmode && align < 64)
12058 return 64;
12059 if (TYPE_MODE (type) == XCmode && align < 128)
12060 return 128;
12061 }
12062 else if ((TREE_CODE (type) == RECORD_TYPE
12063 || TREE_CODE (type) == UNION_TYPE
12064 || TREE_CODE (type) == QUAL_UNION_TYPE)
12065 && TYPE_FIELDS (type))
12066 {
12067 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12068 return 64;
12069 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12070 return 128;
12071 }
12072 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12073 || TREE_CODE (type) == INTEGER_TYPE)
12074 {
12075 if (TYPE_MODE (type) == DFmode && align < 64)
12076 return 64;
12077 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12078 return 128;
12079 }
12080
12081 return align;
12082 }
12083
12084 /* Compute the alignment for a local variable.
12085 TYPE is the data type, and ALIGN is the alignment that
12086 the object would ordinarily have. The value of this macro is used
12087 instead of that alignment to align the object. */
12088
12089 int
12090 ix86_local_alignment (tree type, int align)
12091 {
12092 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12093 to 16byte boundary. */
12094 if (TARGET_64BIT)
12095 {
12096 if (AGGREGATE_TYPE_P (type)
12097 && TYPE_SIZE (type)
12098 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12099 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12100 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12101 return 128;
12102 }
12103 if (TREE_CODE (type) == ARRAY_TYPE)
12104 {
12105 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12106 return 64;
12107 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12108 return 128;
12109 }
12110 else if (TREE_CODE (type) == COMPLEX_TYPE)
12111 {
12112 if (TYPE_MODE (type) == DCmode && align < 64)
12113 return 64;
12114 if (TYPE_MODE (type) == XCmode && align < 128)
12115 return 128;
12116 }
12117 else if ((TREE_CODE (type) == RECORD_TYPE
12118 || TREE_CODE (type) == UNION_TYPE
12119 || TREE_CODE (type) == QUAL_UNION_TYPE)
12120 && TYPE_FIELDS (type))
12121 {
12122 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12123 return 64;
12124 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12125 return 128;
12126 }
12127 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12128 || TREE_CODE (type) == INTEGER_TYPE)
12129 {
12130
12131 if (TYPE_MODE (type) == DFmode && align < 64)
12132 return 64;
12133 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12134 return 128;
12135 }
12136 return align;
12137 }
12138 \f
12139 /* Emit RTL insns to initialize the variable parts of a trampoline.
12140 FNADDR is an RTX for the address of the function's pure code.
12141 CXT is an RTX for the static chain value for the function. */
12142 void
12143 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12144 {
12145 if (!TARGET_64BIT)
12146 {
12147 /* Compute offset from the end of the jmp to the target function. */
12148 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12149 plus_constant (tramp, 10),
12150 NULL_RTX, 1, OPTAB_DIRECT);
12151 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12152 gen_int_mode (0xb9, QImode));
12153 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12154 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12155 gen_int_mode (0xe9, QImode));
12156 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12157 }
12158 else
12159 {
12160 int offset = 0;
12161 /* Try to load address using shorter movl instead of movabs.
12162 We may want to support movq for kernel mode, but kernel does not use
12163 trampolines at the moment. */
12164 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12165 {
12166 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12167 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12168 gen_int_mode (0xbb41, HImode));
12169 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12170 gen_lowpart (SImode, fnaddr));
12171 offset += 6;
12172 }
12173 else
12174 {
12175 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12176 gen_int_mode (0xbb49, HImode));
12177 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12178 fnaddr);
12179 offset += 10;
12180 }
12181 /* Load static chain using movabs to r10. */
12182 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12183 gen_int_mode (0xba49, HImode));
12184 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12185 cxt);
12186 offset += 10;
12187 /* Jump to the r11 */
12188 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12189 gen_int_mode (0xff49, HImode));
12190 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12191 gen_int_mode (0xe3, QImode));
12192 offset += 3;
12193 if (offset > TRAMPOLINE_SIZE)
12194 abort ();
12195 }
12196
12197 #ifdef ENABLE_EXECUTE_STACK
12198 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12199 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12200 #endif
12201 }
12202 \f
12203 /* Codes for all the SSE/MMX builtins. */
12204 enum ix86_builtins
12205 {
12206 IX86_BUILTIN_ADDPS,
12207 IX86_BUILTIN_ADDSS,
12208 IX86_BUILTIN_DIVPS,
12209 IX86_BUILTIN_DIVSS,
12210 IX86_BUILTIN_MULPS,
12211 IX86_BUILTIN_MULSS,
12212 IX86_BUILTIN_SUBPS,
12213 IX86_BUILTIN_SUBSS,
12214
12215 IX86_BUILTIN_CMPEQPS,
12216 IX86_BUILTIN_CMPLTPS,
12217 IX86_BUILTIN_CMPLEPS,
12218 IX86_BUILTIN_CMPGTPS,
12219 IX86_BUILTIN_CMPGEPS,
12220 IX86_BUILTIN_CMPNEQPS,
12221 IX86_BUILTIN_CMPNLTPS,
12222 IX86_BUILTIN_CMPNLEPS,
12223 IX86_BUILTIN_CMPNGTPS,
12224 IX86_BUILTIN_CMPNGEPS,
12225 IX86_BUILTIN_CMPORDPS,
12226 IX86_BUILTIN_CMPUNORDPS,
12227 IX86_BUILTIN_CMPNEPS,
12228 IX86_BUILTIN_CMPEQSS,
12229 IX86_BUILTIN_CMPLTSS,
12230 IX86_BUILTIN_CMPLESS,
12231 IX86_BUILTIN_CMPNEQSS,
12232 IX86_BUILTIN_CMPNLTSS,
12233 IX86_BUILTIN_CMPNLESS,
12234 IX86_BUILTIN_CMPNGTSS,
12235 IX86_BUILTIN_CMPNGESS,
12236 IX86_BUILTIN_CMPORDSS,
12237 IX86_BUILTIN_CMPUNORDSS,
12238 IX86_BUILTIN_CMPNESS,
12239
12240 IX86_BUILTIN_COMIEQSS,
12241 IX86_BUILTIN_COMILTSS,
12242 IX86_BUILTIN_COMILESS,
12243 IX86_BUILTIN_COMIGTSS,
12244 IX86_BUILTIN_COMIGESS,
12245 IX86_BUILTIN_COMINEQSS,
12246 IX86_BUILTIN_UCOMIEQSS,
12247 IX86_BUILTIN_UCOMILTSS,
12248 IX86_BUILTIN_UCOMILESS,
12249 IX86_BUILTIN_UCOMIGTSS,
12250 IX86_BUILTIN_UCOMIGESS,
12251 IX86_BUILTIN_UCOMINEQSS,
12252
12253 IX86_BUILTIN_CVTPI2PS,
12254 IX86_BUILTIN_CVTPS2PI,
12255 IX86_BUILTIN_CVTSI2SS,
12256 IX86_BUILTIN_CVTSI642SS,
12257 IX86_BUILTIN_CVTSS2SI,
12258 IX86_BUILTIN_CVTSS2SI64,
12259 IX86_BUILTIN_CVTTPS2PI,
12260 IX86_BUILTIN_CVTTSS2SI,
12261 IX86_BUILTIN_CVTTSS2SI64,
12262
12263 IX86_BUILTIN_MAXPS,
12264 IX86_BUILTIN_MAXSS,
12265 IX86_BUILTIN_MINPS,
12266 IX86_BUILTIN_MINSS,
12267
12268 IX86_BUILTIN_LOADUPS,
12269 IX86_BUILTIN_STOREUPS,
12270 IX86_BUILTIN_MOVSS,
12271
12272 IX86_BUILTIN_MOVHLPS,
12273 IX86_BUILTIN_MOVLHPS,
12274 IX86_BUILTIN_LOADHPS,
12275 IX86_BUILTIN_LOADLPS,
12276 IX86_BUILTIN_STOREHPS,
12277 IX86_BUILTIN_STORELPS,
12278
12279 IX86_BUILTIN_MASKMOVQ,
12280 IX86_BUILTIN_MOVMSKPS,
12281 IX86_BUILTIN_PMOVMSKB,
12282
12283 IX86_BUILTIN_MOVNTPS,
12284 IX86_BUILTIN_MOVNTQ,
12285
12286 IX86_BUILTIN_LOADDQU,
12287 IX86_BUILTIN_STOREDQU,
12288
12289 IX86_BUILTIN_PACKSSWB,
12290 IX86_BUILTIN_PACKSSDW,
12291 IX86_BUILTIN_PACKUSWB,
12292
12293 IX86_BUILTIN_PADDB,
12294 IX86_BUILTIN_PADDW,
12295 IX86_BUILTIN_PADDD,
12296 IX86_BUILTIN_PADDQ,
12297 IX86_BUILTIN_PADDSB,
12298 IX86_BUILTIN_PADDSW,
12299 IX86_BUILTIN_PADDUSB,
12300 IX86_BUILTIN_PADDUSW,
12301 IX86_BUILTIN_PSUBB,
12302 IX86_BUILTIN_PSUBW,
12303 IX86_BUILTIN_PSUBD,
12304 IX86_BUILTIN_PSUBQ,
12305 IX86_BUILTIN_PSUBSB,
12306 IX86_BUILTIN_PSUBSW,
12307 IX86_BUILTIN_PSUBUSB,
12308 IX86_BUILTIN_PSUBUSW,
12309
12310 IX86_BUILTIN_PAND,
12311 IX86_BUILTIN_PANDN,
12312 IX86_BUILTIN_POR,
12313 IX86_BUILTIN_PXOR,
12314
12315 IX86_BUILTIN_PAVGB,
12316 IX86_BUILTIN_PAVGW,
12317
12318 IX86_BUILTIN_PCMPEQB,
12319 IX86_BUILTIN_PCMPEQW,
12320 IX86_BUILTIN_PCMPEQD,
12321 IX86_BUILTIN_PCMPGTB,
12322 IX86_BUILTIN_PCMPGTW,
12323 IX86_BUILTIN_PCMPGTD,
12324
12325 IX86_BUILTIN_PMADDWD,
12326
12327 IX86_BUILTIN_PMAXSW,
12328 IX86_BUILTIN_PMAXUB,
12329 IX86_BUILTIN_PMINSW,
12330 IX86_BUILTIN_PMINUB,
12331
12332 IX86_BUILTIN_PMULHUW,
12333 IX86_BUILTIN_PMULHW,
12334 IX86_BUILTIN_PMULLW,
12335
12336 IX86_BUILTIN_PSADBW,
12337 IX86_BUILTIN_PSHUFW,
12338
12339 IX86_BUILTIN_PSLLW,
12340 IX86_BUILTIN_PSLLD,
12341 IX86_BUILTIN_PSLLQ,
12342 IX86_BUILTIN_PSRAW,
12343 IX86_BUILTIN_PSRAD,
12344 IX86_BUILTIN_PSRLW,
12345 IX86_BUILTIN_PSRLD,
12346 IX86_BUILTIN_PSRLQ,
12347 IX86_BUILTIN_PSLLWI,
12348 IX86_BUILTIN_PSLLDI,
12349 IX86_BUILTIN_PSLLQI,
12350 IX86_BUILTIN_PSRAWI,
12351 IX86_BUILTIN_PSRADI,
12352 IX86_BUILTIN_PSRLWI,
12353 IX86_BUILTIN_PSRLDI,
12354 IX86_BUILTIN_PSRLQI,
12355
12356 IX86_BUILTIN_PUNPCKHBW,
12357 IX86_BUILTIN_PUNPCKHWD,
12358 IX86_BUILTIN_PUNPCKHDQ,
12359 IX86_BUILTIN_PUNPCKLBW,
12360 IX86_BUILTIN_PUNPCKLWD,
12361 IX86_BUILTIN_PUNPCKLDQ,
12362
12363 IX86_BUILTIN_SHUFPS,
12364
12365 IX86_BUILTIN_RCPPS,
12366 IX86_BUILTIN_RCPSS,
12367 IX86_BUILTIN_RSQRTPS,
12368 IX86_BUILTIN_RSQRTSS,
12369 IX86_BUILTIN_SQRTPS,
12370 IX86_BUILTIN_SQRTSS,
12371
12372 IX86_BUILTIN_UNPCKHPS,
12373 IX86_BUILTIN_UNPCKLPS,
12374
12375 IX86_BUILTIN_ANDPS,
12376 IX86_BUILTIN_ANDNPS,
12377 IX86_BUILTIN_ORPS,
12378 IX86_BUILTIN_XORPS,
12379
12380 IX86_BUILTIN_EMMS,
12381 IX86_BUILTIN_LDMXCSR,
12382 IX86_BUILTIN_STMXCSR,
12383 IX86_BUILTIN_SFENCE,
12384
12385 /* 3DNow! Original */
12386 IX86_BUILTIN_FEMMS,
12387 IX86_BUILTIN_PAVGUSB,
12388 IX86_BUILTIN_PF2ID,
12389 IX86_BUILTIN_PFACC,
12390 IX86_BUILTIN_PFADD,
12391 IX86_BUILTIN_PFCMPEQ,
12392 IX86_BUILTIN_PFCMPGE,
12393 IX86_BUILTIN_PFCMPGT,
12394 IX86_BUILTIN_PFMAX,
12395 IX86_BUILTIN_PFMIN,
12396 IX86_BUILTIN_PFMUL,
12397 IX86_BUILTIN_PFRCP,
12398 IX86_BUILTIN_PFRCPIT1,
12399 IX86_BUILTIN_PFRCPIT2,
12400 IX86_BUILTIN_PFRSQIT1,
12401 IX86_BUILTIN_PFRSQRT,
12402 IX86_BUILTIN_PFSUB,
12403 IX86_BUILTIN_PFSUBR,
12404 IX86_BUILTIN_PI2FD,
12405 IX86_BUILTIN_PMULHRW,
12406
12407 /* 3DNow! Athlon Extensions */
12408 IX86_BUILTIN_PF2IW,
12409 IX86_BUILTIN_PFNACC,
12410 IX86_BUILTIN_PFPNACC,
12411 IX86_BUILTIN_PI2FW,
12412 IX86_BUILTIN_PSWAPDSI,
12413 IX86_BUILTIN_PSWAPDSF,
12414
12415 /* SSE2 */
12416 IX86_BUILTIN_ADDPD,
12417 IX86_BUILTIN_ADDSD,
12418 IX86_BUILTIN_DIVPD,
12419 IX86_BUILTIN_DIVSD,
12420 IX86_BUILTIN_MULPD,
12421 IX86_BUILTIN_MULSD,
12422 IX86_BUILTIN_SUBPD,
12423 IX86_BUILTIN_SUBSD,
12424
12425 IX86_BUILTIN_CMPEQPD,
12426 IX86_BUILTIN_CMPLTPD,
12427 IX86_BUILTIN_CMPLEPD,
12428 IX86_BUILTIN_CMPGTPD,
12429 IX86_BUILTIN_CMPGEPD,
12430 IX86_BUILTIN_CMPNEQPD,
12431 IX86_BUILTIN_CMPNLTPD,
12432 IX86_BUILTIN_CMPNLEPD,
12433 IX86_BUILTIN_CMPNGTPD,
12434 IX86_BUILTIN_CMPNGEPD,
12435 IX86_BUILTIN_CMPORDPD,
12436 IX86_BUILTIN_CMPUNORDPD,
12437 IX86_BUILTIN_CMPNEPD,
12438 IX86_BUILTIN_CMPEQSD,
12439 IX86_BUILTIN_CMPLTSD,
12440 IX86_BUILTIN_CMPLESD,
12441 IX86_BUILTIN_CMPNEQSD,
12442 IX86_BUILTIN_CMPNLTSD,
12443 IX86_BUILTIN_CMPNLESD,
12444 IX86_BUILTIN_CMPORDSD,
12445 IX86_BUILTIN_CMPUNORDSD,
12446 IX86_BUILTIN_CMPNESD,
12447
12448 IX86_BUILTIN_COMIEQSD,
12449 IX86_BUILTIN_COMILTSD,
12450 IX86_BUILTIN_COMILESD,
12451 IX86_BUILTIN_COMIGTSD,
12452 IX86_BUILTIN_COMIGESD,
12453 IX86_BUILTIN_COMINEQSD,
12454 IX86_BUILTIN_UCOMIEQSD,
12455 IX86_BUILTIN_UCOMILTSD,
12456 IX86_BUILTIN_UCOMILESD,
12457 IX86_BUILTIN_UCOMIGTSD,
12458 IX86_BUILTIN_UCOMIGESD,
12459 IX86_BUILTIN_UCOMINEQSD,
12460
12461 IX86_BUILTIN_MAXPD,
12462 IX86_BUILTIN_MAXSD,
12463 IX86_BUILTIN_MINPD,
12464 IX86_BUILTIN_MINSD,
12465
12466 IX86_BUILTIN_ANDPD,
12467 IX86_BUILTIN_ANDNPD,
12468 IX86_BUILTIN_ORPD,
12469 IX86_BUILTIN_XORPD,
12470
12471 IX86_BUILTIN_SQRTPD,
12472 IX86_BUILTIN_SQRTSD,
12473
12474 IX86_BUILTIN_UNPCKHPD,
12475 IX86_BUILTIN_UNPCKLPD,
12476
12477 IX86_BUILTIN_SHUFPD,
12478
12479 IX86_BUILTIN_LOADUPD,
12480 IX86_BUILTIN_STOREUPD,
12481 IX86_BUILTIN_MOVSD,
12482
12483 IX86_BUILTIN_LOADHPD,
12484 IX86_BUILTIN_LOADLPD,
12485
12486 IX86_BUILTIN_CVTDQ2PD,
12487 IX86_BUILTIN_CVTDQ2PS,
12488
12489 IX86_BUILTIN_CVTPD2DQ,
12490 IX86_BUILTIN_CVTPD2PI,
12491 IX86_BUILTIN_CVTPD2PS,
12492 IX86_BUILTIN_CVTTPD2DQ,
12493 IX86_BUILTIN_CVTTPD2PI,
12494
12495 IX86_BUILTIN_CVTPI2PD,
12496 IX86_BUILTIN_CVTSI2SD,
12497 IX86_BUILTIN_CVTSI642SD,
12498
12499 IX86_BUILTIN_CVTSD2SI,
12500 IX86_BUILTIN_CVTSD2SI64,
12501 IX86_BUILTIN_CVTSD2SS,
12502 IX86_BUILTIN_CVTSS2SD,
12503 IX86_BUILTIN_CVTTSD2SI,
12504 IX86_BUILTIN_CVTTSD2SI64,
12505
12506 IX86_BUILTIN_CVTPS2DQ,
12507 IX86_BUILTIN_CVTPS2PD,
12508 IX86_BUILTIN_CVTTPS2DQ,
12509
12510 IX86_BUILTIN_MOVNTI,
12511 IX86_BUILTIN_MOVNTPD,
12512 IX86_BUILTIN_MOVNTDQ,
12513
12514 /* SSE2 MMX */
12515 IX86_BUILTIN_MASKMOVDQU,
12516 IX86_BUILTIN_MOVMSKPD,
12517 IX86_BUILTIN_PMOVMSKB128,
12518
12519 IX86_BUILTIN_PACKSSWB128,
12520 IX86_BUILTIN_PACKSSDW128,
12521 IX86_BUILTIN_PACKUSWB128,
12522
12523 IX86_BUILTIN_PADDB128,
12524 IX86_BUILTIN_PADDW128,
12525 IX86_BUILTIN_PADDD128,
12526 IX86_BUILTIN_PADDQ128,
12527 IX86_BUILTIN_PADDSB128,
12528 IX86_BUILTIN_PADDSW128,
12529 IX86_BUILTIN_PADDUSB128,
12530 IX86_BUILTIN_PADDUSW128,
12531 IX86_BUILTIN_PSUBB128,
12532 IX86_BUILTIN_PSUBW128,
12533 IX86_BUILTIN_PSUBD128,
12534 IX86_BUILTIN_PSUBQ128,
12535 IX86_BUILTIN_PSUBSB128,
12536 IX86_BUILTIN_PSUBSW128,
12537 IX86_BUILTIN_PSUBUSB128,
12538 IX86_BUILTIN_PSUBUSW128,
12539
12540 IX86_BUILTIN_PAND128,
12541 IX86_BUILTIN_PANDN128,
12542 IX86_BUILTIN_POR128,
12543 IX86_BUILTIN_PXOR128,
12544
12545 IX86_BUILTIN_PAVGB128,
12546 IX86_BUILTIN_PAVGW128,
12547
12548 IX86_BUILTIN_PCMPEQB128,
12549 IX86_BUILTIN_PCMPEQW128,
12550 IX86_BUILTIN_PCMPEQD128,
12551 IX86_BUILTIN_PCMPGTB128,
12552 IX86_BUILTIN_PCMPGTW128,
12553 IX86_BUILTIN_PCMPGTD128,
12554
12555 IX86_BUILTIN_PMADDWD128,
12556
12557 IX86_BUILTIN_PMAXSW128,
12558 IX86_BUILTIN_PMAXUB128,
12559 IX86_BUILTIN_PMINSW128,
12560 IX86_BUILTIN_PMINUB128,
12561
12562 IX86_BUILTIN_PMULUDQ,
12563 IX86_BUILTIN_PMULUDQ128,
12564 IX86_BUILTIN_PMULHUW128,
12565 IX86_BUILTIN_PMULHW128,
12566 IX86_BUILTIN_PMULLW128,
12567
12568 IX86_BUILTIN_PSADBW128,
12569 IX86_BUILTIN_PSHUFHW,
12570 IX86_BUILTIN_PSHUFLW,
12571 IX86_BUILTIN_PSHUFD,
12572
12573 IX86_BUILTIN_PSLLW128,
12574 IX86_BUILTIN_PSLLD128,
12575 IX86_BUILTIN_PSLLQ128,
12576 IX86_BUILTIN_PSRAW128,
12577 IX86_BUILTIN_PSRAD128,
12578 IX86_BUILTIN_PSRLW128,
12579 IX86_BUILTIN_PSRLD128,
12580 IX86_BUILTIN_PSRLQ128,
12581 IX86_BUILTIN_PSLLDQI128,
12582 IX86_BUILTIN_PSLLWI128,
12583 IX86_BUILTIN_PSLLDI128,
12584 IX86_BUILTIN_PSLLQI128,
12585 IX86_BUILTIN_PSRAWI128,
12586 IX86_BUILTIN_PSRADI128,
12587 IX86_BUILTIN_PSRLDQI128,
12588 IX86_BUILTIN_PSRLWI128,
12589 IX86_BUILTIN_PSRLDI128,
12590 IX86_BUILTIN_PSRLQI128,
12591
12592 IX86_BUILTIN_PUNPCKHBW128,
12593 IX86_BUILTIN_PUNPCKHWD128,
12594 IX86_BUILTIN_PUNPCKHDQ128,
12595 IX86_BUILTIN_PUNPCKHQDQ128,
12596 IX86_BUILTIN_PUNPCKLBW128,
12597 IX86_BUILTIN_PUNPCKLWD128,
12598 IX86_BUILTIN_PUNPCKLDQ128,
12599 IX86_BUILTIN_PUNPCKLQDQ128,
12600
12601 IX86_BUILTIN_CLFLUSH,
12602 IX86_BUILTIN_MFENCE,
12603 IX86_BUILTIN_LFENCE,
12604
12605 /* Prescott New Instructions. */
12606 IX86_BUILTIN_ADDSUBPS,
12607 IX86_BUILTIN_HADDPS,
12608 IX86_BUILTIN_HSUBPS,
12609 IX86_BUILTIN_MOVSHDUP,
12610 IX86_BUILTIN_MOVSLDUP,
12611 IX86_BUILTIN_ADDSUBPD,
12612 IX86_BUILTIN_HADDPD,
12613 IX86_BUILTIN_HSUBPD,
12614 IX86_BUILTIN_LDDQU,
12615
12616 IX86_BUILTIN_MONITOR,
12617 IX86_BUILTIN_MWAIT,
12618
12619 IX86_BUILTIN_VEC_INIT_V2SI,
12620 IX86_BUILTIN_VEC_INIT_V4HI,
12621 IX86_BUILTIN_VEC_INIT_V8QI,
12622 IX86_BUILTIN_VEC_EXT_V2DF,
12623 IX86_BUILTIN_VEC_EXT_V2DI,
12624 IX86_BUILTIN_VEC_EXT_V4SF,
12625 IX86_BUILTIN_VEC_EXT_V4SI,
12626 IX86_BUILTIN_VEC_EXT_V8HI,
12627 IX86_BUILTIN_VEC_EXT_V4HI,
12628 IX86_BUILTIN_VEC_SET_V8HI,
12629 IX86_BUILTIN_VEC_SET_V4HI,
12630
12631 IX86_BUILTIN_MAX
12632 };
12633
12634 #define def_builtin(MASK, NAME, TYPE, CODE) \
12635 do { \
12636 if ((MASK) & target_flags \
12637 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12638 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12639 NULL, NULL_TREE); \
12640 } while (0)
12641
12642 /* Bits for builtin_description.flag. */
12643
12644 /* Set when we don't support the comparison natively, and should
12645 swap_comparison in order to support it. */
12646 #define BUILTIN_DESC_SWAP_OPERANDS 1
12647
12648 struct builtin_description
12649 {
12650 const unsigned int mask;
12651 const enum insn_code icode;
12652 const char *const name;
12653 const enum ix86_builtins code;
12654 const enum rtx_code comparison;
12655 const unsigned int flag;
12656 };
12657
12658 static const struct builtin_description bdesc_comi[] =
12659 {
12660 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12661 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12662 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12663 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12664 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12665 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12666 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12667 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12668 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12669 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12670 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12671 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12672 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12673 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12674 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12675 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12676 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12677 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12678 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12679 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12680 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12681 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12682 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12683 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12684 };
12685
12686 static const struct builtin_description bdesc_2arg[] =
12687 {
12688 /* SSE */
12689 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12690 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12694 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12695 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12696 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12697
12698 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12699 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12700 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12701 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
12702 BUILTIN_DESC_SWAP_OPERANDS },
12703 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
12704 BUILTIN_DESC_SWAP_OPERANDS },
12705 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12706 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
12707 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
12708 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
12709 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
12710 BUILTIN_DESC_SWAP_OPERANDS },
12711 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
12712 BUILTIN_DESC_SWAP_OPERANDS },
12713 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
12714 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12715 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12716 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12717 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12718 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
12719 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
12720 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
12721 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
12722 BUILTIN_DESC_SWAP_OPERANDS },
12723 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
12724 BUILTIN_DESC_SWAP_OPERANDS },
12725 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12726
12727 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12728 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12729 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12730 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12731
12732 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12733 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12734 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12735 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12736
12737 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12738 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12739 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12740 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12741 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12742
12743 /* MMX */
12744 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12745 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12746 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12747 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12749 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12750 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12751 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12752
12753 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12754 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12755 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12756 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12757 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12760 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12761
12762 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12763 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12764 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12765
12766 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12767 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12768 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12770
12771 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12772 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12773
12774 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12775 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12776 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12777 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12778 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12779 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12780
12781 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12784 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12785
12786 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12787 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12788 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12789 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12790 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12792
12793 /* Special. */
12794 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12795 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12796 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12797
12798 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12799 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12800 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12801
12802 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12803 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12804 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12805 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12806 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12807 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12808
12809 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12810 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12811 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12812 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12814 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12815
12816 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12817 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12818 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12819 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12820
12821 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12822 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12823
12824 /* SSE2 */
12825 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12833
12834 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12835 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12836 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12837 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
12838 BUILTIN_DESC_SWAP_OPERANDS },
12839 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
12840 BUILTIN_DESC_SWAP_OPERANDS },
12841 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12842 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
12844 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
12845 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
12846 BUILTIN_DESC_SWAP_OPERANDS },
12847 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
12848 BUILTIN_DESC_SWAP_OPERANDS },
12849 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
12850 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12851 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12852 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12853 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12854 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
12855 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
12856 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
12857 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
12858
12859 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12860 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12863
12864 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12865 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12868
12869 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12872
12873 /* SSE2 MMX */
12874 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12882
12883 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12884 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12885 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12886 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12887 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12888 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12889 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12890 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12891
12892 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12894
12895 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12899
12900 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12902
12903 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12909
12910 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12914
12915 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12923
12924 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12927
12928 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12929 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12930
12931 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12933
12934 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12937
12938 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12941
12942 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12944
12945 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12946
12947 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12948 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12951
12952 /* SSE3 MMX */
12953 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12954 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12955 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12956 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12957 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12958 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12959 };
12960
12961 static const struct builtin_description bdesc_1arg[] =
12962 {
12963 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12964 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12965
12966 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12967 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12968 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12969
12970 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12971 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12972 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12973 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12974 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12975 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12976
12977 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12979
12980 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12981
12982 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12984
12985 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12990
12991 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12992
12993 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12995 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12996 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12997
12998 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13001
13002 /* SSE3 */
13003 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13004 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13005 };
13006
13007 static void
13008 ix86_init_builtins (void)
13009 {
13010 if (TARGET_MMX)
13011 ix86_init_mmx_sse_builtins ();
13012 }
13013
13014 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13015 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13016 builtins. */
13017 static void
13018 ix86_init_mmx_sse_builtins (void)
13019 {
13020 const struct builtin_description * d;
13021 size_t i;
13022
13023 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13024 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13025 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13026 tree V2DI_type_node
13027 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13028 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13029 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13030 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13031 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13032 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13033 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13034
13035 tree pchar_type_node = build_pointer_type (char_type_node);
13036 tree pcchar_type_node = build_pointer_type (
13037 build_type_variant (char_type_node, 1, 0));
13038 tree pfloat_type_node = build_pointer_type (float_type_node);
13039 tree pcfloat_type_node = build_pointer_type (
13040 build_type_variant (float_type_node, 1, 0));
13041 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13042 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13043 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13044
13045 /* Comparisons. */
13046 tree int_ftype_v4sf_v4sf
13047 = build_function_type_list (integer_type_node,
13048 V4SF_type_node, V4SF_type_node, NULL_TREE);
13049 tree v4si_ftype_v4sf_v4sf
13050 = build_function_type_list (V4SI_type_node,
13051 V4SF_type_node, V4SF_type_node, NULL_TREE);
13052 /* MMX/SSE/integer conversions. */
13053 tree int_ftype_v4sf
13054 = build_function_type_list (integer_type_node,
13055 V4SF_type_node, NULL_TREE);
13056 tree int64_ftype_v4sf
13057 = build_function_type_list (long_long_integer_type_node,
13058 V4SF_type_node, NULL_TREE);
13059 tree int_ftype_v8qi
13060 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13061 tree v4sf_ftype_v4sf_int
13062 = build_function_type_list (V4SF_type_node,
13063 V4SF_type_node, integer_type_node, NULL_TREE);
13064 tree v4sf_ftype_v4sf_int64
13065 = build_function_type_list (V4SF_type_node,
13066 V4SF_type_node, long_long_integer_type_node,
13067 NULL_TREE);
13068 tree v4sf_ftype_v4sf_v2si
13069 = build_function_type_list (V4SF_type_node,
13070 V4SF_type_node, V2SI_type_node, NULL_TREE);
13071
13072 /* Miscellaneous. */
13073 tree v8qi_ftype_v4hi_v4hi
13074 = build_function_type_list (V8QI_type_node,
13075 V4HI_type_node, V4HI_type_node, NULL_TREE);
13076 tree v4hi_ftype_v2si_v2si
13077 = build_function_type_list (V4HI_type_node,
13078 V2SI_type_node, V2SI_type_node, NULL_TREE);
13079 tree v4sf_ftype_v4sf_v4sf_int
13080 = build_function_type_list (V4SF_type_node,
13081 V4SF_type_node, V4SF_type_node,
13082 integer_type_node, NULL_TREE);
13083 tree v2si_ftype_v4hi_v4hi
13084 = build_function_type_list (V2SI_type_node,
13085 V4HI_type_node, V4HI_type_node, NULL_TREE);
13086 tree v4hi_ftype_v4hi_int
13087 = build_function_type_list (V4HI_type_node,
13088 V4HI_type_node, integer_type_node, NULL_TREE);
13089 tree v4hi_ftype_v4hi_di
13090 = build_function_type_list (V4HI_type_node,
13091 V4HI_type_node, long_long_unsigned_type_node,
13092 NULL_TREE);
13093 tree v2si_ftype_v2si_di
13094 = build_function_type_list (V2SI_type_node,
13095 V2SI_type_node, long_long_unsigned_type_node,
13096 NULL_TREE);
13097 tree void_ftype_void
13098 = build_function_type (void_type_node, void_list_node);
13099 tree void_ftype_unsigned
13100 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13101 tree void_ftype_unsigned_unsigned
13102 = build_function_type_list (void_type_node, unsigned_type_node,
13103 unsigned_type_node, NULL_TREE);
13104 tree void_ftype_pcvoid_unsigned_unsigned
13105 = build_function_type_list (void_type_node, const_ptr_type_node,
13106 unsigned_type_node, unsigned_type_node,
13107 NULL_TREE);
13108 tree unsigned_ftype_void
13109 = build_function_type (unsigned_type_node, void_list_node);
13110 tree v2si_ftype_v4sf
13111 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13112 /* Loads/stores. */
13113 tree void_ftype_v8qi_v8qi_pchar
13114 = build_function_type_list (void_type_node,
13115 V8QI_type_node, V8QI_type_node,
13116 pchar_type_node, NULL_TREE);
13117 tree v4sf_ftype_pcfloat
13118 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13119 /* @@@ the type is bogus */
13120 tree v4sf_ftype_v4sf_pv2si
13121 = build_function_type_list (V4SF_type_node,
13122 V4SF_type_node, pv2si_type_node, NULL_TREE);
13123 tree void_ftype_pv2si_v4sf
13124 = build_function_type_list (void_type_node,
13125 pv2si_type_node, V4SF_type_node, NULL_TREE);
13126 tree void_ftype_pfloat_v4sf
13127 = build_function_type_list (void_type_node,
13128 pfloat_type_node, V4SF_type_node, NULL_TREE);
13129 tree void_ftype_pdi_di
13130 = build_function_type_list (void_type_node,
13131 pdi_type_node, long_long_unsigned_type_node,
13132 NULL_TREE);
13133 tree void_ftype_pv2di_v2di
13134 = build_function_type_list (void_type_node,
13135 pv2di_type_node, V2DI_type_node, NULL_TREE);
13136 /* Normal vector unops. */
13137 tree v4sf_ftype_v4sf
13138 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13139
13140 /* Normal vector binops. */
13141 tree v4sf_ftype_v4sf_v4sf
13142 = build_function_type_list (V4SF_type_node,
13143 V4SF_type_node, V4SF_type_node, NULL_TREE);
13144 tree v8qi_ftype_v8qi_v8qi
13145 = build_function_type_list (V8QI_type_node,
13146 V8QI_type_node, V8QI_type_node, NULL_TREE);
13147 tree v4hi_ftype_v4hi_v4hi
13148 = build_function_type_list (V4HI_type_node,
13149 V4HI_type_node, V4HI_type_node, NULL_TREE);
13150 tree v2si_ftype_v2si_v2si
13151 = build_function_type_list (V2SI_type_node,
13152 V2SI_type_node, V2SI_type_node, NULL_TREE);
13153 tree di_ftype_di_di
13154 = build_function_type_list (long_long_unsigned_type_node,
13155 long_long_unsigned_type_node,
13156 long_long_unsigned_type_node, NULL_TREE);
13157
13158 tree v2si_ftype_v2sf
13159 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13160 tree v2sf_ftype_v2si
13161 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13162 tree v2si_ftype_v2si
13163 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13164 tree v2sf_ftype_v2sf
13165 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13166 tree v2sf_ftype_v2sf_v2sf
13167 = build_function_type_list (V2SF_type_node,
13168 V2SF_type_node, V2SF_type_node, NULL_TREE);
13169 tree v2si_ftype_v2sf_v2sf
13170 = build_function_type_list (V2SI_type_node,
13171 V2SF_type_node, V2SF_type_node, NULL_TREE);
13172 tree pint_type_node = build_pointer_type (integer_type_node);
13173 tree pdouble_type_node = build_pointer_type (double_type_node);
13174 tree pcdouble_type_node = build_pointer_type (
13175 build_type_variant (double_type_node, 1, 0));
13176 tree int_ftype_v2df_v2df
13177 = build_function_type_list (integer_type_node,
13178 V2DF_type_node, V2DF_type_node, NULL_TREE);
13179
13180 tree ti_ftype_ti_ti
13181 = build_function_type_list (intTI_type_node,
13182 intTI_type_node, intTI_type_node, NULL_TREE);
13183 tree void_ftype_pcvoid
13184 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13185 tree v4sf_ftype_v4si
13186 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13187 tree v4si_ftype_v4sf
13188 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13189 tree v2df_ftype_v4si
13190 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13191 tree v4si_ftype_v2df
13192 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13193 tree v2si_ftype_v2df
13194 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13195 tree v4sf_ftype_v2df
13196 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13197 tree v2df_ftype_v2si
13198 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13199 tree v2df_ftype_v4sf
13200 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13201 tree int_ftype_v2df
13202 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13203 tree int64_ftype_v2df
13204 = build_function_type_list (long_long_integer_type_node,
13205 V2DF_type_node, NULL_TREE);
13206 tree v2df_ftype_v2df_int
13207 = build_function_type_list (V2DF_type_node,
13208 V2DF_type_node, integer_type_node, NULL_TREE);
13209 tree v2df_ftype_v2df_int64
13210 = build_function_type_list (V2DF_type_node,
13211 V2DF_type_node, long_long_integer_type_node,
13212 NULL_TREE);
13213 tree v4sf_ftype_v4sf_v2df
13214 = build_function_type_list (V4SF_type_node,
13215 V4SF_type_node, V2DF_type_node, NULL_TREE);
13216 tree v2df_ftype_v2df_v4sf
13217 = build_function_type_list (V2DF_type_node,
13218 V2DF_type_node, V4SF_type_node, NULL_TREE);
13219 tree v2df_ftype_v2df_v2df_int
13220 = build_function_type_list (V2DF_type_node,
13221 V2DF_type_node, V2DF_type_node,
13222 integer_type_node,
13223 NULL_TREE);
13224 tree v2df_ftype_v2df_pcdouble
13225 = build_function_type_list (V2DF_type_node,
13226 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13227 tree void_ftype_pdouble_v2df
13228 = build_function_type_list (void_type_node,
13229 pdouble_type_node, V2DF_type_node, NULL_TREE);
13230 tree void_ftype_pint_int
13231 = build_function_type_list (void_type_node,
13232 pint_type_node, integer_type_node, NULL_TREE);
13233 tree void_ftype_v16qi_v16qi_pchar
13234 = build_function_type_list (void_type_node,
13235 V16QI_type_node, V16QI_type_node,
13236 pchar_type_node, NULL_TREE);
13237 tree v2df_ftype_pcdouble
13238 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13239 tree v2df_ftype_v2df_v2df
13240 = build_function_type_list (V2DF_type_node,
13241 V2DF_type_node, V2DF_type_node, NULL_TREE);
13242 tree v16qi_ftype_v16qi_v16qi
13243 = build_function_type_list (V16QI_type_node,
13244 V16QI_type_node, V16QI_type_node, NULL_TREE);
13245 tree v8hi_ftype_v8hi_v8hi
13246 = build_function_type_list (V8HI_type_node,
13247 V8HI_type_node, V8HI_type_node, NULL_TREE);
13248 tree v4si_ftype_v4si_v4si
13249 = build_function_type_list (V4SI_type_node,
13250 V4SI_type_node, V4SI_type_node, NULL_TREE);
13251 tree v2di_ftype_v2di_v2di
13252 = build_function_type_list (V2DI_type_node,
13253 V2DI_type_node, V2DI_type_node, NULL_TREE);
13254 tree v2di_ftype_v2df_v2df
13255 = build_function_type_list (V2DI_type_node,
13256 V2DF_type_node, V2DF_type_node, NULL_TREE);
13257 tree v2df_ftype_v2df
13258 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13259 tree v2di_ftype_v2di_int
13260 = build_function_type_list (V2DI_type_node,
13261 V2DI_type_node, integer_type_node, NULL_TREE);
13262 tree v4si_ftype_v4si_int
13263 = build_function_type_list (V4SI_type_node,
13264 V4SI_type_node, integer_type_node, NULL_TREE);
13265 tree v8hi_ftype_v8hi_int
13266 = build_function_type_list (V8HI_type_node,
13267 V8HI_type_node, integer_type_node, NULL_TREE);
13268 tree v8hi_ftype_v8hi_v2di
13269 = build_function_type_list (V8HI_type_node,
13270 V8HI_type_node, V2DI_type_node, NULL_TREE);
13271 tree v4si_ftype_v4si_v2di
13272 = build_function_type_list (V4SI_type_node,
13273 V4SI_type_node, V2DI_type_node, NULL_TREE);
13274 tree v4si_ftype_v8hi_v8hi
13275 = build_function_type_list (V4SI_type_node,
13276 V8HI_type_node, V8HI_type_node, NULL_TREE);
13277 tree di_ftype_v8qi_v8qi
13278 = build_function_type_list (long_long_unsigned_type_node,
13279 V8QI_type_node, V8QI_type_node, NULL_TREE);
13280 tree di_ftype_v2si_v2si
13281 = build_function_type_list (long_long_unsigned_type_node,
13282 V2SI_type_node, V2SI_type_node, NULL_TREE);
13283 tree v2di_ftype_v16qi_v16qi
13284 = build_function_type_list (V2DI_type_node,
13285 V16QI_type_node, V16QI_type_node, NULL_TREE);
13286 tree v2di_ftype_v4si_v4si
13287 = build_function_type_list (V2DI_type_node,
13288 V4SI_type_node, V4SI_type_node, NULL_TREE);
13289 tree int_ftype_v16qi
13290 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13291 tree v16qi_ftype_pcchar
13292 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13293 tree void_ftype_pchar_v16qi
13294 = build_function_type_list (void_type_node,
13295 pchar_type_node, V16QI_type_node, NULL_TREE);
13296
13297 tree float80_type;
13298 tree float128_type;
13299 tree ftype;
13300
13301 /* The __float80 type. */
13302 if (TYPE_MODE (long_double_type_node) == XFmode)
13303 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13304 "__float80");
13305 else
13306 {
13307 /* The __float80 type. */
13308 float80_type = make_node (REAL_TYPE);
13309 TYPE_PRECISION (float80_type) = 80;
13310 layout_type (float80_type);
13311 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13312 }
13313
13314 float128_type = make_node (REAL_TYPE);
13315 TYPE_PRECISION (float128_type) = 128;
13316 layout_type (float128_type);
13317 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13318
13319 /* Add all builtins that are more or less simple operations on two
13320 operands. */
13321 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13322 {
13323 /* Use one of the operands; the target can have a different mode for
13324 mask-generating compares. */
13325 enum machine_mode mode;
13326 tree type;
13327
13328 if (d->name == 0)
13329 continue;
13330 mode = insn_data[d->icode].operand[1].mode;
13331
13332 switch (mode)
13333 {
13334 case V16QImode:
13335 type = v16qi_ftype_v16qi_v16qi;
13336 break;
13337 case V8HImode:
13338 type = v8hi_ftype_v8hi_v8hi;
13339 break;
13340 case V4SImode:
13341 type = v4si_ftype_v4si_v4si;
13342 break;
13343 case V2DImode:
13344 type = v2di_ftype_v2di_v2di;
13345 break;
13346 case V2DFmode:
13347 type = v2df_ftype_v2df_v2df;
13348 break;
13349 case TImode:
13350 type = ti_ftype_ti_ti;
13351 break;
13352 case V4SFmode:
13353 type = v4sf_ftype_v4sf_v4sf;
13354 break;
13355 case V8QImode:
13356 type = v8qi_ftype_v8qi_v8qi;
13357 break;
13358 case V4HImode:
13359 type = v4hi_ftype_v4hi_v4hi;
13360 break;
13361 case V2SImode:
13362 type = v2si_ftype_v2si_v2si;
13363 break;
13364 case DImode:
13365 type = di_ftype_di_di;
13366 break;
13367
13368 default:
13369 abort ();
13370 }
13371
13372 /* Override for comparisons. */
13373 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13374 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13375 type = v4si_ftype_v4sf_v4sf;
13376
13377 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13378 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13379 type = v2di_ftype_v2df_v2df;
13380
13381 def_builtin (d->mask, d->name, type, d->code);
13382 }
13383
13384 /* Add the remaining MMX insns with somewhat more complicated types. */
13385 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13386 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13387 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13388 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13389
13390 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13391 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13392 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13393
13394 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13395 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13396
13397 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13398 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13399
13400 /* comi/ucomi insns. */
13401 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13402 if (d->mask == MASK_SSE2)
13403 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13404 else
13405 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13406
13407 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13408 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13409 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13410
13411 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13412 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13413 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13414 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13415 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13416 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13417 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13418 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13419 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13420 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13421 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13422
13423 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13424
13425 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13426 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13427
13428 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13429 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13430 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13431 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13432
13433 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13434 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13435 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13436 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13437
13438 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13439
13440 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13441
13442 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13443 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13444 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13445 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13446 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13447 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13448
13449 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13450
13451 /* Original 3DNow! */
13452 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13453 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13454 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13455 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13456 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13457 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13458 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13459 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13460 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13461 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13462 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13467 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13468 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13469 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13470 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13472
13473 /* 3DNow! extension as used in the Athlon CPU. */
13474 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13475 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13476 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13477 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13478 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13479 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13480
13481 /* SSE2 */
13482 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13483
13484 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13485 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13486
13487 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13488 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
13489
13490 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13491 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13492 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13493 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13494 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13495
13496 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13498 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13499 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13500
13501 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13503
13504 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13505
13506 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13508
13509 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13510 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13514
13515 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13516
13517 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13519 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13520 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13521
13522 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13524 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13525
13526 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13527 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13529 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13530
13531 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13534
13535 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13537
13538 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13540
13541 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13544
13545 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13548
13549 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13551
13552 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13556
13557 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13561
13562 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13564
13565 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13566
13567 /* Prescott New Instructions. */
13568 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13569 void_ftype_pcvoid_unsigned_unsigned,
13570 IX86_BUILTIN_MONITOR);
13571 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13572 void_ftype_unsigned_unsigned,
13573 IX86_BUILTIN_MWAIT);
13574 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13575 v4sf_ftype_v4sf,
13576 IX86_BUILTIN_MOVSHDUP);
13577 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13578 v4sf_ftype_v4sf,
13579 IX86_BUILTIN_MOVSLDUP);
13580 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13581 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13582
13583 /* Access to the vec_init patterns. */
13584 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
13585 integer_type_node, NULL_TREE);
13586 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
13587 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
13588
13589 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
13590 short_integer_type_node,
13591 short_integer_type_node,
13592 short_integer_type_node, NULL_TREE);
13593 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
13594 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
13595
13596 ftype = build_function_type_list (V8QI_type_node, char_type_node,
13597 char_type_node, char_type_node,
13598 char_type_node, char_type_node,
13599 char_type_node, char_type_node,
13600 char_type_node, NULL_TREE);
13601 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
13602 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
13603
13604 /* Access to the vec_extract patterns. */
13605 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13606 integer_type_node, NULL_TREE);
13607 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
13608 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
13609
13610 ftype = build_function_type_list (long_long_integer_type_node,
13611 V2DI_type_node, integer_type_node,
13612 NULL_TREE);
13613 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
13614 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
13615
13616 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13617 integer_type_node, NULL_TREE);
13618 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
13619 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
13620
13621 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13622 integer_type_node, NULL_TREE);
13623 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
13624 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
13625
13626 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13627 integer_type_node, NULL_TREE);
13628 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
13629 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
13630
13631 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
13632 integer_type_node, NULL_TREE);
13633 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
13634 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
13635
13636 /* Access to the vec_set patterns. */
13637 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13638 intHI_type_node,
13639 integer_type_node, NULL_TREE);
13640 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
13641 ftype, IX86_BUILTIN_VEC_SET_V8HI);
13642
13643 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
13644 intHI_type_node,
13645 integer_type_node, NULL_TREE);
13646 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
13647 ftype, IX86_BUILTIN_VEC_SET_V4HI);
13648 }
13649
13650 /* Errors in the source file can cause expand_expr to return const0_rtx
13651 where we expect a vector. To avoid crashing, use one of the vector
13652 clear instructions. */
13653 static rtx
13654 safe_vector_operand (rtx x, enum machine_mode mode)
13655 {
13656 if (x == const0_rtx)
13657 x = CONST0_RTX (mode);
13658 return x;
13659 }
13660
13661 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13662
13663 static rtx
13664 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13665 {
13666 rtx pat, xops[3];
13667 tree arg0 = TREE_VALUE (arglist);
13668 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13669 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13670 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13671 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13672 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13673 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13674
13675 if (VECTOR_MODE_P (mode0))
13676 op0 = safe_vector_operand (op0, mode0);
13677 if (VECTOR_MODE_P (mode1))
13678 op1 = safe_vector_operand (op1, mode1);
13679
13680 if (optimize || !target
13681 || GET_MODE (target) != tmode
13682 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13683 target = gen_reg_rtx (tmode);
13684
13685 if (GET_MODE (op1) == SImode && mode1 == TImode)
13686 {
13687 rtx x = gen_reg_rtx (V4SImode);
13688 emit_insn (gen_sse2_loadd (x, op1));
13689 op1 = gen_lowpart (TImode, x);
13690 }
13691
13692 /* In case the insn wants input operands in modes different from
13693 the result, abort. */
13694 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13695 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13696 abort ();
13697
13698 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13699 op0 = copy_to_mode_reg (mode0, op0);
13700 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13701 op1 = copy_to_mode_reg (mode1, op1);
13702
13703 /* ??? Using ix86_fixup_binary_operands is problematic when
13704 we've got mismatched modes. Fake it. */
13705
13706 xops[0] = target;
13707 xops[1] = op0;
13708 xops[2] = op1;
13709
13710 if (tmode == mode0 && tmode == mode1)
13711 {
13712 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
13713 op0 = xops[1];
13714 op1 = xops[2];
13715 }
13716 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
13717 {
13718 op0 = force_reg (mode0, op0);
13719 op1 = force_reg (mode1, op1);
13720 target = gen_reg_rtx (tmode);
13721 }
13722
13723 pat = GEN_FCN (icode) (target, op0, op1);
13724 if (! pat)
13725 return 0;
13726 emit_insn (pat);
13727 return target;
13728 }
13729
13730 /* Subroutine of ix86_expand_builtin to take care of stores. */
13731
13732 static rtx
13733 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13734 {
13735 rtx pat;
13736 tree arg0 = TREE_VALUE (arglist);
13737 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13738 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13739 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13740 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13741 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13742
13743 if (VECTOR_MODE_P (mode1))
13744 op1 = safe_vector_operand (op1, mode1);
13745
13746 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13747 op1 = copy_to_mode_reg (mode1, op1);
13748
13749 pat = GEN_FCN (icode) (op0, op1);
13750 if (pat)
13751 emit_insn (pat);
13752 return 0;
13753 }
13754
13755 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13756
13757 static rtx
13758 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13759 rtx target, int do_load)
13760 {
13761 rtx pat;
13762 tree arg0 = TREE_VALUE (arglist);
13763 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13764 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13765 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13766
13767 if (optimize || !target
13768 || GET_MODE (target) != tmode
13769 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13770 target = gen_reg_rtx (tmode);
13771 if (do_load)
13772 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13773 else
13774 {
13775 if (VECTOR_MODE_P (mode0))
13776 op0 = safe_vector_operand (op0, mode0);
13777
13778 if ((optimize && !register_operand (op0, mode0))
13779 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13780 op0 = copy_to_mode_reg (mode0, op0);
13781 }
13782
13783 pat = GEN_FCN (icode) (target, op0);
13784 if (! pat)
13785 return 0;
13786 emit_insn (pat);
13787 return target;
13788 }
13789
13790 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13791 sqrtss, rsqrtss, rcpss. */
13792
13793 static rtx
13794 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13795 {
13796 rtx pat;
13797 tree arg0 = TREE_VALUE (arglist);
13798 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13799 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13800 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13801
13802 if (optimize || !target
13803 || GET_MODE (target) != tmode
13804 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13805 target = gen_reg_rtx (tmode);
13806
13807 if (VECTOR_MODE_P (mode0))
13808 op0 = safe_vector_operand (op0, mode0);
13809
13810 if ((optimize && !register_operand (op0, mode0))
13811 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13812 op0 = copy_to_mode_reg (mode0, op0);
13813
13814 op1 = op0;
13815 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13816 op1 = copy_to_mode_reg (mode0, op1);
13817
13818 pat = GEN_FCN (icode) (target, op0, op1);
13819 if (! pat)
13820 return 0;
13821 emit_insn (pat);
13822 return target;
13823 }
13824
13825 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13826
13827 static rtx
13828 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13829 rtx target)
13830 {
13831 rtx pat;
13832 tree arg0 = TREE_VALUE (arglist);
13833 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13834 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13835 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13836 rtx op2;
13837 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13838 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13839 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13840 enum rtx_code comparison = d->comparison;
13841
13842 if (VECTOR_MODE_P (mode0))
13843 op0 = safe_vector_operand (op0, mode0);
13844 if (VECTOR_MODE_P (mode1))
13845 op1 = safe_vector_operand (op1, mode1);
13846
13847 /* Swap operands if we have a comparison that isn't available in
13848 hardware. */
13849 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
13850 {
13851 rtx tmp = gen_reg_rtx (mode1);
13852 emit_move_insn (tmp, op1);
13853 op1 = op0;
13854 op0 = tmp;
13855 }
13856
13857 if (optimize || !target
13858 || GET_MODE (target) != tmode
13859 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13860 target = gen_reg_rtx (tmode);
13861
13862 if ((optimize && !register_operand (op0, mode0))
13863 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13864 op0 = copy_to_mode_reg (mode0, op0);
13865 if ((optimize && !register_operand (op1, mode1))
13866 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13867 op1 = copy_to_mode_reg (mode1, op1);
13868
13869 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13870 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13871 if (! pat)
13872 return 0;
13873 emit_insn (pat);
13874 return target;
13875 }
13876
13877 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13878
13879 static rtx
13880 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13881 rtx target)
13882 {
13883 rtx pat;
13884 tree arg0 = TREE_VALUE (arglist);
13885 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13886 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13887 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13888 rtx op2;
13889 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13890 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13891 enum rtx_code comparison = d->comparison;
13892
13893 if (VECTOR_MODE_P (mode0))
13894 op0 = safe_vector_operand (op0, mode0);
13895 if (VECTOR_MODE_P (mode1))
13896 op1 = safe_vector_operand (op1, mode1);
13897
13898 /* Swap operands if we have a comparison that isn't available in
13899 hardware. */
13900 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
13901 {
13902 rtx tmp = op1;
13903 op1 = op0;
13904 op0 = tmp;
13905 }
13906
13907 target = gen_reg_rtx (SImode);
13908 emit_move_insn (target, const0_rtx);
13909 target = gen_rtx_SUBREG (QImode, target, 0);
13910
13911 if ((optimize && !register_operand (op0, mode0))
13912 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13913 op0 = copy_to_mode_reg (mode0, op0);
13914 if ((optimize && !register_operand (op1, mode1))
13915 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13916 op1 = copy_to_mode_reg (mode1, op1);
13917
13918 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13919 pat = GEN_FCN (d->icode) (op0, op1);
13920 if (! pat)
13921 return 0;
13922 emit_insn (pat);
13923 emit_insn (gen_rtx_SET (VOIDmode,
13924 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13925 gen_rtx_fmt_ee (comparison, QImode,
13926 SET_DEST (pat),
13927 const0_rtx)));
13928
13929 return SUBREG_REG (target);
13930 }
13931
13932 /* Return the integer constant in ARG. Constrain it to be in the range
13933 of the subparts of VEC_TYPE; issue an error if not. */
13934
13935 static int
13936 get_element_number (tree vec_type, tree arg)
13937 {
13938 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13939
13940 if (!host_integerp (arg, 1)
13941 || (elt = tree_low_cst (arg, 1), elt > max))
13942 {
13943 error ("selector must be an integer constant in the range 0..%i", max);
13944 return 0;
13945 }
13946
13947 return elt;
13948 }
13949
13950 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
13951 ix86_expand_vector_init. We DO have language-level syntax for this, in
13952 the form of (type){ init-list }. Except that since we can't place emms
13953 instructions from inside the compiler, we can't allow the use of MMX
13954 registers unless the user explicitly asks for it. So we do *not* define
13955 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
13956 we have builtins invoked by mmintrin.h that gives us license to emit
13957 these sorts of instructions. */
13958
13959 static rtx
13960 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
13961 {
13962 enum machine_mode tmode = TYPE_MODE (type);
13963 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
13964 int i, n_elt = GET_MODE_NUNITS (tmode);
13965 rtvec v = rtvec_alloc (n_elt);
13966
13967 gcc_assert (VECTOR_MODE_P (tmode));
13968
13969 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
13970 {
13971 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13972 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13973 }
13974
13975 gcc_assert (arglist == NULL);
13976
13977 if (!target || !register_operand (target, tmode))
13978 target = gen_reg_rtx (tmode);
13979
13980 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
13981 return target;
13982 }
13983
13984 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
13985 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
13986 had a language-level syntax for referencing vector elements. */
13987
13988 static rtx
13989 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
13990 {
13991 enum machine_mode tmode, mode0;
13992 tree arg0, arg1;
13993 int elt;
13994 rtx op0;
13995
13996 arg0 = TREE_VALUE (arglist);
13997 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13998
13999 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14000 elt = get_element_number (TREE_TYPE (arg0), arg1);
14001
14002 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14003 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14004 gcc_assert (VECTOR_MODE_P (mode0));
14005
14006 op0 = force_reg (mode0, op0);
14007
14008 if (optimize || !target || !register_operand (target, tmode))
14009 target = gen_reg_rtx (tmode);
14010
14011 ix86_expand_vector_extract (true, target, op0, elt);
14012
14013 return target;
14014 }
14015
14016 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14017 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14018 a language-level syntax for referencing vector elements. */
14019
14020 static rtx
14021 ix86_expand_vec_set_builtin (tree arglist)
14022 {
14023 enum machine_mode tmode, mode1;
14024 tree arg0, arg1, arg2;
14025 int elt;
14026 rtx op0, op1;
14027
14028 arg0 = TREE_VALUE (arglist);
14029 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14030 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14031
14032 tmode = TYPE_MODE (TREE_TYPE (arg0));
14033 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14034 gcc_assert (VECTOR_MODE_P (tmode));
14035
14036 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14037 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14038 elt = get_element_number (TREE_TYPE (arg0), arg2);
14039
14040 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14041 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14042
14043 op0 = force_reg (tmode, op0);
14044 op1 = force_reg (mode1, op1);
14045
14046 ix86_expand_vector_set (true, op0, op1, elt);
14047
14048 return op0;
14049 }
14050
14051 /* Expand an expression EXP that calls a built-in function,
14052 with result going to TARGET if that's convenient
14053 (and in mode MODE if that's convenient).
14054 SUBTARGET may be used as the target for computing one of EXP's operands.
14055 IGNORE is nonzero if the value is to be ignored. */
14056
14057 static rtx
14058 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14059 enum machine_mode mode ATTRIBUTE_UNUSED,
14060 int ignore ATTRIBUTE_UNUSED)
14061 {
14062 const struct builtin_description *d;
14063 size_t i;
14064 enum insn_code icode;
14065 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14066 tree arglist = TREE_OPERAND (exp, 1);
14067 tree arg0, arg1, arg2;
14068 rtx op0, op1, op2, pat;
14069 enum machine_mode tmode, mode0, mode1, mode2;
14070 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14071
14072 switch (fcode)
14073 {
14074 case IX86_BUILTIN_EMMS:
14075 emit_insn (gen_mmx_emms ());
14076 return 0;
14077
14078 case IX86_BUILTIN_SFENCE:
14079 emit_insn (gen_sse_sfence ());
14080 return 0;
14081
14082 case IX86_BUILTIN_MASKMOVQ:
14083 case IX86_BUILTIN_MASKMOVDQU:
14084 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14085 ? CODE_FOR_mmx_maskmovq
14086 : CODE_FOR_sse2_maskmovdqu);
14087 /* Note the arg order is different from the operand order. */
14088 arg1 = TREE_VALUE (arglist);
14089 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14090 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14091 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14092 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14093 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14094 mode0 = insn_data[icode].operand[0].mode;
14095 mode1 = insn_data[icode].operand[1].mode;
14096 mode2 = insn_data[icode].operand[2].mode;
14097
14098 op0 = force_reg (Pmode, op0);
14099 op0 = gen_rtx_MEM (mode1, op0);
14100
14101 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14102 op0 = copy_to_mode_reg (mode0, op0);
14103 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14104 op1 = copy_to_mode_reg (mode1, op1);
14105 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14106 op2 = copy_to_mode_reg (mode2, op2);
14107 pat = GEN_FCN (icode) (op0, op1, op2);
14108 if (! pat)
14109 return 0;
14110 emit_insn (pat);
14111 return 0;
14112
14113 case IX86_BUILTIN_SQRTSS:
14114 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14115 case IX86_BUILTIN_RSQRTSS:
14116 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14117 case IX86_BUILTIN_RCPSS:
14118 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14119
14120 case IX86_BUILTIN_LOADUPS:
14121 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14122
14123 case IX86_BUILTIN_STOREUPS:
14124 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14125
14126 case IX86_BUILTIN_LOADHPS:
14127 case IX86_BUILTIN_LOADLPS:
14128 case IX86_BUILTIN_LOADHPD:
14129 case IX86_BUILTIN_LOADLPD:
14130 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14131 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14132 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14133 : CODE_FOR_sse2_loadlpd);
14134 arg0 = TREE_VALUE (arglist);
14135 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14136 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14137 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14138 tmode = insn_data[icode].operand[0].mode;
14139 mode0 = insn_data[icode].operand[1].mode;
14140 mode1 = insn_data[icode].operand[2].mode;
14141
14142 op0 = force_reg (mode0, op0);
14143 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14144 if (optimize || target == 0
14145 || GET_MODE (target) != tmode
14146 || !register_operand (target, tmode))
14147 target = gen_reg_rtx (tmode);
14148 pat = GEN_FCN (icode) (target, op0, op1);
14149 if (! pat)
14150 return 0;
14151 emit_insn (pat);
14152 return target;
14153
14154 case IX86_BUILTIN_STOREHPS:
14155 case IX86_BUILTIN_STORELPS:
14156 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14157 : CODE_FOR_sse_storelps);
14158 arg0 = TREE_VALUE (arglist);
14159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14162 mode0 = insn_data[icode].operand[0].mode;
14163 mode1 = insn_data[icode].operand[1].mode;
14164
14165 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14166 op1 = force_reg (mode1, op1);
14167
14168 pat = GEN_FCN (icode) (op0, op1);
14169 if (! pat)
14170 return 0;
14171 emit_insn (pat);
14172 return const0_rtx;
14173
14174 case IX86_BUILTIN_MOVNTPS:
14175 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14176 case IX86_BUILTIN_MOVNTQ:
14177 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14178
14179 case IX86_BUILTIN_LDMXCSR:
14180 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14181 target = assign_386_stack_local (SImode, 0);
14182 emit_move_insn (target, op0);
14183 emit_insn (gen_sse_ldmxcsr (target));
14184 return 0;
14185
14186 case IX86_BUILTIN_STMXCSR:
14187 target = assign_386_stack_local (SImode, 0);
14188 emit_insn (gen_sse_stmxcsr (target));
14189 return copy_to_mode_reg (SImode, target);
14190
14191 case IX86_BUILTIN_SHUFPS:
14192 case IX86_BUILTIN_SHUFPD:
14193 icode = (fcode == IX86_BUILTIN_SHUFPS
14194 ? CODE_FOR_sse_shufps
14195 : CODE_FOR_sse2_shufpd);
14196 arg0 = TREE_VALUE (arglist);
14197 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14198 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14199 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14200 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14201 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14202 tmode = insn_data[icode].operand[0].mode;
14203 mode0 = insn_data[icode].operand[1].mode;
14204 mode1 = insn_data[icode].operand[2].mode;
14205 mode2 = insn_data[icode].operand[3].mode;
14206
14207 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14208 op0 = copy_to_mode_reg (mode0, op0);
14209 if ((optimize && !register_operand (op1, mode1))
14210 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14211 op1 = copy_to_mode_reg (mode1, op1);
14212 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14213 {
14214 /* @@@ better error message */
14215 error ("mask must be an immediate");
14216 return gen_reg_rtx (tmode);
14217 }
14218 if (optimize || target == 0
14219 || GET_MODE (target) != tmode
14220 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14221 target = gen_reg_rtx (tmode);
14222 pat = GEN_FCN (icode) (target, op0, op1, op2);
14223 if (! pat)
14224 return 0;
14225 emit_insn (pat);
14226 return target;
14227
14228 case IX86_BUILTIN_PSHUFW:
14229 case IX86_BUILTIN_PSHUFD:
14230 case IX86_BUILTIN_PSHUFHW:
14231 case IX86_BUILTIN_PSHUFLW:
14232 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14233 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14234 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14235 : CODE_FOR_mmx_pshufw);
14236 arg0 = TREE_VALUE (arglist);
14237 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14238 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14239 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14240 tmode = insn_data[icode].operand[0].mode;
14241 mode1 = insn_data[icode].operand[1].mode;
14242 mode2 = insn_data[icode].operand[2].mode;
14243
14244 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14245 op0 = copy_to_mode_reg (mode1, op0);
14246 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14247 {
14248 /* @@@ better error message */
14249 error ("mask must be an immediate");
14250 return const0_rtx;
14251 }
14252 if (target == 0
14253 || GET_MODE (target) != tmode
14254 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14255 target = gen_reg_rtx (tmode);
14256 pat = GEN_FCN (icode) (target, op0, op1);
14257 if (! pat)
14258 return 0;
14259 emit_insn (pat);
14260 return target;
14261
14262 case IX86_BUILTIN_PSLLDQI128:
14263 case IX86_BUILTIN_PSRLDQI128:
14264 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14265 : CODE_FOR_sse2_lshrti3);
14266 arg0 = TREE_VALUE (arglist);
14267 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14268 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14269 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14270 tmode = insn_data[icode].operand[0].mode;
14271 mode1 = insn_data[icode].operand[1].mode;
14272 mode2 = insn_data[icode].operand[2].mode;
14273
14274 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14275 {
14276 op0 = copy_to_reg (op0);
14277 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14278 }
14279 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14280 {
14281 error ("shift must be an immediate");
14282 return const0_rtx;
14283 }
14284 target = gen_reg_rtx (V2DImode);
14285 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14286 if (! pat)
14287 return 0;
14288 emit_insn (pat);
14289 return target;
14290
14291 case IX86_BUILTIN_FEMMS:
14292 emit_insn (gen_mmx_femms ());
14293 return NULL_RTX;
14294
14295 case IX86_BUILTIN_PAVGUSB:
14296 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14297
14298 case IX86_BUILTIN_PF2ID:
14299 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14300
14301 case IX86_BUILTIN_PFACC:
14302 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14303
14304 case IX86_BUILTIN_PFADD:
14305 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14306
14307 case IX86_BUILTIN_PFCMPEQ:
14308 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14309
14310 case IX86_BUILTIN_PFCMPGE:
14311 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14312
14313 case IX86_BUILTIN_PFCMPGT:
14314 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14315
14316 case IX86_BUILTIN_PFMAX:
14317 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14318
14319 case IX86_BUILTIN_PFMIN:
14320 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14321
14322 case IX86_BUILTIN_PFMUL:
14323 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14324
14325 case IX86_BUILTIN_PFRCP:
14326 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14327
14328 case IX86_BUILTIN_PFRCPIT1:
14329 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14330
14331 case IX86_BUILTIN_PFRCPIT2:
14332 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14333
14334 case IX86_BUILTIN_PFRSQIT1:
14335 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14336
14337 case IX86_BUILTIN_PFRSQRT:
14338 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14339
14340 case IX86_BUILTIN_PFSUB:
14341 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14342
14343 case IX86_BUILTIN_PFSUBR:
14344 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14345
14346 case IX86_BUILTIN_PI2FD:
14347 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14348
14349 case IX86_BUILTIN_PMULHRW:
14350 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14351
14352 case IX86_BUILTIN_PF2IW:
14353 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14354
14355 case IX86_BUILTIN_PFNACC:
14356 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14357
14358 case IX86_BUILTIN_PFPNACC:
14359 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14360
14361 case IX86_BUILTIN_PI2FW:
14362 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14363
14364 case IX86_BUILTIN_PSWAPDSI:
14365 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14366
14367 case IX86_BUILTIN_PSWAPDSF:
14368 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14369
14370 case IX86_BUILTIN_SQRTSD:
14371 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14372 case IX86_BUILTIN_LOADUPD:
14373 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14374 case IX86_BUILTIN_STOREUPD:
14375 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14376
14377 case IX86_BUILTIN_MFENCE:
14378 emit_insn (gen_sse2_mfence ());
14379 return 0;
14380 case IX86_BUILTIN_LFENCE:
14381 emit_insn (gen_sse2_lfence ());
14382 return 0;
14383
14384 case IX86_BUILTIN_CLFLUSH:
14385 arg0 = TREE_VALUE (arglist);
14386 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14387 icode = CODE_FOR_sse2_clflush;
14388 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14389 op0 = copy_to_mode_reg (Pmode, op0);
14390
14391 emit_insn (gen_sse2_clflush (op0));
14392 return 0;
14393
14394 case IX86_BUILTIN_MOVNTPD:
14395 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14396 case IX86_BUILTIN_MOVNTDQ:
14397 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14398 case IX86_BUILTIN_MOVNTI:
14399 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14400
14401 case IX86_BUILTIN_LOADDQU:
14402 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14403 case IX86_BUILTIN_STOREDQU:
14404 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14405
14406 case IX86_BUILTIN_MONITOR:
14407 arg0 = TREE_VALUE (arglist);
14408 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14409 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14410 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14411 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14412 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14413 if (!REG_P (op0))
14414 op0 = copy_to_mode_reg (SImode, op0);
14415 if (!REG_P (op1))
14416 op1 = copy_to_mode_reg (SImode, op1);
14417 if (!REG_P (op2))
14418 op2 = copy_to_mode_reg (SImode, op2);
14419 emit_insn (gen_sse3_monitor (op0, op1, op2));
14420 return 0;
14421
14422 case IX86_BUILTIN_MWAIT:
14423 arg0 = TREE_VALUE (arglist);
14424 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14425 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14426 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14427 if (!REG_P (op0))
14428 op0 = copy_to_mode_reg (SImode, op0);
14429 if (!REG_P (op1))
14430 op1 = copy_to_mode_reg (SImode, op1);
14431 emit_insn (gen_sse3_mwait (op0, op1));
14432 return 0;
14433
14434 case IX86_BUILTIN_LDDQU:
14435 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14436 target, 1);
14437
14438 case IX86_BUILTIN_VEC_INIT_V2SI:
14439 case IX86_BUILTIN_VEC_INIT_V4HI:
14440 case IX86_BUILTIN_VEC_INIT_V8QI:
14441 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14442
14443 case IX86_BUILTIN_VEC_EXT_V2DF:
14444 case IX86_BUILTIN_VEC_EXT_V2DI:
14445 case IX86_BUILTIN_VEC_EXT_V4SF:
14446 case IX86_BUILTIN_VEC_EXT_V4SI:
14447 case IX86_BUILTIN_VEC_EXT_V8HI:
14448 case IX86_BUILTIN_VEC_EXT_V4HI:
14449 return ix86_expand_vec_ext_builtin (arglist, target);
14450
14451 case IX86_BUILTIN_VEC_SET_V8HI:
14452 case IX86_BUILTIN_VEC_SET_V4HI:
14453 return ix86_expand_vec_set_builtin (arglist);
14454
14455 default:
14456 break;
14457 }
14458
14459 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14460 if (d->code == fcode)
14461 {
14462 /* Compares are treated specially. */
14463 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14464 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14465 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14466 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14467 return ix86_expand_sse_compare (d, arglist, target);
14468
14469 return ix86_expand_binop_builtin (d->icode, arglist, target);
14470 }
14471
14472 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14473 if (d->code == fcode)
14474 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14475
14476 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14477 if (d->code == fcode)
14478 return ix86_expand_sse_comi (d, arglist, target);
14479
14480 gcc_unreachable ();
14481 }
14482
14483 /* Store OPERAND to the memory after reload is completed. This means
14484 that we can't easily use assign_stack_local. */
14485 rtx
14486 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14487 {
14488 rtx result;
14489 if (!reload_completed)
14490 abort ();
14491 if (TARGET_RED_ZONE)
14492 {
14493 result = gen_rtx_MEM (mode,
14494 gen_rtx_PLUS (Pmode,
14495 stack_pointer_rtx,
14496 GEN_INT (-RED_ZONE_SIZE)));
14497 emit_move_insn (result, operand);
14498 }
14499 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14500 {
14501 switch (mode)
14502 {
14503 case HImode:
14504 case SImode:
14505 operand = gen_lowpart (DImode, operand);
14506 /* FALLTHRU */
14507 case DImode:
14508 emit_insn (
14509 gen_rtx_SET (VOIDmode,
14510 gen_rtx_MEM (DImode,
14511 gen_rtx_PRE_DEC (DImode,
14512 stack_pointer_rtx)),
14513 operand));
14514 break;
14515 default:
14516 abort ();
14517 }
14518 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14519 }
14520 else
14521 {
14522 switch (mode)
14523 {
14524 case DImode:
14525 {
14526 rtx operands[2];
14527 split_di (&operand, 1, operands, operands + 1);
14528 emit_insn (
14529 gen_rtx_SET (VOIDmode,
14530 gen_rtx_MEM (SImode,
14531 gen_rtx_PRE_DEC (Pmode,
14532 stack_pointer_rtx)),
14533 operands[1]));
14534 emit_insn (
14535 gen_rtx_SET (VOIDmode,
14536 gen_rtx_MEM (SImode,
14537 gen_rtx_PRE_DEC (Pmode,
14538 stack_pointer_rtx)),
14539 operands[0]));
14540 }
14541 break;
14542 case HImode:
14543 /* It is better to store HImodes as SImodes. */
14544 if (!TARGET_PARTIAL_REG_STALL)
14545 operand = gen_lowpart (SImode, operand);
14546 /* FALLTHRU */
14547 case SImode:
14548 emit_insn (
14549 gen_rtx_SET (VOIDmode,
14550 gen_rtx_MEM (GET_MODE (operand),
14551 gen_rtx_PRE_DEC (SImode,
14552 stack_pointer_rtx)),
14553 operand));
14554 break;
14555 default:
14556 abort ();
14557 }
14558 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14559 }
14560 return result;
14561 }
14562
14563 /* Free operand from the memory. */
14564 void
14565 ix86_free_from_memory (enum machine_mode mode)
14566 {
14567 if (!TARGET_RED_ZONE)
14568 {
14569 int size;
14570
14571 if (mode == DImode || TARGET_64BIT)
14572 size = 8;
14573 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14574 size = 2;
14575 else
14576 size = 4;
14577 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14578 to pop or add instruction if registers are available. */
14579 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14580 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14581 GEN_INT (size))));
14582 }
14583 }
14584
14585 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14586 QImode must go into class Q_REGS.
14587 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14588 movdf to do mem-to-mem moves through integer regs. */
14589 enum reg_class
14590 ix86_preferred_reload_class (rtx x, enum reg_class class)
14591 {
14592 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14593 return NO_REGS;
14594 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14595 {
14596 /* SSE can't load any constant directly yet. */
14597 if (SSE_CLASS_P (class))
14598 return NO_REGS;
14599 /* Floats can load 0 and 1. */
14600 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14601 {
14602 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14603 if (MAYBE_SSE_CLASS_P (class))
14604 return (reg_class_subset_p (class, GENERAL_REGS)
14605 ? GENERAL_REGS : FLOAT_REGS);
14606 else
14607 return class;
14608 }
14609 /* General regs can load everything. */
14610 if (reg_class_subset_p (class, GENERAL_REGS))
14611 return GENERAL_REGS;
14612 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14613 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14614 return NO_REGS;
14615 }
14616 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14617 return NO_REGS;
14618 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14619 return Q_REGS;
14620 return class;
14621 }
14622
14623 /* If we are copying between general and FP registers, we need a memory
14624 location. The same is true for SSE and MMX registers.
14625
14626 The macro can't work reliably when one of the CLASSES is class containing
14627 registers from multiple units (SSE, MMX, integer). We avoid this by never
14628 combining those units in single alternative in the machine description.
14629 Ensure that this constraint holds to avoid unexpected surprises.
14630
14631 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14632 enforce these sanity checks. */
14633 int
14634 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14635 enum machine_mode mode, int strict)
14636 {
14637 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14638 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14639 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14640 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14641 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14642 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14643 {
14644 if (strict)
14645 abort ();
14646 else
14647 return 1;
14648 }
14649 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14650 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14651 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14652 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14653 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14654 }
14655 /* Return the cost of moving data from a register in class CLASS1 to
14656 one in class CLASS2.
14657
14658 It is not required that the cost always equal 2 when FROM is the same as TO;
14659 on some machines it is expensive to move between registers if they are not
14660 general registers. */
14661 int
14662 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14663 enum reg_class class2)
14664 {
14665 /* In case we require secondary memory, compute cost of the store followed
14666 by load. In order to avoid bad register allocation choices, we need
14667 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14668
14669 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14670 {
14671 int cost = 1;
14672
14673 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14674 MEMORY_MOVE_COST (mode, class1, 1));
14675 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14676 MEMORY_MOVE_COST (mode, class2, 1));
14677
14678 /* In case of copying from general_purpose_register we may emit multiple
14679 stores followed by single load causing memory size mismatch stall.
14680 Count this as arbitrarily high cost of 20. */
14681 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14682 cost += 20;
14683
14684 /* In the case of FP/MMX moves, the registers actually overlap, and we
14685 have to switch modes in order to treat them differently. */
14686 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14687 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14688 cost += 20;
14689
14690 return cost;
14691 }
14692
14693 /* Moves between SSE/MMX and integer unit are expensive. */
14694 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14695 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14696 return ix86_cost->mmxsse_to_integer;
14697 if (MAYBE_FLOAT_CLASS_P (class1))
14698 return ix86_cost->fp_move;
14699 if (MAYBE_SSE_CLASS_P (class1))
14700 return ix86_cost->sse_move;
14701 if (MAYBE_MMX_CLASS_P (class1))
14702 return ix86_cost->mmx_move;
14703 return 2;
14704 }
14705
14706 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14707 int
14708 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14709 {
14710 /* Flags and only flags can only hold CCmode values. */
14711 if (CC_REGNO_P (regno))
14712 return GET_MODE_CLASS (mode) == MODE_CC;
14713 if (GET_MODE_CLASS (mode) == MODE_CC
14714 || GET_MODE_CLASS (mode) == MODE_RANDOM
14715 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14716 return 0;
14717 if (FP_REGNO_P (regno))
14718 return VALID_FP_MODE_P (mode);
14719 if (SSE_REGNO_P (regno))
14720 {
14721 /* We implement the move patterns for all vector modes into and
14722 out of SSE registers, even when no operation instructions
14723 are available. */
14724 return (VALID_SSE_REG_MODE (mode)
14725 || VALID_SSE2_REG_MODE (mode)
14726 || VALID_MMX_REG_MODE (mode)
14727 || VALID_MMX_REG_MODE_3DNOW (mode));
14728 }
14729 if (MMX_REGNO_P (regno))
14730 {
14731 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14732 so if the register is available at all, then we can move data of
14733 the given mode into or out of it. */
14734 return (VALID_MMX_REG_MODE (mode)
14735 || VALID_MMX_REG_MODE_3DNOW (mode));
14736 }
14737 /* We handle both integer and floats in the general purpose registers.
14738 In future we should be able to handle vector modes as well. */
14739 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14740 return 0;
14741 /* Take care for QImode values - they can be in non-QI regs, but then
14742 they do cause partial register stalls. */
14743 if (regno < 4 || mode != QImode || TARGET_64BIT)
14744 return 1;
14745 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14746 }
14747
14748 /* Return the cost of moving data of mode M between a
14749 register and memory. A value of 2 is the default; this cost is
14750 relative to those in `REGISTER_MOVE_COST'.
14751
14752 If moving between registers and memory is more expensive than
14753 between two registers, you should define this macro to express the
14754 relative cost.
14755
14756 Model also increased moving costs of QImode registers in non
14757 Q_REGS classes.
14758 */
14759 int
14760 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14761 {
14762 if (FLOAT_CLASS_P (class))
14763 {
14764 int index;
14765 switch (mode)
14766 {
14767 case SFmode:
14768 index = 0;
14769 break;
14770 case DFmode:
14771 index = 1;
14772 break;
14773 case XFmode:
14774 index = 2;
14775 break;
14776 default:
14777 return 100;
14778 }
14779 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14780 }
14781 if (SSE_CLASS_P (class))
14782 {
14783 int index;
14784 switch (GET_MODE_SIZE (mode))
14785 {
14786 case 4:
14787 index = 0;
14788 break;
14789 case 8:
14790 index = 1;
14791 break;
14792 case 16:
14793 index = 2;
14794 break;
14795 default:
14796 return 100;
14797 }
14798 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14799 }
14800 if (MMX_CLASS_P (class))
14801 {
14802 int index;
14803 switch (GET_MODE_SIZE (mode))
14804 {
14805 case 4:
14806 index = 0;
14807 break;
14808 case 8:
14809 index = 1;
14810 break;
14811 default:
14812 return 100;
14813 }
14814 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14815 }
14816 switch (GET_MODE_SIZE (mode))
14817 {
14818 case 1:
14819 if (in)
14820 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14821 : ix86_cost->movzbl_load);
14822 else
14823 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14824 : ix86_cost->int_store[0] + 4);
14825 break;
14826 case 2:
14827 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14828 default:
14829 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14830 if (mode == TFmode)
14831 mode = XFmode;
14832 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14833 * (((int) GET_MODE_SIZE (mode)
14834 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14835 }
14836 }
14837
14838 /* Compute a (partial) cost for rtx X. Return true if the complete
14839 cost has been computed, and false if subexpressions should be
14840 scanned. In either case, *TOTAL contains the cost result. */
14841
14842 static bool
14843 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14844 {
14845 enum machine_mode mode = GET_MODE (x);
14846
14847 switch (code)
14848 {
14849 case CONST_INT:
14850 case CONST:
14851 case LABEL_REF:
14852 case SYMBOL_REF:
14853 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14854 *total = 3;
14855 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14856 *total = 2;
14857 else if (flag_pic && SYMBOLIC_CONST (x)
14858 && (!TARGET_64BIT
14859 || (!GET_CODE (x) != LABEL_REF
14860 && (GET_CODE (x) != SYMBOL_REF
14861 || !SYMBOL_REF_LOCAL_P (x)))))
14862 *total = 1;
14863 else
14864 *total = 0;
14865 return true;
14866
14867 case CONST_DOUBLE:
14868 if (mode == VOIDmode)
14869 *total = 0;
14870 else
14871 switch (standard_80387_constant_p (x))
14872 {
14873 case 1: /* 0.0 */
14874 *total = 1;
14875 break;
14876 default: /* Other constants */
14877 *total = 2;
14878 break;
14879 case 0:
14880 case -1:
14881 /* Start with (MEM (SYMBOL_REF)), since that's where
14882 it'll probably end up. Add a penalty for size. */
14883 *total = (COSTS_N_INSNS (1)
14884 + (flag_pic != 0 && !TARGET_64BIT)
14885 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14886 break;
14887 }
14888 return true;
14889
14890 case ZERO_EXTEND:
14891 /* The zero extensions is often completely free on x86_64, so make
14892 it as cheap as possible. */
14893 if (TARGET_64BIT && mode == DImode
14894 && GET_MODE (XEXP (x, 0)) == SImode)
14895 *total = 1;
14896 else if (TARGET_ZERO_EXTEND_WITH_AND)
14897 *total = COSTS_N_INSNS (ix86_cost->add);
14898 else
14899 *total = COSTS_N_INSNS (ix86_cost->movzx);
14900 return false;
14901
14902 case SIGN_EXTEND:
14903 *total = COSTS_N_INSNS (ix86_cost->movsx);
14904 return false;
14905
14906 case ASHIFT:
14907 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14908 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14909 {
14910 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14911 if (value == 1)
14912 {
14913 *total = COSTS_N_INSNS (ix86_cost->add);
14914 return false;
14915 }
14916 if ((value == 2 || value == 3)
14917 && ix86_cost->lea <= ix86_cost->shift_const)
14918 {
14919 *total = COSTS_N_INSNS (ix86_cost->lea);
14920 return false;
14921 }
14922 }
14923 /* FALLTHRU */
14924
14925 case ROTATE:
14926 case ASHIFTRT:
14927 case LSHIFTRT:
14928 case ROTATERT:
14929 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14930 {
14931 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14932 {
14933 if (INTVAL (XEXP (x, 1)) > 32)
14934 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14935 else
14936 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14937 }
14938 else
14939 {
14940 if (GET_CODE (XEXP (x, 1)) == AND)
14941 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14942 else
14943 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14944 }
14945 }
14946 else
14947 {
14948 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14949 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14950 else
14951 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14952 }
14953 return false;
14954
14955 case MULT:
14956 if (FLOAT_MODE_P (mode))
14957 {
14958 *total = COSTS_N_INSNS (ix86_cost->fmul);
14959 return false;
14960 }
14961 else
14962 {
14963 rtx op0 = XEXP (x, 0);
14964 rtx op1 = XEXP (x, 1);
14965 int nbits;
14966 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14967 {
14968 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14969 for (nbits = 0; value != 0; value &= value - 1)
14970 nbits++;
14971 }
14972 else
14973 /* This is arbitrary. */
14974 nbits = 7;
14975
14976 /* Compute costs correctly for widening multiplication. */
14977 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14978 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14979 == GET_MODE_SIZE (mode))
14980 {
14981 int is_mulwiden = 0;
14982 enum machine_mode inner_mode = GET_MODE (op0);
14983
14984 if (GET_CODE (op0) == GET_CODE (op1))
14985 is_mulwiden = 1, op1 = XEXP (op1, 0);
14986 else if (GET_CODE (op1) == CONST_INT)
14987 {
14988 if (GET_CODE (op0) == SIGN_EXTEND)
14989 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14990 == INTVAL (op1);
14991 else
14992 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14993 }
14994
14995 if (is_mulwiden)
14996 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14997 }
14998
14999 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15000 + nbits * ix86_cost->mult_bit)
15001 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15002
15003 return true;
15004 }
15005
15006 case DIV:
15007 case UDIV:
15008 case MOD:
15009 case UMOD:
15010 if (FLOAT_MODE_P (mode))
15011 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15012 else
15013 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15014 return false;
15015
15016 case PLUS:
15017 if (FLOAT_MODE_P (mode))
15018 *total = COSTS_N_INSNS (ix86_cost->fadd);
15019 else if (GET_MODE_CLASS (mode) == MODE_INT
15020 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15021 {
15022 if (GET_CODE (XEXP (x, 0)) == PLUS
15023 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15024 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15025 && CONSTANT_P (XEXP (x, 1)))
15026 {
15027 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15028 if (val == 2 || val == 4 || val == 8)
15029 {
15030 *total = COSTS_N_INSNS (ix86_cost->lea);
15031 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15032 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15033 outer_code);
15034 *total += rtx_cost (XEXP (x, 1), outer_code);
15035 return true;
15036 }
15037 }
15038 else if (GET_CODE (XEXP (x, 0)) == MULT
15039 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15040 {
15041 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15042 if (val == 2 || val == 4 || val == 8)
15043 {
15044 *total = COSTS_N_INSNS (ix86_cost->lea);
15045 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15046 *total += rtx_cost (XEXP (x, 1), outer_code);
15047 return true;
15048 }
15049 }
15050 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15051 {
15052 *total = COSTS_N_INSNS (ix86_cost->lea);
15053 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15054 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15055 *total += rtx_cost (XEXP (x, 1), outer_code);
15056 return true;
15057 }
15058 }
15059 /* FALLTHRU */
15060
15061 case MINUS:
15062 if (FLOAT_MODE_P (mode))
15063 {
15064 *total = COSTS_N_INSNS (ix86_cost->fadd);
15065 return false;
15066 }
15067 /* FALLTHRU */
15068
15069 case AND:
15070 case IOR:
15071 case XOR:
15072 if (!TARGET_64BIT && mode == DImode)
15073 {
15074 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15075 + (rtx_cost (XEXP (x, 0), outer_code)
15076 << (GET_MODE (XEXP (x, 0)) != DImode))
15077 + (rtx_cost (XEXP (x, 1), outer_code)
15078 << (GET_MODE (XEXP (x, 1)) != DImode)));
15079 return true;
15080 }
15081 /* FALLTHRU */
15082
15083 case NEG:
15084 if (FLOAT_MODE_P (mode))
15085 {
15086 *total = COSTS_N_INSNS (ix86_cost->fchs);
15087 return false;
15088 }
15089 /* FALLTHRU */
15090
15091 case NOT:
15092 if (!TARGET_64BIT && mode == DImode)
15093 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15094 else
15095 *total = COSTS_N_INSNS (ix86_cost->add);
15096 return false;
15097
15098 case COMPARE:
15099 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15100 && XEXP (XEXP (x, 0), 1) == const1_rtx
15101 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15102 && XEXP (x, 1) == const0_rtx)
15103 {
15104 /* This kind of construct is implemented using test[bwl].
15105 Treat it as if we had an AND. */
15106 *total = (COSTS_N_INSNS (ix86_cost->add)
15107 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15108 + rtx_cost (const1_rtx, outer_code));
15109 return true;
15110 }
15111 return false;
15112
15113 case FLOAT_EXTEND:
15114 if (!TARGET_SSE_MATH
15115 || mode == XFmode
15116 || (mode == DFmode && !TARGET_SSE2))
15117 *total = 0;
15118 return false;
15119
15120 case ABS:
15121 if (FLOAT_MODE_P (mode))
15122 *total = COSTS_N_INSNS (ix86_cost->fabs);
15123 return false;
15124
15125 case SQRT:
15126 if (FLOAT_MODE_P (mode))
15127 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15128 return false;
15129
15130 case UNSPEC:
15131 if (XINT (x, 1) == UNSPEC_TP)
15132 *total = 0;
15133 return false;
15134
15135 default:
15136 return false;
15137 }
15138 }
15139
15140 #if TARGET_MACHO
15141
15142 static int current_machopic_label_num;
15143
15144 /* Given a symbol name and its associated stub, write out the
15145 definition of the stub. */
15146
15147 void
15148 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15149 {
15150 unsigned int length;
15151 char *binder_name, *symbol_name, lazy_ptr_name[32];
15152 int label = ++current_machopic_label_num;
15153
15154 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15155 symb = (*targetm.strip_name_encoding) (symb);
15156
15157 length = strlen (stub);
15158 binder_name = alloca (length + 32);
15159 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15160
15161 length = strlen (symb);
15162 symbol_name = alloca (length + 32);
15163 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15164
15165 sprintf (lazy_ptr_name, "L%d$lz", label);
15166
15167 if (MACHOPIC_PURE)
15168 machopic_picsymbol_stub_section ();
15169 else
15170 machopic_symbol_stub_section ();
15171
15172 fprintf (file, "%s:\n", stub);
15173 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15174
15175 if (MACHOPIC_PURE)
15176 {
15177 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15178 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15179 fprintf (file, "\tjmp %%edx\n");
15180 }
15181 else
15182 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15183
15184 fprintf (file, "%s:\n", binder_name);
15185
15186 if (MACHOPIC_PURE)
15187 {
15188 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15189 fprintf (file, "\tpushl %%eax\n");
15190 }
15191 else
15192 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15193
15194 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15195
15196 machopic_lazy_symbol_ptr_section ();
15197 fprintf (file, "%s:\n", lazy_ptr_name);
15198 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15199 fprintf (file, "\t.long %s\n", binder_name);
15200 }
15201 #endif /* TARGET_MACHO */
15202
15203 /* Order the registers for register allocator. */
15204
15205 void
15206 x86_order_regs_for_local_alloc (void)
15207 {
15208 int pos = 0;
15209 int i;
15210
15211 /* First allocate the local general purpose registers. */
15212 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15213 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15214 reg_alloc_order [pos++] = i;
15215
15216 /* Global general purpose registers. */
15217 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15218 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15219 reg_alloc_order [pos++] = i;
15220
15221 /* x87 registers come first in case we are doing FP math
15222 using them. */
15223 if (!TARGET_SSE_MATH)
15224 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15225 reg_alloc_order [pos++] = i;
15226
15227 /* SSE registers. */
15228 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15229 reg_alloc_order [pos++] = i;
15230 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15231 reg_alloc_order [pos++] = i;
15232
15233 /* x87 registers. */
15234 if (TARGET_SSE_MATH)
15235 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15236 reg_alloc_order [pos++] = i;
15237
15238 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15239 reg_alloc_order [pos++] = i;
15240
15241 /* Initialize the rest of array as we do not allocate some registers
15242 at all. */
15243 while (pos < FIRST_PSEUDO_REGISTER)
15244 reg_alloc_order [pos++] = 0;
15245 }
15246
15247 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15248 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15249 #endif
15250
15251 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15252 struct attribute_spec.handler. */
15253 static tree
15254 ix86_handle_struct_attribute (tree *node, tree name,
15255 tree args ATTRIBUTE_UNUSED,
15256 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15257 {
15258 tree *type = NULL;
15259 if (DECL_P (*node))
15260 {
15261 if (TREE_CODE (*node) == TYPE_DECL)
15262 type = &TREE_TYPE (*node);
15263 }
15264 else
15265 type = node;
15266
15267 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15268 || TREE_CODE (*type) == UNION_TYPE)))
15269 {
15270 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
15271 *no_add_attrs = true;
15272 }
15273
15274 else if ((is_attribute_p ("ms_struct", name)
15275 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15276 || ((is_attribute_p ("gcc_struct", name)
15277 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15278 {
15279 warning ("%qs incompatible attribute ignored",
15280 IDENTIFIER_POINTER (name));
15281 *no_add_attrs = true;
15282 }
15283
15284 return NULL_TREE;
15285 }
15286
15287 static bool
15288 ix86_ms_bitfield_layout_p (tree record_type)
15289 {
15290 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15291 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15292 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15293 }
15294
15295 /* Returns an expression indicating where the this parameter is
15296 located on entry to the FUNCTION. */
15297
15298 static rtx
15299 x86_this_parameter (tree function)
15300 {
15301 tree type = TREE_TYPE (function);
15302
15303 if (TARGET_64BIT)
15304 {
15305 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15306 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15307 }
15308
15309 if (ix86_function_regparm (type, function) > 0)
15310 {
15311 tree parm;
15312
15313 parm = TYPE_ARG_TYPES (type);
15314 /* Figure out whether or not the function has a variable number of
15315 arguments. */
15316 for (; parm; parm = TREE_CHAIN (parm))
15317 if (TREE_VALUE (parm) == void_type_node)
15318 break;
15319 /* If not, the this parameter is in the first argument. */
15320 if (parm)
15321 {
15322 int regno = 0;
15323 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15324 regno = 2;
15325 return gen_rtx_REG (SImode, regno);
15326 }
15327 }
15328
15329 if (aggregate_value_p (TREE_TYPE (type), type))
15330 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15331 else
15332 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15333 }
15334
15335 /* Determine whether x86_output_mi_thunk can succeed. */
15336
15337 static bool
15338 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15339 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15340 HOST_WIDE_INT vcall_offset, tree function)
15341 {
15342 /* 64-bit can handle anything. */
15343 if (TARGET_64BIT)
15344 return true;
15345
15346 /* For 32-bit, everything's fine if we have one free register. */
15347 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15348 return true;
15349
15350 /* Need a free register for vcall_offset. */
15351 if (vcall_offset)
15352 return false;
15353
15354 /* Need a free register for GOT references. */
15355 if (flag_pic && !(*targetm.binds_local_p) (function))
15356 return false;
15357
15358 /* Otherwise ok. */
15359 return true;
15360 }
15361
15362 /* Output the assembler code for a thunk function. THUNK_DECL is the
15363 declaration for the thunk function itself, FUNCTION is the decl for
15364 the target function. DELTA is an immediate constant offset to be
15365 added to THIS. If VCALL_OFFSET is nonzero, the word at
15366 *(*this + vcall_offset) should be added to THIS. */
15367
15368 static void
15369 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15370 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15371 HOST_WIDE_INT vcall_offset, tree function)
15372 {
15373 rtx xops[3];
15374 rtx this = x86_this_parameter (function);
15375 rtx this_reg, tmp;
15376
15377 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15378 pull it in now and let DELTA benefit. */
15379 if (REG_P (this))
15380 this_reg = this;
15381 else if (vcall_offset)
15382 {
15383 /* Put the this parameter into %eax. */
15384 xops[0] = this;
15385 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15386 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15387 }
15388 else
15389 this_reg = NULL_RTX;
15390
15391 /* Adjust the this parameter by a fixed constant. */
15392 if (delta)
15393 {
15394 xops[0] = GEN_INT (delta);
15395 xops[1] = this_reg ? this_reg : this;
15396 if (TARGET_64BIT)
15397 {
15398 if (!x86_64_general_operand (xops[0], DImode))
15399 {
15400 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15401 xops[1] = tmp;
15402 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15403 xops[0] = tmp;
15404 xops[1] = this;
15405 }
15406 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15407 }
15408 else
15409 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15410 }
15411
15412 /* Adjust the this parameter by a value stored in the vtable. */
15413 if (vcall_offset)
15414 {
15415 if (TARGET_64BIT)
15416 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15417 else
15418 {
15419 int tmp_regno = 2 /* ECX */;
15420 if (lookup_attribute ("fastcall",
15421 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15422 tmp_regno = 0 /* EAX */;
15423 tmp = gen_rtx_REG (SImode, tmp_regno);
15424 }
15425
15426 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15427 xops[1] = tmp;
15428 if (TARGET_64BIT)
15429 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15430 else
15431 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15432
15433 /* Adjust the this parameter. */
15434 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15435 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15436 {
15437 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15438 xops[0] = GEN_INT (vcall_offset);
15439 xops[1] = tmp2;
15440 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15441 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15442 }
15443 xops[1] = this_reg;
15444 if (TARGET_64BIT)
15445 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15446 else
15447 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15448 }
15449
15450 /* If necessary, drop THIS back to its stack slot. */
15451 if (this_reg && this_reg != this)
15452 {
15453 xops[0] = this_reg;
15454 xops[1] = this;
15455 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15456 }
15457
15458 xops[0] = XEXP (DECL_RTL (function), 0);
15459 if (TARGET_64BIT)
15460 {
15461 if (!flag_pic || (*targetm.binds_local_p) (function))
15462 output_asm_insn ("jmp\t%P0", xops);
15463 else
15464 {
15465 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15466 tmp = gen_rtx_CONST (Pmode, tmp);
15467 tmp = gen_rtx_MEM (QImode, tmp);
15468 xops[0] = tmp;
15469 output_asm_insn ("jmp\t%A0", xops);
15470 }
15471 }
15472 else
15473 {
15474 if (!flag_pic || (*targetm.binds_local_p) (function))
15475 output_asm_insn ("jmp\t%P0", xops);
15476 else
15477 #if TARGET_MACHO
15478 if (TARGET_MACHO)
15479 {
15480 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15481 tmp = (gen_rtx_SYMBOL_REF
15482 (Pmode,
15483 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15484 tmp = gen_rtx_MEM (QImode, tmp);
15485 xops[0] = tmp;
15486 output_asm_insn ("jmp\t%0", xops);
15487 }
15488 else
15489 #endif /* TARGET_MACHO */
15490 {
15491 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15492 output_set_got (tmp);
15493
15494 xops[1] = tmp;
15495 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15496 output_asm_insn ("jmp\t{*}%1", xops);
15497 }
15498 }
15499 }
15500
15501 static void
15502 x86_file_start (void)
15503 {
15504 default_file_start ();
15505 if (X86_FILE_START_VERSION_DIRECTIVE)
15506 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15507 if (X86_FILE_START_FLTUSED)
15508 fputs ("\t.global\t__fltused\n", asm_out_file);
15509 if (ix86_asm_dialect == ASM_INTEL)
15510 fputs ("\t.intel_syntax\n", asm_out_file);
15511 }
15512
15513 int
15514 x86_field_alignment (tree field, int computed)
15515 {
15516 enum machine_mode mode;
15517 tree type = TREE_TYPE (field);
15518
15519 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15520 return computed;
15521 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15522 ? get_inner_array_type (type) : type);
15523 if (mode == DFmode || mode == DCmode
15524 || GET_MODE_CLASS (mode) == MODE_INT
15525 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15526 return MIN (32, computed);
15527 return computed;
15528 }
15529
15530 /* Output assembler code to FILE to increment profiler label # LABELNO
15531 for profiling a function entry. */
15532 void
15533 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15534 {
15535 if (TARGET_64BIT)
15536 if (flag_pic)
15537 {
15538 #ifndef NO_PROFILE_COUNTERS
15539 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15540 #endif
15541 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15542 }
15543 else
15544 {
15545 #ifndef NO_PROFILE_COUNTERS
15546 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15547 #endif
15548 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15549 }
15550 else if (flag_pic)
15551 {
15552 #ifndef NO_PROFILE_COUNTERS
15553 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15554 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15555 #endif
15556 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15557 }
15558 else
15559 {
15560 #ifndef NO_PROFILE_COUNTERS
15561 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15562 PROFILE_COUNT_REGISTER);
15563 #endif
15564 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15565 }
15566 }
15567
15568 /* We don't have exact information about the insn sizes, but we may assume
15569 quite safely that we are informed about all 1 byte insns and memory
15570 address sizes. This is enough to eliminate unnecessary padding in
15571 99% of cases. */
15572
15573 static int
15574 min_insn_size (rtx insn)
15575 {
15576 int l = 0;
15577
15578 if (!INSN_P (insn) || !active_insn_p (insn))
15579 return 0;
15580
15581 /* Discard alignments we've emit and jump instructions. */
15582 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15583 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15584 return 0;
15585 if (GET_CODE (insn) == JUMP_INSN
15586 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15587 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15588 return 0;
15589
15590 /* Important case - calls are always 5 bytes.
15591 It is common to have many calls in the row. */
15592 if (GET_CODE (insn) == CALL_INSN
15593 && symbolic_reference_mentioned_p (PATTERN (insn))
15594 && !SIBLING_CALL_P (insn))
15595 return 5;
15596 if (get_attr_length (insn) <= 1)
15597 return 1;
15598
15599 /* For normal instructions we may rely on the sizes of addresses
15600 and the presence of symbol to require 4 bytes of encoding.
15601 This is not the case for jumps where references are PC relative. */
15602 if (GET_CODE (insn) != JUMP_INSN)
15603 {
15604 l = get_attr_length_address (insn);
15605 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15606 l = 4;
15607 }
15608 if (l)
15609 return 1+l;
15610 else
15611 return 2;
15612 }
15613
15614 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15615 window. */
15616
15617 static void
15618 ix86_avoid_jump_misspredicts (void)
15619 {
15620 rtx insn, start = get_insns ();
15621 int nbytes = 0, njumps = 0;
15622 int isjump = 0;
15623
15624 /* Look for all minimal intervals of instructions containing 4 jumps.
15625 The intervals are bounded by START and INSN. NBYTES is the total
15626 size of instructions in the interval including INSN and not including
15627 START. When the NBYTES is smaller than 16 bytes, it is possible
15628 that the end of START and INSN ends up in the same 16byte page.
15629
15630 The smallest offset in the page INSN can start is the case where START
15631 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15632 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15633 */
15634 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15635 {
15636
15637 nbytes += min_insn_size (insn);
15638 if (dump_file)
15639 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15640 INSN_UID (insn), min_insn_size (insn));
15641 if ((GET_CODE (insn) == JUMP_INSN
15642 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15643 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15644 || GET_CODE (insn) == CALL_INSN)
15645 njumps++;
15646 else
15647 continue;
15648
15649 while (njumps > 3)
15650 {
15651 start = NEXT_INSN (start);
15652 if ((GET_CODE (start) == JUMP_INSN
15653 && GET_CODE (PATTERN (start)) != ADDR_VEC
15654 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15655 || GET_CODE (start) == CALL_INSN)
15656 njumps--, isjump = 1;
15657 else
15658 isjump = 0;
15659 nbytes -= min_insn_size (start);
15660 }
15661 if (njumps < 0)
15662 abort ();
15663 if (dump_file)
15664 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15665 INSN_UID (start), INSN_UID (insn), nbytes);
15666
15667 if (njumps == 3 && isjump && nbytes < 16)
15668 {
15669 int padsize = 15 - nbytes + min_insn_size (insn);
15670
15671 if (dump_file)
15672 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15673 INSN_UID (insn), padsize);
15674 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15675 }
15676 }
15677 }
15678
15679 /* AMD Athlon works faster
15680 when RET is not destination of conditional jump or directly preceded
15681 by other jump instruction. We avoid the penalty by inserting NOP just
15682 before the RET instructions in such cases. */
15683 static void
15684 ix86_pad_returns (void)
15685 {
15686 edge e;
15687 edge_iterator ei;
15688
15689 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15690 {
15691 basic_block bb = e->src;
15692 rtx ret = BB_END (bb);
15693 rtx prev;
15694 bool replace = false;
15695
15696 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15697 || !maybe_hot_bb_p (bb))
15698 continue;
15699 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15700 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15701 break;
15702 if (prev && GET_CODE (prev) == CODE_LABEL)
15703 {
15704 edge e;
15705 edge_iterator ei;
15706
15707 FOR_EACH_EDGE (e, ei, bb->preds)
15708 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15709 && !(e->flags & EDGE_FALLTHRU))
15710 replace = true;
15711 }
15712 if (!replace)
15713 {
15714 prev = prev_active_insn (ret);
15715 if (prev
15716 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15717 || GET_CODE (prev) == CALL_INSN))
15718 replace = true;
15719 /* Empty functions get branch mispredict even when the jump destination
15720 is not visible to us. */
15721 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15722 replace = true;
15723 }
15724 if (replace)
15725 {
15726 emit_insn_before (gen_return_internal_long (), ret);
15727 delete_insn (ret);
15728 }
15729 }
15730 }
15731
15732 /* Implement machine specific optimizations. We implement padding of returns
15733 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15734 static void
15735 ix86_reorg (void)
15736 {
15737 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15738 ix86_pad_returns ();
15739 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15740 ix86_avoid_jump_misspredicts ();
15741 }
15742
15743 /* Return nonzero when QImode register that must be represented via REX prefix
15744 is used. */
15745 bool
15746 x86_extended_QIreg_mentioned_p (rtx insn)
15747 {
15748 int i;
15749 extract_insn_cached (insn);
15750 for (i = 0; i < recog_data.n_operands; i++)
15751 if (REG_P (recog_data.operand[i])
15752 && REGNO (recog_data.operand[i]) >= 4)
15753 return true;
15754 return false;
15755 }
15756
15757 /* Return nonzero when P points to register encoded via REX prefix.
15758 Called via for_each_rtx. */
15759 static int
15760 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15761 {
15762 unsigned int regno;
15763 if (!REG_P (*p))
15764 return 0;
15765 regno = REGNO (*p);
15766 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15767 }
15768
15769 /* Return true when INSN mentions register that must be encoded using REX
15770 prefix. */
15771 bool
15772 x86_extended_reg_mentioned_p (rtx insn)
15773 {
15774 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15775 }
15776
15777 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15778 optabs would emit if we didn't have TFmode patterns. */
15779
15780 void
15781 x86_emit_floatuns (rtx operands[2])
15782 {
15783 rtx neglab, donelab, i0, i1, f0, in, out;
15784 enum machine_mode mode, inmode;
15785
15786 inmode = GET_MODE (operands[1]);
15787 if (inmode != SImode
15788 && inmode != DImode)
15789 abort ();
15790
15791 out = operands[0];
15792 in = force_reg (inmode, operands[1]);
15793 mode = GET_MODE (out);
15794 neglab = gen_label_rtx ();
15795 donelab = gen_label_rtx ();
15796 i1 = gen_reg_rtx (Pmode);
15797 f0 = gen_reg_rtx (mode);
15798
15799 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15800
15801 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15802 emit_jump_insn (gen_jump (donelab));
15803 emit_barrier ();
15804
15805 emit_label (neglab);
15806
15807 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15808 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15809 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15810 expand_float (f0, i0, 0);
15811 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15812
15813 emit_label (donelab);
15814 }
15815 \f
15816 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15817 with all elements equal to VAR. Return true if successful. */
15818
15819 static bool
15820 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
15821 rtx target, rtx val)
15822 {
15823 enum machine_mode smode, wsmode, wvmode;
15824 rtx x;
15825
15826 switch (mode)
15827 {
15828 case V2SImode:
15829 case V2SFmode:
15830 if (!mmx_ok && !TARGET_SSE)
15831 return false;
15832 /* FALLTHRU */
15833
15834 case V2DFmode:
15835 case V2DImode:
15836 case V4SFmode:
15837 case V4SImode:
15838 val = force_reg (GET_MODE_INNER (mode), val);
15839 x = gen_rtx_VEC_DUPLICATE (mode, val);
15840 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15841 return true;
15842
15843 case V4HImode:
15844 if (!mmx_ok)
15845 return false;
15846 val = gen_lowpart (SImode, val);
15847 x = gen_rtx_TRUNCATE (HImode, val);
15848 x = gen_rtx_VEC_DUPLICATE (mode, x);
15849 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15850 return true;
15851
15852 case V8QImode:
15853 if (!mmx_ok)
15854 return false;
15855 smode = QImode;
15856 wsmode = HImode;
15857 wvmode = V4HImode;
15858 goto widen;
15859 case V8HImode:
15860 smode = HImode;
15861 wsmode = SImode;
15862 wvmode = V4SImode;
15863 goto widen;
15864 case V16QImode:
15865 smode = QImode;
15866 wsmode = HImode;
15867 wvmode = V8HImode;
15868 goto widen;
15869 widen:
15870 /* Replicate the value once into the next wider mode and recurse. */
15871 val = convert_modes (wsmode, smode, val, true);
15872 x = expand_simple_binop (wsmode, ASHIFT, val,
15873 GEN_INT (GET_MODE_BITSIZE (smode)),
15874 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15875 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
15876
15877 x = gen_reg_rtx (wvmode);
15878 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
15879 gcc_unreachable ();
15880 emit_move_insn (target, gen_lowpart (mode, x));
15881 return true;
15882
15883 default:
15884 return false;
15885 }
15886 }
15887
15888 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15889 whose low element is VAR, and other elements are zero. Return true
15890 if successful. */
15891
15892 static bool
15893 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
15894 rtx target, rtx var)
15895 {
15896 enum machine_mode vsimode;
15897 rtx x;
15898
15899 switch (mode)
15900 {
15901 case V2SFmode:
15902 case V2SImode:
15903 if (!mmx_ok && !TARGET_SSE)
15904 return false;
15905 /* FALLTHRU */
15906
15907 case V2DFmode:
15908 case V2DImode:
15909 var = force_reg (GET_MODE_INNER (mode), var);
15910 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
15911 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15912 return true;
15913
15914 case V4SFmode:
15915 case V4SImode:
15916 var = force_reg (GET_MODE_INNER (mode), var);
15917 x = gen_rtx_VEC_DUPLICATE (mode, var);
15918 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
15919 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15920 return true;
15921
15922 case V8HImode:
15923 case V16QImode:
15924 vsimode = V4SImode;
15925 goto widen;
15926 case V4HImode:
15927 case V8QImode:
15928 if (!mmx_ok)
15929 return false;
15930 vsimode = V2SImode;
15931 goto widen;
15932 widen:
15933 /* Zero extend the variable element to SImode and recurse. */
15934 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
15935
15936 x = gen_reg_rtx (vsimode);
15937 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
15938 gcc_unreachable ();
15939
15940 emit_move_insn (target, gen_lowpart (mode, x));
15941 return true;
15942
15943 default:
15944 return false;
15945 }
15946 }
15947
15948 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15949 consisting of the values in VALS. It is known that all elements
15950 except ONE_VAR are constants. Return true if successful. */
15951
15952 static bool
15953 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
15954 rtx target, rtx vals, int one_var)
15955 {
15956 rtx var = XVECEXP (vals, 0, one_var);
15957 enum machine_mode wmode;
15958 rtx const_vec, x;
15959
15960 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
15961 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
15962
15963 switch (mode)
15964 {
15965 case V2DFmode:
15966 case V2DImode:
15967 case V2SFmode:
15968 case V2SImode:
15969 /* For the two element vectors, it's just as easy to use
15970 the general case. */
15971 return false;
15972
15973 case V4SFmode:
15974 case V4SImode:
15975 case V8HImode:
15976 case V4HImode:
15977 break;
15978
15979 case V16QImode:
15980 wmode = V8HImode;
15981 goto widen;
15982 case V8QImode:
15983 wmode = V4HImode;
15984 goto widen;
15985 widen:
15986 /* There's no way to set one QImode entry easily. Combine
15987 the variable value with its adjacent constant value, and
15988 promote to an HImode set. */
15989 x = XVECEXP (vals, 0, one_var ^ 1);
15990 if (one_var & 1)
15991 {
15992 var = convert_modes (HImode, QImode, var, true);
15993 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
15994 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15995 x = GEN_INT (INTVAL (x) & 0xff);
15996 }
15997 else
15998 {
15999 var = convert_modes (HImode, QImode, var, true);
16000 x = gen_int_mode (INTVAL (x) << 8, HImode);
16001 }
16002 if (x != const0_rtx)
16003 var = expand_simple_binop (HImode, IOR, var, x, var,
16004 1, OPTAB_LIB_WIDEN);
16005
16006 x = gen_reg_rtx (wmode);
16007 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16008 ix86_expand_vector_set (mmx_ok, target, var, one_var >> 1);
16009
16010 emit_move_insn (target, gen_lowpart (mode, x));
16011 return true;
16012
16013 default:
16014 return false;
16015 }
16016
16017 emit_move_insn (target, const_vec);
16018 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16019 return true;
16020 }
16021
16022 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16023 all values variable, and none identical. */
16024
16025 static void
16026 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16027 rtx target, rtx vals)
16028 {
16029 enum machine_mode half_mode = GET_MODE_INNER (mode);
16030 rtx op0 = NULL, op1 = NULL;
16031 bool use_vec_concat = false;
16032
16033 switch (mode)
16034 {
16035 case V2SFmode:
16036 case V2SImode:
16037 if (!mmx_ok && !TARGET_SSE)
16038 break;
16039 /* FALLTHRU */
16040
16041 case V2DFmode:
16042 case V2DImode:
16043 /* For the two element vectors, we always implement VEC_CONCAT. */
16044 op0 = XVECEXP (vals, 0, 0);
16045 op1 = XVECEXP (vals, 0, 1);
16046 use_vec_concat = true;
16047 break;
16048
16049 case V4SFmode:
16050 half_mode = V2SFmode;
16051 goto half;
16052 case V4SImode:
16053 half_mode = V2SImode;
16054 goto half;
16055 half:
16056 {
16057 rtvec v;
16058
16059 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16060 Recurse to load the two halves. */
16061
16062 op0 = gen_reg_rtx (half_mode);
16063 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16064 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16065
16066 op1 = gen_reg_rtx (half_mode);
16067 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16068 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16069
16070 use_vec_concat = true;
16071 }
16072 break;
16073
16074 case V8HImode:
16075 case V16QImode:
16076 case V4HImode:
16077 case V8QImode:
16078 break;
16079
16080 default:
16081 gcc_unreachable ();
16082 }
16083
16084 if (use_vec_concat)
16085 {
16086 if (!register_operand (op0, half_mode))
16087 op0 = force_reg (half_mode, op0);
16088 if (!register_operand (op1, half_mode))
16089 op1 = force_reg (half_mode, op1);
16090
16091 emit_insn (gen_rtx_SET (VOIDmode, target,
16092 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16093 }
16094 else
16095 {
16096 int i, j, n_elts, n_words, n_elt_per_word;
16097 enum machine_mode inner_mode;
16098 rtx words[4], shift;
16099
16100 inner_mode = GET_MODE_INNER (mode);
16101 n_elts = GET_MODE_NUNITS (mode);
16102 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16103 n_elt_per_word = n_elts / n_words;
16104 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16105
16106 for (i = 0; i < n_words; ++i)
16107 {
16108 rtx word = NULL_RTX;
16109
16110 for (j = 0; j < n_elt_per_word; ++j)
16111 {
16112 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16113 elt = convert_modes (word_mode, inner_mode, elt, true);
16114
16115 if (j == 0)
16116 word = elt;
16117 else
16118 {
16119 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16120 word, 1, OPTAB_LIB_WIDEN);
16121 word = expand_simple_binop (word_mode, IOR, word, elt,
16122 word, 1, OPTAB_LIB_WIDEN);
16123 }
16124 }
16125
16126 words[i] = word;
16127 }
16128
16129 if (n_words == 1)
16130 emit_move_insn (target, gen_lowpart (mode, words[0]));
16131 else if (n_words == 2)
16132 {
16133 rtx tmp = gen_reg_rtx (mode);
16134 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16135 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16136 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16137 emit_move_insn (target, tmp);
16138 }
16139 else if (n_words == 4)
16140 {
16141 rtx tmp = gen_reg_rtx (V4SImode);
16142 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16143 ix86_expand_vector_init_general (false, V4SImode, target, vals);
16144 emit_move_insn (target, gen_lowpart (mode, tmp));
16145 }
16146 else
16147 gcc_unreachable ();
16148 }
16149 }
16150
16151 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16152 instructions unless MMX_OK is true. */
16153
16154 void
16155 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16156 {
16157 enum machine_mode mode = GET_MODE (target);
16158 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16159 int n_elts = GET_MODE_NUNITS (mode);
16160 int n_var = 0, one_var = -1;
16161 bool all_same = true, all_const_zero = true;
16162 int i;
16163 rtx x;
16164
16165 for (i = 0; i < n_elts; ++i)
16166 {
16167 x = XVECEXP (vals, 0, i);
16168 if (!CONSTANT_P (x))
16169 n_var++, one_var = i;
16170 else if (x != CONST0_RTX (inner_mode))
16171 all_const_zero = false;
16172 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16173 all_same = false;
16174 }
16175
16176 /* Constants are best loaded from the constant pool. */
16177 if (n_var == 0)
16178 {
16179 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16180 return;
16181 }
16182
16183 /* If all values are identical, broadcast the value. */
16184 if (all_same
16185 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16186 XVECEXP (vals, 0, 0)))
16187 return;
16188
16189 /* Values where only one field is non-constant are best loaded from
16190 the pool and overwritten via move later. */
16191 if (n_var == 1)
16192 {
16193 if (all_const_zero && one_var == 0
16194 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16195 XVECEXP (vals, 0, 0)))
16196 return;
16197
16198 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16199 return;
16200 }
16201
16202 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16203 }
16204
16205 void
16206 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16207 {
16208 enum machine_mode mode = GET_MODE (target);
16209 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16210 bool use_vec_merge = false;
16211 rtx tmp;
16212
16213 switch (mode)
16214 {
16215 case V2SFmode:
16216 case V2SImode:
16217 if (!mmx_ok)
16218 break;
16219 /* FALLTHRU */
16220
16221 case V2DFmode:
16222 case V2DImode:
16223 {
16224 rtx op0, op1;
16225
16226 /* For the two element vectors, we implement a VEC_CONCAT with
16227 the extraction of the other element. */
16228
16229 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16230 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16231
16232 if (elt == 0)
16233 op0 = val, op1 = tmp;
16234 else
16235 op0 = tmp, op1 = val;
16236
16237 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16238 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16239 }
16240 return;
16241
16242 case V4SFmode:
16243 switch (elt)
16244 {
16245 case 0:
16246 use_vec_merge = true;
16247 break;
16248
16249 case 1:
16250 /* tmp = op0 = A B C D */
16251 tmp = copy_to_reg (target);
16252
16253 /* op0 = C C D D */
16254 emit_insn (gen_sse_unpcklps (target, target, target));
16255
16256 /* op0 = C C D X */
16257 ix86_expand_vector_set (false, target, val, 0);
16258
16259 /* op0 = A B X D */
16260 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16261 GEN_INT (1), GEN_INT (0),
16262 GEN_INT (2), GEN_INT (3)));
16263 return;
16264
16265 case 2:
16266 tmp = copy_to_reg (target);
16267 ix86_expand_vector_set (false, target, val, 0);
16268 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16269 GEN_INT (0), GEN_INT (1),
16270 GEN_INT (0), GEN_INT (3)));
16271 return;
16272
16273 case 3:
16274 tmp = copy_to_reg (target);
16275 ix86_expand_vector_set (false, target, val, 0);
16276 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16277 GEN_INT (0), GEN_INT (1),
16278 GEN_INT (2), GEN_INT (0)));
16279 return;
16280
16281 default:
16282 gcc_unreachable ();
16283 }
16284 break;
16285
16286 case V4SImode:
16287 /* Element 0 handled by vec_merge below. */
16288 if (elt == 0)
16289 {
16290 use_vec_merge = true;
16291 break;
16292 }
16293
16294 if (TARGET_SSE2)
16295 {
16296 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16297 store into element 0, then shuffle them back. */
16298
16299 rtx order[4];
16300
16301 order[0] = GEN_INT (elt);
16302 order[1] = const1_rtx;
16303 order[2] = const2_rtx;
16304 order[3] = GEN_INT (3);
16305 order[elt] = const0_rtx;
16306
16307 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16308 order[1], order[2], order[3]));
16309
16310 ix86_expand_vector_set (false, target, val, 0);
16311
16312 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16313 order[1], order[2], order[3]));
16314 }
16315 else
16316 {
16317 /* For SSE1, we have to reuse the V4SF code. */
16318 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
16319 gen_lowpart (SFmode, val), elt);
16320 }
16321 return;
16322
16323 case V8HImode:
16324 use_vec_merge = TARGET_SSE2;
16325 break;
16326 case V4HImode:
16327 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16328 break;
16329
16330 case V16QImode:
16331 case V8QImode:
16332 default:
16333 break;
16334 }
16335
16336 if (use_vec_merge)
16337 {
16338 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
16339 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
16340 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16341 }
16342 else
16343 {
16344 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16345
16346 emit_move_insn (mem, target);
16347
16348 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16349 emit_move_insn (tmp, val);
16350
16351 emit_move_insn (target, mem);
16352 }
16353 }
16354
16355 void
16356 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
16357 {
16358 enum machine_mode mode = GET_MODE (vec);
16359 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16360 bool use_vec_extr = false;
16361 rtx tmp;
16362
16363 switch (mode)
16364 {
16365 case V2SImode:
16366 case V2SFmode:
16367 if (!mmx_ok)
16368 break;
16369 /* FALLTHRU */
16370
16371 case V2DFmode:
16372 case V2DImode:
16373 use_vec_extr = true;
16374 break;
16375
16376 case V4SFmode:
16377 switch (elt)
16378 {
16379 case 0:
16380 tmp = vec;
16381 break;
16382
16383 case 1:
16384 case 3:
16385 tmp = gen_reg_rtx (mode);
16386 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
16387 GEN_INT (elt), GEN_INT (elt),
16388 GEN_INT (elt), GEN_INT (elt)));
16389 break;
16390
16391 case 2:
16392 tmp = gen_reg_rtx (mode);
16393 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
16394 break;
16395
16396 default:
16397 gcc_unreachable ();
16398 }
16399 vec = tmp;
16400 use_vec_extr = true;
16401 elt = 0;
16402 break;
16403
16404 case V4SImode:
16405 if (TARGET_SSE2)
16406 {
16407 switch (elt)
16408 {
16409 case 0:
16410 tmp = vec;
16411 break;
16412
16413 case 1:
16414 case 3:
16415 tmp = gen_reg_rtx (mode);
16416 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
16417 GEN_INT (elt), GEN_INT (elt),
16418 GEN_INT (elt), GEN_INT (elt)));
16419 break;
16420
16421 case 2:
16422 tmp = gen_reg_rtx (mode);
16423 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
16424 break;
16425
16426 default:
16427 gcc_unreachable ();
16428 }
16429 vec = tmp;
16430 use_vec_extr = true;
16431 elt = 0;
16432 }
16433 else
16434 {
16435 /* For SSE1, we have to reuse the V4SF code. */
16436 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
16437 gen_lowpart (V4SFmode, vec), elt);
16438 return;
16439 }
16440 break;
16441
16442 case V8HImode:
16443 use_vec_extr = TARGET_SSE2;
16444 break;
16445 case V4HImode:
16446 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16447 break;
16448
16449 case V16QImode:
16450 case V8QImode:
16451 /* ??? Could extract the appropriate HImode element and shift. */
16452 default:
16453 break;
16454 }
16455
16456 if (use_vec_extr)
16457 {
16458 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
16459 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
16460
16461 /* Let the rtl optimizers know about the zero extension performed. */
16462 if (inner_mode == HImode)
16463 {
16464 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
16465 target = gen_lowpart (SImode, target);
16466 }
16467
16468 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16469 }
16470 else
16471 {
16472 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16473
16474 emit_move_insn (mem, vec);
16475
16476 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16477 emit_move_insn (target, tmp);
16478 }
16479 }
16480 \f
16481 /* Implements target hook vector_mode_supported_p. */
16482 static bool
16483 ix86_vector_mode_supported_p (enum machine_mode mode)
16484 {
16485 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
16486 return true;
16487 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
16488 return true;
16489 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
16490 return true;
16491 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
16492 return true;
16493 return false;
16494 }
16495
16496 /* Worker function for TARGET_MD_ASM_CLOBBERS.
16497
16498 We do this in the new i386 backend to maintain source compatibility
16499 with the old cc0-based compiler. */
16500
16501 static tree
16502 ix86_md_asm_clobbers (tree clobbers)
16503 {
16504 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
16505 clobbers);
16506 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
16507 clobbers);
16508 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
16509 clobbers);
16510 return clobbers;
16511 }
16512
16513 /* Worker function for REVERSE_CONDITION. */
16514
16515 enum rtx_code
16516 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
16517 {
16518 return (mode != CCFPmode && mode != CCFPUmode
16519 ? reverse_condition (code)
16520 : reverse_condition_maybe_unordered (code));
16521 }
16522
16523 /* Output code to perform an x87 FP register move, from OPERANDS[1]
16524 to OPERANDS[0]. */
16525
16526 const char *
16527 output_387_reg_move (rtx insn, rtx *operands)
16528 {
16529 if (REG_P (operands[1])
16530 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16531 {
16532 if (REGNO (operands[0]) == FIRST_STACK_REG
16533 && TARGET_USE_FFREEP)
16534 return "ffreep\t%y0";
16535 return "fstp\t%y0";
16536 }
16537 if (STACK_TOP_P (operands[0]))
16538 return "fld%z1\t%y1";
16539 return "fst\t%y0";
16540 }
16541
16542 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16543 FP status register is set. */
16544
16545 void
16546 ix86_emit_fp_unordered_jump (rtx label)
16547 {
16548 rtx reg = gen_reg_rtx (HImode);
16549 rtx temp;
16550
16551 emit_insn (gen_x86_fnstsw_1 (reg));
16552
16553 if (TARGET_USE_SAHF)
16554 {
16555 emit_insn (gen_x86_sahf_1 (reg));
16556
16557 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16558 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16559 }
16560 else
16561 {
16562 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16563
16564 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16565 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16566 }
16567
16568 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16569 gen_rtx_LABEL_REF (VOIDmode, label),
16570 pc_rtx);
16571 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16572 emit_jump_insn (temp);
16573 }
16574
16575 /* Output code to perform a log1p XFmode calculation. */
16576
16577 void ix86_emit_i387_log1p (rtx op0, rtx op1)
16578 {
16579 rtx label1 = gen_label_rtx ();
16580 rtx label2 = gen_label_rtx ();
16581
16582 rtx tmp = gen_reg_rtx (XFmode);
16583 rtx tmp2 = gen_reg_rtx (XFmode);
16584
16585 emit_insn (gen_absxf2 (tmp, op1));
16586 emit_insn (gen_cmpxf (tmp,
16587 CONST_DOUBLE_FROM_REAL_VALUE (
16588 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16589 XFmode)));
16590 emit_jump_insn (gen_bge (label1));
16591
16592 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16593 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16594 emit_jump (label2);
16595
16596 emit_label (label1);
16597 emit_move_insn (tmp, CONST1_RTX (XFmode));
16598 emit_insn (gen_addxf3 (tmp, op1, tmp));
16599 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16600 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16601
16602 emit_label (label2);
16603 }
16604
16605 /* Solaris named-section hook. Parameters are as for
16606 named_section_real. */
16607
16608 static void
16609 i386_solaris_elf_named_section (const char *name, unsigned int flags,
16610 tree decl)
16611 {
16612 /* With Binutils 2.15, the "@unwind" marker must be specified on
16613 every occurrence of the ".eh_frame" section, not just the first
16614 one. */
16615 if (TARGET_64BIT
16616 && strcmp (name, ".eh_frame") == 0)
16617 {
16618 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
16619 flags & SECTION_WRITE ? "aw" : "a");
16620 return;
16621 }
16622 default_elf_asm_named_section (name, flags, decl);
16623 }
16624
16625 #include "gt-i386.h"
This page took 0.785787 seconds and 6 git commands to generate.