]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
i386.c (override_options): Revert 2004-11-24 change.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
568 undefined. */
569 const int x86_sse_split_regs = m_ATHLON_K8;
570 const int x86_sse_typeless_stores = m_ATHLON_K8;
571 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
572 const int x86_use_ffreep = m_ATHLON_K8;
573 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
574 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
575 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
576 /* Some CPU cores are not able to predict more than 4 branch instructions in
577 the 16 byte window. */
578 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
579 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
580 const int x86_use_bt = m_ATHLON_K8;
581
582 /* In case the average insn count for single function invocation is
583 lower than this constant, emit fast (but longer) prologue and
584 epilogue code. */
585 #define FAST_PROLOGUE_INSN_COUNT 20
586
587 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
588 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
589 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
590 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
591
592 /* Array of the smallest class containing reg number REGNO, indexed by
593 REGNO. Used by REGNO_REG_CLASS in i386.h. */
594
595 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
596 {
597 /* ax, dx, cx, bx */
598 AREG, DREG, CREG, BREG,
599 /* si, di, bp, sp */
600 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
601 /* FP registers */
602 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
603 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
604 /* arg pointer */
605 NON_Q_REGS,
606 /* flags, fpsr, dirflag, frame */
607 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
610 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
611 MMX_REGS, MMX_REGS,
612 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
615 SSE_REGS, SSE_REGS,
616 };
617
618 /* The "default" register map used in 32bit mode. */
619
620 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
621 {
622 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
623 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
624 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
625 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
626 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
629 };
630
631 static int const x86_64_int_parameter_registers[6] =
632 {
633 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
634 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
635 };
636
637 static int const x86_64_int_return_registers[4] =
638 {
639 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
640 };
641
642 /* The "default" register map used in 64bit mode. */
643 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
644 {
645 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
646 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
647 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
648 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
649 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
650 8,9,10,11,12,13,14,15, /* extended integer registers */
651 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
652 };
653
654 /* Define the register numbers to be used in Dwarf debugging information.
655 The SVR4 reference port C compiler uses the following register numbers
656 in its Dwarf output code:
657 0 for %eax (gcc regno = 0)
658 1 for %ecx (gcc regno = 2)
659 2 for %edx (gcc regno = 1)
660 3 for %ebx (gcc regno = 3)
661 4 for %esp (gcc regno = 7)
662 5 for %ebp (gcc regno = 6)
663 6 for %esi (gcc regno = 4)
664 7 for %edi (gcc regno = 5)
665 The following three DWARF register numbers are never generated by
666 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
667 believes these numbers have these meanings.
668 8 for %eip (no gcc equivalent)
669 9 for %eflags (gcc regno = 17)
670 10 for %trapno (no gcc equivalent)
671 It is not at all clear how we should number the FP stack registers
672 for the x86 architecture. If the version of SDB on x86/svr4 were
673 a bit less brain dead with respect to floating-point then we would
674 have a precedent to follow with respect to DWARF register numbers
675 for x86 FP registers, but the SDB on x86/svr4 is so completely
676 broken with respect to FP registers that it is hardly worth thinking
677 of it as something to strive for compatibility with.
678 The version of x86/svr4 SDB I have at the moment does (partially)
679 seem to believe that DWARF register number 11 is associated with
680 the x86 register %st(0), but that's about all. Higher DWARF
681 register numbers don't seem to be associated with anything in
682 particular, and even for DWARF regno 11, SDB only seems to under-
683 stand that it should say that a variable lives in %st(0) (when
684 asked via an `=' command) if we said it was in DWARF regno 11,
685 but SDB still prints garbage when asked for the value of the
686 variable in question (via a `/' command).
687 (Also note that the labels SDB prints for various FP stack regs
688 when doing an `x' command are all wrong.)
689 Note that these problems generally don't affect the native SVR4
690 C compiler because it doesn't allow the use of -O with -g and
691 because when it is *not* optimizing, it allocates a memory
692 location for each floating-point variable, and the memory
693 location is what gets described in the DWARF AT_location
694 attribute for the variable in question.
695 Regardless of the severe mental illness of the x86/svr4 SDB, we
696 do something sensible here and we use the following DWARF
697 register numbers. Note that these are all stack-top-relative
698 numbers.
699 11 for %st(0) (gcc regno = 8)
700 12 for %st(1) (gcc regno = 9)
701 13 for %st(2) (gcc regno = 10)
702 14 for %st(3) (gcc regno = 11)
703 15 for %st(4) (gcc regno = 12)
704 16 for %st(5) (gcc regno = 13)
705 17 for %st(6) (gcc regno = 14)
706 18 for %st(7) (gcc regno = 15)
707 */
708 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
709 {
710 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
711 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
712 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
713 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
714 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
715 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
717 };
718
719 /* Test and compare insns in i386.md store the information needed to
720 generate branch and scc insns here. */
721
722 rtx ix86_compare_op0 = NULL_RTX;
723 rtx ix86_compare_op1 = NULL_RTX;
724
725 #define MAX_386_STACK_LOCALS 3
726 /* Size of the register save area. */
727 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
728
729 /* Define the structure for the machine field in struct function. */
730
731 struct stack_local_entry GTY(())
732 {
733 unsigned short mode;
734 unsigned short n;
735 rtx rtl;
736 struct stack_local_entry *next;
737 };
738
739 /* Structure describing stack frame layout.
740 Stack grows downward:
741
742 [arguments]
743 <- ARG_POINTER
744 saved pc
745
746 saved frame pointer if frame_pointer_needed
747 <- HARD_FRAME_POINTER
748 [saved regs]
749
750 [padding1] \
751 )
752 [va_arg registers] (
753 > to_allocate <- FRAME_POINTER
754 [frame] (
755 )
756 [padding2] /
757 */
758 struct ix86_frame
759 {
760 int nregs;
761 int padding1;
762 int va_arg_size;
763 HOST_WIDE_INT frame;
764 int padding2;
765 int outgoing_arguments_size;
766 int red_zone_size;
767
768 HOST_WIDE_INT to_allocate;
769 /* The offsets relative to ARG_POINTER. */
770 HOST_WIDE_INT frame_pointer_offset;
771 HOST_WIDE_INT hard_frame_pointer_offset;
772 HOST_WIDE_INT stack_pointer_offset;
773
774 /* When save_regs_using_mov is set, emit prologue using
775 move instead of push instructions. */
776 bool save_regs_using_mov;
777 };
778
779 /* Used to enable/disable debugging features. */
780 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
781 /* Code model option as passed by user. */
782 const char *ix86_cmodel_string;
783 /* Parsed value. */
784 enum cmodel ix86_cmodel;
785 /* Asm dialect. */
786 const char *ix86_asm_string;
787 enum asm_dialect ix86_asm_dialect = ASM_ATT;
788 /* TLS dialext. */
789 const char *ix86_tls_dialect_string;
790 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
791
792 /* Which unit we are generating floating point math for. */
793 enum fpmath_unit ix86_fpmath;
794
795 /* Which cpu are we scheduling for. */
796 enum processor_type ix86_tune;
797 /* Which instruction set architecture to use. */
798 enum processor_type ix86_arch;
799
800 /* Strings to hold which cpu and instruction set architecture to use. */
801 const char *ix86_tune_string; /* for -mtune=<xxx> */
802 const char *ix86_arch_string; /* for -march=<xxx> */
803 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
804
805 /* # of registers to use to pass arguments. */
806 const char *ix86_regparm_string;
807
808 /* true if sse prefetch instruction is not NOOP. */
809 int x86_prefetch_sse;
810
811 /* ix86_regparm_string as a number */
812 int ix86_regparm;
813
814 /* Alignment to use for loops and jumps: */
815
816 /* Power of two alignment for loops. */
817 const char *ix86_align_loops_string;
818
819 /* Power of two alignment for non-loop jumps. */
820 const char *ix86_align_jumps_string;
821
822 /* Power of two alignment for stack boundary in bytes. */
823 const char *ix86_preferred_stack_boundary_string;
824
825 /* Preferred alignment for stack boundary in bits. */
826 unsigned int ix86_preferred_stack_boundary;
827
828 /* Values 1-5: see jump.c */
829 int ix86_branch_cost;
830 const char *ix86_branch_cost_string;
831
832 /* Power of two alignment for functions. */
833 const char *ix86_align_funcs_string;
834
835 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
836 char internal_label_prefix[16];
837 int internal_label_prefix_len;
838 \f
839 static void output_pic_addr_const (FILE *, rtx, int);
840 static void put_condition_code (enum rtx_code, enum machine_mode,
841 int, int, FILE *);
842 static const char *get_some_local_dynamic_name (void);
843 static int get_some_local_dynamic_name_1 (rtx *, void *);
844 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
845 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 rtx *);
847 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
848 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 enum machine_mode);
850 static rtx get_thread_pointer (int);
851 static rtx legitimize_tls_address (rtx, enum tls_model, int);
852 static void get_pc_thunk_name (char [32], unsigned int);
853 static rtx gen_push (rtx);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
882 static bool ix86_vector_mode_supported_p (enum machine_mode);
883
884 static int ix86_address_cost (rtx);
885 static bool ix86_cannot_force_const_mem (rtx);
886 static rtx ix86_delegitimize_address (rtx);
887
888 struct builtin_description;
889 static rtx ix86_expand_sse_comi (const struct builtin_description *,
890 tree, rtx);
891 static rtx ix86_expand_sse_compare (const struct builtin_description *,
892 tree, rtx);
893 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
894 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
895 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
896 static rtx ix86_expand_store_builtin (enum insn_code, tree);
897 static rtx safe_vector_operand (rtx, enum machine_mode);
898 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
899 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
900 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
901 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
902 static int ix86_fp_comparison_cost (enum rtx_code code);
903 static unsigned int ix86_select_alt_pic_regnum (void);
904 static int ix86_save_reg (unsigned int, int);
905 static void ix86_compute_frame_layout (struct ix86_frame *);
906 static int ix86_comp_type_attributes (tree, tree);
907 static int ix86_function_regparm (tree, tree);
908 const struct attribute_spec ix86_attribute_table[];
909 static bool ix86_function_ok_for_sibcall (tree, tree);
910 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
911 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
912 static int ix86_value_regno (enum machine_mode);
913 static bool contains_128bit_aligned_vector_p (tree);
914 static rtx ix86_struct_value_rtx (tree, int);
915 static bool ix86_ms_bitfield_layout_p (tree);
916 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
917 static int extended_reg_mentioned_1 (rtx *, void *);
918 static bool ix86_rtx_costs (rtx, int, int, int *);
919 static int min_insn_size (rtx);
920 static tree ix86_md_asm_clobbers (tree clobbers);
921 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
922 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
923 tree, bool);
924 static void ix86_init_builtins (void);
925 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
926
927 /* This function is only used on Solaris. */
928 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
929 ATTRIBUTE_UNUSED;
930
931 /* Register class used for passing given 64bit part of the argument.
932 These represent classes as documented by the PS ABI, with the exception
933 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
934 use SF or DFmode move instead of DImode to avoid reformatting penalties.
935
936 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
937 whenever possible (upper half does contain padding).
938 */
939 enum x86_64_reg_class
940 {
941 X86_64_NO_CLASS,
942 X86_64_INTEGER_CLASS,
943 X86_64_INTEGERSI_CLASS,
944 X86_64_SSE_CLASS,
945 X86_64_SSESF_CLASS,
946 X86_64_SSEDF_CLASS,
947 X86_64_SSEUP_CLASS,
948 X86_64_X87_CLASS,
949 X86_64_X87UP_CLASS,
950 X86_64_COMPLEX_X87_CLASS,
951 X86_64_MEMORY_CLASS
952 };
953 static const char * const x86_64_reg_class_name[] = {
954 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
955 "sseup", "x87", "x87up", "cplx87", "no"
956 };
957
958 #define MAX_CLASSES 4
959
960 /* Table of constants used by fldpi, fldln2, etc.... */
961 static REAL_VALUE_TYPE ext_80387_constants_table [5];
962 static bool ext_80387_constants_init = 0;
963 static void init_ext_80387_constants (void);
964 \f
965 /* Initialize the GCC target structure. */
966 #undef TARGET_ATTRIBUTE_TABLE
967 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
968 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
969 # undef TARGET_MERGE_DECL_ATTRIBUTES
970 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
971 #endif
972
973 #undef TARGET_COMP_TYPE_ATTRIBUTES
974 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
975
976 #undef TARGET_INIT_BUILTINS
977 #define TARGET_INIT_BUILTINS ix86_init_builtins
978 #undef TARGET_EXPAND_BUILTIN
979 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
980
981 #undef TARGET_ASM_FUNCTION_EPILOGUE
982 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
983
984 #undef TARGET_ASM_OPEN_PAREN
985 #define TARGET_ASM_OPEN_PAREN ""
986 #undef TARGET_ASM_CLOSE_PAREN
987 #define TARGET_ASM_CLOSE_PAREN ""
988
989 #undef TARGET_ASM_ALIGNED_HI_OP
990 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
991 #undef TARGET_ASM_ALIGNED_SI_OP
992 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
993 #ifdef ASM_QUAD
994 #undef TARGET_ASM_ALIGNED_DI_OP
995 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
996 #endif
997
998 #undef TARGET_ASM_UNALIGNED_HI_OP
999 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1000 #undef TARGET_ASM_UNALIGNED_SI_OP
1001 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1002 #undef TARGET_ASM_UNALIGNED_DI_OP
1003 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1004
1005 #undef TARGET_SCHED_ADJUST_COST
1006 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1007 #undef TARGET_SCHED_ISSUE_RATE
1008 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1009 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1010 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1011 ia32_multipass_dfa_lookahead
1012
1013 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1014 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1015
1016 #ifdef HAVE_AS_TLS
1017 #undef TARGET_HAVE_TLS
1018 #define TARGET_HAVE_TLS true
1019 #endif
1020 #undef TARGET_CANNOT_FORCE_CONST_MEM
1021 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1022
1023 #undef TARGET_DELEGITIMIZE_ADDRESS
1024 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1025
1026 #undef TARGET_MS_BITFIELD_LAYOUT_P
1027 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1028
1029 #undef TARGET_ASM_OUTPUT_MI_THUNK
1030 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1031 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1032 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1033
1034 #undef TARGET_ASM_FILE_START
1035 #define TARGET_ASM_FILE_START x86_file_start
1036
1037 #undef TARGET_RTX_COSTS
1038 #define TARGET_RTX_COSTS ix86_rtx_costs
1039 #undef TARGET_ADDRESS_COST
1040 #define TARGET_ADDRESS_COST ix86_address_cost
1041
1042 #undef TARGET_FIXED_CONDITION_CODE_REGS
1043 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1044 #undef TARGET_CC_MODES_COMPATIBLE
1045 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1046
1047 #undef TARGET_MACHINE_DEPENDENT_REORG
1048 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1049
1050 #undef TARGET_BUILD_BUILTIN_VA_LIST
1051 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1052
1053 #undef TARGET_MD_ASM_CLOBBERS
1054 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1055
1056 #undef TARGET_PROMOTE_PROTOTYPES
1057 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1058 #undef TARGET_STRUCT_VALUE_RTX
1059 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1060 #undef TARGET_SETUP_INCOMING_VARARGS
1061 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1062 #undef TARGET_MUST_PASS_IN_STACK
1063 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1064 #undef TARGET_PASS_BY_REFERENCE
1065 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1066
1067 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1068 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1069
1070 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1071 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1072
1073 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1074 #undef TARGET_INSERT_ATTRIBUTES
1075 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1076 #endif
1077
1078 struct gcc_target targetm = TARGET_INITIALIZER;
1079
1080 \f
1081 /* The svr4 ABI for the i386 says that records and unions are returned
1082 in memory. */
1083 #ifndef DEFAULT_PCC_STRUCT_RETURN
1084 #define DEFAULT_PCC_STRUCT_RETURN 1
1085 #endif
1086
1087 /* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1091 been parsed.
1092
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1095
1096 void
1097 override_options (void)
1098 {
1099 int i;
1100 int ix86_tune_defaulted = 0;
1101
1102 /* Comes from final.c -- no real reason to change it. */
1103 #define MAX_CODE_ALIGN 16
1104
1105 static struct ptt
1106 {
1107 const struct processor_costs *cost; /* Processor costs */
1108 const int target_enable; /* Target flags to enable. */
1109 const int target_disable; /* Target flags to disable. */
1110 const int align_loop; /* Default alignments. */
1111 const int align_loop_max_skip;
1112 const int align_jump;
1113 const int align_jump_max_skip;
1114 const int align_func;
1115 }
1116 const processor_target_table[PROCESSOR_max] =
1117 {
1118 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1119 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1120 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1121 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1122 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1123 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1125 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1126 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1127 };
1128
1129 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1130 static struct pta
1131 {
1132 const char *const name; /* processor name or nickname. */
1133 const enum processor_type processor;
1134 const enum pta_flags
1135 {
1136 PTA_SSE = 1,
1137 PTA_SSE2 = 2,
1138 PTA_SSE3 = 4,
1139 PTA_MMX = 8,
1140 PTA_PREFETCH_SSE = 16,
1141 PTA_3DNOW = 32,
1142 PTA_3DNOW_A = 64,
1143 PTA_64BIT = 128
1144 } flags;
1145 }
1146 const processor_alias_table[] =
1147 {
1148 {"i386", PROCESSOR_I386, 0},
1149 {"i486", PROCESSOR_I486, 0},
1150 {"i586", PROCESSOR_PENTIUM, 0},
1151 {"pentium", PROCESSOR_PENTIUM, 0},
1152 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1153 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1154 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1155 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1156 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1157 {"i686", PROCESSOR_PENTIUMPRO, 0},
1158 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1159 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1160 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1161 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1162 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1163 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1164 | PTA_MMX | PTA_PREFETCH_SSE},
1165 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1166 | PTA_MMX | PTA_PREFETCH_SSE},
1167 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"k6", PROCESSOR_K6, PTA_MMX},
1172 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1173 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1174 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1175 | PTA_3DNOW_A},
1176 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1177 | PTA_3DNOW | PTA_3DNOW_A},
1178 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 | PTA_3DNOW_A | PTA_SSE},
1180 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1181 | PTA_3DNOW_A | PTA_SSE},
1182 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1183 | PTA_3DNOW_A | PTA_SSE},
1184 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1185 | PTA_SSE | PTA_SSE2 },
1186 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1187 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1188 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1189 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1190 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1192 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 };
1195
1196 int const pta_size = ARRAY_SIZE (processor_alias_table);
1197
1198 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1199 SUBTARGET_OVERRIDE_OPTIONS;
1200 #endif
1201
1202 /* Set the default values for switches whose default depends on TARGET_64BIT
1203 in case they weren't overwritten by command line options. */
1204 if (TARGET_64BIT)
1205 {
1206 if (flag_omit_frame_pointer == 2)
1207 flag_omit_frame_pointer = 1;
1208 if (flag_asynchronous_unwind_tables == 2)
1209 flag_asynchronous_unwind_tables = 1;
1210 if (flag_pcc_struct_return == 2)
1211 flag_pcc_struct_return = 0;
1212 }
1213 else
1214 {
1215 if (flag_omit_frame_pointer == 2)
1216 flag_omit_frame_pointer = 0;
1217 if (flag_asynchronous_unwind_tables == 2)
1218 flag_asynchronous_unwind_tables = 0;
1219 if (flag_pcc_struct_return == 2)
1220 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1221 }
1222
1223 if (!ix86_tune_string && ix86_arch_string)
1224 ix86_tune_string = ix86_arch_string;
1225 if (!ix86_tune_string)
1226 {
1227 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1228 ix86_tune_defaulted = 1;
1229 }
1230 if (!ix86_arch_string)
1231 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1232
1233 if (ix86_cmodel_string != 0)
1234 {
1235 if (!strcmp (ix86_cmodel_string, "small"))
1236 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1237 else if (flag_pic)
1238 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1239 else if (!strcmp (ix86_cmodel_string, "32"))
1240 ix86_cmodel = CM_32;
1241 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1242 ix86_cmodel = CM_KERNEL;
1243 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1244 ix86_cmodel = CM_MEDIUM;
1245 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1246 ix86_cmodel = CM_LARGE;
1247 else
1248 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1249 }
1250 else
1251 {
1252 ix86_cmodel = CM_32;
1253 if (TARGET_64BIT)
1254 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1255 }
1256 if (ix86_asm_string != 0)
1257 {
1258 if (!strcmp (ix86_asm_string, "intel"))
1259 ix86_asm_dialect = ASM_INTEL;
1260 else if (!strcmp (ix86_asm_string, "att"))
1261 ix86_asm_dialect = ASM_ATT;
1262 else
1263 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1264 }
1265 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1266 error ("code model %qs not supported in the %s bit mode",
1267 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1268 if (ix86_cmodel == CM_LARGE)
1269 sorry ("code model %<large%> not supported yet");
1270 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1271 sorry ("%i-bit mode not compiled in",
1272 (target_flags & MASK_64BIT) ? 64 : 32);
1273
1274 for (i = 0; i < pta_size; i++)
1275 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1276 {
1277 ix86_arch = processor_alias_table[i].processor;
1278 /* Default cpu tuning to the architecture. */
1279 ix86_tune = ix86_arch;
1280 if (processor_alias_table[i].flags & PTA_MMX
1281 && !(target_flags_explicit & MASK_MMX))
1282 target_flags |= MASK_MMX;
1283 if (processor_alias_table[i].flags & PTA_3DNOW
1284 && !(target_flags_explicit & MASK_3DNOW))
1285 target_flags |= MASK_3DNOW;
1286 if (processor_alias_table[i].flags & PTA_3DNOW_A
1287 && !(target_flags_explicit & MASK_3DNOW_A))
1288 target_flags |= MASK_3DNOW_A;
1289 if (processor_alias_table[i].flags & PTA_SSE
1290 && !(target_flags_explicit & MASK_SSE))
1291 target_flags |= MASK_SSE;
1292 if (processor_alias_table[i].flags & PTA_SSE2
1293 && !(target_flags_explicit & MASK_SSE2))
1294 target_flags |= MASK_SSE2;
1295 if (processor_alias_table[i].flags & PTA_SSE3
1296 && !(target_flags_explicit & MASK_SSE3))
1297 target_flags |= MASK_SSE3;
1298 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1299 x86_prefetch_sse = true;
1300 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1301 error ("CPU you selected does not support x86-64 "
1302 "instruction set");
1303 break;
1304 }
1305
1306 if (i == pta_size)
1307 error ("bad value (%s) for -march= switch", ix86_arch_string);
1308
1309 for (i = 0; i < pta_size; i++)
1310 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1311 {
1312 ix86_tune = processor_alias_table[i].processor;
1313 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1314 {
1315 if (ix86_tune_defaulted)
1316 {
1317 ix86_tune_string = "x86-64";
1318 for (i = 0; i < pta_size; i++)
1319 if (! strcmp (ix86_tune_string,
1320 processor_alias_table[i].name))
1321 break;
1322 ix86_tune = processor_alias_table[i].processor;
1323 }
1324 else
1325 error ("CPU you selected does not support x86-64 "
1326 "instruction set");
1327 }
1328 /* Intel CPUs have always interpreted SSE prefetch instructions as
1329 NOPs; so, we can enable SSE prefetch instructions even when
1330 -mtune (rather than -march) points us to a processor that has them.
1331 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1332 higher processors. */
1333 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1334 x86_prefetch_sse = true;
1335 break;
1336 }
1337 if (i == pta_size)
1338 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1339
1340 if (optimize_size)
1341 ix86_cost = &size_cost;
1342 else
1343 ix86_cost = processor_target_table[ix86_tune].cost;
1344 target_flags |= processor_target_table[ix86_tune].target_enable;
1345 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1346
1347 /* Arrange to set up i386_stack_locals for all functions. */
1348 init_machine_status = ix86_init_machine_status;
1349
1350 /* Validate -mregparm= value. */
1351 if (ix86_regparm_string)
1352 {
1353 i = atoi (ix86_regparm_string);
1354 if (i < 0 || i > REGPARM_MAX)
1355 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1356 else
1357 ix86_regparm = i;
1358 }
1359 else
1360 if (TARGET_64BIT)
1361 ix86_regparm = REGPARM_MAX;
1362
1363 /* If the user has provided any of the -malign-* options,
1364 warn and use that value only if -falign-* is not set.
1365 Remove this code in GCC 3.2 or later. */
1366 if (ix86_align_loops_string)
1367 {
1368 warning ("-malign-loops is obsolete, use -falign-loops");
1369 if (align_loops == 0)
1370 {
1371 i = atoi (ix86_align_loops_string);
1372 if (i < 0 || i > MAX_CODE_ALIGN)
1373 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1374 else
1375 align_loops = 1 << i;
1376 }
1377 }
1378
1379 if (ix86_align_jumps_string)
1380 {
1381 warning ("-malign-jumps is obsolete, use -falign-jumps");
1382 if (align_jumps == 0)
1383 {
1384 i = atoi (ix86_align_jumps_string);
1385 if (i < 0 || i > MAX_CODE_ALIGN)
1386 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1387 else
1388 align_jumps = 1 << i;
1389 }
1390 }
1391
1392 if (ix86_align_funcs_string)
1393 {
1394 warning ("-malign-functions is obsolete, use -falign-functions");
1395 if (align_functions == 0)
1396 {
1397 i = atoi (ix86_align_funcs_string);
1398 if (i < 0 || i > MAX_CODE_ALIGN)
1399 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1400 else
1401 align_functions = 1 << i;
1402 }
1403 }
1404
1405 /* Default align_* from the processor table. */
1406 if (align_loops == 0)
1407 {
1408 align_loops = processor_target_table[ix86_tune].align_loop;
1409 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1410 }
1411 if (align_jumps == 0)
1412 {
1413 align_jumps = processor_target_table[ix86_tune].align_jump;
1414 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1415 }
1416 if (align_functions == 0)
1417 {
1418 align_functions = processor_target_table[ix86_tune].align_func;
1419 }
1420
1421 /* Validate -mpreferred-stack-boundary= value, or provide default.
1422 The default of 128 bits is for Pentium III's SSE __m128, but we
1423 don't want additional code to keep the stack aligned when
1424 optimizing for code size. */
1425 ix86_preferred_stack_boundary = (optimize_size
1426 ? TARGET_64BIT ? 128 : 32
1427 : 128);
1428 if (ix86_preferred_stack_boundary_string)
1429 {
1430 i = atoi (ix86_preferred_stack_boundary_string);
1431 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1432 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1433 TARGET_64BIT ? 4 : 2);
1434 else
1435 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1436 }
1437
1438 /* Validate -mbranch-cost= value, or provide default. */
1439 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1440 if (ix86_branch_cost_string)
1441 {
1442 i = atoi (ix86_branch_cost_string);
1443 if (i < 0 || i > 5)
1444 error ("-mbranch-cost=%d is not between 0 and 5", i);
1445 else
1446 ix86_branch_cost = i;
1447 }
1448
1449 if (ix86_tls_dialect_string)
1450 {
1451 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1452 ix86_tls_dialect = TLS_DIALECT_GNU;
1453 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1454 ix86_tls_dialect = TLS_DIALECT_SUN;
1455 else
1456 error ("bad value (%s) for -mtls-dialect= switch",
1457 ix86_tls_dialect_string);
1458 }
1459
1460 /* Keep nonleaf frame pointers. */
1461 if (flag_omit_frame_pointer)
1462 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1463 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1464 flag_omit_frame_pointer = 1;
1465
1466 /* If we're doing fast math, we don't care about comparison order
1467 wrt NaNs. This lets us use a shorter comparison sequence. */
1468 if (flag_unsafe_math_optimizations)
1469 target_flags &= ~MASK_IEEE_FP;
1470
1471 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1472 since the insns won't need emulation. */
1473 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1474 target_flags &= ~MASK_NO_FANCY_MATH_387;
1475
1476 /* Likewise, if the target doesn't have a 387, or we've specified
1477 software floating point, don't use 387 inline instrinsics. */
1478 if (!TARGET_80387)
1479 target_flags |= MASK_NO_FANCY_MATH_387;
1480
1481 /* Turn on SSE2 builtins for -msse3. */
1482 if (TARGET_SSE3)
1483 target_flags |= MASK_SSE2;
1484
1485 /* Turn on SSE builtins for -msse2. */
1486 if (TARGET_SSE2)
1487 target_flags |= MASK_SSE;
1488
1489 /* Turn on MMX builtins for -msse. */
1490 if (TARGET_SSE)
1491 {
1492 target_flags |= MASK_MMX & ~target_flags_explicit;
1493 x86_prefetch_sse = true;
1494 }
1495
1496 /* Turn on MMX builtins for 3Dnow. */
1497 if (TARGET_3DNOW)
1498 target_flags |= MASK_MMX;
1499
1500 if (TARGET_64BIT)
1501 {
1502 if (TARGET_ALIGN_DOUBLE)
1503 error ("-malign-double makes no sense in the 64bit mode");
1504 if (TARGET_RTD)
1505 error ("-mrtd calling convention not supported in the 64bit mode");
1506
1507 /* Enable by default the SSE and MMX builtins. Do allow the user to
1508 explicitly disable any of these. In particular, disabling SSE and
1509 MMX for kernel code is extremely useful. */
1510 target_flags
1511 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1512 & ~target_flags_explicit);
1513
1514 if (TARGET_SSE)
1515 ix86_fpmath = FPMATH_SSE;
1516 }
1517 else
1518 {
1519 ix86_fpmath = FPMATH_387;
1520 /* i386 ABI does not specify red zone. It still makes sense to use it
1521 when programmer takes care to stack from being destroyed. */
1522 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1523 target_flags |= MASK_NO_RED_ZONE;
1524 }
1525
1526 if (ix86_fpmath_string != 0)
1527 {
1528 if (! strcmp (ix86_fpmath_string, "387"))
1529 ix86_fpmath = FPMATH_387;
1530 else if (! strcmp (ix86_fpmath_string, "sse"))
1531 {
1532 if (!TARGET_SSE)
1533 {
1534 warning ("SSE instruction set disabled, using 387 arithmetics");
1535 ix86_fpmath = FPMATH_387;
1536 }
1537 else
1538 ix86_fpmath = FPMATH_SSE;
1539 }
1540 else if (! strcmp (ix86_fpmath_string, "387,sse")
1541 || ! strcmp (ix86_fpmath_string, "sse,387"))
1542 {
1543 if (!TARGET_SSE)
1544 {
1545 warning ("SSE instruction set disabled, using 387 arithmetics");
1546 ix86_fpmath = FPMATH_387;
1547 }
1548 else if (!TARGET_80387)
1549 {
1550 warning ("387 instruction set disabled, using SSE arithmetics");
1551 ix86_fpmath = FPMATH_SSE;
1552 }
1553 else
1554 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1555 }
1556 else
1557 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1558 }
1559
1560 if ((x86_accumulate_outgoing_args & TUNEMASK)
1561 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1562 && !optimize_size)
1563 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1564
1565 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1566 {
1567 char *p;
1568 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1569 p = strchr (internal_label_prefix, 'X');
1570 internal_label_prefix_len = p - internal_label_prefix;
1571 *p = '\0';
1572 }
1573
1574 /* When scheduling description is not available, disable scheduler pass
1575 so it won't slow down the compilation and make x87 code slower. */
1576 if (!TARGET_SCHEDULE)
1577 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1578 }
1579 \f
1580 void
1581 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1582 {
1583 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1584 make the problem with not enough registers even worse. */
1585 #ifdef INSN_SCHEDULING
1586 if (level > 1)
1587 flag_schedule_insns = 0;
1588 #endif
1589
1590 /* The default values of these switches depend on the TARGET_64BIT
1591 that is not known at this moment. Mark these values with 2 and
1592 let user the to override these. In case there is no command line option
1593 specifying them, we will set the defaults in override_options. */
1594 if (optimize >= 1)
1595 flag_omit_frame_pointer = 2;
1596 flag_pcc_struct_return = 2;
1597 flag_asynchronous_unwind_tables = 2;
1598 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1599 SUBTARGET_OPTIMIZATION_OPTIONS;
1600 #endif
1601 }
1602 \f
1603 /* Table of valid machine attributes. */
1604 const struct attribute_spec ix86_attribute_table[] =
1605 {
1606 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1607 /* Stdcall attribute says callee is responsible for popping arguments
1608 if they are not variable. */
1609 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1610 /* Fastcall attribute says callee is responsible for popping arguments
1611 if they are not variable. */
1612 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1613 /* Cdecl attribute says the callee is a normal C declaration */
1614 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1615 /* Regparm attribute specifies how many integer arguments are to be
1616 passed in registers. */
1617 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1618 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1619 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1620 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1621 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1622 #endif
1623 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1624 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1625 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1626 SUBTARGET_ATTRIBUTE_TABLE,
1627 #endif
1628 { NULL, 0, 0, false, false, false, NULL }
1629 };
1630
1631 /* Decide whether we can make a sibling call to a function. DECL is the
1632 declaration of the function being targeted by the call and EXP is the
1633 CALL_EXPR representing the call. */
1634
1635 static bool
1636 ix86_function_ok_for_sibcall (tree decl, tree exp)
1637 {
1638 /* If we are generating position-independent code, we cannot sibcall
1639 optimize any indirect call, or a direct call to a global function,
1640 as the PLT requires %ebx be live. */
1641 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1642 return false;
1643
1644 /* If we are returning floats on the 80387 register stack, we cannot
1645 make a sibcall from a function that doesn't return a float to a
1646 function that does or, conversely, from a function that does return
1647 a float to a function that doesn't; the necessary stack adjustment
1648 would not be executed. */
1649 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1650 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1651 return false;
1652
1653 /* If this call is indirect, we'll need to be able to use a call-clobbered
1654 register for the address of the target function. Make sure that all
1655 such registers are not used for passing parameters. */
1656 if (!decl && !TARGET_64BIT)
1657 {
1658 tree type;
1659
1660 /* We're looking at the CALL_EXPR, we need the type of the function. */
1661 type = TREE_OPERAND (exp, 0); /* pointer expression */
1662 type = TREE_TYPE (type); /* pointer type */
1663 type = TREE_TYPE (type); /* function type */
1664
1665 if (ix86_function_regparm (type, NULL) >= 3)
1666 {
1667 /* ??? Need to count the actual number of registers to be used,
1668 not the possible number of registers. Fix later. */
1669 return false;
1670 }
1671 }
1672
1673 /* Otherwise okay. That also includes certain types of indirect calls. */
1674 return true;
1675 }
1676
1677 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1678 arguments as in struct attribute_spec.handler. */
1679 static tree
1680 ix86_handle_cdecl_attribute (tree *node, tree name,
1681 tree args ATTRIBUTE_UNUSED,
1682 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1683 {
1684 if (TREE_CODE (*node) != FUNCTION_TYPE
1685 && TREE_CODE (*node) != METHOD_TYPE
1686 && TREE_CODE (*node) != FIELD_DECL
1687 && TREE_CODE (*node) != TYPE_DECL)
1688 {
1689 warning ("%qs attribute only applies to functions",
1690 IDENTIFIER_POINTER (name));
1691 *no_add_attrs = true;
1692 }
1693 else
1694 {
1695 if (is_attribute_p ("fastcall", name))
1696 {
1697 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1698 {
1699 error ("fastcall and stdcall attributes are not compatible");
1700 }
1701 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1702 {
1703 error ("fastcall and regparm attributes are not compatible");
1704 }
1705 }
1706 else if (is_attribute_p ("stdcall", name))
1707 {
1708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1709 {
1710 error ("fastcall and stdcall attributes are not compatible");
1711 }
1712 }
1713 }
1714
1715 if (TARGET_64BIT)
1716 {
1717 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1718 *no_add_attrs = true;
1719 }
1720
1721 return NULL_TREE;
1722 }
1723
1724 /* Handle a "regparm" attribute;
1725 arguments as in struct attribute_spec.handler. */
1726 static tree
1727 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1728 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1729 {
1730 if (TREE_CODE (*node) != FUNCTION_TYPE
1731 && TREE_CODE (*node) != METHOD_TYPE
1732 && TREE_CODE (*node) != FIELD_DECL
1733 && TREE_CODE (*node) != TYPE_DECL)
1734 {
1735 warning ("%qs attribute only applies to functions",
1736 IDENTIFIER_POINTER (name));
1737 *no_add_attrs = true;
1738 }
1739 else
1740 {
1741 tree cst;
1742
1743 cst = TREE_VALUE (args);
1744 if (TREE_CODE (cst) != INTEGER_CST)
1745 {
1746 warning ("%qs attribute requires an integer constant argument",
1747 IDENTIFIER_POINTER (name));
1748 *no_add_attrs = true;
1749 }
1750 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1751 {
1752 warning ("argument to %qs attribute larger than %d",
1753 IDENTIFIER_POINTER (name), REGPARM_MAX);
1754 *no_add_attrs = true;
1755 }
1756
1757 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1758 {
1759 error ("fastcall and regparm attributes are not compatible");
1760 }
1761 }
1762
1763 return NULL_TREE;
1764 }
1765
1766 /* Return 0 if the attributes for two types are incompatible, 1 if they
1767 are compatible, and 2 if they are nearly compatible (which causes a
1768 warning to be generated). */
1769
1770 static int
1771 ix86_comp_type_attributes (tree type1, tree type2)
1772 {
1773 /* Check for mismatch of non-default calling convention. */
1774 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1775
1776 if (TREE_CODE (type1) != FUNCTION_TYPE)
1777 return 1;
1778
1779 /* Check for mismatched fastcall types */
1780 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1781 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1782 return 0;
1783
1784 /* Check for mismatched return types (cdecl vs stdcall). */
1785 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1786 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1787 return 0;
1788 if (ix86_function_regparm (type1, NULL)
1789 != ix86_function_regparm (type2, NULL))
1790 return 0;
1791 return 1;
1792 }
1793 \f
1794 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1795 DECL may be NULL when calling function indirectly
1796 or considering a libcall. */
1797
1798 static int
1799 ix86_function_regparm (tree type, tree decl)
1800 {
1801 tree attr;
1802 int regparm = ix86_regparm;
1803 bool user_convention = false;
1804
1805 if (!TARGET_64BIT)
1806 {
1807 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1808 if (attr)
1809 {
1810 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1811 user_convention = true;
1812 }
1813
1814 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1815 {
1816 regparm = 2;
1817 user_convention = true;
1818 }
1819
1820 /* Use register calling convention for local functions when possible. */
1821 if (!TARGET_64BIT && !user_convention && decl
1822 && flag_unit_at_a_time && !profile_flag)
1823 {
1824 struct cgraph_local_info *i = cgraph_local_info (decl);
1825 if (i && i->local)
1826 {
1827 /* We can't use regparm(3) for nested functions as these use
1828 static chain pointer in third argument. */
1829 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1830 regparm = 2;
1831 else
1832 regparm = 3;
1833 }
1834 }
1835 }
1836 return regparm;
1837 }
1838
1839 /* Return true if EAX is live at the start of the function. Used by
1840 ix86_expand_prologue to determine if we need special help before
1841 calling allocate_stack_worker. */
1842
1843 static bool
1844 ix86_eax_live_at_start_p (void)
1845 {
1846 /* Cheat. Don't bother working forward from ix86_function_regparm
1847 to the function type to whether an actual argument is located in
1848 eax. Instead just look at cfg info, which is still close enough
1849 to correct at this point. This gives false positives for broken
1850 functions that might use uninitialized data that happens to be
1851 allocated in eax, but who cares? */
1852 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1853 }
1854
1855 /* Value is the number of bytes of arguments automatically
1856 popped when returning from a subroutine call.
1857 FUNDECL is the declaration node of the function (as a tree),
1858 FUNTYPE is the data type of the function (as a tree),
1859 or for a library call it is an identifier node for the subroutine name.
1860 SIZE is the number of bytes of arguments passed on the stack.
1861
1862 On the 80386, the RTD insn may be used to pop them if the number
1863 of args is fixed, but if the number is variable then the caller
1864 must pop them all. RTD can't be used for library calls now
1865 because the library is compiled with the Unix compiler.
1866 Use of RTD is a selectable option, since it is incompatible with
1867 standard Unix calling sequences. If the option is not selected,
1868 the caller must always pop the args.
1869
1870 The attribute stdcall is equivalent to RTD on a per module basis. */
1871
1872 int
1873 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1874 {
1875 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1876
1877 /* Cdecl functions override -mrtd, and never pop the stack. */
1878 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1879
1880 /* Stdcall and fastcall functions will pop the stack if not
1881 variable args. */
1882 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1883 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1884 rtd = 1;
1885
1886 if (rtd
1887 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1888 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1889 == void_type_node)))
1890 return size;
1891 }
1892
1893 /* Lose any fake structure return argument if it is passed on the stack. */
1894 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1895 && !TARGET_64BIT
1896 && !KEEP_AGGREGATE_RETURN_POINTER)
1897 {
1898 int nregs = ix86_function_regparm (funtype, fundecl);
1899
1900 if (!nregs)
1901 return GET_MODE_SIZE (Pmode);
1902 }
1903
1904 return 0;
1905 }
1906 \f
1907 /* Argument support functions. */
1908
1909 /* Return true when register may be used to pass function parameters. */
1910 bool
1911 ix86_function_arg_regno_p (int regno)
1912 {
1913 int i;
1914 if (!TARGET_64BIT)
1915 return (regno < REGPARM_MAX
1916 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1917 if (SSE_REGNO_P (regno) && TARGET_SSE)
1918 return true;
1919 /* RAX is used as hidden argument to va_arg functions. */
1920 if (!regno)
1921 return true;
1922 for (i = 0; i < REGPARM_MAX; i++)
1923 if (regno == x86_64_int_parameter_registers[i])
1924 return true;
1925 return false;
1926 }
1927
1928 /* Return if we do not know how to pass TYPE solely in registers. */
1929
1930 static bool
1931 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1932 {
1933 if (must_pass_in_stack_var_size_or_pad (mode, type))
1934 return true;
1935
1936 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1937 The layout_type routine is crafty and tries to trick us into passing
1938 currently unsupported vector types on the stack by using TImode. */
1939 return (!TARGET_64BIT && mode == TImode
1940 && type && TREE_CODE (type) != VECTOR_TYPE);
1941 }
1942
1943 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1944 for a call to a function whose data type is FNTYPE.
1945 For a library call, FNTYPE is 0. */
1946
1947 void
1948 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1949 tree fntype, /* tree ptr for function decl */
1950 rtx libname, /* SYMBOL_REF of library name or 0 */
1951 tree fndecl)
1952 {
1953 static CUMULATIVE_ARGS zero_cum;
1954 tree param, next_param;
1955
1956 if (TARGET_DEBUG_ARG)
1957 {
1958 fprintf (stderr, "\ninit_cumulative_args (");
1959 if (fntype)
1960 fprintf (stderr, "fntype code = %s, ret code = %s",
1961 tree_code_name[(int) TREE_CODE (fntype)],
1962 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1963 else
1964 fprintf (stderr, "no fntype");
1965
1966 if (libname)
1967 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1968 }
1969
1970 *cum = zero_cum;
1971
1972 /* Set up the number of registers to use for passing arguments. */
1973 if (fntype)
1974 cum->nregs = ix86_function_regparm (fntype, fndecl);
1975 else
1976 cum->nregs = ix86_regparm;
1977 if (TARGET_SSE)
1978 cum->sse_nregs = SSE_REGPARM_MAX;
1979 if (TARGET_MMX)
1980 cum->mmx_nregs = MMX_REGPARM_MAX;
1981 cum->warn_sse = true;
1982 cum->warn_mmx = true;
1983 cum->maybe_vaarg = false;
1984
1985 /* Use ecx and edx registers if function has fastcall attribute */
1986 if (fntype && !TARGET_64BIT)
1987 {
1988 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1989 {
1990 cum->nregs = 2;
1991 cum->fastcall = 1;
1992 }
1993 }
1994
1995 /* Determine if this function has variable arguments. This is
1996 indicated by the last argument being 'void_type_mode' if there
1997 are no variable arguments. If there are variable arguments, then
1998 we won't pass anything in registers in 32-bit mode. */
1999
2000 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2001 {
2002 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2003 param != 0; param = next_param)
2004 {
2005 next_param = TREE_CHAIN (param);
2006 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2007 {
2008 if (!TARGET_64BIT)
2009 {
2010 cum->nregs = 0;
2011 cum->sse_nregs = 0;
2012 cum->mmx_nregs = 0;
2013 cum->warn_sse = 0;
2014 cum->warn_mmx = 0;
2015 cum->fastcall = 0;
2016 }
2017 cum->maybe_vaarg = true;
2018 }
2019 }
2020 }
2021 if ((!fntype && !libname)
2022 || (fntype && !TYPE_ARG_TYPES (fntype)))
2023 cum->maybe_vaarg = 1;
2024
2025 if (TARGET_DEBUG_ARG)
2026 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2027
2028 return;
2029 }
2030
2031 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2032 But in the case of vector types, it is some vector mode.
2033
2034 When we have only some of our vector isa extensions enabled, then there
2035 are some modes for which vector_mode_supported_p is false. For these
2036 modes, the generic vector support in gcc will choose some non-vector mode
2037 in order to implement the type. By computing the natural mode, we'll
2038 select the proper ABI location for the operand and not depend on whatever
2039 the middle-end decides to do with these vector types. */
2040
2041 static enum machine_mode
2042 type_natural_mode (tree type)
2043 {
2044 enum machine_mode mode = TYPE_MODE (type);
2045
2046 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2047 {
2048 HOST_WIDE_INT size = int_size_in_bytes (type);
2049 if ((size == 8 || size == 16)
2050 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2051 && TYPE_VECTOR_SUBPARTS (type) > 1)
2052 {
2053 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2054
2055 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2056 mode = MIN_MODE_VECTOR_FLOAT;
2057 else
2058 mode = MIN_MODE_VECTOR_INT;
2059
2060 /* Get the mode which has this inner mode and number of units. */
2061 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2062 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2063 && GET_MODE_INNER (mode) == innermode)
2064 return mode;
2065
2066 abort ();
2067 }
2068 }
2069
2070 return mode;
2071 }
2072
2073 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2074 this may not agree with the mode that the type system has chosen for the
2075 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2076 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2077
2078 static rtx
2079 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2080 unsigned int regno)
2081 {
2082 rtx tmp;
2083
2084 if (orig_mode != BLKmode)
2085 tmp = gen_rtx_REG (orig_mode, regno);
2086 else
2087 {
2088 tmp = gen_rtx_REG (mode, regno);
2089 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2090 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2091 }
2092
2093 return tmp;
2094 }
2095
2096 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2097 of this code is to classify each 8bytes of incoming argument by the register
2098 class and assign registers accordingly. */
2099
2100 /* Return the union class of CLASS1 and CLASS2.
2101 See the x86-64 PS ABI for details. */
2102
2103 static enum x86_64_reg_class
2104 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2105 {
2106 /* Rule #1: If both classes are equal, this is the resulting class. */
2107 if (class1 == class2)
2108 return class1;
2109
2110 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2111 the other class. */
2112 if (class1 == X86_64_NO_CLASS)
2113 return class2;
2114 if (class2 == X86_64_NO_CLASS)
2115 return class1;
2116
2117 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2118 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2119 return X86_64_MEMORY_CLASS;
2120
2121 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2122 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2123 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2124 return X86_64_INTEGERSI_CLASS;
2125 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2126 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2127 return X86_64_INTEGER_CLASS;
2128
2129 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2130 MEMORY is used. */
2131 if (class1 == X86_64_X87_CLASS
2132 || class1 == X86_64_X87UP_CLASS
2133 || class1 == X86_64_COMPLEX_X87_CLASS
2134 || class2 == X86_64_X87_CLASS
2135 || class2 == X86_64_X87UP_CLASS
2136 || class2 == X86_64_COMPLEX_X87_CLASS)
2137 return X86_64_MEMORY_CLASS;
2138
2139 /* Rule #6: Otherwise class SSE is used. */
2140 return X86_64_SSE_CLASS;
2141 }
2142
2143 /* Classify the argument of type TYPE and mode MODE.
2144 CLASSES will be filled by the register class used to pass each word
2145 of the operand. The number of words is returned. In case the parameter
2146 should be passed in memory, 0 is returned. As a special case for zero
2147 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2148
2149 BIT_OFFSET is used internally for handling records and specifies offset
2150 of the offset in bits modulo 256 to avoid overflow cases.
2151
2152 See the x86-64 PS ABI for details.
2153 */
2154
2155 static int
2156 classify_argument (enum machine_mode mode, tree type,
2157 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2158 {
2159 HOST_WIDE_INT bytes =
2160 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2161 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2162
2163 /* Variable sized entities are always passed/returned in memory. */
2164 if (bytes < 0)
2165 return 0;
2166
2167 if (mode != VOIDmode
2168 && targetm.calls.must_pass_in_stack (mode, type))
2169 return 0;
2170
2171 if (type && AGGREGATE_TYPE_P (type))
2172 {
2173 int i;
2174 tree field;
2175 enum x86_64_reg_class subclasses[MAX_CLASSES];
2176
2177 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2178 if (bytes > 16)
2179 return 0;
2180
2181 for (i = 0; i < words; i++)
2182 classes[i] = X86_64_NO_CLASS;
2183
2184 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2185 signalize memory class, so handle it as special case. */
2186 if (!words)
2187 {
2188 classes[0] = X86_64_NO_CLASS;
2189 return 1;
2190 }
2191
2192 /* Classify each field of record and merge classes. */
2193 if (TREE_CODE (type) == RECORD_TYPE)
2194 {
2195 /* For classes first merge in the field of the subclasses. */
2196 if (TYPE_BINFO (type))
2197 {
2198 tree binfo, base_binfo;
2199 int basenum;
2200
2201 for (binfo = TYPE_BINFO (type), basenum = 0;
2202 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2203 {
2204 int num;
2205 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2206 tree type = BINFO_TYPE (base_binfo);
2207
2208 num = classify_argument (TYPE_MODE (type),
2209 type, subclasses,
2210 (offset + bit_offset) % 256);
2211 if (!num)
2212 return 0;
2213 for (i = 0; i < num; i++)
2214 {
2215 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2216 classes[i + pos] =
2217 merge_classes (subclasses[i], classes[i + pos]);
2218 }
2219 }
2220 }
2221 /* And now merge the fields of structure. */
2222 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2223 {
2224 if (TREE_CODE (field) == FIELD_DECL)
2225 {
2226 int num;
2227
2228 /* Bitfields are always classified as integer. Handle them
2229 early, since later code would consider them to be
2230 misaligned integers. */
2231 if (DECL_BIT_FIELD (field))
2232 {
2233 for (i = int_bit_position (field) / 8 / 8;
2234 i < (int_bit_position (field)
2235 + tree_low_cst (DECL_SIZE (field), 0)
2236 + 63) / 8 / 8; i++)
2237 classes[i] =
2238 merge_classes (X86_64_INTEGER_CLASS,
2239 classes[i]);
2240 }
2241 else
2242 {
2243 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2244 TREE_TYPE (field), subclasses,
2245 (int_bit_position (field)
2246 + bit_offset) % 256);
2247 if (!num)
2248 return 0;
2249 for (i = 0; i < num; i++)
2250 {
2251 int pos =
2252 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2253 classes[i + pos] =
2254 merge_classes (subclasses[i], classes[i + pos]);
2255 }
2256 }
2257 }
2258 }
2259 }
2260 /* Arrays are handled as small records. */
2261 else if (TREE_CODE (type) == ARRAY_TYPE)
2262 {
2263 int num;
2264 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2265 TREE_TYPE (type), subclasses, bit_offset);
2266 if (!num)
2267 return 0;
2268
2269 /* The partial classes are now full classes. */
2270 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2271 subclasses[0] = X86_64_SSE_CLASS;
2272 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2273 subclasses[0] = X86_64_INTEGER_CLASS;
2274
2275 for (i = 0; i < words; i++)
2276 classes[i] = subclasses[i % num];
2277 }
2278 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2279 else if (TREE_CODE (type) == UNION_TYPE
2280 || TREE_CODE (type) == QUAL_UNION_TYPE)
2281 {
2282 /* For classes first merge in the field of the subclasses. */
2283 if (TYPE_BINFO (type))
2284 {
2285 tree binfo, base_binfo;
2286 int basenum;
2287
2288 for (binfo = TYPE_BINFO (type), basenum = 0;
2289 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2290 {
2291 int num;
2292 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2293 tree type = BINFO_TYPE (base_binfo);
2294
2295 num = classify_argument (TYPE_MODE (type),
2296 type, subclasses,
2297 (offset + (bit_offset % 64)) % 256);
2298 if (!num)
2299 return 0;
2300 for (i = 0; i < num; i++)
2301 {
2302 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2303 classes[i + pos] =
2304 merge_classes (subclasses[i], classes[i + pos]);
2305 }
2306 }
2307 }
2308 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2309 {
2310 if (TREE_CODE (field) == FIELD_DECL)
2311 {
2312 int num;
2313 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2314 TREE_TYPE (field), subclasses,
2315 bit_offset);
2316 if (!num)
2317 return 0;
2318 for (i = 0; i < num; i++)
2319 classes[i] = merge_classes (subclasses[i], classes[i]);
2320 }
2321 }
2322 }
2323 else
2324 abort ();
2325
2326 /* Final merger cleanup. */
2327 for (i = 0; i < words; i++)
2328 {
2329 /* If one class is MEMORY, everything should be passed in
2330 memory. */
2331 if (classes[i] == X86_64_MEMORY_CLASS)
2332 return 0;
2333
2334 /* The X86_64_SSEUP_CLASS should be always preceded by
2335 X86_64_SSE_CLASS. */
2336 if (classes[i] == X86_64_SSEUP_CLASS
2337 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2338 classes[i] = X86_64_SSE_CLASS;
2339
2340 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2341 if (classes[i] == X86_64_X87UP_CLASS
2342 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2343 classes[i] = X86_64_SSE_CLASS;
2344 }
2345 return words;
2346 }
2347
2348 /* Compute alignment needed. We align all types to natural boundaries with
2349 exception of XFmode that is aligned to 64bits. */
2350 if (mode != VOIDmode && mode != BLKmode)
2351 {
2352 int mode_alignment = GET_MODE_BITSIZE (mode);
2353
2354 if (mode == XFmode)
2355 mode_alignment = 128;
2356 else if (mode == XCmode)
2357 mode_alignment = 256;
2358 if (COMPLEX_MODE_P (mode))
2359 mode_alignment /= 2;
2360 /* Misaligned fields are always returned in memory. */
2361 if (bit_offset % mode_alignment)
2362 return 0;
2363 }
2364
2365 /* for V1xx modes, just use the base mode */
2366 if (VECTOR_MODE_P (mode)
2367 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2368 mode = GET_MODE_INNER (mode);
2369
2370 /* Classification of atomic types. */
2371 switch (mode)
2372 {
2373 case DImode:
2374 case SImode:
2375 case HImode:
2376 case QImode:
2377 case CSImode:
2378 case CHImode:
2379 case CQImode:
2380 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2381 classes[0] = X86_64_INTEGERSI_CLASS;
2382 else
2383 classes[0] = X86_64_INTEGER_CLASS;
2384 return 1;
2385 case CDImode:
2386 case TImode:
2387 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2388 return 2;
2389 case CTImode:
2390 return 0;
2391 case SFmode:
2392 if (!(bit_offset % 64))
2393 classes[0] = X86_64_SSESF_CLASS;
2394 else
2395 classes[0] = X86_64_SSE_CLASS;
2396 return 1;
2397 case DFmode:
2398 classes[0] = X86_64_SSEDF_CLASS;
2399 return 1;
2400 case XFmode:
2401 classes[0] = X86_64_X87_CLASS;
2402 classes[1] = X86_64_X87UP_CLASS;
2403 return 2;
2404 case TFmode:
2405 classes[0] = X86_64_SSE_CLASS;
2406 classes[1] = X86_64_SSEUP_CLASS;
2407 return 2;
2408 case SCmode:
2409 classes[0] = X86_64_SSE_CLASS;
2410 return 1;
2411 case DCmode:
2412 classes[0] = X86_64_SSEDF_CLASS;
2413 classes[1] = X86_64_SSEDF_CLASS;
2414 return 2;
2415 case XCmode:
2416 classes[0] = X86_64_COMPLEX_X87_CLASS;
2417 return 1;
2418 case TCmode:
2419 /* This modes is larger than 16 bytes. */
2420 return 0;
2421 case V4SFmode:
2422 case V4SImode:
2423 case V16QImode:
2424 case V8HImode:
2425 case V2DFmode:
2426 case V2DImode:
2427 classes[0] = X86_64_SSE_CLASS;
2428 classes[1] = X86_64_SSEUP_CLASS;
2429 return 2;
2430 case V2SFmode:
2431 case V2SImode:
2432 case V4HImode:
2433 case V8QImode:
2434 classes[0] = X86_64_SSE_CLASS;
2435 return 1;
2436 case BLKmode:
2437 case VOIDmode:
2438 return 0;
2439 default:
2440 if (VECTOR_MODE_P (mode))
2441 {
2442 if (bytes > 16)
2443 return 0;
2444 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2445 {
2446 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2447 classes[0] = X86_64_INTEGERSI_CLASS;
2448 else
2449 classes[0] = X86_64_INTEGER_CLASS;
2450 classes[1] = X86_64_INTEGER_CLASS;
2451 return 1 + (bytes > 8);
2452 }
2453 }
2454 abort ();
2455 }
2456 }
2457
2458 /* Examine the argument and return set number of register required in each
2459 class. Return 0 iff parameter should be passed in memory. */
2460 static int
2461 examine_argument (enum machine_mode mode, tree type, int in_return,
2462 int *int_nregs, int *sse_nregs)
2463 {
2464 enum x86_64_reg_class class[MAX_CLASSES];
2465 int n = classify_argument (mode, type, class, 0);
2466
2467 *int_nregs = 0;
2468 *sse_nregs = 0;
2469 if (!n)
2470 return 0;
2471 for (n--; n >= 0; n--)
2472 switch (class[n])
2473 {
2474 case X86_64_INTEGER_CLASS:
2475 case X86_64_INTEGERSI_CLASS:
2476 (*int_nregs)++;
2477 break;
2478 case X86_64_SSE_CLASS:
2479 case X86_64_SSESF_CLASS:
2480 case X86_64_SSEDF_CLASS:
2481 (*sse_nregs)++;
2482 break;
2483 case X86_64_NO_CLASS:
2484 case X86_64_SSEUP_CLASS:
2485 break;
2486 case X86_64_X87_CLASS:
2487 case X86_64_X87UP_CLASS:
2488 if (!in_return)
2489 return 0;
2490 break;
2491 case X86_64_COMPLEX_X87_CLASS:
2492 return in_return ? 2 : 0;
2493 case X86_64_MEMORY_CLASS:
2494 abort ();
2495 }
2496 return 1;
2497 }
2498
2499 /* Construct container for the argument used by GCC interface. See
2500 FUNCTION_ARG for the detailed description. */
2501
2502 static rtx
2503 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2504 tree type, int in_return, int nintregs, int nsseregs,
2505 const int *intreg, int sse_regno)
2506 {
2507 enum machine_mode tmpmode;
2508 int bytes =
2509 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2510 enum x86_64_reg_class class[MAX_CLASSES];
2511 int n;
2512 int i;
2513 int nexps = 0;
2514 int needed_sseregs, needed_intregs;
2515 rtx exp[MAX_CLASSES];
2516 rtx ret;
2517
2518 n = classify_argument (mode, type, class, 0);
2519 if (TARGET_DEBUG_ARG)
2520 {
2521 if (!n)
2522 fprintf (stderr, "Memory class\n");
2523 else
2524 {
2525 fprintf (stderr, "Classes:");
2526 for (i = 0; i < n; i++)
2527 {
2528 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2529 }
2530 fprintf (stderr, "\n");
2531 }
2532 }
2533 if (!n)
2534 return NULL;
2535 if (!examine_argument (mode, type, in_return, &needed_intregs,
2536 &needed_sseregs))
2537 return NULL;
2538 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2539 return NULL;
2540
2541 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2542 some less clueful developer tries to use floating-point anyway. */
2543 if (needed_sseregs && !TARGET_SSE)
2544 {
2545 static bool issued_error;
2546 if (!issued_error)
2547 {
2548 issued_error = true;
2549 if (in_return)
2550 error ("SSE register return with SSE disabled");
2551 else
2552 error ("SSE register argument with SSE disabled");
2553 }
2554 return NULL;
2555 }
2556
2557 /* First construct simple cases. Avoid SCmode, since we want to use
2558 single register to pass this type. */
2559 if (n == 1 && mode != SCmode)
2560 switch (class[0])
2561 {
2562 case X86_64_INTEGER_CLASS:
2563 case X86_64_INTEGERSI_CLASS:
2564 return gen_rtx_REG (mode, intreg[0]);
2565 case X86_64_SSE_CLASS:
2566 case X86_64_SSESF_CLASS:
2567 case X86_64_SSEDF_CLASS:
2568 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2569 case X86_64_X87_CLASS:
2570 case X86_64_COMPLEX_X87_CLASS:
2571 return gen_rtx_REG (mode, FIRST_STACK_REG);
2572 case X86_64_NO_CLASS:
2573 /* Zero sized array, struct or class. */
2574 return NULL;
2575 default:
2576 abort ();
2577 }
2578 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2579 && mode != BLKmode)
2580 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2581 if (n == 2
2582 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2583 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2584 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2585 && class[1] == X86_64_INTEGER_CLASS
2586 && (mode == CDImode || mode == TImode || mode == TFmode)
2587 && intreg[0] + 1 == intreg[1])
2588 return gen_rtx_REG (mode, intreg[0]);
2589
2590 /* Otherwise figure out the entries of the PARALLEL. */
2591 for (i = 0; i < n; i++)
2592 {
2593 switch (class[i])
2594 {
2595 case X86_64_NO_CLASS:
2596 break;
2597 case X86_64_INTEGER_CLASS:
2598 case X86_64_INTEGERSI_CLASS:
2599 /* Merge TImodes on aligned occasions here too. */
2600 if (i * 8 + 8 > bytes)
2601 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2602 else if (class[i] == X86_64_INTEGERSI_CLASS)
2603 tmpmode = SImode;
2604 else
2605 tmpmode = DImode;
2606 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2607 if (tmpmode == BLKmode)
2608 tmpmode = DImode;
2609 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2610 gen_rtx_REG (tmpmode, *intreg),
2611 GEN_INT (i*8));
2612 intreg++;
2613 break;
2614 case X86_64_SSESF_CLASS:
2615 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2616 gen_rtx_REG (SFmode,
2617 SSE_REGNO (sse_regno)),
2618 GEN_INT (i*8));
2619 sse_regno++;
2620 break;
2621 case X86_64_SSEDF_CLASS:
2622 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2623 gen_rtx_REG (DFmode,
2624 SSE_REGNO (sse_regno)),
2625 GEN_INT (i*8));
2626 sse_regno++;
2627 break;
2628 case X86_64_SSE_CLASS:
2629 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2630 tmpmode = TImode;
2631 else
2632 tmpmode = DImode;
2633 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2634 gen_rtx_REG (tmpmode,
2635 SSE_REGNO (sse_regno)),
2636 GEN_INT (i*8));
2637 if (tmpmode == TImode)
2638 i++;
2639 sse_regno++;
2640 break;
2641 default:
2642 abort ();
2643 }
2644 }
2645 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2646 for (i = 0; i < nexps; i++)
2647 XVECEXP (ret, 0, i) = exp [i];
2648 return ret;
2649 }
2650
2651 /* Update the data in CUM to advance over an argument
2652 of mode MODE and data type TYPE.
2653 (TYPE is null for libcalls where that information may not be available.) */
2654
2655 void
2656 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2657 tree type, int named)
2658 {
2659 int bytes =
2660 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2661 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2662
2663 if (type)
2664 mode = type_natural_mode (type);
2665
2666 if (TARGET_DEBUG_ARG)
2667 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2668 "mode=%s, named=%d)\n\n",
2669 words, cum->words, cum->nregs, cum->sse_nregs,
2670 GET_MODE_NAME (mode), named);
2671
2672 if (TARGET_64BIT)
2673 {
2674 int int_nregs, sse_nregs;
2675 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2676 cum->words += words;
2677 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2678 {
2679 cum->nregs -= int_nregs;
2680 cum->sse_nregs -= sse_nregs;
2681 cum->regno += int_nregs;
2682 cum->sse_regno += sse_nregs;
2683 }
2684 else
2685 cum->words += words;
2686 }
2687 else
2688 {
2689 switch (mode)
2690 {
2691 default:
2692 break;
2693
2694 case BLKmode:
2695 if (bytes < 0)
2696 break;
2697 /* FALLTHRU */
2698
2699 case DImode:
2700 case SImode:
2701 case HImode:
2702 case QImode:
2703 cum->words += words;
2704 cum->nregs -= words;
2705 cum->regno += words;
2706
2707 if (cum->nregs <= 0)
2708 {
2709 cum->nregs = 0;
2710 cum->regno = 0;
2711 }
2712 break;
2713
2714 case TImode:
2715 case V16QImode:
2716 case V8HImode:
2717 case V4SImode:
2718 case V2DImode:
2719 case V4SFmode:
2720 case V2DFmode:
2721 if (!type || !AGGREGATE_TYPE_P (type))
2722 {
2723 cum->sse_words += words;
2724 cum->sse_nregs -= 1;
2725 cum->sse_regno += 1;
2726 if (cum->sse_nregs <= 0)
2727 {
2728 cum->sse_nregs = 0;
2729 cum->sse_regno = 0;
2730 }
2731 }
2732 break;
2733
2734 case V8QImode:
2735 case V4HImode:
2736 case V2SImode:
2737 case V2SFmode:
2738 if (!type || !AGGREGATE_TYPE_P (type))
2739 {
2740 cum->mmx_words += words;
2741 cum->mmx_nregs -= 1;
2742 cum->mmx_regno += 1;
2743 if (cum->mmx_nregs <= 0)
2744 {
2745 cum->mmx_nregs = 0;
2746 cum->mmx_regno = 0;
2747 }
2748 }
2749 break;
2750 }
2751 }
2752 }
2753
2754 /* Define where to put the arguments to a function.
2755 Value is zero to push the argument on the stack,
2756 or a hard register in which to store the argument.
2757
2758 MODE is the argument's machine mode.
2759 TYPE is the data type of the argument (as a tree).
2760 This is null for libcalls where that information may
2761 not be available.
2762 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2763 the preceding args and about the function being called.
2764 NAMED is nonzero if this argument is a named parameter
2765 (otherwise it is an extra parameter matching an ellipsis). */
2766
2767 rtx
2768 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2769 tree type, int named)
2770 {
2771 enum machine_mode mode = orig_mode;
2772 rtx ret = NULL_RTX;
2773 int bytes =
2774 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2775 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2776 static bool warnedsse, warnedmmx;
2777
2778 /* To simplify the code below, represent vector types with a vector mode
2779 even if MMX/SSE are not active. */
2780 if (type && TREE_CODE (type) == VECTOR_TYPE)
2781 mode = type_natural_mode (type);
2782
2783 /* Handle a hidden AL argument containing number of registers for varargs
2784 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2785 any AL settings. */
2786 if (mode == VOIDmode)
2787 {
2788 if (TARGET_64BIT)
2789 return GEN_INT (cum->maybe_vaarg
2790 ? (cum->sse_nregs < 0
2791 ? SSE_REGPARM_MAX
2792 : cum->sse_regno)
2793 : -1);
2794 else
2795 return constm1_rtx;
2796 }
2797 if (TARGET_64BIT)
2798 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2799 cum->sse_nregs,
2800 &x86_64_int_parameter_registers [cum->regno],
2801 cum->sse_regno);
2802 else
2803 switch (mode)
2804 {
2805 /* For now, pass fp/complex values on the stack. */
2806 default:
2807 break;
2808
2809 case BLKmode:
2810 if (bytes < 0)
2811 break;
2812 /* FALLTHRU */
2813 case DImode:
2814 case SImode:
2815 case HImode:
2816 case QImode:
2817 if (words <= cum->nregs)
2818 {
2819 int regno = cum->regno;
2820
2821 /* Fastcall allocates the first two DWORD (SImode) or
2822 smaller arguments to ECX and EDX. */
2823 if (cum->fastcall)
2824 {
2825 if (mode == BLKmode || mode == DImode)
2826 break;
2827
2828 /* ECX not EAX is the first allocated register. */
2829 if (regno == 0)
2830 regno = 2;
2831 }
2832 ret = gen_rtx_REG (mode, regno);
2833 }
2834 break;
2835 case TImode:
2836 case V16QImode:
2837 case V8HImode:
2838 case V4SImode:
2839 case V2DImode:
2840 case V4SFmode:
2841 case V2DFmode:
2842 if (!type || !AGGREGATE_TYPE_P (type))
2843 {
2844 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2845 {
2846 warnedsse = true;
2847 warning ("SSE vector argument without SSE enabled "
2848 "changes the ABI");
2849 }
2850 if (cum->sse_nregs)
2851 ret = gen_reg_or_parallel (mode, orig_mode,
2852 cum->sse_regno + FIRST_SSE_REG);
2853 }
2854 break;
2855 case V8QImode:
2856 case V4HImode:
2857 case V2SImode:
2858 case V2SFmode:
2859 if (!type || !AGGREGATE_TYPE_P (type))
2860 {
2861 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2862 {
2863 warnedmmx = true;
2864 warning ("MMX vector argument without MMX enabled "
2865 "changes the ABI");
2866 }
2867 if (cum->mmx_nregs)
2868 ret = gen_reg_or_parallel (mode, orig_mode,
2869 cum->mmx_regno + FIRST_MMX_REG);
2870 }
2871 break;
2872 }
2873
2874 if (TARGET_DEBUG_ARG)
2875 {
2876 fprintf (stderr,
2877 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2878 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2879
2880 if (ret)
2881 print_simple_rtl (stderr, ret);
2882 else
2883 fprintf (stderr, ", stack");
2884
2885 fprintf (stderr, " )\n");
2886 }
2887
2888 return ret;
2889 }
2890
2891 /* A C expression that indicates when an argument must be passed by
2892 reference. If nonzero for an argument, a copy of that argument is
2893 made in memory and a pointer to the argument is passed instead of
2894 the argument itself. The pointer is passed in whatever way is
2895 appropriate for passing a pointer to that type. */
2896
2897 static bool
2898 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2899 enum machine_mode mode ATTRIBUTE_UNUSED,
2900 tree type, bool named ATTRIBUTE_UNUSED)
2901 {
2902 if (!TARGET_64BIT)
2903 return 0;
2904
2905 if (type && int_size_in_bytes (type) == -1)
2906 {
2907 if (TARGET_DEBUG_ARG)
2908 fprintf (stderr, "function_arg_pass_by_reference\n");
2909 return 1;
2910 }
2911
2912 return 0;
2913 }
2914
2915 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2916 ABI. Only called if TARGET_SSE. */
2917 static bool
2918 contains_128bit_aligned_vector_p (tree type)
2919 {
2920 enum machine_mode mode = TYPE_MODE (type);
2921 if (SSE_REG_MODE_P (mode)
2922 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2923 return true;
2924 if (TYPE_ALIGN (type) < 128)
2925 return false;
2926
2927 if (AGGREGATE_TYPE_P (type))
2928 {
2929 /* Walk the aggregates recursively. */
2930 if (TREE_CODE (type) == RECORD_TYPE
2931 || TREE_CODE (type) == UNION_TYPE
2932 || TREE_CODE (type) == QUAL_UNION_TYPE)
2933 {
2934 tree field;
2935
2936 if (TYPE_BINFO (type))
2937 {
2938 tree binfo, base_binfo;
2939 int i;
2940
2941 for (binfo = TYPE_BINFO (type), i = 0;
2942 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2943 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2944 return true;
2945 }
2946 /* And now merge the fields of structure. */
2947 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2948 {
2949 if (TREE_CODE (field) == FIELD_DECL
2950 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2951 return true;
2952 }
2953 }
2954 /* Just for use if some languages passes arrays by value. */
2955 else if (TREE_CODE (type) == ARRAY_TYPE)
2956 {
2957 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2958 return true;
2959 }
2960 else
2961 abort ();
2962 }
2963 return false;
2964 }
2965
2966 /* Gives the alignment boundary, in bits, of an argument with the
2967 specified mode and type. */
2968
2969 int
2970 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2971 {
2972 int align;
2973 if (type)
2974 align = TYPE_ALIGN (type);
2975 else
2976 align = GET_MODE_ALIGNMENT (mode);
2977 if (align < PARM_BOUNDARY)
2978 align = PARM_BOUNDARY;
2979 if (!TARGET_64BIT)
2980 {
2981 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2982 make an exception for SSE modes since these require 128bit
2983 alignment.
2984
2985 The handling here differs from field_alignment. ICC aligns MMX
2986 arguments to 4 byte boundaries, while structure fields are aligned
2987 to 8 byte boundaries. */
2988 if (!TARGET_SSE)
2989 align = PARM_BOUNDARY;
2990 else if (!type)
2991 {
2992 if (!SSE_REG_MODE_P (mode))
2993 align = PARM_BOUNDARY;
2994 }
2995 else
2996 {
2997 if (!contains_128bit_aligned_vector_p (type))
2998 align = PARM_BOUNDARY;
2999 }
3000 }
3001 if (align > 128)
3002 align = 128;
3003 return align;
3004 }
3005
3006 /* Return true if N is a possible register number of function value. */
3007 bool
3008 ix86_function_value_regno_p (int regno)
3009 {
3010 if (!TARGET_64BIT)
3011 {
3012 return ((regno) == 0
3013 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3014 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3015 }
3016 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3017 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3018 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3019 }
3020
3021 /* Define how to find the value returned by a function.
3022 VALTYPE is the data type of the value (as a tree).
3023 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3024 otherwise, FUNC is 0. */
3025 rtx
3026 ix86_function_value (tree valtype)
3027 {
3028 enum machine_mode natmode = type_natural_mode (valtype);
3029
3030 if (TARGET_64BIT)
3031 {
3032 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3033 1, REGPARM_MAX, SSE_REGPARM_MAX,
3034 x86_64_int_return_registers, 0);
3035 /* For zero sized structures, construct_container return NULL, but we
3036 need to keep rest of compiler happy by returning meaningful value. */
3037 if (!ret)
3038 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3039 return ret;
3040 }
3041 else
3042 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode));
3043 }
3044
3045 /* Return false iff type is returned in memory. */
3046 int
3047 ix86_return_in_memory (tree type)
3048 {
3049 int needed_intregs, needed_sseregs, size;
3050 enum machine_mode mode = type_natural_mode (type);
3051
3052 if (TARGET_64BIT)
3053 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3054
3055 if (mode == BLKmode)
3056 return 1;
3057
3058 size = int_size_in_bytes (type);
3059
3060 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3061 return 0;
3062
3063 if (VECTOR_MODE_P (mode) || mode == TImode)
3064 {
3065 /* User-created vectors small enough to fit in EAX. */
3066 if (size < 8)
3067 return 0;
3068
3069 /* MMX/3dNow values are returned on the stack, since we've
3070 got to EMMS/FEMMS before returning. */
3071 if (size == 8)
3072 return 1;
3073
3074 /* SSE values are returned in XMM0, except when it doesn't exist. */
3075 if (size == 16)
3076 return (TARGET_SSE ? 0 : 1);
3077 }
3078
3079 if (mode == XFmode)
3080 return 0;
3081
3082 if (size > 12)
3083 return 1;
3084 return 0;
3085 }
3086
3087 /* When returning SSE vector types, we have a choice of either
3088 (1) being abi incompatible with a -march switch, or
3089 (2) generating an error.
3090 Given no good solution, I think the safest thing is one warning.
3091 The user won't be able to use -Werror, but....
3092
3093 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3094 called in response to actually generating a caller or callee that
3095 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3096 via aggregate_value_p for general type probing from tree-ssa. */
3097
3098 static rtx
3099 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3100 {
3101 static bool warned;
3102
3103 if (!TARGET_SSE && type && !warned)
3104 {
3105 /* Look at the return type of the function, not the function type. */
3106 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3107
3108 if (mode == TImode
3109 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3110 {
3111 warned = true;
3112 warning ("SSE vector return without SSE enabled changes the ABI");
3113 }
3114 }
3115
3116 return NULL;
3117 }
3118
3119 /* Define how to find the value returned by a library function
3120 assuming the value has mode MODE. */
3121 rtx
3122 ix86_libcall_value (enum machine_mode mode)
3123 {
3124 if (TARGET_64BIT)
3125 {
3126 switch (mode)
3127 {
3128 case SFmode:
3129 case SCmode:
3130 case DFmode:
3131 case DCmode:
3132 case TFmode:
3133 return gen_rtx_REG (mode, FIRST_SSE_REG);
3134 case XFmode:
3135 case XCmode:
3136 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3137 case TCmode:
3138 return NULL;
3139 default:
3140 return gen_rtx_REG (mode, 0);
3141 }
3142 }
3143 else
3144 return gen_rtx_REG (mode, ix86_value_regno (mode));
3145 }
3146
3147 /* Given a mode, return the register to use for a return value. */
3148
3149 static int
3150 ix86_value_regno (enum machine_mode mode)
3151 {
3152 /* Floating point return values in %st(0). */
3153 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3154 return FIRST_FLOAT_REG;
3155 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3156 we prevent this case when sse is not available. */
3157 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3158 return FIRST_SSE_REG;
3159 /* Everything else in %eax. */
3160 return 0;
3161 }
3162 \f
3163 /* Create the va_list data type. */
3164
3165 static tree
3166 ix86_build_builtin_va_list (void)
3167 {
3168 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3169
3170 /* For i386 we use plain pointer to argument area. */
3171 if (!TARGET_64BIT)
3172 return build_pointer_type (char_type_node);
3173
3174 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3175 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3176
3177 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3178 unsigned_type_node);
3179 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3180 unsigned_type_node);
3181 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3182 ptr_type_node);
3183 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3184 ptr_type_node);
3185
3186 DECL_FIELD_CONTEXT (f_gpr) = record;
3187 DECL_FIELD_CONTEXT (f_fpr) = record;
3188 DECL_FIELD_CONTEXT (f_ovf) = record;
3189 DECL_FIELD_CONTEXT (f_sav) = record;
3190
3191 TREE_CHAIN (record) = type_decl;
3192 TYPE_NAME (record) = type_decl;
3193 TYPE_FIELDS (record) = f_gpr;
3194 TREE_CHAIN (f_gpr) = f_fpr;
3195 TREE_CHAIN (f_fpr) = f_ovf;
3196 TREE_CHAIN (f_ovf) = f_sav;
3197
3198 layout_type (record);
3199
3200 /* The correct type is an array type of one element. */
3201 return build_array_type (record, build_index_type (size_zero_node));
3202 }
3203
3204 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3205
3206 static void
3207 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3208 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3209 int no_rtl)
3210 {
3211 CUMULATIVE_ARGS next_cum;
3212 rtx save_area = NULL_RTX, mem;
3213 rtx label;
3214 rtx label_ref;
3215 rtx tmp_reg;
3216 rtx nsse_reg;
3217 int set;
3218 tree fntype;
3219 int stdarg_p;
3220 int i;
3221
3222 if (!TARGET_64BIT)
3223 return;
3224
3225 /* Indicate to allocate space on the stack for varargs save area. */
3226 ix86_save_varrargs_registers = 1;
3227
3228 cfun->stack_alignment_needed = 128;
3229
3230 fntype = TREE_TYPE (current_function_decl);
3231 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3232 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3233 != void_type_node));
3234
3235 /* For varargs, we do not want to skip the dummy va_dcl argument.
3236 For stdargs, we do want to skip the last named argument. */
3237 next_cum = *cum;
3238 if (stdarg_p)
3239 function_arg_advance (&next_cum, mode, type, 1);
3240
3241 if (!no_rtl)
3242 save_area = frame_pointer_rtx;
3243
3244 set = get_varargs_alias_set ();
3245
3246 for (i = next_cum.regno; i < ix86_regparm; i++)
3247 {
3248 mem = gen_rtx_MEM (Pmode,
3249 plus_constant (save_area, i * UNITS_PER_WORD));
3250 set_mem_alias_set (mem, set);
3251 emit_move_insn (mem, gen_rtx_REG (Pmode,
3252 x86_64_int_parameter_registers[i]));
3253 }
3254
3255 if (next_cum.sse_nregs)
3256 {
3257 /* Now emit code to save SSE registers. The AX parameter contains number
3258 of SSE parameter registers used to call this function. We use
3259 sse_prologue_save insn template that produces computed jump across
3260 SSE saves. We need some preparation work to get this working. */
3261
3262 label = gen_label_rtx ();
3263 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3264
3265 /* Compute address to jump to :
3266 label - 5*eax + nnamed_sse_arguments*5 */
3267 tmp_reg = gen_reg_rtx (Pmode);
3268 nsse_reg = gen_reg_rtx (Pmode);
3269 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3270 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3271 gen_rtx_MULT (Pmode, nsse_reg,
3272 GEN_INT (4))));
3273 if (next_cum.sse_regno)
3274 emit_move_insn
3275 (nsse_reg,
3276 gen_rtx_CONST (DImode,
3277 gen_rtx_PLUS (DImode,
3278 label_ref,
3279 GEN_INT (next_cum.sse_regno * 4))));
3280 else
3281 emit_move_insn (nsse_reg, label_ref);
3282 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3283
3284 /* Compute address of memory block we save into. We always use pointer
3285 pointing 127 bytes after first byte to store - this is needed to keep
3286 instruction size limited by 4 bytes. */
3287 tmp_reg = gen_reg_rtx (Pmode);
3288 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3289 plus_constant (save_area,
3290 8 * REGPARM_MAX + 127)));
3291 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3292 set_mem_alias_set (mem, set);
3293 set_mem_align (mem, BITS_PER_WORD);
3294
3295 /* And finally do the dirty job! */
3296 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3297 GEN_INT (next_cum.sse_regno), label));
3298 }
3299
3300 }
3301
3302 /* Implement va_start. */
3303
3304 void
3305 ix86_va_start (tree valist, rtx nextarg)
3306 {
3307 HOST_WIDE_INT words, n_gpr, n_fpr;
3308 tree f_gpr, f_fpr, f_ovf, f_sav;
3309 tree gpr, fpr, ovf, sav, t;
3310
3311 /* Only 64bit target needs something special. */
3312 if (!TARGET_64BIT)
3313 {
3314 std_expand_builtin_va_start (valist, nextarg);
3315 return;
3316 }
3317
3318 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3319 f_fpr = TREE_CHAIN (f_gpr);
3320 f_ovf = TREE_CHAIN (f_fpr);
3321 f_sav = TREE_CHAIN (f_ovf);
3322
3323 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3324 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3325 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3326 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3327 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3328
3329 /* Count number of gp and fp argument registers used. */
3330 words = current_function_args_info.words;
3331 n_gpr = current_function_args_info.regno;
3332 n_fpr = current_function_args_info.sse_regno;
3333
3334 if (TARGET_DEBUG_ARG)
3335 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3336 (int) words, (int) n_gpr, (int) n_fpr);
3337
3338 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3339 build_int_cst (NULL_TREE, n_gpr * 8));
3340 TREE_SIDE_EFFECTS (t) = 1;
3341 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3342
3343 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3344 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3345 TREE_SIDE_EFFECTS (t) = 1;
3346 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3347
3348 /* Find the overflow area. */
3349 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3350 if (words != 0)
3351 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3352 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3353 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3354 TREE_SIDE_EFFECTS (t) = 1;
3355 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3356
3357 /* Find the register save area.
3358 Prologue of the function save it right above stack frame. */
3359 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3360 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3361 TREE_SIDE_EFFECTS (t) = 1;
3362 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3363 }
3364
3365 /* Implement va_arg. */
3366
3367 tree
3368 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3369 {
3370 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3371 tree f_gpr, f_fpr, f_ovf, f_sav;
3372 tree gpr, fpr, ovf, sav, t;
3373 int size, rsize;
3374 tree lab_false, lab_over = NULL_TREE;
3375 tree addr, t2;
3376 rtx container;
3377 int indirect_p = 0;
3378 tree ptrtype;
3379 enum machine_mode nat_mode;
3380
3381 /* Only 64bit target needs something special. */
3382 if (!TARGET_64BIT)
3383 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3384
3385 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3386 f_fpr = TREE_CHAIN (f_gpr);
3387 f_ovf = TREE_CHAIN (f_fpr);
3388 f_sav = TREE_CHAIN (f_ovf);
3389
3390 valist = build_va_arg_indirect_ref (valist);
3391 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3392 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3393 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3394 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3395
3396 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3397 if (indirect_p)
3398 type = build_pointer_type (type);
3399 size = int_size_in_bytes (type);
3400 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3401
3402 nat_mode = type_natural_mode (type);
3403 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3404 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3405
3406 /* Pull the value out of the saved registers. */
3407
3408 addr = create_tmp_var (ptr_type_node, "addr");
3409 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3410
3411 if (container)
3412 {
3413 int needed_intregs, needed_sseregs;
3414 bool need_temp;
3415 tree int_addr, sse_addr;
3416
3417 lab_false = create_artificial_label ();
3418 lab_over = create_artificial_label ();
3419
3420 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3421
3422 need_temp = (!REG_P (container)
3423 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3424 || TYPE_ALIGN (type) > 128));
3425
3426 /* In case we are passing structure, verify that it is consecutive block
3427 on the register save area. If not we need to do moves. */
3428 if (!need_temp && !REG_P (container))
3429 {
3430 /* Verify that all registers are strictly consecutive */
3431 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3432 {
3433 int i;
3434
3435 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3436 {
3437 rtx slot = XVECEXP (container, 0, i);
3438 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3439 || INTVAL (XEXP (slot, 1)) != i * 16)
3440 need_temp = 1;
3441 }
3442 }
3443 else
3444 {
3445 int i;
3446
3447 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3448 {
3449 rtx slot = XVECEXP (container, 0, i);
3450 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3451 || INTVAL (XEXP (slot, 1)) != i * 8)
3452 need_temp = 1;
3453 }
3454 }
3455 }
3456 if (!need_temp)
3457 {
3458 int_addr = addr;
3459 sse_addr = addr;
3460 }
3461 else
3462 {
3463 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3464 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3465 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3466 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3467 }
3468
3469 /* First ensure that we fit completely in registers. */
3470 if (needed_intregs)
3471 {
3472 t = build_int_cst (TREE_TYPE (gpr),
3473 (REGPARM_MAX - needed_intregs + 1) * 8);
3474 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3475 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3476 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3477 gimplify_and_add (t, pre_p);
3478 }
3479 if (needed_sseregs)
3480 {
3481 t = build_int_cst (TREE_TYPE (fpr),
3482 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3483 + REGPARM_MAX * 8);
3484 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3485 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3486 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3487 gimplify_and_add (t, pre_p);
3488 }
3489
3490 /* Compute index to start of area used for integer regs. */
3491 if (needed_intregs)
3492 {
3493 /* int_addr = gpr + sav; */
3494 t = fold_convert (ptr_type_node, gpr);
3495 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3496 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3497 gimplify_and_add (t, pre_p);
3498 }
3499 if (needed_sseregs)
3500 {
3501 /* sse_addr = fpr + sav; */
3502 t = fold_convert (ptr_type_node, fpr);
3503 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3504 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3505 gimplify_and_add (t, pre_p);
3506 }
3507 if (need_temp)
3508 {
3509 int i;
3510 tree temp = create_tmp_var (type, "va_arg_tmp");
3511
3512 /* addr = &temp; */
3513 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3514 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3515 gimplify_and_add (t, pre_p);
3516
3517 for (i = 0; i < XVECLEN (container, 0); i++)
3518 {
3519 rtx slot = XVECEXP (container, 0, i);
3520 rtx reg = XEXP (slot, 0);
3521 enum machine_mode mode = GET_MODE (reg);
3522 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3523 tree addr_type = build_pointer_type (piece_type);
3524 tree src_addr, src;
3525 int src_offset;
3526 tree dest_addr, dest;
3527
3528 if (SSE_REGNO_P (REGNO (reg)))
3529 {
3530 src_addr = sse_addr;
3531 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3532 }
3533 else
3534 {
3535 src_addr = int_addr;
3536 src_offset = REGNO (reg) * 8;
3537 }
3538 src_addr = fold_convert (addr_type, src_addr);
3539 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3540 size_int (src_offset)));
3541 src = build_va_arg_indirect_ref (src_addr);
3542
3543 dest_addr = fold_convert (addr_type, addr);
3544 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3545 size_int (INTVAL (XEXP (slot, 1)))));
3546 dest = build_va_arg_indirect_ref (dest_addr);
3547
3548 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3549 gimplify_and_add (t, pre_p);
3550 }
3551 }
3552
3553 if (needed_intregs)
3554 {
3555 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3556 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3557 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3558 gimplify_and_add (t, pre_p);
3559 }
3560 if (needed_sseregs)
3561 {
3562 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3563 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3564 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3565 gimplify_and_add (t, pre_p);
3566 }
3567
3568 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3569 gimplify_and_add (t, pre_p);
3570
3571 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3572 append_to_statement_list (t, pre_p);
3573 }
3574
3575 /* ... otherwise out of the overflow area. */
3576
3577 /* Care for on-stack alignment if needed. */
3578 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3579 t = ovf;
3580 else
3581 {
3582 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3583 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3584 build_int_cst (TREE_TYPE (ovf), align - 1));
3585 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3586 build_int_cst (TREE_TYPE (t), -align));
3587 }
3588 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3589
3590 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3591 gimplify_and_add (t2, pre_p);
3592
3593 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3594 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3595 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3596 gimplify_and_add (t, pre_p);
3597
3598 if (container)
3599 {
3600 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3601 append_to_statement_list (t, pre_p);
3602 }
3603
3604 ptrtype = build_pointer_type (type);
3605 addr = fold_convert (ptrtype, addr);
3606
3607 if (indirect_p)
3608 addr = build_va_arg_indirect_ref (addr);
3609 return build_va_arg_indirect_ref (addr);
3610 }
3611 \f
3612 /* Return nonzero if OPNUM's MEM should be matched
3613 in movabs* patterns. */
3614
3615 int
3616 ix86_check_movabs (rtx insn, int opnum)
3617 {
3618 rtx set, mem;
3619
3620 set = PATTERN (insn);
3621 if (GET_CODE (set) == PARALLEL)
3622 set = XVECEXP (set, 0, 0);
3623 if (GET_CODE (set) != SET)
3624 abort ();
3625 mem = XEXP (set, opnum);
3626 while (GET_CODE (mem) == SUBREG)
3627 mem = SUBREG_REG (mem);
3628 if (GET_CODE (mem) != MEM)
3629 abort ();
3630 return (volatile_ok || !MEM_VOLATILE_P (mem));
3631 }
3632 \f
3633 /* Initialize the table of extra 80387 mathematical constants. */
3634
3635 static void
3636 init_ext_80387_constants (void)
3637 {
3638 static const char * cst[5] =
3639 {
3640 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3641 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3642 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3643 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3644 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3645 };
3646 int i;
3647
3648 for (i = 0; i < 5; i++)
3649 {
3650 real_from_string (&ext_80387_constants_table[i], cst[i]);
3651 /* Ensure each constant is rounded to XFmode precision. */
3652 real_convert (&ext_80387_constants_table[i],
3653 XFmode, &ext_80387_constants_table[i]);
3654 }
3655
3656 ext_80387_constants_init = 1;
3657 }
3658
3659 /* Return true if the constant is something that can be loaded with
3660 a special instruction. */
3661
3662 int
3663 standard_80387_constant_p (rtx x)
3664 {
3665 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3666 return -1;
3667
3668 if (x == CONST0_RTX (GET_MODE (x)))
3669 return 1;
3670 if (x == CONST1_RTX (GET_MODE (x)))
3671 return 2;
3672
3673 /* For XFmode constants, try to find a special 80387 instruction when
3674 optimizing for size or on those CPUs that benefit from them. */
3675 if (GET_MODE (x) == XFmode
3676 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3677 {
3678 REAL_VALUE_TYPE r;
3679 int i;
3680
3681 if (! ext_80387_constants_init)
3682 init_ext_80387_constants ();
3683
3684 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3685 for (i = 0; i < 5; i++)
3686 if (real_identical (&r, &ext_80387_constants_table[i]))
3687 return i + 3;
3688 }
3689
3690 return 0;
3691 }
3692
3693 /* Return the opcode of the special instruction to be used to load
3694 the constant X. */
3695
3696 const char *
3697 standard_80387_constant_opcode (rtx x)
3698 {
3699 switch (standard_80387_constant_p (x))
3700 {
3701 case 1:
3702 return "fldz";
3703 case 2:
3704 return "fld1";
3705 case 3:
3706 return "fldlg2";
3707 case 4:
3708 return "fldln2";
3709 case 5:
3710 return "fldl2e";
3711 case 6:
3712 return "fldl2t";
3713 case 7:
3714 return "fldpi";
3715 }
3716 abort ();
3717 }
3718
3719 /* Return the CONST_DOUBLE representing the 80387 constant that is
3720 loaded by the specified special instruction. The argument IDX
3721 matches the return value from standard_80387_constant_p. */
3722
3723 rtx
3724 standard_80387_constant_rtx (int idx)
3725 {
3726 int i;
3727
3728 if (! ext_80387_constants_init)
3729 init_ext_80387_constants ();
3730
3731 switch (idx)
3732 {
3733 case 3:
3734 case 4:
3735 case 5:
3736 case 6:
3737 case 7:
3738 i = idx - 3;
3739 break;
3740
3741 default:
3742 abort ();
3743 }
3744
3745 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3746 XFmode);
3747 }
3748
3749 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3750 */
3751 int
3752 standard_sse_constant_p (rtx x)
3753 {
3754 if (x == const0_rtx)
3755 return 1;
3756 return (x == CONST0_RTX (GET_MODE (x)));
3757 }
3758
3759 /* Returns 1 if OP contains a symbol reference */
3760
3761 int
3762 symbolic_reference_mentioned_p (rtx op)
3763 {
3764 const char *fmt;
3765 int i;
3766
3767 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3768 return 1;
3769
3770 fmt = GET_RTX_FORMAT (GET_CODE (op));
3771 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3772 {
3773 if (fmt[i] == 'E')
3774 {
3775 int j;
3776
3777 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3778 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3779 return 1;
3780 }
3781
3782 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3783 return 1;
3784 }
3785
3786 return 0;
3787 }
3788
3789 /* Return 1 if it is appropriate to emit `ret' instructions in the
3790 body of a function. Do this only if the epilogue is simple, needing a
3791 couple of insns. Prior to reloading, we can't tell how many registers
3792 must be saved, so return 0 then. Return 0 if there is no frame
3793 marker to de-allocate. */
3794
3795 int
3796 ix86_can_use_return_insn_p (void)
3797 {
3798 struct ix86_frame frame;
3799
3800 if (! reload_completed || frame_pointer_needed)
3801 return 0;
3802
3803 /* Don't allow more than 32 pop, since that's all we can do
3804 with one instruction. */
3805 if (current_function_pops_args
3806 && current_function_args_size >= 32768)
3807 return 0;
3808
3809 ix86_compute_frame_layout (&frame);
3810 return frame.to_allocate == 0 && frame.nregs == 0;
3811 }
3812 \f
3813 /* Value should be nonzero if functions must have frame pointers.
3814 Zero means the frame pointer need not be set up (and parms may
3815 be accessed via the stack pointer) in functions that seem suitable. */
3816
3817 int
3818 ix86_frame_pointer_required (void)
3819 {
3820 /* If we accessed previous frames, then the generated code expects
3821 to be able to access the saved ebp value in our frame. */
3822 if (cfun->machine->accesses_prev_frame)
3823 return 1;
3824
3825 /* Several x86 os'es need a frame pointer for other reasons,
3826 usually pertaining to setjmp. */
3827 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3828 return 1;
3829
3830 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3831 the frame pointer by default. Turn it back on now if we've not
3832 got a leaf function. */
3833 if (TARGET_OMIT_LEAF_FRAME_POINTER
3834 && (!current_function_is_leaf))
3835 return 1;
3836
3837 if (current_function_profile)
3838 return 1;
3839
3840 return 0;
3841 }
3842
3843 /* Record that the current function accesses previous call frames. */
3844
3845 void
3846 ix86_setup_frame_addresses (void)
3847 {
3848 cfun->machine->accesses_prev_frame = 1;
3849 }
3850 \f
3851 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3852 # define USE_HIDDEN_LINKONCE 1
3853 #else
3854 # define USE_HIDDEN_LINKONCE 0
3855 #endif
3856
3857 static int pic_labels_used;
3858
3859 /* Fills in the label name that should be used for a pc thunk for
3860 the given register. */
3861
3862 static void
3863 get_pc_thunk_name (char name[32], unsigned int regno)
3864 {
3865 if (USE_HIDDEN_LINKONCE)
3866 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3867 else
3868 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3869 }
3870
3871
3872 /* This function generates code for -fpic that loads %ebx with
3873 the return address of the caller and then returns. */
3874
3875 void
3876 ix86_file_end (void)
3877 {
3878 rtx xops[2];
3879 int regno;
3880
3881 for (regno = 0; regno < 8; ++regno)
3882 {
3883 char name[32];
3884
3885 if (! ((pic_labels_used >> regno) & 1))
3886 continue;
3887
3888 get_pc_thunk_name (name, regno);
3889
3890 if (USE_HIDDEN_LINKONCE)
3891 {
3892 tree decl;
3893
3894 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3895 error_mark_node);
3896 TREE_PUBLIC (decl) = 1;
3897 TREE_STATIC (decl) = 1;
3898 DECL_ONE_ONLY (decl) = 1;
3899
3900 (*targetm.asm_out.unique_section) (decl, 0);
3901 named_section (decl, NULL, 0);
3902
3903 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3904 fputs ("\t.hidden\t", asm_out_file);
3905 assemble_name (asm_out_file, name);
3906 fputc ('\n', asm_out_file);
3907 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3908 }
3909 else
3910 {
3911 text_section ();
3912 ASM_OUTPUT_LABEL (asm_out_file, name);
3913 }
3914
3915 xops[0] = gen_rtx_REG (SImode, regno);
3916 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3917 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3918 output_asm_insn ("ret", xops);
3919 }
3920
3921 if (NEED_INDICATE_EXEC_STACK)
3922 file_end_indicate_exec_stack ();
3923 }
3924
3925 /* Emit code for the SET_GOT patterns. */
3926
3927 const char *
3928 output_set_got (rtx dest)
3929 {
3930 rtx xops[3];
3931
3932 xops[0] = dest;
3933 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3934
3935 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3936 {
3937 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3938
3939 if (!flag_pic)
3940 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3941 else
3942 output_asm_insn ("call\t%a2", xops);
3943
3944 #if TARGET_MACHO
3945 /* Output the "canonical" label name ("Lxx$pb") here too. This
3946 is what will be referred to by the Mach-O PIC subsystem. */
3947 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3948 #endif
3949 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3950 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3951
3952 if (flag_pic)
3953 output_asm_insn ("pop{l}\t%0", xops);
3954 }
3955 else
3956 {
3957 char name[32];
3958 get_pc_thunk_name (name, REGNO (dest));
3959 pic_labels_used |= 1 << REGNO (dest);
3960
3961 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3962 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3963 output_asm_insn ("call\t%X2", xops);
3964 }
3965
3966 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3967 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3968 else if (!TARGET_MACHO)
3969 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3970
3971 return "";
3972 }
3973
3974 /* Generate an "push" pattern for input ARG. */
3975
3976 static rtx
3977 gen_push (rtx arg)
3978 {
3979 return gen_rtx_SET (VOIDmode,
3980 gen_rtx_MEM (Pmode,
3981 gen_rtx_PRE_DEC (Pmode,
3982 stack_pointer_rtx)),
3983 arg);
3984 }
3985
3986 /* Return >= 0 if there is an unused call-clobbered register available
3987 for the entire function. */
3988
3989 static unsigned int
3990 ix86_select_alt_pic_regnum (void)
3991 {
3992 if (current_function_is_leaf && !current_function_profile)
3993 {
3994 int i;
3995 for (i = 2; i >= 0; --i)
3996 if (!regs_ever_live[i])
3997 return i;
3998 }
3999
4000 return INVALID_REGNUM;
4001 }
4002
4003 /* Return 1 if we need to save REGNO. */
4004 static int
4005 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4006 {
4007 if (pic_offset_table_rtx
4008 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4009 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4010 || current_function_profile
4011 || current_function_calls_eh_return
4012 || current_function_uses_const_pool))
4013 {
4014 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4015 return 0;
4016 return 1;
4017 }
4018
4019 if (current_function_calls_eh_return && maybe_eh_return)
4020 {
4021 unsigned i;
4022 for (i = 0; ; i++)
4023 {
4024 unsigned test = EH_RETURN_DATA_REGNO (i);
4025 if (test == INVALID_REGNUM)
4026 break;
4027 if (test == regno)
4028 return 1;
4029 }
4030 }
4031
4032 return (regs_ever_live[regno]
4033 && !call_used_regs[regno]
4034 && !fixed_regs[regno]
4035 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4036 }
4037
4038 /* Return number of registers to be saved on the stack. */
4039
4040 static int
4041 ix86_nsaved_regs (void)
4042 {
4043 int nregs = 0;
4044 int regno;
4045
4046 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4047 if (ix86_save_reg (regno, true))
4048 nregs++;
4049 return nregs;
4050 }
4051
4052 /* Return the offset between two registers, one to be eliminated, and the other
4053 its replacement, at the start of a routine. */
4054
4055 HOST_WIDE_INT
4056 ix86_initial_elimination_offset (int from, int to)
4057 {
4058 struct ix86_frame frame;
4059 ix86_compute_frame_layout (&frame);
4060
4061 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4062 return frame.hard_frame_pointer_offset;
4063 else if (from == FRAME_POINTER_REGNUM
4064 && to == HARD_FRAME_POINTER_REGNUM)
4065 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4066 else
4067 {
4068 if (to != STACK_POINTER_REGNUM)
4069 abort ();
4070 else if (from == ARG_POINTER_REGNUM)
4071 return frame.stack_pointer_offset;
4072 else if (from != FRAME_POINTER_REGNUM)
4073 abort ();
4074 else
4075 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4076 }
4077 }
4078
4079 /* Fill structure ix86_frame about frame of currently computed function. */
4080
4081 static void
4082 ix86_compute_frame_layout (struct ix86_frame *frame)
4083 {
4084 HOST_WIDE_INT total_size;
4085 unsigned int stack_alignment_needed;
4086 HOST_WIDE_INT offset;
4087 unsigned int preferred_alignment;
4088 HOST_WIDE_INT size = get_frame_size ();
4089
4090 frame->nregs = ix86_nsaved_regs ();
4091 total_size = size;
4092
4093 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4094 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4095
4096 /* During reload iteration the amount of registers saved can change.
4097 Recompute the value as needed. Do not recompute when amount of registers
4098 didn't change as reload does mutiple calls to the function and does not
4099 expect the decision to change within single iteration. */
4100 if (!optimize_size
4101 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4102 {
4103 int count = frame->nregs;
4104
4105 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4106 /* The fast prologue uses move instead of push to save registers. This
4107 is significantly longer, but also executes faster as modern hardware
4108 can execute the moves in parallel, but can't do that for push/pop.
4109
4110 Be careful about choosing what prologue to emit: When function takes
4111 many instructions to execute we may use slow version as well as in
4112 case function is known to be outside hot spot (this is known with
4113 feedback only). Weight the size of function by number of registers
4114 to save as it is cheap to use one or two push instructions but very
4115 slow to use many of them. */
4116 if (count)
4117 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4118 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4119 || (flag_branch_probabilities
4120 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4121 cfun->machine->use_fast_prologue_epilogue = false;
4122 else
4123 cfun->machine->use_fast_prologue_epilogue
4124 = !expensive_function_p (count);
4125 }
4126 if (TARGET_PROLOGUE_USING_MOVE
4127 && cfun->machine->use_fast_prologue_epilogue)
4128 frame->save_regs_using_mov = true;
4129 else
4130 frame->save_regs_using_mov = false;
4131
4132
4133 /* Skip return address and saved base pointer. */
4134 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4135
4136 frame->hard_frame_pointer_offset = offset;
4137
4138 /* Do some sanity checking of stack_alignment_needed and
4139 preferred_alignment, since i386 port is the only using those features
4140 that may break easily. */
4141
4142 if (size && !stack_alignment_needed)
4143 abort ();
4144 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4145 abort ();
4146 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4147 abort ();
4148 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4149 abort ();
4150
4151 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4152 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4153
4154 /* Register save area */
4155 offset += frame->nregs * UNITS_PER_WORD;
4156
4157 /* Va-arg area */
4158 if (ix86_save_varrargs_registers)
4159 {
4160 offset += X86_64_VARARGS_SIZE;
4161 frame->va_arg_size = X86_64_VARARGS_SIZE;
4162 }
4163 else
4164 frame->va_arg_size = 0;
4165
4166 /* Align start of frame for local function. */
4167 frame->padding1 = ((offset + stack_alignment_needed - 1)
4168 & -stack_alignment_needed) - offset;
4169
4170 offset += frame->padding1;
4171
4172 /* Frame pointer points here. */
4173 frame->frame_pointer_offset = offset;
4174
4175 offset += size;
4176
4177 /* Add outgoing arguments area. Can be skipped if we eliminated
4178 all the function calls as dead code.
4179 Skipping is however impossible when function calls alloca. Alloca
4180 expander assumes that last current_function_outgoing_args_size
4181 of stack frame are unused. */
4182 if (ACCUMULATE_OUTGOING_ARGS
4183 && (!current_function_is_leaf || current_function_calls_alloca))
4184 {
4185 offset += current_function_outgoing_args_size;
4186 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4187 }
4188 else
4189 frame->outgoing_arguments_size = 0;
4190
4191 /* Align stack boundary. Only needed if we're calling another function
4192 or using alloca. */
4193 if (!current_function_is_leaf || current_function_calls_alloca)
4194 frame->padding2 = ((offset + preferred_alignment - 1)
4195 & -preferred_alignment) - offset;
4196 else
4197 frame->padding2 = 0;
4198
4199 offset += frame->padding2;
4200
4201 /* We've reached end of stack frame. */
4202 frame->stack_pointer_offset = offset;
4203
4204 /* Size prologue needs to allocate. */
4205 frame->to_allocate =
4206 (size + frame->padding1 + frame->padding2
4207 + frame->outgoing_arguments_size + frame->va_arg_size);
4208
4209 if ((!frame->to_allocate && frame->nregs <= 1)
4210 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4211 frame->save_regs_using_mov = false;
4212
4213 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4214 && current_function_is_leaf)
4215 {
4216 frame->red_zone_size = frame->to_allocate;
4217 if (frame->save_regs_using_mov)
4218 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4219 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4220 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4221 }
4222 else
4223 frame->red_zone_size = 0;
4224 frame->to_allocate -= frame->red_zone_size;
4225 frame->stack_pointer_offset -= frame->red_zone_size;
4226 #if 0
4227 fprintf (stderr, "nregs: %i\n", frame->nregs);
4228 fprintf (stderr, "size: %i\n", size);
4229 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4230 fprintf (stderr, "padding1: %i\n", frame->padding1);
4231 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4232 fprintf (stderr, "padding2: %i\n", frame->padding2);
4233 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4234 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4235 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4236 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4237 frame->hard_frame_pointer_offset);
4238 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4239 #endif
4240 }
4241
4242 /* Emit code to save registers in the prologue. */
4243
4244 static void
4245 ix86_emit_save_regs (void)
4246 {
4247 int regno;
4248 rtx insn;
4249
4250 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4251 if (ix86_save_reg (regno, true))
4252 {
4253 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4254 RTX_FRAME_RELATED_P (insn) = 1;
4255 }
4256 }
4257
4258 /* Emit code to save registers using MOV insns. First register
4259 is restored from POINTER + OFFSET. */
4260 static void
4261 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4262 {
4263 int regno;
4264 rtx insn;
4265
4266 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4267 if (ix86_save_reg (regno, true))
4268 {
4269 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4270 Pmode, offset),
4271 gen_rtx_REG (Pmode, regno));
4272 RTX_FRAME_RELATED_P (insn) = 1;
4273 offset += UNITS_PER_WORD;
4274 }
4275 }
4276
4277 /* Expand prologue or epilogue stack adjustment.
4278 The pattern exist to put a dependency on all ebp-based memory accesses.
4279 STYLE should be negative if instructions should be marked as frame related,
4280 zero if %r11 register is live and cannot be freely used and positive
4281 otherwise. */
4282
4283 static void
4284 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4285 {
4286 rtx insn;
4287
4288 if (! TARGET_64BIT)
4289 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4290 else if (x86_64_immediate_operand (offset, DImode))
4291 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4292 else
4293 {
4294 rtx r11;
4295 /* r11 is used by indirect sibcall return as well, set before the
4296 epilogue and used after the epilogue. ATM indirect sibcall
4297 shouldn't be used together with huge frame sizes in one
4298 function because of the frame_size check in sibcall.c. */
4299 if (style == 0)
4300 abort ();
4301 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4302 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4303 if (style < 0)
4304 RTX_FRAME_RELATED_P (insn) = 1;
4305 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4306 offset));
4307 }
4308 if (style < 0)
4309 RTX_FRAME_RELATED_P (insn) = 1;
4310 }
4311
4312 /* Expand the prologue into a bunch of separate insns. */
4313
4314 void
4315 ix86_expand_prologue (void)
4316 {
4317 rtx insn;
4318 bool pic_reg_used;
4319 struct ix86_frame frame;
4320 HOST_WIDE_INT allocate;
4321
4322 ix86_compute_frame_layout (&frame);
4323
4324 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4325 slower on all targets. Also sdb doesn't like it. */
4326
4327 if (frame_pointer_needed)
4328 {
4329 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4330 RTX_FRAME_RELATED_P (insn) = 1;
4331
4332 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4333 RTX_FRAME_RELATED_P (insn) = 1;
4334 }
4335
4336 allocate = frame.to_allocate;
4337
4338 if (!frame.save_regs_using_mov)
4339 ix86_emit_save_regs ();
4340 else
4341 allocate += frame.nregs * UNITS_PER_WORD;
4342
4343 /* When using red zone we may start register saving before allocating
4344 the stack frame saving one cycle of the prologue. */
4345 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4346 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4347 : stack_pointer_rtx,
4348 -frame.nregs * UNITS_PER_WORD);
4349
4350 if (allocate == 0)
4351 ;
4352 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4353 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4354 GEN_INT (-allocate), -1);
4355 else
4356 {
4357 /* Only valid for Win32. */
4358 rtx eax = gen_rtx_REG (SImode, 0);
4359 bool eax_live = ix86_eax_live_at_start_p ();
4360 rtx t;
4361
4362 if (TARGET_64BIT)
4363 abort ();
4364
4365 if (eax_live)
4366 {
4367 emit_insn (gen_push (eax));
4368 allocate -= 4;
4369 }
4370
4371 emit_move_insn (eax, GEN_INT (allocate));
4372
4373 insn = emit_insn (gen_allocate_stack_worker (eax));
4374 RTX_FRAME_RELATED_P (insn) = 1;
4375 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4376 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4377 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4378 t, REG_NOTES (insn));
4379
4380 if (eax_live)
4381 {
4382 if (frame_pointer_needed)
4383 t = plus_constant (hard_frame_pointer_rtx,
4384 allocate
4385 - frame.to_allocate
4386 - frame.nregs * UNITS_PER_WORD);
4387 else
4388 t = plus_constant (stack_pointer_rtx, allocate);
4389 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4390 }
4391 }
4392
4393 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4394 {
4395 if (!frame_pointer_needed || !frame.to_allocate)
4396 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4397 else
4398 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4399 -frame.nregs * UNITS_PER_WORD);
4400 }
4401
4402 pic_reg_used = false;
4403 if (pic_offset_table_rtx
4404 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4405 || current_function_profile))
4406 {
4407 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4408
4409 if (alt_pic_reg_used != INVALID_REGNUM)
4410 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4411
4412 pic_reg_used = true;
4413 }
4414
4415 if (pic_reg_used)
4416 {
4417 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4418
4419 /* Even with accurate pre-reload life analysis, we can wind up
4420 deleting all references to the pic register after reload.
4421 Consider if cross-jumping unifies two sides of a branch
4422 controlled by a comparison vs the only read from a global.
4423 In which case, allow the set_got to be deleted, though we're
4424 too late to do anything about the ebx save in the prologue. */
4425 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4426 }
4427
4428 /* Prevent function calls from be scheduled before the call to mcount.
4429 In the pic_reg_used case, make sure that the got load isn't deleted. */
4430 if (current_function_profile)
4431 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4432 }
4433
4434 /* Emit code to restore saved registers using MOV insns. First register
4435 is restored from POINTER + OFFSET. */
4436 static void
4437 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4438 int maybe_eh_return)
4439 {
4440 int regno;
4441 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4442
4443 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4444 if (ix86_save_reg (regno, maybe_eh_return))
4445 {
4446 /* Ensure that adjust_address won't be forced to produce pointer
4447 out of range allowed by x86-64 instruction set. */
4448 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4449 {
4450 rtx r11;
4451
4452 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4453 emit_move_insn (r11, GEN_INT (offset));
4454 emit_insn (gen_adddi3 (r11, r11, pointer));
4455 base_address = gen_rtx_MEM (Pmode, r11);
4456 offset = 0;
4457 }
4458 emit_move_insn (gen_rtx_REG (Pmode, regno),
4459 adjust_address (base_address, Pmode, offset));
4460 offset += UNITS_PER_WORD;
4461 }
4462 }
4463
4464 /* Restore function stack, frame, and registers. */
4465
4466 void
4467 ix86_expand_epilogue (int style)
4468 {
4469 int regno;
4470 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4471 struct ix86_frame frame;
4472 HOST_WIDE_INT offset;
4473
4474 ix86_compute_frame_layout (&frame);
4475
4476 /* Calculate start of saved registers relative to ebp. Special care
4477 must be taken for the normal return case of a function using
4478 eh_return: the eax and edx registers are marked as saved, but not
4479 restored along this path. */
4480 offset = frame.nregs;
4481 if (current_function_calls_eh_return && style != 2)
4482 offset -= 2;
4483 offset *= -UNITS_PER_WORD;
4484
4485 /* If we're only restoring one register and sp is not valid then
4486 using a move instruction to restore the register since it's
4487 less work than reloading sp and popping the register.
4488
4489 The default code result in stack adjustment using add/lea instruction,
4490 while this code results in LEAVE instruction (or discrete equivalent),
4491 so it is profitable in some other cases as well. Especially when there
4492 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4493 and there is exactly one register to pop. This heuristic may need some
4494 tuning in future. */
4495 if ((!sp_valid && frame.nregs <= 1)
4496 || (TARGET_EPILOGUE_USING_MOVE
4497 && cfun->machine->use_fast_prologue_epilogue
4498 && (frame.nregs > 1 || frame.to_allocate))
4499 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4500 || (frame_pointer_needed && TARGET_USE_LEAVE
4501 && cfun->machine->use_fast_prologue_epilogue
4502 && frame.nregs == 1)
4503 || current_function_calls_eh_return)
4504 {
4505 /* Restore registers. We can use ebp or esp to address the memory
4506 locations. If both are available, default to ebp, since offsets
4507 are known to be small. Only exception is esp pointing directly to the
4508 end of block of saved registers, where we may simplify addressing
4509 mode. */
4510
4511 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4512 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4513 frame.to_allocate, style == 2);
4514 else
4515 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4516 offset, style == 2);
4517
4518 /* eh_return epilogues need %ecx added to the stack pointer. */
4519 if (style == 2)
4520 {
4521 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4522
4523 if (frame_pointer_needed)
4524 {
4525 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4526 tmp = plus_constant (tmp, UNITS_PER_WORD);
4527 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4528
4529 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4530 emit_move_insn (hard_frame_pointer_rtx, tmp);
4531
4532 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4533 const0_rtx, style);
4534 }
4535 else
4536 {
4537 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4538 tmp = plus_constant (tmp, (frame.to_allocate
4539 + frame.nregs * UNITS_PER_WORD));
4540 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4541 }
4542 }
4543 else if (!frame_pointer_needed)
4544 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4545 GEN_INT (frame.to_allocate
4546 + frame.nregs * UNITS_PER_WORD),
4547 style);
4548 /* If not an i386, mov & pop is faster than "leave". */
4549 else if (TARGET_USE_LEAVE || optimize_size
4550 || !cfun->machine->use_fast_prologue_epilogue)
4551 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4552 else
4553 {
4554 pro_epilogue_adjust_stack (stack_pointer_rtx,
4555 hard_frame_pointer_rtx,
4556 const0_rtx, style);
4557 if (TARGET_64BIT)
4558 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4559 else
4560 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4561 }
4562 }
4563 else
4564 {
4565 /* First step is to deallocate the stack frame so that we can
4566 pop the registers. */
4567 if (!sp_valid)
4568 {
4569 if (!frame_pointer_needed)
4570 abort ();
4571 pro_epilogue_adjust_stack (stack_pointer_rtx,
4572 hard_frame_pointer_rtx,
4573 GEN_INT (offset), style);
4574 }
4575 else if (frame.to_allocate)
4576 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4577 GEN_INT (frame.to_allocate), style);
4578
4579 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4580 if (ix86_save_reg (regno, false))
4581 {
4582 if (TARGET_64BIT)
4583 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4584 else
4585 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4586 }
4587 if (frame_pointer_needed)
4588 {
4589 /* Leave results in shorter dependency chains on CPUs that are
4590 able to grok it fast. */
4591 if (TARGET_USE_LEAVE)
4592 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4593 else if (TARGET_64BIT)
4594 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4595 else
4596 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4597 }
4598 }
4599
4600 /* Sibcall epilogues don't want a return instruction. */
4601 if (style == 0)
4602 return;
4603
4604 if (current_function_pops_args && current_function_args_size)
4605 {
4606 rtx popc = GEN_INT (current_function_pops_args);
4607
4608 /* i386 can only pop 64K bytes. If asked to pop more, pop
4609 return address, do explicit add, and jump indirectly to the
4610 caller. */
4611
4612 if (current_function_pops_args >= 65536)
4613 {
4614 rtx ecx = gen_rtx_REG (SImode, 2);
4615
4616 /* There is no "pascal" calling convention in 64bit ABI. */
4617 if (TARGET_64BIT)
4618 abort ();
4619
4620 emit_insn (gen_popsi1 (ecx));
4621 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4622 emit_jump_insn (gen_return_indirect_internal (ecx));
4623 }
4624 else
4625 emit_jump_insn (gen_return_pop_internal (popc));
4626 }
4627 else
4628 emit_jump_insn (gen_return_internal ());
4629 }
4630
4631 /* Reset from the function's potential modifications. */
4632
4633 static void
4634 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4635 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4636 {
4637 if (pic_offset_table_rtx)
4638 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4639 }
4640 \f
4641 /* Extract the parts of an RTL expression that is a valid memory address
4642 for an instruction. Return 0 if the structure of the address is
4643 grossly off. Return -1 if the address contains ASHIFT, so it is not
4644 strictly valid, but still used for computing length of lea instruction. */
4645
4646 int
4647 ix86_decompose_address (rtx addr, struct ix86_address *out)
4648 {
4649 rtx base = NULL_RTX;
4650 rtx index = NULL_RTX;
4651 rtx disp = NULL_RTX;
4652 HOST_WIDE_INT scale = 1;
4653 rtx scale_rtx = NULL_RTX;
4654 int retval = 1;
4655 enum ix86_address_seg seg = SEG_DEFAULT;
4656
4657 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4658 base = addr;
4659 else if (GET_CODE (addr) == PLUS)
4660 {
4661 rtx addends[4], op;
4662 int n = 0, i;
4663
4664 op = addr;
4665 do
4666 {
4667 if (n >= 4)
4668 return 0;
4669 addends[n++] = XEXP (op, 1);
4670 op = XEXP (op, 0);
4671 }
4672 while (GET_CODE (op) == PLUS);
4673 if (n >= 4)
4674 return 0;
4675 addends[n] = op;
4676
4677 for (i = n; i >= 0; --i)
4678 {
4679 op = addends[i];
4680 switch (GET_CODE (op))
4681 {
4682 case MULT:
4683 if (index)
4684 return 0;
4685 index = XEXP (op, 0);
4686 scale_rtx = XEXP (op, 1);
4687 break;
4688
4689 case UNSPEC:
4690 if (XINT (op, 1) == UNSPEC_TP
4691 && TARGET_TLS_DIRECT_SEG_REFS
4692 && seg == SEG_DEFAULT)
4693 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4694 else
4695 return 0;
4696 break;
4697
4698 case REG:
4699 case SUBREG:
4700 if (!base)
4701 base = op;
4702 else if (!index)
4703 index = op;
4704 else
4705 return 0;
4706 break;
4707
4708 case CONST:
4709 case CONST_INT:
4710 case SYMBOL_REF:
4711 case LABEL_REF:
4712 if (disp)
4713 return 0;
4714 disp = op;
4715 break;
4716
4717 default:
4718 return 0;
4719 }
4720 }
4721 }
4722 else if (GET_CODE (addr) == MULT)
4723 {
4724 index = XEXP (addr, 0); /* index*scale */
4725 scale_rtx = XEXP (addr, 1);
4726 }
4727 else if (GET_CODE (addr) == ASHIFT)
4728 {
4729 rtx tmp;
4730
4731 /* We're called for lea too, which implements ashift on occasion. */
4732 index = XEXP (addr, 0);
4733 tmp = XEXP (addr, 1);
4734 if (GET_CODE (tmp) != CONST_INT)
4735 return 0;
4736 scale = INTVAL (tmp);
4737 if ((unsigned HOST_WIDE_INT) scale > 3)
4738 return 0;
4739 scale = 1 << scale;
4740 retval = -1;
4741 }
4742 else
4743 disp = addr; /* displacement */
4744
4745 /* Extract the integral value of scale. */
4746 if (scale_rtx)
4747 {
4748 if (GET_CODE (scale_rtx) != CONST_INT)
4749 return 0;
4750 scale = INTVAL (scale_rtx);
4751 }
4752
4753 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4754 if (base && index && scale == 1
4755 && (index == arg_pointer_rtx
4756 || index == frame_pointer_rtx
4757 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4758 {
4759 rtx tmp = base;
4760 base = index;
4761 index = tmp;
4762 }
4763
4764 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4765 if ((base == hard_frame_pointer_rtx
4766 || base == frame_pointer_rtx
4767 || base == arg_pointer_rtx) && !disp)
4768 disp = const0_rtx;
4769
4770 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4771 Avoid this by transforming to [%esi+0]. */
4772 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4773 && base && !index && !disp
4774 && REG_P (base)
4775 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4776 disp = const0_rtx;
4777
4778 /* Special case: encode reg+reg instead of reg*2. */
4779 if (!base && index && scale && scale == 2)
4780 base = index, scale = 1;
4781
4782 /* Special case: scaling cannot be encoded without base or displacement. */
4783 if (!base && !disp && index && scale != 1)
4784 disp = const0_rtx;
4785
4786 out->base = base;
4787 out->index = index;
4788 out->disp = disp;
4789 out->scale = scale;
4790 out->seg = seg;
4791
4792 return retval;
4793 }
4794 \f
4795 /* Return cost of the memory address x.
4796 For i386, it is better to use a complex address than let gcc copy
4797 the address into a reg and make a new pseudo. But not if the address
4798 requires to two regs - that would mean more pseudos with longer
4799 lifetimes. */
4800 static int
4801 ix86_address_cost (rtx x)
4802 {
4803 struct ix86_address parts;
4804 int cost = 1;
4805
4806 if (!ix86_decompose_address (x, &parts))
4807 abort ();
4808
4809 /* More complex memory references are better. */
4810 if (parts.disp && parts.disp != const0_rtx)
4811 cost--;
4812 if (parts.seg != SEG_DEFAULT)
4813 cost--;
4814
4815 /* Attempt to minimize number of registers in the address. */
4816 if ((parts.base
4817 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4818 || (parts.index
4819 && (!REG_P (parts.index)
4820 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4821 cost++;
4822
4823 if (parts.base
4824 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4825 && parts.index
4826 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4827 && parts.base != parts.index)
4828 cost++;
4829
4830 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4831 since it's predecode logic can't detect the length of instructions
4832 and it degenerates to vector decoded. Increase cost of such
4833 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4834 to split such addresses or even refuse such addresses at all.
4835
4836 Following addressing modes are affected:
4837 [base+scale*index]
4838 [scale*index+disp]
4839 [base+index]
4840
4841 The first and last case may be avoidable by explicitly coding the zero in
4842 memory address, but I don't have AMD-K6 machine handy to check this
4843 theory. */
4844
4845 if (TARGET_K6
4846 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4847 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4848 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4849 cost += 10;
4850
4851 return cost;
4852 }
4853 \f
4854 /* If X is a machine specific address (i.e. a symbol or label being
4855 referenced as a displacement from the GOT implemented using an
4856 UNSPEC), then return the base term. Otherwise return X. */
4857
4858 rtx
4859 ix86_find_base_term (rtx x)
4860 {
4861 rtx term;
4862
4863 if (TARGET_64BIT)
4864 {
4865 if (GET_CODE (x) != CONST)
4866 return x;
4867 term = XEXP (x, 0);
4868 if (GET_CODE (term) == PLUS
4869 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4870 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4871 term = XEXP (term, 0);
4872 if (GET_CODE (term) != UNSPEC
4873 || XINT (term, 1) != UNSPEC_GOTPCREL)
4874 return x;
4875
4876 term = XVECEXP (term, 0, 0);
4877
4878 if (GET_CODE (term) != SYMBOL_REF
4879 && GET_CODE (term) != LABEL_REF)
4880 return x;
4881
4882 return term;
4883 }
4884
4885 term = ix86_delegitimize_address (x);
4886
4887 if (GET_CODE (term) != SYMBOL_REF
4888 && GET_CODE (term) != LABEL_REF)
4889 return x;
4890
4891 return term;
4892 }
4893
4894 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4895 this is used for to form addresses to local data when -fPIC is in
4896 use. */
4897
4898 static bool
4899 darwin_local_data_pic (rtx disp)
4900 {
4901 if (GET_CODE (disp) == MINUS)
4902 {
4903 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4904 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4905 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4906 {
4907 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4908 if (! strcmp (sym_name, "<pic base>"))
4909 return true;
4910 }
4911 }
4912
4913 return false;
4914 }
4915 \f
4916 /* Determine if a given RTX is a valid constant. We already know this
4917 satisfies CONSTANT_P. */
4918
4919 bool
4920 legitimate_constant_p (rtx x)
4921 {
4922 switch (GET_CODE (x))
4923 {
4924 case CONST:
4925 x = XEXP (x, 0);
4926
4927 if (GET_CODE (x) == PLUS)
4928 {
4929 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4930 return false;
4931 x = XEXP (x, 0);
4932 }
4933
4934 if (TARGET_MACHO && darwin_local_data_pic (x))
4935 return true;
4936
4937 /* Only some unspecs are valid as "constants". */
4938 if (GET_CODE (x) == UNSPEC)
4939 switch (XINT (x, 1))
4940 {
4941 case UNSPEC_TPOFF:
4942 case UNSPEC_NTPOFF:
4943 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4944 case UNSPEC_DTPOFF:
4945 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4946 default:
4947 return false;
4948 }
4949
4950 /* We must have drilled down to a symbol. */
4951 if (!symbolic_operand (x, Pmode))
4952 return false;
4953 /* FALLTHRU */
4954
4955 case SYMBOL_REF:
4956 /* TLS symbols are never valid. */
4957 if (tls_symbolic_operand (x, Pmode))
4958 return false;
4959 break;
4960
4961 default:
4962 break;
4963 }
4964
4965 /* Otherwise we handle everything else in the move patterns. */
4966 return true;
4967 }
4968
4969 /* Determine if it's legal to put X into the constant pool. This
4970 is not possible for the address of thread-local symbols, which
4971 is checked above. */
4972
4973 static bool
4974 ix86_cannot_force_const_mem (rtx x)
4975 {
4976 return !legitimate_constant_p (x);
4977 }
4978
4979 /* Determine if a given RTX is a valid constant address. */
4980
4981 bool
4982 constant_address_p (rtx x)
4983 {
4984 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4985 }
4986
4987 /* Nonzero if the constant value X is a legitimate general operand
4988 when generating PIC code. It is given that flag_pic is on and
4989 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4990
4991 bool
4992 legitimate_pic_operand_p (rtx x)
4993 {
4994 rtx inner;
4995
4996 switch (GET_CODE (x))
4997 {
4998 case CONST:
4999 inner = XEXP (x, 0);
5000
5001 /* Only some unspecs are valid as "constants". */
5002 if (GET_CODE (inner) == UNSPEC)
5003 switch (XINT (inner, 1))
5004 {
5005 case UNSPEC_TPOFF:
5006 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5007 default:
5008 return false;
5009 }
5010 /* FALLTHRU */
5011
5012 case SYMBOL_REF:
5013 case LABEL_REF:
5014 return legitimate_pic_address_disp_p (x);
5015
5016 default:
5017 return true;
5018 }
5019 }
5020
5021 /* Determine if a given CONST RTX is a valid memory displacement
5022 in PIC mode. */
5023
5024 int
5025 legitimate_pic_address_disp_p (rtx disp)
5026 {
5027 bool saw_plus;
5028
5029 /* In 64bit mode we can allow direct addresses of symbols and labels
5030 when they are not dynamic symbols. */
5031 if (TARGET_64BIT)
5032 {
5033 /* TLS references should always be enclosed in UNSPEC. */
5034 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5035 return 0;
5036 if (GET_CODE (disp) == SYMBOL_REF
5037 && ix86_cmodel == CM_SMALL_PIC
5038 && SYMBOL_REF_LOCAL_P (disp))
5039 return 1;
5040 if (GET_CODE (disp) == LABEL_REF)
5041 return 1;
5042 if (GET_CODE (disp) == CONST
5043 && GET_CODE (XEXP (disp, 0)) == PLUS)
5044 {
5045 rtx op0 = XEXP (XEXP (disp, 0), 0);
5046 rtx op1 = XEXP (XEXP (disp, 0), 1);
5047
5048 /* TLS references should always be enclosed in UNSPEC. */
5049 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5050 return 0;
5051 if (((GET_CODE (op0) == SYMBOL_REF
5052 && ix86_cmodel == CM_SMALL_PIC
5053 && SYMBOL_REF_LOCAL_P (op0))
5054 || GET_CODE (op0) == LABEL_REF)
5055 && GET_CODE (op1) == CONST_INT
5056 && INTVAL (op1) < 16*1024*1024
5057 && INTVAL (op1) >= -16*1024*1024)
5058 return 1;
5059 }
5060 }
5061 if (GET_CODE (disp) != CONST)
5062 return 0;
5063 disp = XEXP (disp, 0);
5064
5065 if (TARGET_64BIT)
5066 {
5067 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5068 of GOT tables. We should not need these anyway. */
5069 if (GET_CODE (disp) != UNSPEC
5070 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5071 return 0;
5072
5073 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5074 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5075 return 0;
5076 return 1;
5077 }
5078
5079 saw_plus = false;
5080 if (GET_CODE (disp) == PLUS)
5081 {
5082 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5083 return 0;
5084 disp = XEXP (disp, 0);
5085 saw_plus = true;
5086 }
5087
5088 if (TARGET_MACHO && darwin_local_data_pic (disp))
5089 return 1;
5090
5091 if (GET_CODE (disp) != UNSPEC)
5092 return 0;
5093
5094 switch (XINT (disp, 1))
5095 {
5096 case UNSPEC_GOT:
5097 if (saw_plus)
5098 return false;
5099 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5100 case UNSPEC_GOTOFF:
5101 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5102 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5103 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5104 return false;
5105 case UNSPEC_GOTTPOFF:
5106 case UNSPEC_GOTNTPOFF:
5107 case UNSPEC_INDNTPOFF:
5108 if (saw_plus)
5109 return false;
5110 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5111 case UNSPEC_NTPOFF:
5112 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5113 case UNSPEC_DTPOFF:
5114 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5115 }
5116
5117 return 0;
5118 }
5119
5120 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5121 memory address for an instruction. The MODE argument is the machine mode
5122 for the MEM expression that wants to use this address.
5123
5124 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5125 convert common non-canonical forms to canonical form so that they will
5126 be recognized. */
5127
5128 int
5129 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5130 {
5131 struct ix86_address parts;
5132 rtx base, index, disp;
5133 HOST_WIDE_INT scale;
5134 const char *reason = NULL;
5135 rtx reason_rtx = NULL_RTX;
5136
5137 if (TARGET_DEBUG_ADDR)
5138 {
5139 fprintf (stderr,
5140 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5141 GET_MODE_NAME (mode), strict);
5142 debug_rtx (addr);
5143 }
5144
5145 if (ix86_decompose_address (addr, &parts) <= 0)
5146 {
5147 reason = "decomposition failed";
5148 goto report_error;
5149 }
5150
5151 base = parts.base;
5152 index = parts.index;
5153 disp = parts.disp;
5154 scale = parts.scale;
5155
5156 /* Validate base register.
5157
5158 Don't allow SUBREG's here, it can lead to spill failures when the base
5159 is one word out of a two word structure, which is represented internally
5160 as a DImode int. */
5161
5162 if (base)
5163 {
5164 reason_rtx = base;
5165
5166 if (GET_CODE (base) != REG)
5167 {
5168 reason = "base is not a register";
5169 goto report_error;
5170 }
5171
5172 if (GET_MODE (base) != Pmode)
5173 {
5174 reason = "base is not in Pmode";
5175 goto report_error;
5176 }
5177
5178 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5179 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5180 {
5181 reason = "base is not valid";
5182 goto report_error;
5183 }
5184 }
5185
5186 /* Validate index register.
5187
5188 Don't allow SUBREG's here, it can lead to spill failures when the index
5189 is one word out of a two word structure, which is represented internally
5190 as a DImode int. */
5191
5192 if (index)
5193 {
5194 reason_rtx = index;
5195
5196 if (GET_CODE (index) != REG)
5197 {
5198 reason = "index is not a register";
5199 goto report_error;
5200 }
5201
5202 if (GET_MODE (index) != Pmode)
5203 {
5204 reason = "index is not in Pmode";
5205 goto report_error;
5206 }
5207
5208 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5209 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5210 {
5211 reason = "index is not valid";
5212 goto report_error;
5213 }
5214 }
5215
5216 /* Validate scale factor. */
5217 if (scale != 1)
5218 {
5219 reason_rtx = GEN_INT (scale);
5220 if (!index)
5221 {
5222 reason = "scale without index";
5223 goto report_error;
5224 }
5225
5226 if (scale != 2 && scale != 4 && scale != 8)
5227 {
5228 reason = "scale is not a valid multiplier";
5229 goto report_error;
5230 }
5231 }
5232
5233 /* Validate displacement. */
5234 if (disp)
5235 {
5236 reason_rtx = disp;
5237
5238 if (GET_CODE (disp) == CONST
5239 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5240 switch (XINT (XEXP (disp, 0), 1))
5241 {
5242 case UNSPEC_GOT:
5243 case UNSPEC_GOTOFF:
5244 case UNSPEC_GOTPCREL:
5245 if (!flag_pic)
5246 abort ();
5247 goto is_legitimate_pic;
5248
5249 case UNSPEC_GOTTPOFF:
5250 case UNSPEC_GOTNTPOFF:
5251 case UNSPEC_INDNTPOFF:
5252 case UNSPEC_NTPOFF:
5253 case UNSPEC_DTPOFF:
5254 break;
5255
5256 default:
5257 reason = "invalid address unspec";
5258 goto report_error;
5259 }
5260
5261 else if (flag_pic && (SYMBOLIC_CONST (disp)
5262 #if TARGET_MACHO
5263 && !machopic_operand_p (disp)
5264 #endif
5265 ))
5266 {
5267 is_legitimate_pic:
5268 if (TARGET_64BIT && (index || base))
5269 {
5270 /* foo@dtpoff(%rX) is ok. */
5271 if (GET_CODE (disp) != CONST
5272 || GET_CODE (XEXP (disp, 0)) != PLUS
5273 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5274 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5275 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5276 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5277 {
5278 reason = "non-constant pic memory reference";
5279 goto report_error;
5280 }
5281 }
5282 else if (! legitimate_pic_address_disp_p (disp))
5283 {
5284 reason = "displacement is an invalid pic construct";
5285 goto report_error;
5286 }
5287
5288 /* This code used to verify that a symbolic pic displacement
5289 includes the pic_offset_table_rtx register.
5290
5291 While this is good idea, unfortunately these constructs may
5292 be created by "adds using lea" optimization for incorrect
5293 code like:
5294
5295 int a;
5296 int foo(int i)
5297 {
5298 return *(&a+i);
5299 }
5300
5301 This code is nonsensical, but results in addressing
5302 GOT table with pic_offset_table_rtx base. We can't
5303 just refuse it easily, since it gets matched by
5304 "addsi3" pattern, that later gets split to lea in the
5305 case output register differs from input. While this
5306 can be handled by separate addsi pattern for this case
5307 that never results in lea, this seems to be easier and
5308 correct fix for crash to disable this test. */
5309 }
5310 else if (GET_CODE (disp) != LABEL_REF
5311 && GET_CODE (disp) != CONST_INT
5312 && (GET_CODE (disp) != CONST
5313 || !legitimate_constant_p (disp))
5314 && (GET_CODE (disp) != SYMBOL_REF
5315 || !legitimate_constant_p (disp)))
5316 {
5317 reason = "displacement is not constant";
5318 goto report_error;
5319 }
5320 else if (TARGET_64BIT
5321 && !x86_64_immediate_operand (disp, VOIDmode))
5322 {
5323 reason = "displacement is out of range";
5324 goto report_error;
5325 }
5326 }
5327
5328 /* Everything looks valid. */
5329 if (TARGET_DEBUG_ADDR)
5330 fprintf (stderr, "Success.\n");
5331 return TRUE;
5332
5333 report_error:
5334 if (TARGET_DEBUG_ADDR)
5335 {
5336 fprintf (stderr, "Error: %s\n", reason);
5337 debug_rtx (reason_rtx);
5338 }
5339 return FALSE;
5340 }
5341 \f
5342 /* Return an unique alias set for the GOT. */
5343
5344 static HOST_WIDE_INT
5345 ix86_GOT_alias_set (void)
5346 {
5347 static HOST_WIDE_INT set = -1;
5348 if (set == -1)
5349 set = new_alias_set ();
5350 return set;
5351 }
5352
5353 /* Return a legitimate reference for ORIG (an address) using the
5354 register REG. If REG is 0, a new pseudo is generated.
5355
5356 There are two types of references that must be handled:
5357
5358 1. Global data references must load the address from the GOT, via
5359 the PIC reg. An insn is emitted to do this load, and the reg is
5360 returned.
5361
5362 2. Static data references, constant pool addresses, and code labels
5363 compute the address as an offset from the GOT, whose base is in
5364 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5365 differentiate them from global data objects. The returned
5366 address is the PIC reg + an unspec constant.
5367
5368 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5369 reg also appears in the address. */
5370
5371 static rtx
5372 legitimize_pic_address (rtx orig, rtx reg)
5373 {
5374 rtx addr = orig;
5375 rtx new = orig;
5376 rtx base;
5377
5378 #if TARGET_MACHO
5379 if (reg == 0)
5380 reg = gen_reg_rtx (Pmode);
5381 /* Use the generic Mach-O PIC machinery. */
5382 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5383 #endif
5384
5385 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5386 new = addr;
5387 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5388 {
5389 /* This symbol may be referenced via a displacement from the PIC
5390 base address (@GOTOFF). */
5391
5392 if (reload_in_progress)
5393 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5394 if (GET_CODE (addr) == CONST)
5395 addr = XEXP (addr, 0);
5396 if (GET_CODE (addr) == PLUS)
5397 {
5398 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5399 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5400 }
5401 else
5402 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5403 new = gen_rtx_CONST (Pmode, new);
5404 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5405
5406 if (reg != 0)
5407 {
5408 emit_move_insn (reg, new);
5409 new = reg;
5410 }
5411 }
5412 else if (GET_CODE (addr) == SYMBOL_REF)
5413 {
5414 if (TARGET_64BIT)
5415 {
5416 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5417 new = gen_rtx_CONST (Pmode, new);
5418 new = gen_const_mem (Pmode, new);
5419 set_mem_alias_set (new, ix86_GOT_alias_set ());
5420
5421 if (reg == 0)
5422 reg = gen_reg_rtx (Pmode);
5423 /* Use directly gen_movsi, otherwise the address is loaded
5424 into register for CSE. We don't want to CSE this addresses,
5425 instead we CSE addresses from the GOT table, so skip this. */
5426 emit_insn (gen_movsi (reg, new));
5427 new = reg;
5428 }
5429 else
5430 {
5431 /* This symbol must be referenced via a load from the
5432 Global Offset Table (@GOT). */
5433
5434 if (reload_in_progress)
5435 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5436 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5437 new = gen_rtx_CONST (Pmode, new);
5438 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5439 new = gen_const_mem (Pmode, new);
5440 set_mem_alias_set (new, ix86_GOT_alias_set ());
5441
5442 if (reg == 0)
5443 reg = gen_reg_rtx (Pmode);
5444 emit_move_insn (reg, new);
5445 new = reg;
5446 }
5447 }
5448 else
5449 {
5450 if (GET_CODE (addr) == CONST)
5451 {
5452 addr = XEXP (addr, 0);
5453
5454 /* We must match stuff we generate before. Assume the only
5455 unspecs that can get here are ours. Not that we could do
5456 anything with them anyway.... */
5457 if (GET_CODE (addr) == UNSPEC
5458 || (GET_CODE (addr) == PLUS
5459 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5460 return orig;
5461 if (GET_CODE (addr) != PLUS)
5462 abort ();
5463 }
5464 if (GET_CODE (addr) == PLUS)
5465 {
5466 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5467
5468 /* Check first to see if this is a constant offset from a @GOTOFF
5469 symbol reference. */
5470 if (local_symbolic_operand (op0, Pmode)
5471 && GET_CODE (op1) == CONST_INT)
5472 {
5473 if (!TARGET_64BIT)
5474 {
5475 if (reload_in_progress)
5476 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5477 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5478 UNSPEC_GOTOFF);
5479 new = gen_rtx_PLUS (Pmode, new, op1);
5480 new = gen_rtx_CONST (Pmode, new);
5481 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5482
5483 if (reg != 0)
5484 {
5485 emit_move_insn (reg, new);
5486 new = reg;
5487 }
5488 }
5489 else
5490 {
5491 if (INTVAL (op1) < -16*1024*1024
5492 || INTVAL (op1) >= 16*1024*1024)
5493 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5494 }
5495 }
5496 else
5497 {
5498 base = legitimize_pic_address (XEXP (addr, 0), reg);
5499 new = legitimize_pic_address (XEXP (addr, 1),
5500 base == reg ? NULL_RTX : reg);
5501
5502 if (GET_CODE (new) == CONST_INT)
5503 new = plus_constant (base, INTVAL (new));
5504 else
5505 {
5506 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5507 {
5508 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5509 new = XEXP (new, 1);
5510 }
5511 new = gen_rtx_PLUS (Pmode, base, new);
5512 }
5513 }
5514 }
5515 }
5516 return new;
5517 }
5518 \f
5519 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5520
5521 static rtx
5522 get_thread_pointer (int to_reg)
5523 {
5524 rtx tp, reg, insn;
5525
5526 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5527 if (!to_reg)
5528 return tp;
5529
5530 reg = gen_reg_rtx (Pmode);
5531 insn = gen_rtx_SET (VOIDmode, reg, tp);
5532 insn = emit_insn (insn);
5533
5534 return reg;
5535 }
5536
5537 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5538 false if we expect this to be used for a memory address and true if
5539 we expect to load the address into a register. */
5540
5541 static rtx
5542 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5543 {
5544 rtx dest, base, off, pic;
5545 int type;
5546
5547 switch (model)
5548 {
5549 case TLS_MODEL_GLOBAL_DYNAMIC:
5550 dest = gen_reg_rtx (Pmode);
5551 if (TARGET_64BIT)
5552 {
5553 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5554
5555 start_sequence ();
5556 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5557 insns = get_insns ();
5558 end_sequence ();
5559
5560 emit_libcall_block (insns, dest, rax, x);
5561 }
5562 else
5563 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5564 break;
5565
5566 case TLS_MODEL_LOCAL_DYNAMIC:
5567 base = gen_reg_rtx (Pmode);
5568 if (TARGET_64BIT)
5569 {
5570 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5571
5572 start_sequence ();
5573 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5574 insns = get_insns ();
5575 end_sequence ();
5576
5577 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5578 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5579 emit_libcall_block (insns, base, rax, note);
5580 }
5581 else
5582 emit_insn (gen_tls_local_dynamic_base_32 (base));
5583
5584 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5585 off = gen_rtx_CONST (Pmode, off);
5586
5587 return gen_rtx_PLUS (Pmode, base, off);
5588
5589 case TLS_MODEL_INITIAL_EXEC:
5590 if (TARGET_64BIT)
5591 {
5592 pic = NULL;
5593 type = UNSPEC_GOTNTPOFF;
5594 }
5595 else if (flag_pic)
5596 {
5597 if (reload_in_progress)
5598 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5599 pic = pic_offset_table_rtx;
5600 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5601 }
5602 else if (!TARGET_GNU_TLS)
5603 {
5604 pic = gen_reg_rtx (Pmode);
5605 emit_insn (gen_set_got (pic));
5606 type = UNSPEC_GOTTPOFF;
5607 }
5608 else
5609 {
5610 pic = NULL;
5611 type = UNSPEC_INDNTPOFF;
5612 }
5613
5614 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5615 off = gen_rtx_CONST (Pmode, off);
5616 if (pic)
5617 off = gen_rtx_PLUS (Pmode, pic, off);
5618 off = gen_const_mem (Pmode, off);
5619 set_mem_alias_set (off, ix86_GOT_alias_set ());
5620
5621 if (TARGET_64BIT || TARGET_GNU_TLS)
5622 {
5623 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5624 off = force_reg (Pmode, off);
5625 return gen_rtx_PLUS (Pmode, base, off);
5626 }
5627 else
5628 {
5629 base = get_thread_pointer (true);
5630 dest = gen_reg_rtx (Pmode);
5631 emit_insn (gen_subsi3 (dest, base, off));
5632 }
5633 break;
5634
5635 case TLS_MODEL_LOCAL_EXEC:
5636 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5637 (TARGET_64BIT || TARGET_GNU_TLS)
5638 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5639 off = gen_rtx_CONST (Pmode, off);
5640
5641 if (TARGET_64BIT || TARGET_GNU_TLS)
5642 {
5643 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5644 return gen_rtx_PLUS (Pmode, base, off);
5645 }
5646 else
5647 {
5648 base = get_thread_pointer (true);
5649 dest = gen_reg_rtx (Pmode);
5650 emit_insn (gen_subsi3 (dest, base, off));
5651 }
5652 break;
5653
5654 default:
5655 abort ();
5656 }
5657
5658 return dest;
5659 }
5660
5661 /* Try machine-dependent ways of modifying an illegitimate address
5662 to be legitimate. If we find one, return the new, valid address.
5663 This macro is used in only one place: `memory_address' in explow.c.
5664
5665 OLDX is the address as it was before break_out_memory_refs was called.
5666 In some cases it is useful to look at this to decide what needs to be done.
5667
5668 MODE and WIN are passed so that this macro can use
5669 GO_IF_LEGITIMATE_ADDRESS.
5670
5671 It is always safe for this macro to do nothing. It exists to recognize
5672 opportunities to optimize the output.
5673
5674 For the 80386, we handle X+REG by loading X into a register R and
5675 using R+REG. R will go in a general reg and indexing will be used.
5676 However, if REG is a broken-out memory address or multiplication,
5677 nothing needs to be done because REG can certainly go in a general reg.
5678
5679 When -fpic is used, special handling is needed for symbolic references.
5680 See comments by legitimize_pic_address in i386.c for details. */
5681
5682 rtx
5683 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5684 {
5685 int changed = 0;
5686 unsigned log;
5687
5688 if (TARGET_DEBUG_ADDR)
5689 {
5690 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5691 GET_MODE_NAME (mode));
5692 debug_rtx (x);
5693 }
5694
5695 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5696 if (log)
5697 return legitimize_tls_address (x, log, false);
5698 if (GET_CODE (x) == CONST
5699 && GET_CODE (XEXP (x, 0)) == PLUS
5700 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5701 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5702 {
5703 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5704 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5705 }
5706
5707 if (flag_pic && SYMBOLIC_CONST (x))
5708 return legitimize_pic_address (x, 0);
5709
5710 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5711 if (GET_CODE (x) == ASHIFT
5712 && GET_CODE (XEXP (x, 1)) == CONST_INT
5713 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5714 {
5715 changed = 1;
5716 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5717 GEN_INT (1 << log));
5718 }
5719
5720 if (GET_CODE (x) == PLUS)
5721 {
5722 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5723
5724 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5725 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5726 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5727 {
5728 changed = 1;
5729 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5730 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5731 GEN_INT (1 << log));
5732 }
5733
5734 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5735 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5736 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5737 {
5738 changed = 1;
5739 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5740 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5741 GEN_INT (1 << log));
5742 }
5743
5744 /* Put multiply first if it isn't already. */
5745 if (GET_CODE (XEXP (x, 1)) == MULT)
5746 {
5747 rtx tmp = XEXP (x, 0);
5748 XEXP (x, 0) = XEXP (x, 1);
5749 XEXP (x, 1) = tmp;
5750 changed = 1;
5751 }
5752
5753 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5754 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5755 created by virtual register instantiation, register elimination, and
5756 similar optimizations. */
5757 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5758 {
5759 changed = 1;
5760 x = gen_rtx_PLUS (Pmode,
5761 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5762 XEXP (XEXP (x, 1), 0)),
5763 XEXP (XEXP (x, 1), 1));
5764 }
5765
5766 /* Canonicalize
5767 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5768 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5769 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5770 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5771 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5772 && CONSTANT_P (XEXP (x, 1)))
5773 {
5774 rtx constant;
5775 rtx other = NULL_RTX;
5776
5777 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5778 {
5779 constant = XEXP (x, 1);
5780 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5781 }
5782 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5783 {
5784 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5785 other = XEXP (x, 1);
5786 }
5787 else
5788 constant = 0;
5789
5790 if (constant)
5791 {
5792 changed = 1;
5793 x = gen_rtx_PLUS (Pmode,
5794 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5795 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5796 plus_constant (other, INTVAL (constant)));
5797 }
5798 }
5799
5800 if (changed && legitimate_address_p (mode, x, FALSE))
5801 return x;
5802
5803 if (GET_CODE (XEXP (x, 0)) == MULT)
5804 {
5805 changed = 1;
5806 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5807 }
5808
5809 if (GET_CODE (XEXP (x, 1)) == MULT)
5810 {
5811 changed = 1;
5812 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5813 }
5814
5815 if (changed
5816 && GET_CODE (XEXP (x, 1)) == REG
5817 && GET_CODE (XEXP (x, 0)) == REG)
5818 return x;
5819
5820 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5821 {
5822 changed = 1;
5823 x = legitimize_pic_address (x, 0);
5824 }
5825
5826 if (changed && legitimate_address_p (mode, x, FALSE))
5827 return x;
5828
5829 if (GET_CODE (XEXP (x, 0)) == REG)
5830 {
5831 rtx temp = gen_reg_rtx (Pmode);
5832 rtx val = force_operand (XEXP (x, 1), temp);
5833 if (val != temp)
5834 emit_move_insn (temp, val);
5835
5836 XEXP (x, 1) = temp;
5837 return x;
5838 }
5839
5840 else if (GET_CODE (XEXP (x, 1)) == REG)
5841 {
5842 rtx temp = gen_reg_rtx (Pmode);
5843 rtx val = force_operand (XEXP (x, 0), temp);
5844 if (val != temp)
5845 emit_move_insn (temp, val);
5846
5847 XEXP (x, 0) = temp;
5848 return x;
5849 }
5850 }
5851
5852 return x;
5853 }
5854 \f
5855 /* Print an integer constant expression in assembler syntax. Addition
5856 and subtraction are the only arithmetic that may appear in these
5857 expressions. FILE is the stdio stream to write to, X is the rtx, and
5858 CODE is the operand print code from the output string. */
5859
5860 static void
5861 output_pic_addr_const (FILE *file, rtx x, int code)
5862 {
5863 char buf[256];
5864
5865 switch (GET_CODE (x))
5866 {
5867 case PC:
5868 if (flag_pic)
5869 putc ('.', file);
5870 else
5871 abort ();
5872 break;
5873
5874 case SYMBOL_REF:
5875 /* Mark the decl as referenced so that cgraph will output the function. */
5876 if (SYMBOL_REF_DECL (x))
5877 mark_decl_referenced (SYMBOL_REF_DECL (x));
5878
5879 assemble_name (file, XSTR (x, 0));
5880 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5881 fputs ("@PLT", file);
5882 break;
5883
5884 case LABEL_REF:
5885 x = XEXP (x, 0);
5886 /* FALLTHRU */
5887 case CODE_LABEL:
5888 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5889 assemble_name (asm_out_file, buf);
5890 break;
5891
5892 case CONST_INT:
5893 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5894 break;
5895
5896 case CONST:
5897 /* This used to output parentheses around the expression,
5898 but that does not work on the 386 (either ATT or BSD assembler). */
5899 output_pic_addr_const (file, XEXP (x, 0), code);
5900 break;
5901
5902 case CONST_DOUBLE:
5903 if (GET_MODE (x) == VOIDmode)
5904 {
5905 /* We can use %d if the number is <32 bits and positive. */
5906 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5907 fprintf (file, "0x%lx%08lx",
5908 (unsigned long) CONST_DOUBLE_HIGH (x),
5909 (unsigned long) CONST_DOUBLE_LOW (x));
5910 else
5911 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5912 }
5913 else
5914 /* We can't handle floating point constants;
5915 PRINT_OPERAND must handle them. */
5916 output_operand_lossage ("floating constant misused");
5917 break;
5918
5919 case PLUS:
5920 /* Some assemblers need integer constants to appear first. */
5921 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5922 {
5923 output_pic_addr_const (file, XEXP (x, 0), code);
5924 putc ('+', file);
5925 output_pic_addr_const (file, XEXP (x, 1), code);
5926 }
5927 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5928 {
5929 output_pic_addr_const (file, XEXP (x, 1), code);
5930 putc ('+', file);
5931 output_pic_addr_const (file, XEXP (x, 0), code);
5932 }
5933 else
5934 abort ();
5935 break;
5936
5937 case MINUS:
5938 if (!TARGET_MACHO)
5939 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5940 output_pic_addr_const (file, XEXP (x, 0), code);
5941 putc ('-', file);
5942 output_pic_addr_const (file, XEXP (x, 1), code);
5943 if (!TARGET_MACHO)
5944 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5945 break;
5946
5947 case UNSPEC:
5948 if (XVECLEN (x, 0) != 1)
5949 abort ();
5950 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5951 switch (XINT (x, 1))
5952 {
5953 case UNSPEC_GOT:
5954 fputs ("@GOT", file);
5955 break;
5956 case UNSPEC_GOTOFF:
5957 fputs ("@GOTOFF", file);
5958 break;
5959 case UNSPEC_GOTPCREL:
5960 fputs ("@GOTPCREL(%rip)", file);
5961 break;
5962 case UNSPEC_GOTTPOFF:
5963 /* FIXME: This might be @TPOFF in Sun ld too. */
5964 fputs ("@GOTTPOFF", file);
5965 break;
5966 case UNSPEC_TPOFF:
5967 fputs ("@TPOFF", file);
5968 break;
5969 case UNSPEC_NTPOFF:
5970 if (TARGET_64BIT)
5971 fputs ("@TPOFF", file);
5972 else
5973 fputs ("@NTPOFF", file);
5974 break;
5975 case UNSPEC_DTPOFF:
5976 fputs ("@DTPOFF", file);
5977 break;
5978 case UNSPEC_GOTNTPOFF:
5979 if (TARGET_64BIT)
5980 fputs ("@GOTTPOFF(%rip)", file);
5981 else
5982 fputs ("@GOTNTPOFF", file);
5983 break;
5984 case UNSPEC_INDNTPOFF:
5985 fputs ("@INDNTPOFF", file);
5986 break;
5987 default:
5988 output_operand_lossage ("invalid UNSPEC as operand");
5989 break;
5990 }
5991 break;
5992
5993 default:
5994 output_operand_lossage ("invalid expression as operand");
5995 }
5996 }
5997
5998 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5999 We need to emit DTP-relative relocations. */
6000
6001 void
6002 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6003 {
6004 fputs (ASM_LONG, file);
6005 output_addr_const (file, x);
6006 fputs ("@DTPOFF", file);
6007 switch (size)
6008 {
6009 case 4:
6010 break;
6011 case 8:
6012 fputs (", 0", file);
6013 break;
6014 default:
6015 abort ();
6016 }
6017 }
6018
6019 /* In the name of slightly smaller debug output, and to cater to
6020 general assembler losage, recognize PIC+GOTOFF and turn it back
6021 into a direct symbol reference. */
6022
6023 static rtx
6024 ix86_delegitimize_address (rtx orig_x)
6025 {
6026 rtx x = orig_x, y;
6027
6028 if (GET_CODE (x) == MEM)
6029 x = XEXP (x, 0);
6030
6031 if (TARGET_64BIT)
6032 {
6033 if (GET_CODE (x) != CONST
6034 || GET_CODE (XEXP (x, 0)) != UNSPEC
6035 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6036 || GET_CODE (orig_x) != MEM)
6037 return orig_x;
6038 return XVECEXP (XEXP (x, 0), 0, 0);
6039 }
6040
6041 if (GET_CODE (x) != PLUS
6042 || GET_CODE (XEXP (x, 1)) != CONST)
6043 return orig_x;
6044
6045 if (GET_CODE (XEXP (x, 0)) == REG
6046 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6047 /* %ebx + GOT/GOTOFF */
6048 y = NULL;
6049 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6050 {
6051 /* %ebx + %reg * scale + GOT/GOTOFF */
6052 y = XEXP (x, 0);
6053 if (GET_CODE (XEXP (y, 0)) == REG
6054 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6055 y = XEXP (y, 1);
6056 else if (GET_CODE (XEXP (y, 1)) == REG
6057 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6058 y = XEXP (y, 0);
6059 else
6060 return orig_x;
6061 if (GET_CODE (y) != REG
6062 && GET_CODE (y) != MULT
6063 && GET_CODE (y) != ASHIFT)
6064 return orig_x;
6065 }
6066 else
6067 return orig_x;
6068
6069 x = XEXP (XEXP (x, 1), 0);
6070 if (GET_CODE (x) == UNSPEC
6071 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6072 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6073 {
6074 if (y)
6075 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6076 return XVECEXP (x, 0, 0);
6077 }
6078
6079 if (GET_CODE (x) == PLUS
6080 && GET_CODE (XEXP (x, 0)) == UNSPEC
6081 && GET_CODE (XEXP (x, 1)) == CONST_INT
6082 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6083 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6084 && GET_CODE (orig_x) != MEM)))
6085 {
6086 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6087 if (y)
6088 return gen_rtx_PLUS (Pmode, y, x);
6089 return x;
6090 }
6091
6092 return orig_x;
6093 }
6094 \f
6095 static void
6096 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6097 int fp, FILE *file)
6098 {
6099 const char *suffix;
6100
6101 if (mode == CCFPmode || mode == CCFPUmode)
6102 {
6103 enum rtx_code second_code, bypass_code;
6104 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6105 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6106 abort ();
6107 code = ix86_fp_compare_code_to_integer (code);
6108 mode = CCmode;
6109 }
6110 if (reverse)
6111 code = reverse_condition (code);
6112
6113 switch (code)
6114 {
6115 case EQ:
6116 suffix = "e";
6117 break;
6118 case NE:
6119 suffix = "ne";
6120 break;
6121 case GT:
6122 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6123 abort ();
6124 suffix = "g";
6125 break;
6126 case GTU:
6127 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6128 Those same assemblers have the same but opposite losage on cmov. */
6129 if (mode != CCmode)
6130 abort ();
6131 suffix = fp ? "nbe" : "a";
6132 break;
6133 case LT:
6134 if (mode == CCNOmode || mode == CCGOCmode)
6135 suffix = "s";
6136 else if (mode == CCmode || mode == CCGCmode)
6137 suffix = "l";
6138 else
6139 abort ();
6140 break;
6141 case LTU:
6142 if (mode != CCmode)
6143 abort ();
6144 suffix = "b";
6145 break;
6146 case GE:
6147 if (mode == CCNOmode || mode == CCGOCmode)
6148 suffix = "ns";
6149 else if (mode == CCmode || mode == CCGCmode)
6150 suffix = "ge";
6151 else
6152 abort ();
6153 break;
6154 case GEU:
6155 /* ??? As above. */
6156 if (mode != CCmode)
6157 abort ();
6158 suffix = fp ? "nb" : "ae";
6159 break;
6160 case LE:
6161 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6162 abort ();
6163 suffix = "le";
6164 break;
6165 case LEU:
6166 if (mode != CCmode)
6167 abort ();
6168 suffix = "be";
6169 break;
6170 case UNORDERED:
6171 suffix = fp ? "u" : "p";
6172 break;
6173 case ORDERED:
6174 suffix = fp ? "nu" : "np";
6175 break;
6176 default:
6177 abort ();
6178 }
6179 fputs (suffix, file);
6180 }
6181
6182 /* Print the name of register X to FILE based on its machine mode and number.
6183 If CODE is 'w', pretend the mode is HImode.
6184 If CODE is 'b', pretend the mode is QImode.
6185 If CODE is 'k', pretend the mode is SImode.
6186 If CODE is 'q', pretend the mode is DImode.
6187 If CODE is 'h', pretend the reg is the `high' byte register.
6188 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6189
6190 void
6191 print_reg (rtx x, int code, FILE *file)
6192 {
6193 if (REGNO (x) == ARG_POINTER_REGNUM
6194 || REGNO (x) == FRAME_POINTER_REGNUM
6195 || REGNO (x) == FLAGS_REG
6196 || REGNO (x) == FPSR_REG)
6197 abort ();
6198
6199 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6200 putc ('%', file);
6201
6202 if (code == 'w' || MMX_REG_P (x))
6203 code = 2;
6204 else if (code == 'b')
6205 code = 1;
6206 else if (code == 'k')
6207 code = 4;
6208 else if (code == 'q')
6209 code = 8;
6210 else if (code == 'y')
6211 code = 3;
6212 else if (code == 'h')
6213 code = 0;
6214 else
6215 code = GET_MODE_SIZE (GET_MODE (x));
6216
6217 /* Irritatingly, AMD extended registers use different naming convention
6218 from the normal registers. */
6219 if (REX_INT_REG_P (x))
6220 {
6221 if (!TARGET_64BIT)
6222 abort ();
6223 switch (code)
6224 {
6225 case 0:
6226 error ("extended registers have no high halves");
6227 break;
6228 case 1:
6229 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6230 break;
6231 case 2:
6232 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6233 break;
6234 case 4:
6235 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6236 break;
6237 case 8:
6238 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6239 break;
6240 default:
6241 error ("unsupported operand size for extended register");
6242 break;
6243 }
6244 return;
6245 }
6246 switch (code)
6247 {
6248 case 3:
6249 if (STACK_TOP_P (x))
6250 {
6251 fputs ("st(0)", file);
6252 break;
6253 }
6254 /* FALLTHRU */
6255 case 8:
6256 case 4:
6257 case 12:
6258 if (! ANY_FP_REG_P (x))
6259 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6260 /* FALLTHRU */
6261 case 16:
6262 case 2:
6263 normal:
6264 fputs (hi_reg_name[REGNO (x)], file);
6265 break;
6266 case 1:
6267 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6268 goto normal;
6269 fputs (qi_reg_name[REGNO (x)], file);
6270 break;
6271 case 0:
6272 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6273 goto normal;
6274 fputs (qi_high_reg_name[REGNO (x)], file);
6275 break;
6276 default:
6277 abort ();
6278 }
6279 }
6280
6281 /* Locate some local-dynamic symbol still in use by this function
6282 so that we can print its name in some tls_local_dynamic_base
6283 pattern. */
6284
6285 static const char *
6286 get_some_local_dynamic_name (void)
6287 {
6288 rtx insn;
6289
6290 if (cfun->machine->some_ld_name)
6291 return cfun->machine->some_ld_name;
6292
6293 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6294 if (INSN_P (insn)
6295 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6296 return cfun->machine->some_ld_name;
6297
6298 abort ();
6299 }
6300
6301 static int
6302 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6303 {
6304 rtx x = *px;
6305
6306 if (GET_CODE (x) == SYMBOL_REF
6307 && local_dynamic_symbolic_operand (x, Pmode))
6308 {
6309 cfun->machine->some_ld_name = XSTR (x, 0);
6310 return 1;
6311 }
6312
6313 return 0;
6314 }
6315
6316 /* Meaning of CODE:
6317 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6318 C -- print opcode suffix for set/cmov insn.
6319 c -- like C, but print reversed condition
6320 F,f -- likewise, but for floating-point.
6321 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6322 otherwise nothing
6323 R -- print the prefix for register names.
6324 z -- print the opcode suffix for the size of the current operand.
6325 * -- print a star (in certain assembler syntax)
6326 A -- print an absolute memory reference.
6327 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6328 s -- print a shift double count, followed by the assemblers argument
6329 delimiter.
6330 b -- print the QImode name of the register for the indicated operand.
6331 %b0 would print %al if operands[0] is reg 0.
6332 w -- likewise, print the HImode name of the register.
6333 k -- likewise, print the SImode name of the register.
6334 q -- likewise, print the DImode name of the register.
6335 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6336 y -- print "st(0)" instead of "st" as a register.
6337 D -- print condition for SSE cmp instruction.
6338 P -- if PIC, print an @PLT suffix.
6339 X -- don't print any sort of PIC '@' suffix for a symbol.
6340 & -- print some in-use local-dynamic symbol name.
6341 H -- print a memory address offset by 8; used for sse high-parts
6342 */
6343
6344 void
6345 print_operand (FILE *file, rtx x, int code)
6346 {
6347 if (code)
6348 {
6349 switch (code)
6350 {
6351 case '*':
6352 if (ASSEMBLER_DIALECT == ASM_ATT)
6353 putc ('*', file);
6354 return;
6355
6356 case '&':
6357 assemble_name (file, get_some_local_dynamic_name ());
6358 return;
6359
6360 case 'A':
6361 if (ASSEMBLER_DIALECT == ASM_ATT)
6362 putc ('*', file);
6363 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6364 {
6365 /* Intel syntax. For absolute addresses, registers should not
6366 be surrounded by braces. */
6367 if (GET_CODE (x) != REG)
6368 {
6369 putc ('[', file);
6370 PRINT_OPERAND (file, x, 0);
6371 putc (']', file);
6372 return;
6373 }
6374 }
6375 else
6376 abort ();
6377
6378 PRINT_OPERAND (file, x, 0);
6379 return;
6380
6381
6382 case 'L':
6383 if (ASSEMBLER_DIALECT == ASM_ATT)
6384 putc ('l', file);
6385 return;
6386
6387 case 'W':
6388 if (ASSEMBLER_DIALECT == ASM_ATT)
6389 putc ('w', file);
6390 return;
6391
6392 case 'B':
6393 if (ASSEMBLER_DIALECT == ASM_ATT)
6394 putc ('b', file);
6395 return;
6396
6397 case 'Q':
6398 if (ASSEMBLER_DIALECT == ASM_ATT)
6399 putc ('l', file);
6400 return;
6401
6402 case 'S':
6403 if (ASSEMBLER_DIALECT == ASM_ATT)
6404 putc ('s', file);
6405 return;
6406
6407 case 'T':
6408 if (ASSEMBLER_DIALECT == ASM_ATT)
6409 putc ('t', file);
6410 return;
6411
6412 case 'z':
6413 /* 387 opcodes don't get size suffixes if the operands are
6414 registers. */
6415 if (STACK_REG_P (x))
6416 return;
6417
6418 /* Likewise if using Intel opcodes. */
6419 if (ASSEMBLER_DIALECT == ASM_INTEL)
6420 return;
6421
6422 /* This is the size of op from size of operand. */
6423 switch (GET_MODE_SIZE (GET_MODE (x)))
6424 {
6425 case 2:
6426 #ifdef HAVE_GAS_FILDS_FISTS
6427 putc ('s', file);
6428 #endif
6429 return;
6430
6431 case 4:
6432 if (GET_MODE (x) == SFmode)
6433 {
6434 putc ('s', file);
6435 return;
6436 }
6437 else
6438 putc ('l', file);
6439 return;
6440
6441 case 12:
6442 case 16:
6443 putc ('t', file);
6444 return;
6445
6446 case 8:
6447 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6448 {
6449 #ifdef GAS_MNEMONICS
6450 putc ('q', file);
6451 #else
6452 putc ('l', file);
6453 putc ('l', file);
6454 #endif
6455 }
6456 else
6457 putc ('l', file);
6458 return;
6459
6460 default:
6461 abort ();
6462 }
6463
6464 case 'b':
6465 case 'w':
6466 case 'k':
6467 case 'q':
6468 case 'h':
6469 case 'y':
6470 case 'X':
6471 case 'P':
6472 break;
6473
6474 case 's':
6475 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6476 {
6477 PRINT_OPERAND (file, x, 0);
6478 putc (',', file);
6479 }
6480 return;
6481
6482 case 'D':
6483 /* Little bit of braindamage here. The SSE compare instructions
6484 does use completely different names for the comparisons that the
6485 fp conditional moves. */
6486 switch (GET_CODE (x))
6487 {
6488 case EQ:
6489 case UNEQ:
6490 fputs ("eq", file);
6491 break;
6492 case LT:
6493 case UNLT:
6494 fputs ("lt", file);
6495 break;
6496 case LE:
6497 case UNLE:
6498 fputs ("le", file);
6499 break;
6500 case UNORDERED:
6501 fputs ("unord", file);
6502 break;
6503 case NE:
6504 case LTGT:
6505 fputs ("neq", file);
6506 break;
6507 case UNGE:
6508 case GE:
6509 fputs ("nlt", file);
6510 break;
6511 case UNGT:
6512 case GT:
6513 fputs ("nle", file);
6514 break;
6515 case ORDERED:
6516 fputs ("ord", file);
6517 break;
6518 default:
6519 abort ();
6520 break;
6521 }
6522 return;
6523 case 'O':
6524 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6525 if (ASSEMBLER_DIALECT == ASM_ATT)
6526 {
6527 switch (GET_MODE (x))
6528 {
6529 case HImode: putc ('w', file); break;
6530 case SImode:
6531 case SFmode: putc ('l', file); break;
6532 case DImode:
6533 case DFmode: putc ('q', file); break;
6534 default: abort ();
6535 }
6536 putc ('.', file);
6537 }
6538 #endif
6539 return;
6540 case 'C':
6541 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6542 return;
6543 case 'F':
6544 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6545 if (ASSEMBLER_DIALECT == ASM_ATT)
6546 putc ('.', file);
6547 #endif
6548 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6549 return;
6550
6551 /* Like above, but reverse condition */
6552 case 'c':
6553 /* Check to see if argument to %c is really a constant
6554 and not a condition code which needs to be reversed. */
6555 if (!COMPARISON_P (x))
6556 {
6557 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6558 return;
6559 }
6560 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6561 return;
6562 case 'f':
6563 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6564 if (ASSEMBLER_DIALECT == ASM_ATT)
6565 putc ('.', file);
6566 #endif
6567 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6568 return;
6569
6570 case 'H':
6571 /* It doesn't actually matter what mode we use here, as we're
6572 only going to use this for printing. */
6573 x = adjust_address_nv (x, DImode, 8);
6574 break;
6575
6576 case '+':
6577 {
6578 rtx x;
6579
6580 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6581 return;
6582
6583 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6584 if (x)
6585 {
6586 int pred_val = INTVAL (XEXP (x, 0));
6587
6588 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6589 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6590 {
6591 int taken = pred_val > REG_BR_PROB_BASE / 2;
6592 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6593
6594 /* Emit hints only in the case default branch prediction
6595 heuristics would fail. */
6596 if (taken != cputaken)
6597 {
6598 /* We use 3e (DS) prefix for taken branches and
6599 2e (CS) prefix for not taken branches. */
6600 if (taken)
6601 fputs ("ds ; ", file);
6602 else
6603 fputs ("cs ; ", file);
6604 }
6605 }
6606 }
6607 return;
6608 }
6609 default:
6610 output_operand_lossage ("invalid operand code '%c'", code);
6611 }
6612 }
6613
6614 if (GET_CODE (x) == REG)
6615 print_reg (x, code, file);
6616
6617 else if (GET_CODE (x) == MEM)
6618 {
6619 /* No `byte ptr' prefix for call instructions. */
6620 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6621 {
6622 const char * size;
6623 switch (GET_MODE_SIZE (GET_MODE (x)))
6624 {
6625 case 1: size = "BYTE"; break;
6626 case 2: size = "WORD"; break;
6627 case 4: size = "DWORD"; break;
6628 case 8: size = "QWORD"; break;
6629 case 12: size = "XWORD"; break;
6630 case 16: size = "XMMWORD"; break;
6631 default:
6632 abort ();
6633 }
6634
6635 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6636 if (code == 'b')
6637 size = "BYTE";
6638 else if (code == 'w')
6639 size = "WORD";
6640 else if (code == 'k')
6641 size = "DWORD";
6642
6643 fputs (size, file);
6644 fputs (" PTR ", file);
6645 }
6646
6647 x = XEXP (x, 0);
6648 /* Avoid (%rip) for call operands. */
6649 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6650 && GET_CODE (x) != CONST_INT)
6651 output_addr_const (file, x);
6652 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6653 output_operand_lossage ("invalid constraints for operand");
6654 else
6655 output_address (x);
6656 }
6657
6658 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6659 {
6660 REAL_VALUE_TYPE r;
6661 long l;
6662
6663 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6664 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6665
6666 if (ASSEMBLER_DIALECT == ASM_ATT)
6667 putc ('$', file);
6668 fprintf (file, "0x%08lx", l);
6669 }
6670
6671 /* These float cases don't actually occur as immediate operands. */
6672 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6673 {
6674 char dstr[30];
6675
6676 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6677 fprintf (file, "%s", dstr);
6678 }
6679
6680 else if (GET_CODE (x) == CONST_DOUBLE
6681 && GET_MODE (x) == XFmode)
6682 {
6683 char dstr[30];
6684
6685 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6686 fprintf (file, "%s", dstr);
6687 }
6688
6689 else
6690 {
6691 if (code != 'P')
6692 {
6693 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6694 {
6695 if (ASSEMBLER_DIALECT == ASM_ATT)
6696 putc ('$', file);
6697 }
6698 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6699 || GET_CODE (x) == LABEL_REF)
6700 {
6701 if (ASSEMBLER_DIALECT == ASM_ATT)
6702 putc ('$', file);
6703 else
6704 fputs ("OFFSET FLAT:", file);
6705 }
6706 }
6707 if (GET_CODE (x) == CONST_INT)
6708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6709 else if (flag_pic)
6710 output_pic_addr_const (file, x, code);
6711 else
6712 output_addr_const (file, x);
6713 }
6714 }
6715 \f
6716 /* Print a memory operand whose address is ADDR. */
6717
6718 void
6719 print_operand_address (FILE *file, rtx addr)
6720 {
6721 struct ix86_address parts;
6722 rtx base, index, disp;
6723 int scale;
6724
6725 if (! ix86_decompose_address (addr, &parts))
6726 abort ();
6727
6728 base = parts.base;
6729 index = parts.index;
6730 disp = parts.disp;
6731 scale = parts.scale;
6732
6733 switch (parts.seg)
6734 {
6735 case SEG_DEFAULT:
6736 break;
6737 case SEG_FS:
6738 case SEG_GS:
6739 if (USER_LABEL_PREFIX[0] == 0)
6740 putc ('%', file);
6741 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6742 break;
6743 default:
6744 abort ();
6745 }
6746
6747 if (!base && !index)
6748 {
6749 /* Displacement only requires special attention. */
6750
6751 if (GET_CODE (disp) == CONST_INT)
6752 {
6753 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6754 {
6755 if (USER_LABEL_PREFIX[0] == 0)
6756 putc ('%', file);
6757 fputs ("ds:", file);
6758 }
6759 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6760 }
6761 else if (flag_pic)
6762 output_pic_addr_const (file, disp, 0);
6763 else
6764 output_addr_const (file, disp);
6765
6766 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6767 if (TARGET_64BIT
6768 && ((GET_CODE (disp) == SYMBOL_REF
6769 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6770 || GET_CODE (disp) == LABEL_REF
6771 || (GET_CODE (disp) == CONST
6772 && GET_CODE (XEXP (disp, 0)) == PLUS
6773 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6774 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6775 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6776 fputs ("(%rip)", file);
6777 }
6778 else
6779 {
6780 if (ASSEMBLER_DIALECT == ASM_ATT)
6781 {
6782 if (disp)
6783 {
6784 if (flag_pic)
6785 output_pic_addr_const (file, disp, 0);
6786 else if (GET_CODE (disp) == LABEL_REF)
6787 output_asm_label (disp);
6788 else
6789 output_addr_const (file, disp);
6790 }
6791
6792 putc ('(', file);
6793 if (base)
6794 print_reg (base, 0, file);
6795 if (index)
6796 {
6797 putc (',', file);
6798 print_reg (index, 0, file);
6799 if (scale != 1)
6800 fprintf (file, ",%d", scale);
6801 }
6802 putc (')', file);
6803 }
6804 else
6805 {
6806 rtx offset = NULL_RTX;
6807
6808 if (disp)
6809 {
6810 /* Pull out the offset of a symbol; print any symbol itself. */
6811 if (GET_CODE (disp) == CONST
6812 && GET_CODE (XEXP (disp, 0)) == PLUS
6813 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6814 {
6815 offset = XEXP (XEXP (disp, 0), 1);
6816 disp = gen_rtx_CONST (VOIDmode,
6817 XEXP (XEXP (disp, 0), 0));
6818 }
6819
6820 if (flag_pic)
6821 output_pic_addr_const (file, disp, 0);
6822 else if (GET_CODE (disp) == LABEL_REF)
6823 output_asm_label (disp);
6824 else if (GET_CODE (disp) == CONST_INT)
6825 offset = disp;
6826 else
6827 output_addr_const (file, disp);
6828 }
6829
6830 putc ('[', file);
6831 if (base)
6832 {
6833 print_reg (base, 0, file);
6834 if (offset)
6835 {
6836 if (INTVAL (offset) >= 0)
6837 putc ('+', file);
6838 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6839 }
6840 }
6841 else if (offset)
6842 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6843 else
6844 putc ('0', file);
6845
6846 if (index)
6847 {
6848 putc ('+', file);
6849 print_reg (index, 0, file);
6850 if (scale != 1)
6851 fprintf (file, "*%d", scale);
6852 }
6853 putc (']', file);
6854 }
6855 }
6856 }
6857
6858 bool
6859 output_addr_const_extra (FILE *file, rtx x)
6860 {
6861 rtx op;
6862
6863 if (GET_CODE (x) != UNSPEC)
6864 return false;
6865
6866 op = XVECEXP (x, 0, 0);
6867 switch (XINT (x, 1))
6868 {
6869 case UNSPEC_GOTTPOFF:
6870 output_addr_const (file, op);
6871 /* FIXME: This might be @TPOFF in Sun ld. */
6872 fputs ("@GOTTPOFF", file);
6873 break;
6874 case UNSPEC_TPOFF:
6875 output_addr_const (file, op);
6876 fputs ("@TPOFF", file);
6877 break;
6878 case UNSPEC_NTPOFF:
6879 output_addr_const (file, op);
6880 if (TARGET_64BIT)
6881 fputs ("@TPOFF", file);
6882 else
6883 fputs ("@NTPOFF", file);
6884 break;
6885 case UNSPEC_DTPOFF:
6886 output_addr_const (file, op);
6887 fputs ("@DTPOFF", file);
6888 break;
6889 case UNSPEC_GOTNTPOFF:
6890 output_addr_const (file, op);
6891 if (TARGET_64BIT)
6892 fputs ("@GOTTPOFF(%rip)", file);
6893 else
6894 fputs ("@GOTNTPOFF", file);
6895 break;
6896 case UNSPEC_INDNTPOFF:
6897 output_addr_const (file, op);
6898 fputs ("@INDNTPOFF", file);
6899 break;
6900
6901 default:
6902 return false;
6903 }
6904
6905 return true;
6906 }
6907 \f
6908 /* Split one or more DImode RTL references into pairs of SImode
6909 references. The RTL can be REG, offsettable MEM, integer constant, or
6910 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6911 split and "num" is its length. lo_half and hi_half are output arrays
6912 that parallel "operands". */
6913
6914 void
6915 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6916 {
6917 while (num--)
6918 {
6919 rtx op = operands[num];
6920
6921 /* simplify_subreg refuse to split volatile memory addresses,
6922 but we still have to handle it. */
6923 if (GET_CODE (op) == MEM)
6924 {
6925 lo_half[num] = adjust_address (op, SImode, 0);
6926 hi_half[num] = adjust_address (op, SImode, 4);
6927 }
6928 else
6929 {
6930 lo_half[num] = simplify_gen_subreg (SImode, op,
6931 GET_MODE (op) == VOIDmode
6932 ? DImode : GET_MODE (op), 0);
6933 hi_half[num] = simplify_gen_subreg (SImode, op,
6934 GET_MODE (op) == VOIDmode
6935 ? DImode : GET_MODE (op), 4);
6936 }
6937 }
6938 }
6939 /* Split one or more TImode RTL references into pairs of SImode
6940 references. The RTL can be REG, offsettable MEM, integer constant, or
6941 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6942 split and "num" is its length. lo_half and hi_half are output arrays
6943 that parallel "operands". */
6944
6945 void
6946 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6947 {
6948 while (num--)
6949 {
6950 rtx op = operands[num];
6951
6952 /* simplify_subreg refuse to split volatile memory addresses, but we
6953 still have to handle it. */
6954 if (GET_CODE (op) == MEM)
6955 {
6956 lo_half[num] = adjust_address (op, DImode, 0);
6957 hi_half[num] = adjust_address (op, DImode, 8);
6958 }
6959 else
6960 {
6961 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6962 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6963 }
6964 }
6965 }
6966 \f
6967 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6968 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6969 is the expression of the binary operation. The output may either be
6970 emitted here, or returned to the caller, like all output_* functions.
6971
6972 There is no guarantee that the operands are the same mode, as they
6973 might be within FLOAT or FLOAT_EXTEND expressions. */
6974
6975 #ifndef SYSV386_COMPAT
6976 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6977 wants to fix the assemblers because that causes incompatibility
6978 with gcc. No-one wants to fix gcc because that causes
6979 incompatibility with assemblers... You can use the option of
6980 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6981 #define SYSV386_COMPAT 1
6982 #endif
6983
6984 const char *
6985 output_387_binary_op (rtx insn, rtx *operands)
6986 {
6987 static char buf[30];
6988 const char *p;
6989 const char *ssep;
6990 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
6991
6992 #ifdef ENABLE_CHECKING
6993 /* Even if we do not want to check the inputs, this documents input
6994 constraints. Which helps in understanding the following code. */
6995 if (STACK_REG_P (operands[0])
6996 && ((REG_P (operands[1])
6997 && REGNO (operands[0]) == REGNO (operands[1])
6998 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6999 || (REG_P (operands[2])
7000 && REGNO (operands[0]) == REGNO (operands[2])
7001 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7002 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7003 ; /* ok */
7004 else if (!is_sse)
7005 abort ();
7006 #endif
7007
7008 switch (GET_CODE (operands[3]))
7009 {
7010 case PLUS:
7011 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7012 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7013 p = "fiadd";
7014 else
7015 p = "fadd";
7016 ssep = "add";
7017 break;
7018
7019 case MINUS:
7020 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7021 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7022 p = "fisub";
7023 else
7024 p = "fsub";
7025 ssep = "sub";
7026 break;
7027
7028 case MULT:
7029 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7030 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7031 p = "fimul";
7032 else
7033 p = "fmul";
7034 ssep = "mul";
7035 break;
7036
7037 case DIV:
7038 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7039 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7040 p = "fidiv";
7041 else
7042 p = "fdiv";
7043 ssep = "div";
7044 break;
7045
7046 default:
7047 abort ();
7048 }
7049
7050 if (is_sse)
7051 {
7052 strcpy (buf, ssep);
7053 if (GET_MODE (operands[0]) == SFmode)
7054 strcat (buf, "ss\t{%2, %0|%0, %2}");
7055 else
7056 strcat (buf, "sd\t{%2, %0|%0, %2}");
7057 return buf;
7058 }
7059 strcpy (buf, p);
7060
7061 switch (GET_CODE (operands[3]))
7062 {
7063 case MULT:
7064 case PLUS:
7065 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7066 {
7067 rtx temp = operands[2];
7068 operands[2] = operands[1];
7069 operands[1] = temp;
7070 }
7071
7072 /* know operands[0] == operands[1]. */
7073
7074 if (GET_CODE (operands[2]) == MEM)
7075 {
7076 p = "%z2\t%2";
7077 break;
7078 }
7079
7080 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7081 {
7082 if (STACK_TOP_P (operands[0]))
7083 /* How is it that we are storing to a dead operand[2]?
7084 Well, presumably operands[1] is dead too. We can't
7085 store the result to st(0) as st(0) gets popped on this
7086 instruction. Instead store to operands[2] (which I
7087 think has to be st(1)). st(1) will be popped later.
7088 gcc <= 2.8.1 didn't have this check and generated
7089 assembly code that the Unixware assembler rejected. */
7090 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7091 else
7092 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7093 break;
7094 }
7095
7096 if (STACK_TOP_P (operands[0]))
7097 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7098 else
7099 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7100 break;
7101
7102 case MINUS:
7103 case DIV:
7104 if (GET_CODE (operands[1]) == MEM)
7105 {
7106 p = "r%z1\t%1";
7107 break;
7108 }
7109
7110 if (GET_CODE (operands[2]) == MEM)
7111 {
7112 p = "%z2\t%2";
7113 break;
7114 }
7115
7116 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7117 {
7118 #if SYSV386_COMPAT
7119 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7120 derived assemblers, confusingly reverse the direction of
7121 the operation for fsub{r} and fdiv{r} when the
7122 destination register is not st(0). The Intel assembler
7123 doesn't have this brain damage. Read !SYSV386_COMPAT to
7124 figure out what the hardware really does. */
7125 if (STACK_TOP_P (operands[0]))
7126 p = "{p\t%0, %2|rp\t%2, %0}";
7127 else
7128 p = "{rp\t%2, %0|p\t%0, %2}";
7129 #else
7130 if (STACK_TOP_P (operands[0]))
7131 /* As above for fmul/fadd, we can't store to st(0). */
7132 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7133 else
7134 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7135 #endif
7136 break;
7137 }
7138
7139 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7140 {
7141 #if SYSV386_COMPAT
7142 if (STACK_TOP_P (operands[0]))
7143 p = "{rp\t%0, %1|p\t%1, %0}";
7144 else
7145 p = "{p\t%1, %0|rp\t%0, %1}";
7146 #else
7147 if (STACK_TOP_P (operands[0]))
7148 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7149 else
7150 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7151 #endif
7152 break;
7153 }
7154
7155 if (STACK_TOP_P (operands[0]))
7156 {
7157 if (STACK_TOP_P (operands[1]))
7158 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7159 else
7160 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7161 break;
7162 }
7163 else if (STACK_TOP_P (operands[1]))
7164 {
7165 #if SYSV386_COMPAT
7166 p = "{\t%1, %0|r\t%0, %1}";
7167 #else
7168 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7169 #endif
7170 }
7171 else
7172 {
7173 #if SYSV386_COMPAT
7174 p = "{r\t%2, %0|\t%0, %2}";
7175 #else
7176 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7177 #endif
7178 }
7179 break;
7180
7181 default:
7182 abort ();
7183 }
7184
7185 strcat (buf, p);
7186 return buf;
7187 }
7188
7189 /* Output code to initialize control word copies used by trunc?f?i and
7190 rounding patterns. CURRENT_MODE is set to current control word,
7191 while NEW_MODE is set to new control word. */
7192
7193 void
7194 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7195 {
7196 rtx reg = gen_reg_rtx (HImode);
7197
7198 emit_insn (gen_x86_fnstcw_1 (current_mode));
7199 emit_move_insn (reg, current_mode);
7200
7201 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7202 && !TARGET_64BIT)
7203 {
7204 switch (mode)
7205 {
7206 case I387_CW_FLOOR:
7207 /* round down toward -oo */
7208 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7209 break;
7210
7211 case I387_CW_CEIL:
7212 /* round up toward +oo */
7213 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7214 break;
7215
7216 case I387_CW_TRUNC:
7217 /* round toward zero (truncate) */
7218 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7219 break;
7220
7221 case I387_CW_MASK_PM:
7222 /* mask precision exception for nearbyint() */
7223 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7224 break;
7225
7226 default:
7227 abort();
7228 }
7229 }
7230 else
7231 {
7232 switch (mode)
7233 {
7234 case I387_CW_FLOOR:
7235 /* round down toward -oo */
7236 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7237 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7238 break;
7239
7240 case I387_CW_CEIL:
7241 /* round up toward +oo */
7242 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7243 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7244 break;
7245
7246 case I387_CW_TRUNC:
7247 /* round toward zero (truncate) */
7248 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7249 break;
7250
7251 case I387_CW_MASK_PM:
7252 /* mask precision exception for nearbyint() */
7253 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7254 break;
7255
7256 default:
7257 abort();
7258 }
7259 }
7260
7261 emit_move_insn (new_mode, reg);
7262 }
7263
7264 /* Output code for INSN to convert a float to a signed int. OPERANDS
7265 are the insn operands. The output may be [HSD]Imode and the input
7266 operand may be [SDX]Fmode. */
7267
7268 const char *
7269 output_fix_trunc (rtx insn, rtx *operands)
7270 {
7271 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7272 int dimode_p = GET_MODE (operands[0]) == DImode;
7273
7274 /* Jump through a hoop or two for DImode, since the hardware has no
7275 non-popping instruction. We used to do this a different way, but
7276 that was somewhat fragile and broke with post-reload splitters. */
7277 if (dimode_p && !stack_top_dies)
7278 output_asm_insn ("fld\t%y1", operands);
7279
7280 if (!STACK_TOP_P (operands[1]))
7281 abort ();
7282
7283 if (GET_CODE (operands[0]) != MEM)
7284 abort ();
7285
7286 output_asm_insn ("fldcw\t%3", operands);
7287 if (stack_top_dies || dimode_p)
7288 output_asm_insn ("fistp%z0\t%0", operands);
7289 else
7290 output_asm_insn ("fist%z0\t%0", operands);
7291 output_asm_insn ("fldcw\t%2", operands);
7292
7293 return "";
7294 }
7295
7296 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7297 should be used. UNORDERED_P is true when fucom should be used. */
7298
7299 const char *
7300 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7301 {
7302 int stack_top_dies;
7303 rtx cmp_op0, cmp_op1;
7304 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7305
7306 if (eflags_p)
7307 {
7308 cmp_op0 = operands[0];
7309 cmp_op1 = operands[1];
7310 }
7311 else
7312 {
7313 cmp_op0 = operands[1];
7314 cmp_op1 = operands[2];
7315 }
7316
7317 if (is_sse)
7318 {
7319 if (GET_MODE (operands[0]) == SFmode)
7320 if (unordered_p)
7321 return "ucomiss\t{%1, %0|%0, %1}";
7322 else
7323 return "comiss\t{%1, %0|%0, %1}";
7324 else
7325 if (unordered_p)
7326 return "ucomisd\t{%1, %0|%0, %1}";
7327 else
7328 return "comisd\t{%1, %0|%0, %1}";
7329 }
7330
7331 if (! STACK_TOP_P (cmp_op0))
7332 abort ();
7333
7334 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7335
7336 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7337 {
7338 if (stack_top_dies)
7339 {
7340 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7341 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7342 }
7343 else
7344 return "ftst\n\tfnstsw\t%0";
7345 }
7346
7347 if (STACK_REG_P (cmp_op1)
7348 && stack_top_dies
7349 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7350 && REGNO (cmp_op1) != FIRST_STACK_REG)
7351 {
7352 /* If both the top of the 387 stack dies, and the other operand
7353 is also a stack register that dies, then this must be a
7354 `fcompp' float compare */
7355
7356 if (eflags_p)
7357 {
7358 /* There is no double popping fcomi variant. Fortunately,
7359 eflags is immune from the fstp's cc clobbering. */
7360 if (unordered_p)
7361 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7362 else
7363 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7364 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7365 }
7366 else
7367 {
7368 if (unordered_p)
7369 return "fucompp\n\tfnstsw\t%0";
7370 else
7371 return "fcompp\n\tfnstsw\t%0";
7372 }
7373 }
7374 else
7375 {
7376 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7377
7378 static const char * const alt[16] =
7379 {
7380 "fcom%z2\t%y2\n\tfnstsw\t%0",
7381 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7382 "fucom%z2\t%y2\n\tfnstsw\t%0",
7383 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7384
7385 "ficom%z2\t%y2\n\tfnstsw\t%0",
7386 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7387 NULL,
7388 NULL,
7389
7390 "fcomi\t{%y1, %0|%0, %y1}",
7391 "fcomip\t{%y1, %0|%0, %y1}",
7392 "fucomi\t{%y1, %0|%0, %y1}",
7393 "fucomip\t{%y1, %0|%0, %y1}",
7394
7395 NULL,
7396 NULL,
7397 NULL,
7398 NULL
7399 };
7400
7401 int mask;
7402 const char *ret;
7403
7404 mask = eflags_p << 3;
7405 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7406 mask |= unordered_p << 1;
7407 mask |= stack_top_dies;
7408
7409 if (mask >= 16)
7410 abort ();
7411 ret = alt[mask];
7412 if (ret == NULL)
7413 abort ();
7414
7415 return ret;
7416 }
7417 }
7418
7419 void
7420 ix86_output_addr_vec_elt (FILE *file, int value)
7421 {
7422 const char *directive = ASM_LONG;
7423
7424 if (TARGET_64BIT)
7425 {
7426 #ifdef ASM_QUAD
7427 directive = ASM_QUAD;
7428 #else
7429 abort ();
7430 #endif
7431 }
7432
7433 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7434 }
7435
7436 void
7437 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7438 {
7439 if (TARGET_64BIT)
7440 fprintf (file, "%s%s%d-%s%d\n",
7441 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7442 else if (HAVE_AS_GOTOFF_IN_DATA)
7443 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7444 #if TARGET_MACHO
7445 else if (TARGET_MACHO)
7446 {
7447 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7448 machopic_output_function_base_name (file);
7449 fprintf(file, "\n");
7450 }
7451 #endif
7452 else
7453 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7454 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7455 }
7456 \f
7457 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7458 for the target. */
7459
7460 void
7461 ix86_expand_clear (rtx dest)
7462 {
7463 rtx tmp;
7464
7465 /* We play register width games, which are only valid after reload. */
7466 if (!reload_completed)
7467 abort ();
7468
7469 /* Avoid HImode and its attendant prefix byte. */
7470 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7471 dest = gen_rtx_REG (SImode, REGNO (dest));
7472
7473 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7474
7475 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7476 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7477 {
7478 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7479 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7480 }
7481
7482 emit_insn (tmp);
7483 }
7484
7485 /* X is an unchanging MEM. If it is a constant pool reference, return
7486 the constant pool rtx, else NULL. */
7487
7488 rtx
7489 maybe_get_pool_constant (rtx x)
7490 {
7491 x = ix86_delegitimize_address (XEXP (x, 0));
7492
7493 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7494 return get_pool_constant (x);
7495
7496 return NULL_RTX;
7497 }
7498
7499 void
7500 ix86_expand_move (enum machine_mode mode, rtx operands[])
7501 {
7502 int strict = (reload_in_progress || reload_completed);
7503 rtx op0, op1;
7504 enum tls_model model;
7505
7506 op0 = operands[0];
7507 op1 = operands[1];
7508
7509 if (GET_CODE (op1) == SYMBOL_REF)
7510 {
7511 model = SYMBOL_REF_TLS_MODEL (op1);
7512 if (model)
7513 {
7514 op1 = legitimize_tls_address (op1, model, true);
7515 op1 = force_operand (op1, op0);
7516 if (op1 == op0)
7517 return;
7518 }
7519 }
7520 else if (GET_CODE (op1) == CONST
7521 && GET_CODE (XEXP (op1, 0)) == PLUS
7522 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7523 {
7524 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7525 if (model)
7526 {
7527 rtx addend = XEXP (XEXP (op1, 0), 1);
7528 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7529 op1 = force_operand (op1, NULL);
7530 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7531 op0, 1, OPTAB_DIRECT);
7532 if (op1 == op0)
7533 return;
7534 }
7535 }
7536
7537 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7538 {
7539 #if TARGET_MACHO
7540 if (MACHOPIC_PURE)
7541 {
7542 rtx temp = ((reload_in_progress
7543 || ((op0 && GET_CODE (op0) == REG)
7544 && mode == Pmode))
7545 ? op0 : gen_reg_rtx (Pmode));
7546 op1 = machopic_indirect_data_reference (op1, temp);
7547 op1 = machopic_legitimize_pic_address (op1, mode,
7548 temp == op1 ? 0 : temp);
7549 }
7550 else if (MACHOPIC_INDIRECT)
7551 op1 = machopic_indirect_data_reference (op1, 0);
7552 if (op0 == op1)
7553 return;
7554 #else
7555 if (GET_CODE (op0) == MEM)
7556 op1 = force_reg (Pmode, op1);
7557 else
7558 op1 = legitimize_address (op1, op1, Pmode);
7559 #endif /* TARGET_MACHO */
7560 }
7561 else
7562 {
7563 if (GET_CODE (op0) == MEM
7564 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7565 || !push_operand (op0, mode))
7566 && GET_CODE (op1) == MEM)
7567 op1 = force_reg (mode, op1);
7568
7569 if (push_operand (op0, mode)
7570 && ! general_no_elim_operand (op1, mode))
7571 op1 = copy_to_mode_reg (mode, op1);
7572
7573 /* Force large constants in 64bit compilation into register
7574 to get them CSEed. */
7575 if (TARGET_64BIT && mode == DImode
7576 && immediate_operand (op1, mode)
7577 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7578 && !register_operand (op0, mode)
7579 && optimize && !reload_completed && !reload_in_progress)
7580 op1 = copy_to_mode_reg (mode, op1);
7581
7582 if (FLOAT_MODE_P (mode))
7583 {
7584 /* If we are loading a floating point constant to a register,
7585 force the value to memory now, since we'll get better code
7586 out the back end. */
7587
7588 if (strict)
7589 ;
7590 else if (GET_CODE (op1) == CONST_DOUBLE)
7591 {
7592 op1 = validize_mem (force_const_mem (mode, op1));
7593 if (!register_operand (op0, mode))
7594 {
7595 rtx temp = gen_reg_rtx (mode);
7596 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7597 emit_move_insn (op0, temp);
7598 return;
7599 }
7600 }
7601 }
7602 }
7603
7604 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7605 }
7606
7607 void
7608 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7609 {
7610 rtx op0 = operands[0], op1 = operands[1];
7611
7612 /* Force constants other than zero into memory. We do not know how
7613 the instructions used to build constants modify the upper 64 bits
7614 of the register, once we have that information we may be able
7615 to handle some of them more efficiently. */
7616 if ((reload_in_progress | reload_completed) == 0
7617 && register_operand (op0, mode)
7618 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7619 op1 = validize_mem (force_const_mem (mode, op1));
7620
7621 /* Make operand1 a register if it isn't already. */
7622 if (!no_new_pseudos
7623 && !register_operand (op0, mode)
7624 && !register_operand (op1, mode))
7625 {
7626 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7627 return;
7628 }
7629
7630 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7631 }
7632
7633 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7634 straight to ix86_expand_vector_move. */
7635
7636 void
7637 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7638 {
7639 rtx op0, op1, m;
7640
7641 op0 = operands[0];
7642 op1 = operands[1];
7643
7644 if (MEM_P (op1))
7645 {
7646 /* If we're optimizing for size, movups is the smallest. */
7647 if (optimize_size)
7648 {
7649 op0 = gen_lowpart (V4SFmode, op0);
7650 op1 = gen_lowpart (V4SFmode, op1);
7651 emit_insn (gen_sse_movups (op0, op1));
7652 return;
7653 }
7654
7655 /* ??? If we have typed data, then it would appear that using
7656 movdqu is the only way to get unaligned data loaded with
7657 integer type. */
7658 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7659 {
7660 op0 = gen_lowpart (V16QImode, op0);
7661 op1 = gen_lowpart (V16QImode, op1);
7662 emit_insn (gen_sse2_movdqu (op0, op1));
7663 return;
7664 }
7665
7666 if (TARGET_SSE2 && mode == V2DFmode)
7667 {
7668 rtx zero;
7669
7670 /* When SSE registers are split into halves, we can avoid
7671 writing to the top half twice. */
7672 if (TARGET_SSE_SPLIT_REGS)
7673 {
7674 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7675 zero = op0;
7676 }
7677 else
7678 {
7679 /* ??? Not sure about the best option for the Intel chips.
7680 The following would seem to satisfy; the register is
7681 entirely cleared, breaking the dependency chain. We
7682 then store to the upper half, with a dependency depth
7683 of one. A rumor has it that Intel recommends two movsd
7684 followed by an unpacklpd, but this is unconfirmed. And
7685 given that the dependency depth of the unpacklpd would
7686 still be one, I'm not sure why this would be better. */
7687 zero = CONST0_RTX (V2DFmode);
7688 }
7689
7690 m = adjust_address (op1, DFmode, 0);
7691 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7692 m = adjust_address (op1, DFmode, 8);
7693 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7694 }
7695 else
7696 {
7697 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7698 emit_move_insn (op0, CONST0_RTX (mode));
7699 else
7700 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7701
7702 m = adjust_address (op1, V2SFmode, 0);
7703 emit_insn (gen_sse_loadlps (op0, op0, m));
7704 m = adjust_address (op1, V2SFmode, 8);
7705 emit_insn (gen_sse_loadhps (op0, op0, m));
7706 }
7707 }
7708 else if (MEM_P (op0))
7709 {
7710 /* If we're optimizing for size, movups is the smallest. */
7711 if (optimize_size)
7712 {
7713 op0 = gen_lowpart (V4SFmode, op0);
7714 op1 = gen_lowpart (V4SFmode, op1);
7715 emit_insn (gen_sse_movups (op0, op1));
7716 return;
7717 }
7718
7719 /* ??? Similar to above, only less clear because of quote
7720 typeless stores unquote. */
7721 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7722 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7723 {
7724 op0 = gen_lowpart (V16QImode, op0);
7725 op1 = gen_lowpart (V16QImode, op1);
7726 emit_insn (gen_sse2_movdqu (op0, op1));
7727 return;
7728 }
7729
7730 if (TARGET_SSE2 && mode == V2DFmode)
7731 {
7732 m = adjust_address (op0, DFmode, 0);
7733 emit_insn (gen_sse2_storelpd (m, op1));
7734 m = adjust_address (op0, DFmode, 8);
7735 emit_insn (gen_sse2_storehpd (m, op1));
7736 }
7737 else
7738 {
7739 if (mode != V4SFmode)
7740 op1 = gen_lowpart (V4SFmode, op1);
7741 m = adjust_address (op0, V2SFmode, 0);
7742 emit_insn (gen_sse_storelps (m, op1));
7743 m = adjust_address (op0, V2SFmode, 8);
7744 emit_insn (gen_sse_storehps (m, op1));
7745 }
7746 }
7747 else
7748 gcc_unreachable ();
7749 }
7750
7751
7752 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7753 destination to use for the operation. If different from the true
7754 destination in operands[0], a copy operation will be required. */
7755
7756 rtx
7757 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7758 rtx operands[])
7759 {
7760 int matching_memory;
7761 rtx src1, src2, dst;
7762
7763 dst = operands[0];
7764 src1 = operands[1];
7765 src2 = operands[2];
7766
7767 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7768 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7769 && (rtx_equal_p (dst, src2)
7770 || immediate_operand (src1, mode)))
7771 {
7772 rtx temp = src1;
7773 src1 = src2;
7774 src2 = temp;
7775 }
7776
7777 /* If the destination is memory, and we do not have matching source
7778 operands, do things in registers. */
7779 matching_memory = 0;
7780 if (GET_CODE (dst) == MEM)
7781 {
7782 if (rtx_equal_p (dst, src1))
7783 matching_memory = 1;
7784 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7785 && rtx_equal_p (dst, src2))
7786 matching_memory = 2;
7787 else
7788 dst = gen_reg_rtx (mode);
7789 }
7790
7791 /* Both source operands cannot be in memory. */
7792 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7793 {
7794 if (matching_memory != 2)
7795 src2 = force_reg (mode, src2);
7796 else
7797 src1 = force_reg (mode, src1);
7798 }
7799
7800 /* If the operation is not commutable, source 1 cannot be a constant
7801 or non-matching memory. */
7802 if ((CONSTANT_P (src1)
7803 || (!matching_memory && GET_CODE (src1) == MEM))
7804 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7805 src1 = force_reg (mode, src1);
7806
7807 /* If optimizing, copy to regs to improve CSE */
7808 if (optimize && ! no_new_pseudos)
7809 {
7810 if (GET_CODE (dst) == MEM)
7811 dst = gen_reg_rtx (mode);
7812 if (GET_CODE (src1) == MEM)
7813 src1 = force_reg (mode, src1);
7814 if (GET_CODE (src2) == MEM)
7815 src2 = force_reg (mode, src2);
7816 }
7817
7818 src1 = operands[1] = src1;
7819 src2 = operands[2] = src2;
7820 return dst;
7821 }
7822
7823 /* Similarly, but assume that the destination has already been
7824 set up properly. */
7825
7826 void
7827 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7828 enum machine_mode mode, rtx operands[])
7829 {
7830 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7831 gcc_assert (dst == operands[0]);
7832 }
7833
7834 /* Attempt to expand a binary operator. Make the expansion closer to the
7835 actual machine, then just general_operand, which will allow 3 separate
7836 memory references (one output, two input) in a single insn. */
7837
7838 void
7839 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7840 rtx operands[])
7841 {
7842 rtx src1, src2, dst, op, clob;
7843
7844 dst = ix86_fixup_binary_operands (code, mode, operands);
7845 src1 = operands[1];
7846 src2 = operands[2];
7847
7848 /* Emit the instruction. */
7849
7850 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7851 if (reload_in_progress)
7852 {
7853 /* Reload doesn't know about the flags register, and doesn't know that
7854 it doesn't want to clobber it. We can only do this with PLUS. */
7855 if (code != PLUS)
7856 abort ();
7857 emit_insn (op);
7858 }
7859 else
7860 {
7861 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7862 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7863 }
7864
7865 /* Fix up the destination if needed. */
7866 if (dst != operands[0])
7867 emit_move_insn (operands[0], dst);
7868 }
7869
7870 /* Return TRUE or FALSE depending on whether the binary operator meets the
7871 appropriate constraints. */
7872
7873 int
7874 ix86_binary_operator_ok (enum rtx_code code,
7875 enum machine_mode mode ATTRIBUTE_UNUSED,
7876 rtx operands[3])
7877 {
7878 /* Both source operands cannot be in memory. */
7879 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7880 return 0;
7881 /* If the operation is not commutable, source 1 cannot be a constant. */
7882 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7883 return 0;
7884 /* If the destination is memory, we must have a matching source operand. */
7885 if (GET_CODE (operands[0]) == MEM
7886 && ! (rtx_equal_p (operands[0], operands[1])
7887 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7888 && rtx_equal_p (operands[0], operands[2]))))
7889 return 0;
7890 /* If the operation is not commutable and the source 1 is memory, we must
7891 have a matching destination. */
7892 if (GET_CODE (operands[1]) == MEM
7893 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7894 && ! rtx_equal_p (operands[0], operands[1]))
7895 return 0;
7896 return 1;
7897 }
7898
7899 /* Attempt to expand a unary operator. Make the expansion closer to the
7900 actual machine, then just general_operand, which will allow 2 separate
7901 memory references (one output, one input) in a single insn. */
7902
7903 void
7904 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7905 rtx operands[])
7906 {
7907 int matching_memory;
7908 rtx src, dst, op, clob;
7909
7910 dst = operands[0];
7911 src = operands[1];
7912
7913 /* If the destination is memory, and we do not have matching source
7914 operands, do things in registers. */
7915 matching_memory = 0;
7916 if (MEM_P (dst))
7917 {
7918 if (rtx_equal_p (dst, src))
7919 matching_memory = 1;
7920 else
7921 dst = gen_reg_rtx (mode);
7922 }
7923
7924 /* When source operand is memory, destination must match. */
7925 if (MEM_P (src) && !matching_memory)
7926 src = force_reg (mode, src);
7927
7928 /* If optimizing, copy to regs to improve CSE. */
7929 if (optimize && ! no_new_pseudos)
7930 {
7931 if (GET_CODE (dst) == MEM)
7932 dst = gen_reg_rtx (mode);
7933 if (GET_CODE (src) == MEM)
7934 src = force_reg (mode, src);
7935 }
7936
7937 /* Emit the instruction. */
7938
7939 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7940 if (reload_in_progress || code == NOT)
7941 {
7942 /* Reload doesn't know about the flags register, and doesn't know that
7943 it doesn't want to clobber it. */
7944 if (code != NOT)
7945 abort ();
7946 emit_insn (op);
7947 }
7948 else
7949 {
7950 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7951 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7952 }
7953
7954 /* Fix up the destination if needed. */
7955 if (dst != operands[0])
7956 emit_move_insn (operands[0], dst);
7957 }
7958
7959 /* Return TRUE or FALSE depending on whether the unary operator meets the
7960 appropriate constraints. */
7961
7962 int
7963 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7964 enum machine_mode mode ATTRIBUTE_UNUSED,
7965 rtx operands[2] ATTRIBUTE_UNUSED)
7966 {
7967 /* If one of operands is memory, source and destination must match. */
7968 if ((GET_CODE (operands[0]) == MEM
7969 || GET_CODE (operands[1]) == MEM)
7970 && ! rtx_equal_p (operands[0], operands[1]))
7971 return FALSE;
7972 return TRUE;
7973 }
7974
7975 /* Generate code for floating point ABS or NEG. */
7976
7977 void
7978 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7979 rtx operands[])
7980 {
7981 rtx mask, set, use, clob, dst, src;
7982 bool matching_memory;
7983 bool use_sse = false;
7984 bool vector_mode = VECTOR_MODE_P (mode);
7985 enum machine_mode elt_mode = mode;
7986 enum machine_mode vec_mode = VOIDmode;
7987
7988 if (vector_mode)
7989 {
7990 elt_mode = GET_MODE_INNER (mode);
7991 vec_mode = mode;
7992 use_sse = true;
7993 }
7994 if (TARGET_SSE_MATH)
7995 {
7996 if (mode == SFmode)
7997 {
7998 use_sse = true;
7999 vec_mode = V4SFmode;
8000 }
8001 else if (mode == DFmode && TARGET_SSE2)
8002 {
8003 use_sse = true;
8004 vec_mode = V2DFmode;
8005 }
8006 }
8007
8008 /* NEG and ABS performed with SSE use bitwise mask operations.
8009 Create the appropriate mask now. */
8010 if (use_sse)
8011 {
8012 HOST_WIDE_INT hi, lo;
8013 int shift = 63;
8014 rtvec v;
8015
8016 /* Find the sign bit, sign extended to 2*HWI. */
8017 if (elt_mode == SFmode)
8018 lo = 0x80000000, hi = lo < 0;
8019 else if (HOST_BITS_PER_WIDE_INT >= 64)
8020 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8021 else
8022 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8023
8024 /* If we're looking for the absolute value, then we want
8025 the compliment. */
8026 if (code == ABS)
8027 lo = ~lo, hi = ~hi;
8028
8029 /* Force this value into the low part of a fp vector constant. */
8030 mask = immed_double_const (lo, hi, elt_mode == SFmode ? SImode : DImode);
8031 mask = gen_lowpart (elt_mode, mask);
8032
8033 switch (mode)
8034 {
8035 case SFmode:
8036 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8037 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8038 break;
8039
8040 case DFmode:
8041 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8042 break;
8043
8044 case V4SFmode:
8045 v = gen_rtvec (4, mask, mask, mask, mask);
8046 break;
8047
8048 case V4DFmode:
8049 v = gen_rtvec (2, mask, mask);
8050 break;
8051
8052 default:
8053 gcc_unreachable ();
8054 }
8055
8056 mask = gen_rtx_CONST_VECTOR (vec_mode, v);
8057 mask = force_reg (vec_mode, mask);
8058 }
8059 else
8060 {
8061 /* When not using SSE, we don't use the mask, but prefer to keep the
8062 same general form of the insn pattern to reduce duplication when
8063 it comes time to split. */
8064 mask = const0_rtx;
8065 }
8066
8067 dst = operands[0];
8068 src = operands[1];
8069
8070 /* If the destination is memory, and we don't have matching source
8071 operands, do things in registers. */
8072 matching_memory = false;
8073 if (MEM_P (dst))
8074 {
8075 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8076 matching_memory = true;
8077 else
8078 dst = gen_reg_rtx (mode);
8079 }
8080 if (MEM_P (src) && !matching_memory)
8081 src = force_reg (mode, src);
8082
8083 if (vector_mode)
8084 {
8085 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8086 set = gen_rtx_SET (VOIDmode, dst, set);
8087 emit_insn (set);
8088 }
8089 else
8090 {
8091 set = gen_rtx_fmt_e (code, mode, src);
8092 set = gen_rtx_SET (VOIDmode, dst, set);
8093 use = gen_rtx_USE (VOIDmode, mask);
8094 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8095 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8096 }
8097
8098 if (dst != operands[0])
8099 emit_move_insn (operands[0], dst);
8100 }
8101
8102 /* Return TRUE or FALSE depending on whether the first SET in INSN
8103 has source and destination with matching CC modes, and that the
8104 CC mode is at least as constrained as REQ_MODE. */
8105
8106 int
8107 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8108 {
8109 rtx set;
8110 enum machine_mode set_mode;
8111
8112 set = PATTERN (insn);
8113 if (GET_CODE (set) == PARALLEL)
8114 set = XVECEXP (set, 0, 0);
8115 if (GET_CODE (set) != SET)
8116 abort ();
8117 if (GET_CODE (SET_SRC (set)) != COMPARE)
8118 abort ();
8119
8120 set_mode = GET_MODE (SET_DEST (set));
8121 switch (set_mode)
8122 {
8123 case CCNOmode:
8124 if (req_mode != CCNOmode
8125 && (req_mode != CCmode
8126 || XEXP (SET_SRC (set), 1) != const0_rtx))
8127 return 0;
8128 break;
8129 case CCmode:
8130 if (req_mode == CCGCmode)
8131 return 0;
8132 /* FALLTHRU */
8133 case CCGCmode:
8134 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8135 return 0;
8136 /* FALLTHRU */
8137 case CCGOCmode:
8138 if (req_mode == CCZmode)
8139 return 0;
8140 /* FALLTHRU */
8141 case CCZmode:
8142 break;
8143
8144 default:
8145 abort ();
8146 }
8147
8148 return (GET_MODE (SET_SRC (set)) == set_mode);
8149 }
8150
8151 /* Generate insn patterns to do an integer compare of OPERANDS. */
8152
8153 static rtx
8154 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8155 {
8156 enum machine_mode cmpmode;
8157 rtx tmp, flags;
8158
8159 cmpmode = SELECT_CC_MODE (code, op0, op1);
8160 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8161
8162 /* This is very simple, but making the interface the same as in the
8163 FP case makes the rest of the code easier. */
8164 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8165 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8166
8167 /* Return the test that should be put into the flags user, i.e.
8168 the bcc, scc, or cmov instruction. */
8169 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8170 }
8171
8172 /* Figure out whether to use ordered or unordered fp comparisons.
8173 Return the appropriate mode to use. */
8174
8175 enum machine_mode
8176 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8177 {
8178 /* ??? In order to make all comparisons reversible, we do all comparisons
8179 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8180 all forms trapping and nontrapping comparisons, we can make inequality
8181 comparisons trapping again, since it results in better code when using
8182 FCOM based compares. */
8183 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8184 }
8185
8186 enum machine_mode
8187 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8188 {
8189 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8190 return ix86_fp_compare_mode (code);
8191 switch (code)
8192 {
8193 /* Only zero flag is needed. */
8194 case EQ: /* ZF=0 */
8195 case NE: /* ZF!=0 */
8196 return CCZmode;
8197 /* Codes needing carry flag. */
8198 case GEU: /* CF=0 */
8199 case GTU: /* CF=0 & ZF=0 */
8200 case LTU: /* CF=1 */
8201 case LEU: /* CF=1 | ZF=1 */
8202 return CCmode;
8203 /* Codes possibly doable only with sign flag when
8204 comparing against zero. */
8205 case GE: /* SF=OF or SF=0 */
8206 case LT: /* SF<>OF or SF=1 */
8207 if (op1 == const0_rtx)
8208 return CCGOCmode;
8209 else
8210 /* For other cases Carry flag is not required. */
8211 return CCGCmode;
8212 /* Codes doable only with sign flag when comparing
8213 against zero, but we miss jump instruction for it
8214 so we need to use relational tests against overflow
8215 that thus needs to be zero. */
8216 case GT: /* ZF=0 & SF=OF */
8217 case LE: /* ZF=1 | SF<>OF */
8218 if (op1 == const0_rtx)
8219 return CCNOmode;
8220 else
8221 return CCGCmode;
8222 /* strcmp pattern do (use flags) and combine may ask us for proper
8223 mode. */
8224 case USE:
8225 return CCmode;
8226 default:
8227 abort ();
8228 }
8229 }
8230
8231 /* Return the fixed registers used for condition codes. */
8232
8233 static bool
8234 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8235 {
8236 *p1 = FLAGS_REG;
8237 *p2 = FPSR_REG;
8238 return true;
8239 }
8240
8241 /* If two condition code modes are compatible, return a condition code
8242 mode which is compatible with both. Otherwise, return
8243 VOIDmode. */
8244
8245 static enum machine_mode
8246 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8247 {
8248 if (m1 == m2)
8249 return m1;
8250
8251 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8252 return VOIDmode;
8253
8254 if ((m1 == CCGCmode && m2 == CCGOCmode)
8255 || (m1 == CCGOCmode && m2 == CCGCmode))
8256 return CCGCmode;
8257
8258 switch (m1)
8259 {
8260 default:
8261 abort ();
8262
8263 case CCmode:
8264 case CCGCmode:
8265 case CCGOCmode:
8266 case CCNOmode:
8267 case CCZmode:
8268 switch (m2)
8269 {
8270 default:
8271 return VOIDmode;
8272
8273 case CCmode:
8274 case CCGCmode:
8275 case CCGOCmode:
8276 case CCNOmode:
8277 case CCZmode:
8278 return CCmode;
8279 }
8280
8281 case CCFPmode:
8282 case CCFPUmode:
8283 /* These are only compatible with themselves, which we already
8284 checked above. */
8285 return VOIDmode;
8286 }
8287 }
8288
8289 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8290
8291 int
8292 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8293 {
8294 enum rtx_code swapped_code = swap_condition (code);
8295 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8296 || (ix86_fp_comparison_cost (swapped_code)
8297 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8298 }
8299
8300 /* Swap, force into registers, or otherwise massage the two operands
8301 to a fp comparison. The operands are updated in place; the new
8302 comparison code is returned. */
8303
8304 static enum rtx_code
8305 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8306 {
8307 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8308 rtx op0 = *pop0, op1 = *pop1;
8309 enum machine_mode op_mode = GET_MODE (op0);
8310 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
8311
8312 /* All of the unordered compare instructions only work on registers.
8313 The same is true of the fcomi compare instructions. The same is
8314 true of the XFmode compare instructions if not comparing with
8315 zero (ftst insn is used in this case). */
8316
8317 if (!is_sse
8318 && (fpcmp_mode == CCFPUmode
8319 || (op_mode == XFmode
8320 && ! (standard_80387_constant_p (op0) == 1
8321 || standard_80387_constant_p (op1) == 1))
8322 || ix86_use_fcomi_compare (code)))
8323 {
8324 op0 = force_reg (op_mode, op0);
8325 op1 = force_reg (op_mode, op1);
8326 }
8327 else
8328 {
8329 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8330 things around if they appear profitable, otherwise force op0
8331 into a register. */
8332
8333 if (standard_80387_constant_p (op0) == 0
8334 || (GET_CODE (op0) == MEM
8335 && ! (standard_80387_constant_p (op1) == 0
8336 || GET_CODE (op1) == MEM)))
8337 {
8338 rtx tmp;
8339 tmp = op0, op0 = op1, op1 = tmp;
8340 code = swap_condition (code);
8341 }
8342
8343 if (GET_CODE (op0) != REG)
8344 op0 = force_reg (op_mode, op0);
8345
8346 if (CONSTANT_P (op1))
8347 {
8348 int tmp = standard_80387_constant_p (op1);
8349 if (tmp == 0)
8350 op1 = validize_mem (force_const_mem (op_mode, op1));
8351 else if (tmp == 1)
8352 {
8353 if (TARGET_CMOVE)
8354 op1 = force_reg (op_mode, op1);
8355 }
8356 else
8357 op1 = force_reg (op_mode, op1);
8358 }
8359 }
8360
8361 /* Try to rearrange the comparison to make it cheaper. */
8362 if (ix86_fp_comparison_cost (code)
8363 > ix86_fp_comparison_cost (swap_condition (code))
8364 && (GET_CODE (op1) == REG || !no_new_pseudos))
8365 {
8366 rtx tmp;
8367 tmp = op0, op0 = op1, op1 = tmp;
8368 code = swap_condition (code);
8369 if (GET_CODE (op0) != REG)
8370 op0 = force_reg (op_mode, op0);
8371 }
8372
8373 *pop0 = op0;
8374 *pop1 = op1;
8375 return code;
8376 }
8377
8378 /* Convert comparison codes we use to represent FP comparison to integer
8379 code that will result in proper branch. Return UNKNOWN if no such code
8380 is available. */
8381
8382 enum rtx_code
8383 ix86_fp_compare_code_to_integer (enum rtx_code code)
8384 {
8385 switch (code)
8386 {
8387 case GT:
8388 return GTU;
8389 case GE:
8390 return GEU;
8391 case ORDERED:
8392 case UNORDERED:
8393 return code;
8394 break;
8395 case UNEQ:
8396 return EQ;
8397 break;
8398 case UNLT:
8399 return LTU;
8400 break;
8401 case UNLE:
8402 return LEU;
8403 break;
8404 case LTGT:
8405 return NE;
8406 break;
8407 default:
8408 return UNKNOWN;
8409 }
8410 }
8411
8412 /* Split comparison code CODE into comparisons we can do using branch
8413 instructions. BYPASS_CODE is comparison code for branch that will
8414 branch around FIRST_CODE and SECOND_CODE. If some of branches
8415 is not required, set value to UNKNOWN.
8416 We never require more than two branches. */
8417
8418 void
8419 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8420 enum rtx_code *first_code,
8421 enum rtx_code *second_code)
8422 {
8423 *first_code = code;
8424 *bypass_code = UNKNOWN;
8425 *second_code = UNKNOWN;
8426
8427 /* The fcomi comparison sets flags as follows:
8428
8429 cmp ZF PF CF
8430 > 0 0 0
8431 < 0 0 1
8432 = 1 0 0
8433 un 1 1 1 */
8434
8435 switch (code)
8436 {
8437 case GT: /* GTU - CF=0 & ZF=0 */
8438 case GE: /* GEU - CF=0 */
8439 case ORDERED: /* PF=0 */
8440 case UNORDERED: /* PF=1 */
8441 case UNEQ: /* EQ - ZF=1 */
8442 case UNLT: /* LTU - CF=1 */
8443 case UNLE: /* LEU - CF=1 | ZF=1 */
8444 case LTGT: /* EQ - ZF=0 */
8445 break;
8446 case LT: /* LTU - CF=1 - fails on unordered */
8447 *first_code = UNLT;
8448 *bypass_code = UNORDERED;
8449 break;
8450 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8451 *first_code = UNLE;
8452 *bypass_code = UNORDERED;
8453 break;
8454 case EQ: /* EQ - ZF=1 - fails on unordered */
8455 *first_code = UNEQ;
8456 *bypass_code = UNORDERED;
8457 break;
8458 case NE: /* NE - ZF=0 - fails on unordered */
8459 *first_code = LTGT;
8460 *second_code = UNORDERED;
8461 break;
8462 case UNGE: /* GEU - CF=0 - fails on unordered */
8463 *first_code = GE;
8464 *second_code = UNORDERED;
8465 break;
8466 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8467 *first_code = GT;
8468 *second_code = UNORDERED;
8469 break;
8470 default:
8471 abort ();
8472 }
8473 if (!TARGET_IEEE_FP)
8474 {
8475 *second_code = UNKNOWN;
8476 *bypass_code = UNKNOWN;
8477 }
8478 }
8479
8480 /* Return cost of comparison done fcom + arithmetics operations on AX.
8481 All following functions do use number of instructions as a cost metrics.
8482 In future this should be tweaked to compute bytes for optimize_size and
8483 take into account performance of various instructions on various CPUs. */
8484 static int
8485 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8486 {
8487 if (!TARGET_IEEE_FP)
8488 return 4;
8489 /* The cost of code output by ix86_expand_fp_compare. */
8490 switch (code)
8491 {
8492 case UNLE:
8493 case UNLT:
8494 case LTGT:
8495 case GT:
8496 case GE:
8497 case UNORDERED:
8498 case ORDERED:
8499 case UNEQ:
8500 return 4;
8501 break;
8502 case LT:
8503 case NE:
8504 case EQ:
8505 case UNGE:
8506 return 5;
8507 break;
8508 case LE:
8509 case UNGT:
8510 return 6;
8511 break;
8512 default:
8513 abort ();
8514 }
8515 }
8516
8517 /* Return cost of comparison done using fcomi operation.
8518 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8519 static int
8520 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8521 {
8522 enum rtx_code bypass_code, first_code, second_code;
8523 /* Return arbitrarily high cost when instruction is not supported - this
8524 prevents gcc from using it. */
8525 if (!TARGET_CMOVE)
8526 return 1024;
8527 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8528 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8529 }
8530
8531 /* Return cost of comparison done using sahf operation.
8532 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8533 static int
8534 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8535 {
8536 enum rtx_code bypass_code, first_code, second_code;
8537 /* Return arbitrarily high cost when instruction is not preferred - this
8538 avoids gcc from using it. */
8539 if (!TARGET_USE_SAHF && !optimize_size)
8540 return 1024;
8541 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8542 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8543 }
8544
8545 /* Compute cost of the comparison done using any method.
8546 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8547 static int
8548 ix86_fp_comparison_cost (enum rtx_code code)
8549 {
8550 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8551 int min;
8552
8553 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8554 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8555
8556 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8557 if (min > sahf_cost)
8558 min = sahf_cost;
8559 if (min > fcomi_cost)
8560 min = fcomi_cost;
8561 return min;
8562 }
8563
8564 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8565
8566 static rtx
8567 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8568 rtx *second_test, rtx *bypass_test)
8569 {
8570 enum machine_mode fpcmp_mode, intcmp_mode;
8571 rtx tmp, tmp2;
8572 int cost = ix86_fp_comparison_cost (code);
8573 enum rtx_code bypass_code, first_code, second_code;
8574
8575 fpcmp_mode = ix86_fp_compare_mode (code);
8576 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8577
8578 if (second_test)
8579 *second_test = NULL_RTX;
8580 if (bypass_test)
8581 *bypass_test = NULL_RTX;
8582
8583 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8584
8585 /* Do fcomi/sahf based test when profitable. */
8586 if ((bypass_code == UNKNOWN || bypass_test)
8587 && (second_code == UNKNOWN || second_test)
8588 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8589 {
8590 if (TARGET_CMOVE)
8591 {
8592 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8593 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8594 tmp);
8595 emit_insn (tmp);
8596 }
8597 else
8598 {
8599 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8600 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8601 if (!scratch)
8602 scratch = gen_reg_rtx (HImode);
8603 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8604 emit_insn (gen_x86_sahf_1 (scratch));
8605 }
8606
8607 /* The FP codes work out to act like unsigned. */
8608 intcmp_mode = fpcmp_mode;
8609 code = first_code;
8610 if (bypass_code != UNKNOWN)
8611 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8612 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8613 const0_rtx);
8614 if (second_code != UNKNOWN)
8615 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8616 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8617 const0_rtx);
8618 }
8619 else
8620 {
8621 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8622 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8623 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8624 if (!scratch)
8625 scratch = gen_reg_rtx (HImode);
8626 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8627
8628 /* In the unordered case, we have to check C2 for NaN's, which
8629 doesn't happen to work out to anything nice combination-wise.
8630 So do some bit twiddling on the value we've got in AH to come
8631 up with an appropriate set of condition codes. */
8632
8633 intcmp_mode = CCNOmode;
8634 switch (code)
8635 {
8636 case GT:
8637 case UNGT:
8638 if (code == GT || !TARGET_IEEE_FP)
8639 {
8640 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8641 code = EQ;
8642 }
8643 else
8644 {
8645 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8646 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8647 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8648 intcmp_mode = CCmode;
8649 code = GEU;
8650 }
8651 break;
8652 case LT:
8653 case UNLT:
8654 if (code == LT && TARGET_IEEE_FP)
8655 {
8656 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8657 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8658 intcmp_mode = CCmode;
8659 code = EQ;
8660 }
8661 else
8662 {
8663 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8664 code = NE;
8665 }
8666 break;
8667 case GE:
8668 case UNGE:
8669 if (code == GE || !TARGET_IEEE_FP)
8670 {
8671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8672 code = EQ;
8673 }
8674 else
8675 {
8676 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8677 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8678 GEN_INT (0x01)));
8679 code = NE;
8680 }
8681 break;
8682 case LE:
8683 case UNLE:
8684 if (code == LE && TARGET_IEEE_FP)
8685 {
8686 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8687 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8688 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8689 intcmp_mode = CCmode;
8690 code = LTU;
8691 }
8692 else
8693 {
8694 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8695 code = NE;
8696 }
8697 break;
8698 case EQ:
8699 case UNEQ:
8700 if (code == EQ && TARGET_IEEE_FP)
8701 {
8702 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8703 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8704 intcmp_mode = CCmode;
8705 code = EQ;
8706 }
8707 else
8708 {
8709 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8710 code = NE;
8711 break;
8712 }
8713 break;
8714 case NE:
8715 case LTGT:
8716 if (code == NE && TARGET_IEEE_FP)
8717 {
8718 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8719 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8720 GEN_INT (0x40)));
8721 code = NE;
8722 }
8723 else
8724 {
8725 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8726 code = EQ;
8727 }
8728 break;
8729
8730 case UNORDERED:
8731 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8732 code = NE;
8733 break;
8734 case ORDERED:
8735 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8736 code = EQ;
8737 break;
8738
8739 default:
8740 abort ();
8741 }
8742 }
8743
8744 /* Return the test that should be put into the flags user, i.e.
8745 the bcc, scc, or cmov instruction. */
8746 return gen_rtx_fmt_ee (code, VOIDmode,
8747 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8748 const0_rtx);
8749 }
8750
8751 rtx
8752 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8753 {
8754 rtx op0, op1, ret;
8755 op0 = ix86_compare_op0;
8756 op1 = ix86_compare_op1;
8757
8758 if (second_test)
8759 *second_test = NULL_RTX;
8760 if (bypass_test)
8761 *bypass_test = NULL_RTX;
8762
8763 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8764 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8765 second_test, bypass_test);
8766 else
8767 ret = ix86_expand_int_compare (code, op0, op1);
8768
8769 return ret;
8770 }
8771
8772 /* Return true if the CODE will result in nontrivial jump sequence. */
8773 bool
8774 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8775 {
8776 enum rtx_code bypass_code, first_code, second_code;
8777 if (!TARGET_CMOVE)
8778 return true;
8779 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8780 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8781 }
8782
8783 void
8784 ix86_expand_branch (enum rtx_code code, rtx label)
8785 {
8786 rtx tmp;
8787
8788 switch (GET_MODE (ix86_compare_op0))
8789 {
8790 case QImode:
8791 case HImode:
8792 case SImode:
8793 simple:
8794 tmp = ix86_expand_compare (code, NULL, NULL);
8795 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8796 gen_rtx_LABEL_REF (VOIDmode, label),
8797 pc_rtx);
8798 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8799 return;
8800
8801 case SFmode:
8802 case DFmode:
8803 case XFmode:
8804 {
8805 rtvec vec;
8806 int use_fcomi;
8807 enum rtx_code bypass_code, first_code, second_code;
8808
8809 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8810 &ix86_compare_op1);
8811
8812 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8813
8814 /* Check whether we will use the natural sequence with one jump. If
8815 so, we can expand jump early. Otherwise delay expansion by
8816 creating compound insn to not confuse optimizers. */
8817 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8818 && TARGET_CMOVE)
8819 {
8820 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8821 gen_rtx_LABEL_REF (VOIDmode, label),
8822 pc_rtx, NULL_RTX, NULL_RTX);
8823 }
8824 else
8825 {
8826 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8827 ix86_compare_op0, ix86_compare_op1);
8828 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8829 gen_rtx_LABEL_REF (VOIDmode, label),
8830 pc_rtx);
8831 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8832
8833 use_fcomi = ix86_use_fcomi_compare (code);
8834 vec = rtvec_alloc (3 + !use_fcomi);
8835 RTVEC_ELT (vec, 0) = tmp;
8836 RTVEC_ELT (vec, 1)
8837 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8838 RTVEC_ELT (vec, 2)
8839 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8840 if (! use_fcomi)
8841 RTVEC_ELT (vec, 3)
8842 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8843
8844 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8845 }
8846 return;
8847 }
8848
8849 case DImode:
8850 if (TARGET_64BIT)
8851 goto simple;
8852 /* Expand DImode branch into multiple compare+branch. */
8853 {
8854 rtx lo[2], hi[2], label2;
8855 enum rtx_code code1, code2, code3;
8856
8857 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8858 {
8859 tmp = ix86_compare_op0;
8860 ix86_compare_op0 = ix86_compare_op1;
8861 ix86_compare_op1 = tmp;
8862 code = swap_condition (code);
8863 }
8864 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8865 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8866
8867 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8868 avoid two branches. This costs one extra insn, so disable when
8869 optimizing for size. */
8870
8871 if ((code == EQ || code == NE)
8872 && (!optimize_size
8873 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8874 {
8875 rtx xor0, xor1;
8876
8877 xor1 = hi[0];
8878 if (hi[1] != const0_rtx)
8879 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8880 NULL_RTX, 0, OPTAB_WIDEN);
8881
8882 xor0 = lo[0];
8883 if (lo[1] != const0_rtx)
8884 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8885 NULL_RTX, 0, OPTAB_WIDEN);
8886
8887 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8888 NULL_RTX, 0, OPTAB_WIDEN);
8889
8890 ix86_compare_op0 = tmp;
8891 ix86_compare_op1 = const0_rtx;
8892 ix86_expand_branch (code, label);
8893 return;
8894 }
8895
8896 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8897 op1 is a constant and the low word is zero, then we can just
8898 examine the high word. */
8899
8900 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8901 switch (code)
8902 {
8903 case LT: case LTU: case GE: case GEU:
8904 ix86_compare_op0 = hi[0];
8905 ix86_compare_op1 = hi[1];
8906 ix86_expand_branch (code, label);
8907 return;
8908 default:
8909 break;
8910 }
8911
8912 /* Otherwise, we need two or three jumps. */
8913
8914 label2 = gen_label_rtx ();
8915
8916 code1 = code;
8917 code2 = swap_condition (code);
8918 code3 = unsigned_condition (code);
8919
8920 switch (code)
8921 {
8922 case LT: case GT: case LTU: case GTU:
8923 break;
8924
8925 case LE: code1 = LT; code2 = GT; break;
8926 case GE: code1 = GT; code2 = LT; break;
8927 case LEU: code1 = LTU; code2 = GTU; break;
8928 case GEU: code1 = GTU; code2 = LTU; break;
8929
8930 case EQ: code1 = UNKNOWN; code2 = NE; break;
8931 case NE: code2 = UNKNOWN; break;
8932
8933 default:
8934 abort ();
8935 }
8936
8937 /*
8938 * a < b =>
8939 * if (hi(a) < hi(b)) goto true;
8940 * if (hi(a) > hi(b)) goto false;
8941 * if (lo(a) < lo(b)) goto true;
8942 * false:
8943 */
8944
8945 ix86_compare_op0 = hi[0];
8946 ix86_compare_op1 = hi[1];
8947
8948 if (code1 != UNKNOWN)
8949 ix86_expand_branch (code1, label);
8950 if (code2 != UNKNOWN)
8951 ix86_expand_branch (code2, label2);
8952
8953 ix86_compare_op0 = lo[0];
8954 ix86_compare_op1 = lo[1];
8955 ix86_expand_branch (code3, label);
8956
8957 if (code2 != UNKNOWN)
8958 emit_label (label2);
8959 return;
8960 }
8961
8962 default:
8963 abort ();
8964 }
8965 }
8966
8967 /* Split branch based on floating point condition. */
8968 void
8969 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8970 rtx target1, rtx target2, rtx tmp, rtx pushed)
8971 {
8972 rtx second, bypass;
8973 rtx label = NULL_RTX;
8974 rtx condition;
8975 int bypass_probability = -1, second_probability = -1, probability = -1;
8976 rtx i;
8977
8978 if (target2 != pc_rtx)
8979 {
8980 rtx tmp = target2;
8981 code = reverse_condition_maybe_unordered (code);
8982 target2 = target1;
8983 target1 = tmp;
8984 }
8985
8986 condition = ix86_expand_fp_compare (code, op1, op2,
8987 tmp, &second, &bypass);
8988
8989 /* Remove pushed operand from stack. */
8990 if (pushed)
8991 ix86_free_from_memory (GET_MODE (pushed));
8992
8993 if (split_branch_probability >= 0)
8994 {
8995 /* Distribute the probabilities across the jumps.
8996 Assume the BYPASS and SECOND to be always test
8997 for UNORDERED. */
8998 probability = split_branch_probability;
8999
9000 /* Value of 1 is low enough to make no need for probability
9001 to be updated. Later we may run some experiments and see
9002 if unordered values are more frequent in practice. */
9003 if (bypass)
9004 bypass_probability = 1;
9005 if (second)
9006 second_probability = 1;
9007 }
9008 if (bypass != NULL_RTX)
9009 {
9010 label = gen_label_rtx ();
9011 i = emit_jump_insn (gen_rtx_SET
9012 (VOIDmode, pc_rtx,
9013 gen_rtx_IF_THEN_ELSE (VOIDmode,
9014 bypass,
9015 gen_rtx_LABEL_REF (VOIDmode,
9016 label),
9017 pc_rtx)));
9018 if (bypass_probability >= 0)
9019 REG_NOTES (i)
9020 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9021 GEN_INT (bypass_probability),
9022 REG_NOTES (i));
9023 }
9024 i = emit_jump_insn (gen_rtx_SET
9025 (VOIDmode, pc_rtx,
9026 gen_rtx_IF_THEN_ELSE (VOIDmode,
9027 condition, target1, target2)));
9028 if (probability >= 0)
9029 REG_NOTES (i)
9030 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9031 GEN_INT (probability),
9032 REG_NOTES (i));
9033 if (second != NULL_RTX)
9034 {
9035 i = emit_jump_insn (gen_rtx_SET
9036 (VOIDmode, pc_rtx,
9037 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9038 target2)));
9039 if (second_probability >= 0)
9040 REG_NOTES (i)
9041 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9042 GEN_INT (second_probability),
9043 REG_NOTES (i));
9044 }
9045 if (label != NULL_RTX)
9046 emit_label (label);
9047 }
9048
9049 int
9050 ix86_expand_setcc (enum rtx_code code, rtx dest)
9051 {
9052 rtx ret, tmp, tmpreg, equiv;
9053 rtx second_test, bypass_test;
9054
9055 if (GET_MODE (ix86_compare_op0) == DImode
9056 && !TARGET_64BIT)
9057 return 0; /* FAIL */
9058
9059 if (GET_MODE (dest) != QImode)
9060 abort ();
9061
9062 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9063 PUT_MODE (ret, QImode);
9064
9065 tmp = dest;
9066 tmpreg = dest;
9067
9068 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9069 if (bypass_test || second_test)
9070 {
9071 rtx test = second_test;
9072 int bypass = 0;
9073 rtx tmp2 = gen_reg_rtx (QImode);
9074 if (bypass_test)
9075 {
9076 if (second_test)
9077 abort ();
9078 test = bypass_test;
9079 bypass = 1;
9080 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9081 }
9082 PUT_MODE (test, QImode);
9083 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9084
9085 if (bypass)
9086 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9087 else
9088 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9089 }
9090
9091 /* Attach a REG_EQUAL note describing the comparison result. */
9092 equiv = simplify_gen_relational (code, QImode,
9093 GET_MODE (ix86_compare_op0),
9094 ix86_compare_op0, ix86_compare_op1);
9095 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9096
9097 return 1; /* DONE */
9098 }
9099
9100 /* Expand comparison setting or clearing carry flag. Return true when
9101 successful and set pop for the operation. */
9102 static bool
9103 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9104 {
9105 enum machine_mode mode =
9106 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9107
9108 /* Do not handle DImode compares that go trought special path. Also we can't
9109 deal with FP compares yet. This is possible to add. */
9110 if ((mode == DImode && !TARGET_64BIT))
9111 return false;
9112 if (FLOAT_MODE_P (mode))
9113 {
9114 rtx second_test = NULL, bypass_test = NULL;
9115 rtx compare_op, compare_seq;
9116
9117 /* Shortcut: following common codes never translate into carry flag compares. */
9118 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9119 || code == ORDERED || code == UNORDERED)
9120 return false;
9121
9122 /* These comparisons require zero flag; swap operands so they won't. */
9123 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9124 && !TARGET_IEEE_FP)
9125 {
9126 rtx tmp = op0;
9127 op0 = op1;
9128 op1 = tmp;
9129 code = swap_condition (code);
9130 }
9131
9132 /* Try to expand the comparison and verify that we end up with carry flag
9133 based comparison. This is fails to be true only when we decide to expand
9134 comparison using arithmetic that is not too common scenario. */
9135 start_sequence ();
9136 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9137 &second_test, &bypass_test);
9138 compare_seq = get_insns ();
9139 end_sequence ();
9140
9141 if (second_test || bypass_test)
9142 return false;
9143 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9144 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9145 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9146 else
9147 code = GET_CODE (compare_op);
9148 if (code != LTU && code != GEU)
9149 return false;
9150 emit_insn (compare_seq);
9151 *pop = compare_op;
9152 return true;
9153 }
9154 if (!INTEGRAL_MODE_P (mode))
9155 return false;
9156 switch (code)
9157 {
9158 case LTU:
9159 case GEU:
9160 break;
9161
9162 /* Convert a==0 into (unsigned)a<1. */
9163 case EQ:
9164 case NE:
9165 if (op1 != const0_rtx)
9166 return false;
9167 op1 = const1_rtx;
9168 code = (code == EQ ? LTU : GEU);
9169 break;
9170
9171 /* Convert a>b into b<a or a>=b-1. */
9172 case GTU:
9173 case LEU:
9174 if (GET_CODE (op1) == CONST_INT)
9175 {
9176 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9177 /* Bail out on overflow. We still can swap operands but that
9178 would force loading of the constant into register. */
9179 if (op1 == const0_rtx
9180 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9181 return false;
9182 code = (code == GTU ? GEU : LTU);
9183 }
9184 else
9185 {
9186 rtx tmp = op1;
9187 op1 = op0;
9188 op0 = tmp;
9189 code = (code == GTU ? LTU : GEU);
9190 }
9191 break;
9192
9193 /* Convert a>=0 into (unsigned)a<0x80000000. */
9194 case LT:
9195 case GE:
9196 if (mode == DImode || op1 != const0_rtx)
9197 return false;
9198 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9199 code = (code == LT ? GEU : LTU);
9200 break;
9201 case LE:
9202 case GT:
9203 if (mode == DImode || op1 != constm1_rtx)
9204 return false;
9205 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9206 code = (code == LE ? GEU : LTU);
9207 break;
9208
9209 default:
9210 return false;
9211 }
9212 /* Swapping operands may cause constant to appear as first operand. */
9213 if (!nonimmediate_operand (op0, VOIDmode))
9214 {
9215 if (no_new_pseudos)
9216 return false;
9217 op0 = force_reg (mode, op0);
9218 }
9219 ix86_compare_op0 = op0;
9220 ix86_compare_op1 = op1;
9221 *pop = ix86_expand_compare (code, NULL, NULL);
9222 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9223 abort ();
9224 return true;
9225 }
9226
9227 int
9228 ix86_expand_int_movcc (rtx operands[])
9229 {
9230 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9231 rtx compare_seq, compare_op;
9232 rtx second_test, bypass_test;
9233 enum machine_mode mode = GET_MODE (operands[0]);
9234 bool sign_bit_compare_p = false;;
9235
9236 start_sequence ();
9237 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9238 compare_seq = get_insns ();
9239 end_sequence ();
9240
9241 compare_code = GET_CODE (compare_op);
9242
9243 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9244 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9245 sign_bit_compare_p = true;
9246
9247 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9248 HImode insns, we'd be swallowed in word prefix ops. */
9249
9250 if ((mode != HImode || TARGET_FAST_PREFIX)
9251 && (mode != DImode || TARGET_64BIT)
9252 && GET_CODE (operands[2]) == CONST_INT
9253 && GET_CODE (operands[3]) == CONST_INT)
9254 {
9255 rtx out = operands[0];
9256 HOST_WIDE_INT ct = INTVAL (operands[2]);
9257 HOST_WIDE_INT cf = INTVAL (operands[3]);
9258 HOST_WIDE_INT diff;
9259
9260 diff = ct - cf;
9261 /* Sign bit compares are better done using shifts than we do by using
9262 sbb. */
9263 if (sign_bit_compare_p
9264 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9265 ix86_compare_op1, &compare_op))
9266 {
9267 /* Detect overlap between destination and compare sources. */
9268 rtx tmp = out;
9269
9270 if (!sign_bit_compare_p)
9271 {
9272 bool fpcmp = false;
9273
9274 compare_code = GET_CODE (compare_op);
9275
9276 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9277 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9278 {
9279 fpcmp = true;
9280 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9281 }
9282
9283 /* To simplify rest of code, restrict to the GEU case. */
9284 if (compare_code == LTU)
9285 {
9286 HOST_WIDE_INT tmp = ct;
9287 ct = cf;
9288 cf = tmp;
9289 compare_code = reverse_condition (compare_code);
9290 code = reverse_condition (code);
9291 }
9292 else
9293 {
9294 if (fpcmp)
9295 PUT_CODE (compare_op,
9296 reverse_condition_maybe_unordered
9297 (GET_CODE (compare_op)));
9298 else
9299 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9300 }
9301 diff = ct - cf;
9302
9303 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9304 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9305 tmp = gen_reg_rtx (mode);
9306
9307 if (mode == DImode)
9308 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9309 else
9310 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9311 }
9312 else
9313 {
9314 if (code == GT || code == GE)
9315 code = reverse_condition (code);
9316 else
9317 {
9318 HOST_WIDE_INT tmp = ct;
9319 ct = cf;
9320 cf = tmp;
9321 diff = ct - cf;
9322 }
9323 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9324 ix86_compare_op1, VOIDmode, 0, -1);
9325 }
9326
9327 if (diff == 1)
9328 {
9329 /*
9330 * cmpl op0,op1
9331 * sbbl dest,dest
9332 * [addl dest, ct]
9333 *
9334 * Size 5 - 8.
9335 */
9336 if (ct)
9337 tmp = expand_simple_binop (mode, PLUS,
9338 tmp, GEN_INT (ct),
9339 copy_rtx (tmp), 1, OPTAB_DIRECT);
9340 }
9341 else if (cf == -1)
9342 {
9343 /*
9344 * cmpl op0,op1
9345 * sbbl dest,dest
9346 * orl $ct, dest
9347 *
9348 * Size 8.
9349 */
9350 tmp = expand_simple_binop (mode, IOR,
9351 tmp, GEN_INT (ct),
9352 copy_rtx (tmp), 1, OPTAB_DIRECT);
9353 }
9354 else if (diff == -1 && ct)
9355 {
9356 /*
9357 * cmpl op0,op1
9358 * sbbl dest,dest
9359 * notl dest
9360 * [addl dest, cf]
9361 *
9362 * Size 8 - 11.
9363 */
9364 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9365 if (cf)
9366 tmp = expand_simple_binop (mode, PLUS,
9367 copy_rtx (tmp), GEN_INT (cf),
9368 copy_rtx (tmp), 1, OPTAB_DIRECT);
9369 }
9370 else
9371 {
9372 /*
9373 * cmpl op0,op1
9374 * sbbl dest,dest
9375 * [notl dest]
9376 * andl cf - ct, dest
9377 * [addl dest, ct]
9378 *
9379 * Size 8 - 11.
9380 */
9381
9382 if (cf == 0)
9383 {
9384 cf = ct;
9385 ct = 0;
9386 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9387 }
9388
9389 tmp = expand_simple_binop (mode, AND,
9390 copy_rtx (tmp),
9391 gen_int_mode (cf - ct, mode),
9392 copy_rtx (tmp), 1, OPTAB_DIRECT);
9393 if (ct)
9394 tmp = expand_simple_binop (mode, PLUS,
9395 copy_rtx (tmp), GEN_INT (ct),
9396 copy_rtx (tmp), 1, OPTAB_DIRECT);
9397 }
9398
9399 if (!rtx_equal_p (tmp, out))
9400 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9401
9402 return 1; /* DONE */
9403 }
9404
9405 if (diff < 0)
9406 {
9407 HOST_WIDE_INT tmp;
9408 tmp = ct, ct = cf, cf = tmp;
9409 diff = -diff;
9410 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9411 {
9412 /* We may be reversing unordered compare to normal compare, that
9413 is not valid in general (we may convert non-trapping condition
9414 to trapping one), however on i386 we currently emit all
9415 comparisons unordered. */
9416 compare_code = reverse_condition_maybe_unordered (compare_code);
9417 code = reverse_condition_maybe_unordered (code);
9418 }
9419 else
9420 {
9421 compare_code = reverse_condition (compare_code);
9422 code = reverse_condition (code);
9423 }
9424 }
9425
9426 compare_code = UNKNOWN;
9427 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9428 && GET_CODE (ix86_compare_op1) == CONST_INT)
9429 {
9430 if (ix86_compare_op1 == const0_rtx
9431 && (code == LT || code == GE))
9432 compare_code = code;
9433 else if (ix86_compare_op1 == constm1_rtx)
9434 {
9435 if (code == LE)
9436 compare_code = LT;
9437 else if (code == GT)
9438 compare_code = GE;
9439 }
9440 }
9441
9442 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9443 if (compare_code != UNKNOWN
9444 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9445 && (cf == -1 || ct == -1))
9446 {
9447 /* If lea code below could be used, only optimize
9448 if it results in a 2 insn sequence. */
9449
9450 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9451 || diff == 3 || diff == 5 || diff == 9)
9452 || (compare_code == LT && ct == -1)
9453 || (compare_code == GE && cf == -1))
9454 {
9455 /*
9456 * notl op1 (if necessary)
9457 * sarl $31, op1
9458 * orl cf, op1
9459 */
9460 if (ct != -1)
9461 {
9462 cf = ct;
9463 ct = -1;
9464 code = reverse_condition (code);
9465 }
9466
9467 out = emit_store_flag (out, code, ix86_compare_op0,
9468 ix86_compare_op1, VOIDmode, 0, -1);
9469
9470 out = expand_simple_binop (mode, IOR,
9471 out, GEN_INT (cf),
9472 out, 1, OPTAB_DIRECT);
9473 if (out != operands[0])
9474 emit_move_insn (operands[0], out);
9475
9476 return 1; /* DONE */
9477 }
9478 }
9479
9480
9481 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9482 || diff == 3 || diff == 5 || diff == 9)
9483 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9484 && (mode != DImode
9485 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9486 {
9487 /*
9488 * xorl dest,dest
9489 * cmpl op1,op2
9490 * setcc dest
9491 * lea cf(dest*(ct-cf)),dest
9492 *
9493 * Size 14.
9494 *
9495 * This also catches the degenerate setcc-only case.
9496 */
9497
9498 rtx tmp;
9499 int nops;
9500
9501 out = emit_store_flag (out, code, ix86_compare_op0,
9502 ix86_compare_op1, VOIDmode, 0, 1);
9503
9504 nops = 0;
9505 /* On x86_64 the lea instruction operates on Pmode, so we need
9506 to get arithmetics done in proper mode to match. */
9507 if (diff == 1)
9508 tmp = copy_rtx (out);
9509 else
9510 {
9511 rtx out1;
9512 out1 = copy_rtx (out);
9513 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9514 nops++;
9515 if (diff & 1)
9516 {
9517 tmp = gen_rtx_PLUS (mode, tmp, out1);
9518 nops++;
9519 }
9520 }
9521 if (cf != 0)
9522 {
9523 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9524 nops++;
9525 }
9526 if (!rtx_equal_p (tmp, out))
9527 {
9528 if (nops == 1)
9529 out = force_operand (tmp, copy_rtx (out));
9530 else
9531 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9532 }
9533 if (!rtx_equal_p (out, operands[0]))
9534 emit_move_insn (operands[0], copy_rtx (out));
9535
9536 return 1; /* DONE */
9537 }
9538
9539 /*
9540 * General case: Jumpful:
9541 * xorl dest,dest cmpl op1, op2
9542 * cmpl op1, op2 movl ct, dest
9543 * setcc dest jcc 1f
9544 * decl dest movl cf, dest
9545 * andl (cf-ct),dest 1:
9546 * addl ct,dest
9547 *
9548 * Size 20. Size 14.
9549 *
9550 * This is reasonably steep, but branch mispredict costs are
9551 * high on modern cpus, so consider failing only if optimizing
9552 * for space.
9553 */
9554
9555 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9556 && BRANCH_COST >= 2)
9557 {
9558 if (cf == 0)
9559 {
9560 cf = ct;
9561 ct = 0;
9562 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9563 /* We may be reversing unordered compare to normal compare,
9564 that is not valid in general (we may convert non-trapping
9565 condition to trapping one), however on i386 we currently
9566 emit all comparisons unordered. */
9567 code = reverse_condition_maybe_unordered (code);
9568 else
9569 {
9570 code = reverse_condition (code);
9571 if (compare_code != UNKNOWN)
9572 compare_code = reverse_condition (compare_code);
9573 }
9574 }
9575
9576 if (compare_code != UNKNOWN)
9577 {
9578 /* notl op1 (if needed)
9579 sarl $31, op1
9580 andl (cf-ct), op1
9581 addl ct, op1
9582
9583 For x < 0 (resp. x <= -1) there will be no notl,
9584 so if possible swap the constants to get rid of the
9585 complement.
9586 True/false will be -1/0 while code below (store flag
9587 followed by decrement) is 0/-1, so the constants need
9588 to be exchanged once more. */
9589
9590 if (compare_code == GE || !cf)
9591 {
9592 code = reverse_condition (code);
9593 compare_code = LT;
9594 }
9595 else
9596 {
9597 HOST_WIDE_INT tmp = cf;
9598 cf = ct;
9599 ct = tmp;
9600 }
9601
9602 out = emit_store_flag (out, code, ix86_compare_op0,
9603 ix86_compare_op1, VOIDmode, 0, -1);
9604 }
9605 else
9606 {
9607 out = emit_store_flag (out, code, ix86_compare_op0,
9608 ix86_compare_op1, VOIDmode, 0, 1);
9609
9610 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9611 copy_rtx (out), 1, OPTAB_DIRECT);
9612 }
9613
9614 out = expand_simple_binop (mode, AND, copy_rtx (out),
9615 gen_int_mode (cf - ct, mode),
9616 copy_rtx (out), 1, OPTAB_DIRECT);
9617 if (ct)
9618 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9619 copy_rtx (out), 1, OPTAB_DIRECT);
9620 if (!rtx_equal_p (out, operands[0]))
9621 emit_move_insn (operands[0], copy_rtx (out));
9622
9623 return 1; /* DONE */
9624 }
9625 }
9626
9627 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9628 {
9629 /* Try a few things more with specific constants and a variable. */
9630
9631 optab op;
9632 rtx var, orig_out, out, tmp;
9633
9634 if (BRANCH_COST <= 2)
9635 return 0; /* FAIL */
9636
9637 /* If one of the two operands is an interesting constant, load a
9638 constant with the above and mask it in with a logical operation. */
9639
9640 if (GET_CODE (operands[2]) == CONST_INT)
9641 {
9642 var = operands[3];
9643 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9644 operands[3] = constm1_rtx, op = and_optab;
9645 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9646 operands[3] = const0_rtx, op = ior_optab;
9647 else
9648 return 0; /* FAIL */
9649 }
9650 else if (GET_CODE (operands[3]) == CONST_INT)
9651 {
9652 var = operands[2];
9653 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9654 operands[2] = constm1_rtx, op = and_optab;
9655 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9656 operands[2] = const0_rtx, op = ior_optab;
9657 else
9658 return 0; /* FAIL */
9659 }
9660 else
9661 return 0; /* FAIL */
9662
9663 orig_out = operands[0];
9664 tmp = gen_reg_rtx (mode);
9665 operands[0] = tmp;
9666
9667 /* Recurse to get the constant loaded. */
9668 if (ix86_expand_int_movcc (operands) == 0)
9669 return 0; /* FAIL */
9670
9671 /* Mask in the interesting variable. */
9672 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9673 OPTAB_WIDEN);
9674 if (!rtx_equal_p (out, orig_out))
9675 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9676
9677 return 1; /* DONE */
9678 }
9679
9680 /*
9681 * For comparison with above,
9682 *
9683 * movl cf,dest
9684 * movl ct,tmp
9685 * cmpl op1,op2
9686 * cmovcc tmp,dest
9687 *
9688 * Size 15.
9689 */
9690
9691 if (! nonimmediate_operand (operands[2], mode))
9692 operands[2] = force_reg (mode, operands[2]);
9693 if (! nonimmediate_operand (operands[3], mode))
9694 operands[3] = force_reg (mode, operands[3]);
9695
9696 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9697 {
9698 rtx tmp = gen_reg_rtx (mode);
9699 emit_move_insn (tmp, operands[3]);
9700 operands[3] = tmp;
9701 }
9702 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9703 {
9704 rtx tmp = gen_reg_rtx (mode);
9705 emit_move_insn (tmp, operands[2]);
9706 operands[2] = tmp;
9707 }
9708
9709 if (! register_operand (operands[2], VOIDmode)
9710 && (mode == QImode
9711 || ! register_operand (operands[3], VOIDmode)))
9712 operands[2] = force_reg (mode, operands[2]);
9713
9714 if (mode == QImode
9715 && ! register_operand (operands[3], VOIDmode))
9716 operands[3] = force_reg (mode, operands[3]);
9717
9718 emit_insn (compare_seq);
9719 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9720 gen_rtx_IF_THEN_ELSE (mode,
9721 compare_op, operands[2],
9722 operands[3])));
9723 if (bypass_test)
9724 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9725 gen_rtx_IF_THEN_ELSE (mode,
9726 bypass_test,
9727 copy_rtx (operands[3]),
9728 copy_rtx (operands[0]))));
9729 if (second_test)
9730 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9731 gen_rtx_IF_THEN_ELSE (mode,
9732 second_test,
9733 copy_rtx (operands[2]),
9734 copy_rtx (operands[0]))));
9735
9736 return 1; /* DONE */
9737 }
9738
9739 int
9740 ix86_expand_fp_movcc (rtx operands[])
9741 {
9742 enum machine_mode mode = GET_MODE (operands[0]);
9743 enum rtx_code code = GET_CODE (operands[1]);
9744 rtx tmp, compare_op, second_test, bypass_test;
9745
9746 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
9747 {
9748 rtx cmp_op0, cmp_op1, if_true, if_false;
9749 rtx clob;
9750 enum machine_mode vmode, cmode;
9751 bool is_minmax = false;
9752
9753 cmp_op0 = ix86_compare_op0;
9754 cmp_op1 = ix86_compare_op1;
9755 if_true = operands[2];
9756 if_false = operands[3];
9757
9758 /* Since we've no cmove for sse registers, don't force bad register
9759 allocation just to gain access to it. Deny movcc when the
9760 comparison mode doesn't match the move mode. */
9761 cmode = GET_MODE (cmp_op0);
9762 if (cmode == VOIDmode)
9763 cmode = GET_MODE (cmp_op1);
9764 if (cmode != mode)
9765 return 0;
9766
9767 /* Massage condition to satisfy sse_comparison_operator. In case we
9768 are in non-ieee mode, try to canonicalize the destination operand
9769 to be first in the comparison - this helps reload to avoid extra
9770 moves. */
9771 if (!sse_comparison_operator (operands[1], VOIDmode)
9772 || ((COMMUTATIVE_P (operands[1]) || !TARGET_IEEE_FP)
9773 && rtx_equal_p (operands[0], cmp_op1)))
9774 {
9775 tmp = cmp_op0;
9776 cmp_op0 = cmp_op1;
9777 cmp_op1 = tmp;
9778 code = swap_condition (code);
9779 }
9780
9781 /* Detect conditional moves that exactly match min/max operational
9782 semantics. Note that this is IEEE safe, as long as we don't
9783 interchange the operands. Which is why we keep this in the form
9784 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
9785 if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
9786 {
9787 if (((cmp_op0 == if_true && cmp_op1 == if_false)
9788 || (cmp_op0 == if_false && cmp_op1 == if_true)))
9789 {
9790 is_minmax = true;
9791 if (code == UNGE)
9792 {
9793 code = LT;
9794 tmp = if_true;
9795 if_true = if_false;
9796 if_false = tmp;
9797 }
9798 }
9799 }
9800
9801 if (mode == SFmode)
9802 vmode = V4SFmode;
9803 else if (mode == DFmode)
9804 vmode = V2DFmode;
9805 else
9806 gcc_unreachable ();
9807
9808 cmp_op0 = force_reg (mode, cmp_op0);
9809 if (!nonimmediate_operand (cmp_op1, mode))
9810 cmp_op1 = force_reg (mode, cmp_op1);
9811
9812 tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
9813 gcc_assert (sse_comparison_operator (tmp, VOIDmode));
9814
9815 tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
9816 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
9817
9818 if (!is_minmax)
9819 {
9820 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
9821 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9822 }
9823
9824 emit_insn (tmp);
9825 return 1;
9826 }
9827
9828 /* The floating point conditional move instructions don't directly
9829 support conditions resulting from a signed integer comparison. */
9830
9831 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9832
9833 /* The floating point conditional move instructions don't directly
9834 support signed integer comparisons. */
9835
9836 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9837 {
9838 if (second_test != NULL || bypass_test != NULL)
9839 abort ();
9840 tmp = gen_reg_rtx (QImode);
9841 ix86_expand_setcc (code, tmp);
9842 code = NE;
9843 ix86_compare_op0 = tmp;
9844 ix86_compare_op1 = const0_rtx;
9845 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9846 }
9847 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9848 {
9849 tmp = gen_reg_rtx (mode);
9850 emit_move_insn (tmp, operands[3]);
9851 operands[3] = tmp;
9852 }
9853 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9854 {
9855 tmp = gen_reg_rtx (mode);
9856 emit_move_insn (tmp, operands[2]);
9857 operands[2] = tmp;
9858 }
9859
9860 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9861 gen_rtx_IF_THEN_ELSE (mode, compare_op,
9862 operands[2], operands[3])));
9863 if (bypass_test)
9864 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9865 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
9866 operands[3], operands[0])));
9867 if (second_test)
9868 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9869 gen_rtx_IF_THEN_ELSE (mode, second_test,
9870 operands[2], operands[0])));
9871
9872 return 1;
9873 }
9874
9875 void
9876 ix86_split_sse_movcc (rtx operands[])
9877 {
9878 rtx dest, scratch, cmp, op_true, op_false, x;
9879 enum machine_mode mode, vmode;
9880
9881 /* Note that the operator CMP has been set up with matching constraints
9882 such that dest is valid for the comparison. Unless one of the true
9883 or false operands are zero, the true operand has already been placed
9884 in SCRATCH. */
9885 dest = operands[0];
9886 scratch = operands[1];
9887 op_true = operands[2];
9888 op_false = operands[3];
9889 cmp = operands[4];
9890
9891 mode = GET_MODE (dest);
9892 vmode = GET_MODE (scratch);
9893
9894 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
9895
9896 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9897
9898 if (op_false == CONST0_RTX (mode))
9899 {
9900 op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
9901 x = gen_rtx_AND (vmode, dest, op_true);
9902 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9903 }
9904 else
9905 {
9906 op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
9907
9908 if (op_true == CONST0_RTX (mode))
9909 {
9910 x = gen_rtx_NOT (vmode, dest);
9911 x = gen_rtx_AND (vmode, x, op_false);
9912 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9913 }
9914 else
9915 {
9916 x = gen_rtx_AND (vmode, scratch, dest);
9917 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9918
9919 x = gen_rtx_NOT (vmode, dest);
9920 x = gen_rtx_AND (vmode, x, op_false);
9921 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9922
9923 x = gen_rtx_IOR (vmode, dest, scratch);
9924 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9925 }
9926 }
9927 }
9928
9929 /* Expand conditional increment or decrement using adb/sbb instructions.
9930 The default case using setcc followed by the conditional move can be
9931 done by generic code. */
9932 int
9933 ix86_expand_int_addcc (rtx operands[])
9934 {
9935 enum rtx_code code = GET_CODE (operands[1]);
9936 rtx compare_op;
9937 rtx val = const0_rtx;
9938 bool fpcmp = false;
9939 enum machine_mode mode = GET_MODE (operands[0]);
9940
9941 if (operands[3] != const1_rtx
9942 && operands[3] != constm1_rtx)
9943 return 0;
9944 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9945 ix86_compare_op1, &compare_op))
9946 return 0;
9947 code = GET_CODE (compare_op);
9948
9949 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9950 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9951 {
9952 fpcmp = true;
9953 code = ix86_fp_compare_code_to_integer (code);
9954 }
9955
9956 if (code != LTU)
9957 {
9958 val = constm1_rtx;
9959 if (fpcmp)
9960 PUT_CODE (compare_op,
9961 reverse_condition_maybe_unordered
9962 (GET_CODE (compare_op)));
9963 else
9964 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9965 }
9966 PUT_MODE (compare_op, mode);
9967
9968 /* Construct either adc or sbb insn. */
9969 if ((code == LTU) == (operands[3] == constm1_rtx))
9970 {
9971 switch (GET_MODE (operands[0]))
9972 {
9973 case QImode:
9974 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9975 break;
9976 case HImode:
9977 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9978 break;
9979 case SImode:
9980 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9981 break;
9982 case DImode:
9983 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9984 break;
9985 default:
9986 abort ();
9987 }
9988 }
9989 else
9990 {
9991 switch (GET_MODE (operands[0]))
9992 {
9993 case QImode:
9994 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9995 break;
9996 case HImode:
9997 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9998 break;
9999 case SImode:
10000 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10001 break;
10002 case DImode:
10003 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10004 break;
10005 default:
10006 abort ();
10007 }
10008 }
10009 return 1; /* DONE */
10010 }
10011
10012
10013 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10014 works for floating pointer parameters and nonoffsetable memories.
10015 For pushes, it returns just stack offsets; the values will be saved
10016 in the right order. Maximally three parts are generated. */
10017
10018 static int
10019 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10020 {
10021 int size;
10022
10023 if (!TARGET_64BIT)
10024 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10025 else
10026 size = (GET_MODE_SIZE (mode) + 4) / 8;
10027
10028 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10029 abort ();
10030 if (size < 2 || size > 3)
10031 abort ();
10032
10033 /* Optimize constant pool reference to immediates. This is used by fp
10034 moves, that force all constants to memory to allow combining. */
10035 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10036 {
10037 rtx tmp = maybe_get_pool_constant (operand);
10038 if (tmp)
10039 operand = tmp;
10040 }
10041
10042 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10043 {
10044 /* The only non-offsetable memories we handle are pushes. */
10045 if (! push_operand (operand, VOIDmode))
10046 abort ();
10047
10048 operand = copy_rtx (operand);
10049 PUT_MODE (operand, Pmode);
10050 parts[0] = parts[1] = parts[2] = operand;
10051 }
10052 else if (!TARGET_64BIT)
10053 {
10054 if (mode == DImode)
10055 split_di (&operand, 1, &parts[0], &parts[1]);
10056 else
10057 {
10058 if (REG_P (operand))
10059 {
10060 if (!reload_completed)
10061 abort ();
10062 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10063 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10064 if (size == 3)
10065 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10066 }
10067 else if (offsettable_memref_p (operand))
10068 {
10069 operand = adjust_address (operand, SImode, 0);
10070 parts[0] = operand;
10071 parts[1] = adjust_address (operand, SImode, 4);
10072 if (size == 3)
10073 parts[2] = adjust_address (operand, SImode, 8);
10074 }
10075 else if (GET_CODE (operand) == CONST_DOUBLE)
10076 {
10077 REAL_VALUE_TYPE r;
10078 long l[4];
10079
10080 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10081 switch (mode)
10082 {
10083 case XFmode:
10084 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10085 parts[2] = gen_int_mode (l[2], SImode);
10086 break;
10087 case DFmode:
10088 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10089 break;
10090 default:
10091 abort ();
10092 }
10093 parts[1] = gen_int_mode (l[1], SImode);
10094 parts[0] = gen_int_mode (l[0], SImode);
10095 }
10096 else
10097 abort ();
10098 }
10099 }
10100 else
10101 {
10102 if (mode == TImode)
10103 split_ti (&operand, 1, &parts[0], &parts[1]);
10104 if (mode == XFmode || mode == TFmode)
10105 {
10106 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10107 if (REG_P (operand))
10108 {
10109 if (!reload_completed)
10110 abort ();
10111 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10112 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10113 }
10114 else if (offsettable_memref_p (operand))
10115 {
10116 operand = adjust_address (operand, DImode, 0);
10117 parts[0] = operand;
10118 parts[1] = adjust_address (operand, upper_mode, 8);
10119 }
10120 else if (GET_CODE (operand) == CONST_DOUBLE)
10121 {
10122 REAL_VALUE_TYPE r;
10123 long l[4];
10124
10125 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10126 real_to_target (l, &r, mode);
10127
10128 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10129 if (HOST_BITS_PER_WIDE_INT >= 64)
10130 parts[0]
10131 = gen_int_mode
10132 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10133 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10134 DImode);
10135 else
10136 parts[0] = immed_double_const (l[0], l[1], DImode);
10137
10138 if (upper_mode == SImode)
10139 parts[1] = gen_int_mode (l[2], SImode);
10140 else if (HOST_BITS_PER_WIDE_INT >= 64)
10141 parts[1]
10142 = gen_int_mode
10143 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10144 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10145 DImode);
10146 else
10147 parts[1] = immed_double_const (l[2], l[3], DImode);
10148 }
10149 else
10150 abort ();
10151 }
10152 }
10153
10154 return size;
10155 }
10156
10157 /* Emit insns to perform a move or push of DI, DF, and XF values.
10158 Return false when normal moves are needed; true when all required
10159 insns have been emitted. Operands 2-4 contain the input values
10160 int the correct order; operands 5-7 contain the output values. */
10161
10162 void
10163 ix86_split_long_move (rtx operands[])
10164 {
10165 rtx part[2][3];
10166 int nparts;
10167 int push = 0;
10168 int collisions = 0;
10169 enum machine_mode mode = GET_MODE (operands[0]);
10170
10171 /* The DFmode expanders may ask us to move double.
10172 For 64bit target this is single move. By hiding the fact
10173 here we simplify i386.md splitters. */
10174 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10175 {
10176 /* Optimize constant pool reference to immediates. This is used by
10177 fp moves, that force all constants to memory to allow combining. */
10178
10179 if (GET_CODE (operands[1]) == MEM
10180 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10181 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10182 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10183 if (push_operand (operands[0], VOIDmode))
10184 {
10185 operands[0] = copy_rtx (operands[0]);
10186 PUT_MODE (operands[0], Pmode);
10187 }
10188 else
10189 operands[0] = gen_lowpart (DImode, operands[0]);
10190 operands[1] = gen_lowpart (DImode, operands[1]);
10191 emit_move_insn (operands[0], operands[1]);
10192 return;
10193 }
10194
10195 /* The only non-offsettable memory we handle is push. */
10196 if (push_operand (operands[0], VOIDmode))
10197 push = 1;
10198 else if (GET_CODE (operands[0]) == MEM
10199 && ! offsettable_memref_p (operands[0]))
10200 abort ();
10201
10202 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10203 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10204
10205 /* When emitting push, take care for source operands on the stack. */
10206 if (push && GET_CODE (operands[1]) == MEM
10207 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10208 {
10209 if (nparts == 3)
10210 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10211 XEXP (part[1][2], 0));
10212 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10213 XEXP (part[1][1], 0));
10214 }
10215
10216 /* We need to do copy in the right order in case an address register
10217 of the source overlaps the destination. */
10218 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10219 {
10220 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10221 collisions++;
10222 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10223 collisions++;
10224 if (nparts == 3
10225 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10226 collisions++;
10227
10228 /* Collision in the middle part can be handled by reordering. */
10229 if (collisions == 1 && nparts == 3
10230 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10231 {
10232 rtx tmp;
10233 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10234 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10235 }
10236
10237 /* If there are more collisions, we can't handle it by reordering.
10238 Do an lea to the last part and use only one colliding move. */
10239 else if (collisions > 1)
10240 {
10241 rtx base;
10242
10243 collisions = 1;
10244
10245 base = part[0][nparts - 1];
10246
10247 /* Handle the case when the last part isn't valid for lea.
10248 Happens in 64-bit mode storing the 12-byte XFmode. */
10249 if (GET_MODE (base) != Pmode)
10250 base = gen_rtx_REG (Pmode, REGNO (base));
10251
10252 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10253 part[1][0] = replace_equiv_address (part[1][0], base);
10254 part[1][1] = replace_equiv_address (part[1][1],
10255 plus_constant (base, UNITS_PER_WORD));
10256 if (nparts == 3)
10257 part[1][2] = replace_equiv_address (part[1][2],
10258 plus_constant (base, 8));
10259 }
10260 }
10261
10262 if (push)
10263 {
10264 if (!TARGET_64BIT)
10265 {
10266 if (nparts == 3)
10267 {
10268 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10269 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10270 emit_move_insn (part[0][2], part[1][2]);
10271 }
10272 }
10273 else
10274 {
10275 /* In 64bit mode we don't have 32bit push available. In case this is
10276 register, it is OK - we will just use larger counterpart. We also
10277 retype memory - these comes from attempt to avoid REX prefix on
10278 moving of second half of TFmode value. */
10279 if (GET_MODE (part[1][1]) == SImode)
10280 {
10281 if (GET_CODE (part[1][1]) == MEM)
10282 part[1][1] = adjust_address (part[1][1], DImode, 0);
10283 else if (REG_P (part[1][1]))
10284 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10285 else
10286 abort ();
10287 if (GET_MODE (part[1][0]) == SImode)
10288 part[1][0] = part[1][1];
10289 }
10290 }
10291 emit_move_insn (part[0][1], part[1][1]);
10292 emit_move_insn (part[0][0], part[1][0]);
10293 return;
10294 }
10295
10296 /* Choose correct order to not overwrite the source before it is copied. */
10297 if ((REG_P (part[0][0])
10298 && REG_P (part[1][1])
10299 && (REGNO (part[0][0]) == REGNO (part[1][1])
10300 || (nparts == 3
10301 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10302 || (collisions > 0
10303 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10304 {
10305 if (nparts == 3)
10306 {
10307 operands[2] = part[0][2];
10308 operands[3] = part[0][1];
10309 operands[4] = part[0][0];
10310 operands[5] = part[1][2];
10311 operands[6] = part[1][1];
10312 operands[7] = part[1][0];
10313 }
10314 else
10315 {
10316 operands[2] = part[0][1];
10317 operands[3] = part[0][0];
10318 operands[5] = part[1][1];
10319 operands[6] = part[1][0];
10320 }
10321 }
10322 else
10323 {
10324 if (nparts == 3)
10325 {
10326 operands[2] = part[0][0];
10327 operands[3] = part[0][1];
10328 operands[4] = part[0][2];
10329 operands[5] = part[1][0];
10330 operands[6] = part[1][1];
10331 operands[7] = part[1][2];
10332 }
10333 else
10334 {
10335 operands[2] = part[0][0];
10336 operands[3] = part[0][1];
10337 operands[5] = part[1][0];
10338 operands[6] = part[1][1];
10339 }
10340 }
10341
10342 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10343 if (optimize_size)
10344 {
10345 if (GET_CODE (operands[5]) == CONST_INT
10346 && operands[5] != const0_rtx
10347 && REG_P (operands[2]))
10348 {
10349 if (GET_CODE (operands[6]) == CONST_INT
10350 && INTVAL (operands[6]) == INTVAL (operands[5]))
10351 operands[6] = operands[2];
10352
10353 if (nparts == 3
10354 && GET_CODE (operands[7]) == CONST_INT
10355 && INTVAL (operands[7]) == INTVAL (operands[5]))
10356 operands[7] = operands[2];
10357 }
10358
10359 if (nparts == 3
10360 && GET_CODE (operands[6]) == CONST_INT
10361 && operands[6] != const0_rtx
10362 && REG_P (operands[3])
10363 && GET_CODE (operands[7]) == CONST_INT
10364 && INTVAL (operands[7]) == INTVAL (operands[6]))
10365 operands[7] = operands[3];
10366 }
10367
10368 emit_move_insn (operands[2], operands[5]);
10369 emit_move_insn (operands[3], operands[6]);
10370 if (nparts == 3)
10371 emit_move_insn (operands[4], operands[7]);
10372
10373 return;
10374 }
10375
10376 /* Helper function of ix86_split_ashldi used to generate an SImode
10377 left shift by a constant, either using a single shift or
10378 a sequence of add instructions. */
10379
10380 static void
10381 ix86_expand_ashlsi3_const (rtx operand, int count)
10382 {
10383 if (count == 1)
10384 emit_insn (gen_addsi3 (operand, operand, operand));
10385 else if (!optimize_size
10386 && count * ix86_cost->add <= ix86_cost->shift_const)
10387 {
10388 int i;
10389 for (i=0; i<count; i++)
10390 emit_insn (gen_addsi3 (operand, operand, operand));
10391 }
10392 else
10393 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10394 }
10395
10396 void
10397 ix86_split_ashldi (rtx *operands, rtx scratch)
10398 {
10399 rtx low[2], high[2];
10400 int count;
10401
10402 if (GET_CODE (operands[2]) == CONST_INT)
10403 {
10404 split_di (operands, 2, low, high);
10405 count = INTVAL (operands[2]) & 63;
10406
10407 if (count >= 32)
10408 {
10409 emit_move_insn (high[0], low[1]);
10410 emit_move_insn (low[0], const0_rtx);
10411
10412 if (count > 32)
10413 ix86_expand_ashlsi3_const (high[0], count - 32);
10414 }
10415 else
10416 {
10417 if (!rtx_equal_p (operands[0], operands[1]))
10418 emit_move_insn (operands[0], operands[1]);
10419 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10420 ix86_expand_ashlsi3_const (low[0], count);
10421 }
10422 return;
10423 }
10424
10425 split_di (operands, 1, low, high);
10426
10427 if (operands[1] == const1_rtx)
10428 {
10429 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10430 can be done with two 32-bit shifts, no branches, no cmoves. */
10431 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10432 {
10433 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10434
10435 ix86_expand_clear (low[0]);
10436 ix86_expand_clear (high[0]);
10437 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10438
10439 d = gen_lowpart (QImode, low[0]);
10440 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10441 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10442 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10443
10444 d = gen_lowpart (QImode, high[0]);
10445 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10446 s = gen_rtx_NE (QImode, flags, const0_rtx);
10447 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10448 }
10449
10450 /* Otherwise, we can get the same results by manually performing
10451 a bit extract operation on bit 5, and then performing the two
10452 shifts. The two methods of getting 0/1 into low/high are exactly
10453 the same size. Avoiding the shift in the bit extract case helps
10454 pentium4 a bit; no one else seems to care much either way. */
10455 else
10456 {
10457 rtx x;
10458
10459 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10460 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10461 else
10462 x = gen_lowpart (SImode, operands[2]);
10463 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10464
10465 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10466 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10467 emit_move_insn (low[0], high[0]);
10468 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10469 }
10470
10471 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10472 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10473 return;
10474 }
10475
10476 if (operands[1] == constm1_rtx)
10477 {
10478 /* For -1LL << N, we can avoid the shld instruction, because we
10479 know that we're shifting 0...31 ones into a -1. */
10480 emit_move_insn (low[0], constm1_rtx);
10481 if (optimize_size)
10482 emit_move_insn (high[0], low[0]);
10483 else
10484 emit_move_insn (high[0], constm1_rtx);
10485 }
10486 else
10487 {
10488 if (!rtx_equal_p (operands[0], operands[1]))
10489 emit_move_insn (operands[0], operands[1]);
10490
10491 split_di (operands, 1, low, high);
10492 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10493 }
10494
10495 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10496
10497 if (TARGET_CMOVE && scratch)
10498 {
10499 ix86_expand_clear (scratch);
10500 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10501 }
10502 else
10503 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10504 }
10505
10506 void
10507 ix86_split_ashrdi (rtx *operands, rtx scratch)
10508 {
10509 rtx low[2], high[2];
10510 int count;
10511
10512 if (GET_CODE (operands[2]) == CONST_INT)
10513 {
10514 split_di (operands, 2, low, high);
10515 count = INTVAL (operands[2]) & 63;
10516
10517 if (count == 63)
10518 {
10519 emit_move_insn (high[0], high[1]);
10520 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10521 emit_move_insn (low[0], high[0]);
10522
10523 }
10524 else if (count >= 32)
10525 {
10526 emit_move_insn (low[0], high[1]);
10527 emit_move_insn (high[0], low[0]);
10528 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10529 if (count > 32)
10530 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10531 }
10532 else
10533 {
10534 if (!rtx_equal_p (operands[0], operands[1]))
10535 emit_move_insn (operands[0], operands[1]);
10536 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10537 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10538 }
10539 }
10540 else
10541 {
10542 if (!rtx_equal_p (operands[0], operands[1]))
10543 emit_move_insn (operands[0], operands[1]);
10544
10545 split_di (operands, 1, low, high);
10546
10547 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10548 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10549
10550 if (TARGET_CMOVE && scratch)
10551 {
10552 emit_move_insn (scratch, high[0]);
10553 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10554 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10555 scratch));
10556 }
10557 else
10558 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10559 }
10560 }
10561
10562 void
10563 ix86_split_lshrdi (rtx *operands, rtx scratch)
10564 {
10565 rtx low[2], high[2];
10566 int count;
10567
10568 if (GET_CODE (operands[2]) == CONST_INT)
10569 {
10570 split_di (operands, 2, low, high);
10571 count = INTVAL (operands[2]) & 63;
10572
10573 if (count >= 32)
10574 {
10575 emit_move_insn (low[0], high[1]);
10576 ix86_expand_clear (high[0]);
10577
10578 if (count > 32)
10579 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10580 }
10581 else
10582 {
10583 if (!rtx_equal_p (operands[0], operands[1]))
10584 emit_move_insn (operands[0], operands[1]);
10585 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10586 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10587 }
10588 }
10589 else
10590 {
10591 if (!rtx_equal_p (operands[0], operands[1]))
10592 emit_move_insn (operands[0], operands[1]);
10593
10594 split_di (operands, 1, low, high);
10595
10596 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10597 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10598
10599 /* Heh. By reversing the arguments, we can reuse this pattern. */
10600 if (TARGET_CMOVE && scratch)
10601 {
10602 ix86_expand_clear (scratch);
10603 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10604 scratch));
10605 }
10606 else
10607 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10608 }
10609 }
10610
10611 /* Helper function for the string operations below. Dest VARIABLE whether
10612 it is aligned to VALUE bytes. If true, jump to the label. */
10613 static rtx
10614 ix86_expand_aligntest (rtx variable, int value)
10615 {
10616 rtx label = gen_label_rtx ();
10617 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10618 if (GET_MODE (variable) == DImode)
10619 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10620 else
10621 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10622 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10623 1, label);
10624 return label;
10625 }
10626
10627 /* Adjust COUNTER by the VALUE. */
10628 static void
10629 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10630 {
10631 if (GET_MODE (countreg) == DImode)
10632 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10633 else
10634 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10635 }
10636
10637 /* Zero extend possibly SImode EXP to Pmode register. */
10638 rtx
10639 ix86_zero_extend_to_Pmode (rtx exp)
10640 {
10641 rtx r;
10642 if (GET_MODE (exp) == VOIDmode)
10643 return force_reg (Pmode, exp);
10644 if (GET_MODE (exp) == Pmode)
10645 return copy_to_mode_reg (Pmode, exp);
10646 r = gen_reg_rtx (Pmode);
10647 emit_insn (gen_zero_extendsidi2 (r, exp));
10648 return r;
10649 }
10650
10651 /* Expand string move (memcpy) operation. Use i386 string operations when
10652 profitable. expand_clrmem contains similar code. */
10653 int
10654 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10655 {
10656 rtx srcreg, destreg, countreg, srcexp, destexp;
10657 enum machine_mode counter_mode;
10658 HOST_WIDE_INT align = 0;
10659 unsigned HOST_WIDE_INT count = 0;
10660
10661 if (GET_CODE (align_exp) == CONST_INT)
10662 align = INTVAL (align_exp);
10663
10664 /* Can't use any of this if the user has appropriated esi or edi. */
10665 if (global_regs[4] || global_regs[5])
10666 return 0;
10667
10668 /* This simple hack avoids all inlining code and simplifies code below. */
10669 if (!TARGET_ALIGN_STRINGOPS)
10670 align = 64;
10671
10672 if (GET_CODE (count_exp) == CONST_INT)
10673 {
10674 count = INTVAL (count_exp);
10675 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10676 return 0;
10677 }
10678
10679 /* Figure out proper mode for counter. For 32bits it is always SImode,
10680 for 64bits use SImode when possible, otherwise DImode.
10681 Set count to number of bytes copied when known at compile time. */
10682 if (!TARGET_64BIT
10683 || GET_MODE (count_exp) == SImode
10684 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10685 counter_mode = SImode;
10686 else
10687 counter_mode = DImode;
10688
10689 if (counter_mode != SImode && counter_mode != DImode)
10690 abort ();
10691
10692 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10693 if (destreg != XEXP (dst, 0))
10694 dst = replace_equiv_address_nv (dst, destreg);
10695 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10696 if (srcreg != XEXP (src, 0))
10697 src = replace_equiv_address_nv (src, srcreg);
10698
10699 /* When optimizing for size emit simple rep ; movsb instruction for
10700 counts not divisible by 4. */
10701
10702 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10703 {
10704 emit_insn (gen_cld ());
10705 countreg = ix86_zero_extend_to_Pmode (count_exp);
10706 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10707 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10708 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10709 destexp, srcexp));
10710 }
10711
10712 /* For constant aligned (or small unaligned) copies use rep movsl
10713 followed by code copying the rest. For PentiumPro ensure 8 byte
10714 alignment to allow rep movsl acceleration. */
10715
10716 else if (count != 0
10717 && (align >= 8
10718 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10719 || optimize_size || count < (unsigned int) 64))
10720 {
10721 unsigned HOST_WIDE_INT offset = 0;
10722 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10723 rtx srcmem, dstmem;
10724
10725 emit_insn (gen_cld ());
10726 if (count & ~(size - 1))
10727 {
10728 countreg = copy_to_mode_reg (counter_mode,
10729 GEN_INT ((count >> (size == 4 ? 2 : 3))
10730 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10731 countreg = ix86_zero_extend_to_Pmode (countreg);
10732
10733 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10734 GEN_INT (size == 4 ? 2 : 3));
10735 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10736 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10737
10738 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10739 countreg, destexp, srcexp));
10740 offset = count & ~(size - 1);
10741 }
10742 if (size == 8 && (count & 0x04))
10743 {
10744 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10745 offset);
10746 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10747 offset);
10748 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10749 offset += 4;
10750 }
10751 if (count & 0x02)
10752 {
10753 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10754 offset);
10755 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10756 offset);
10757 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10758 offset += 2;
10759 }
10760 if (count & 0x01)
10761 {
10762 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10763 offset);
10764 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10765 offset);
10766 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10767 }
10768 }
10769 /* The generic code based on the glibc implementation:
10770 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10771 allowing accelerated copying there)
10772 - copy the data using rep movsl
10773 - copy the rest. */
10774 else
10775 {
10776 rtx countreg2;
10777 rtx label = NULL;
10778 rtx srcmem, dstmem;
10779 int desired_alignment = (TARGET_PENTIUMPRO
10780 && (count == 0 || count >= (unsigned int) 260)
10781 ? 8 : UNITS_PER_WORD);
10782 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10783 dst = change_address (dst, BLKmode, destreg);
10784 src = change_address (src, BLKmode, srcreg);
10785
10786 /* In case we don't know anything about the alignment, default to
10787 library version, since it is usually equally fast and result in
10788 shorter code.
10789
10790 Also emit call when we know that the count is large and call overhead
10791 will not be important. */
10792 if (!TARGET_INLINE_ALL_STRINGOPS
10793 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10794 return 0;
10795
10796 if (TARGET_SINGLE_STRINGOP)
10797 emit_insn (gen_cld ());
10798
10799 countreg2 = gen_reg_rtx (Pmode);
10800 countreg = copy_to_mode_reg (counter_mode, count_exp);
10801
10802 /* We don't use loops to align destination and to copy parts smaller
10803 than 4 bytes, because gcc is able to optimize such code better (in
10804 the case the destination or the count really is aligned, gcc is often
10805 able to predict the branches) and also it is friendlier to the
10806 hardware branch prediction.
10807
10808 Using loops is beneficial for generic case, because we can
10809 handle small counts using the loops. Many CPUs (such as Athlon)
10810 have large REP prefix setup costs.
10811
10812 This is quite costly. Maybe we can revisit this decision later or
10813 add some customizability to this code. */
10814
10815 if (count == 0 && align < desired_alignment)
10816 {
10817 label = gen_label_rtx ();
10818 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10819 LEU, 0, counter_mode, 1, label);
10820 }
10821 if (align <= 1)
10822 {
10823 rtx label = ix86_expand_aligntest (destreg, 1);
10824 srcmem = change_address (src, QImode, srcreg);
10825 dstmem = change_address (dst, QImode, destreg);
10826 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10827 ix86_adjust_counter (countreg, 1);
10828 emit_label (label);
10829 LABEL_NUSES (label) = 1;
10830 }
10831 if (align <= 2)
10832 {
10833 rtx label = ix86_expand_aligntest (destreg, 2);
10834 srcmem = change_address (src, HImode, srcreg);
10835 dstmem = change_address (dst, HImode, destreg);
10836 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10837 ix86_adjust_counter (countreg, 2);
10838 emit_label (label);
10839 LABEL_NUSES (label) = 1;
10840 }
10841 if (align <= 4 && desired_alignment > 4)
10842 {
10843 rtx label = ix86_expand_aligntest (destreg, 4);
10844 srcmem = change_address (src, SImode, srcreg);
10845 dstmem = change_address (dst, SImode, destreg);
10846 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10847 ix86_adjust_counter (countreg, 4);
10848 emit_label (label);
10849 LABEL_NUSES (label) = 1;
10850 }
10851
10852 if (label && desired_alignment > 4 && !TARGET_64BIT)
10853 {
10854 emit_label (label);
10855 LABEL_NUSES (label) = 1;
10856 label = NULL_RTX;
10857 }
10858 if (!TARGET_SINGLE_STRINGOP)
10859 emit_insn (gen_cld ());
10860 if (TARGET_64BIT)
10861 {
10862 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10863 GEN_INT (3)));
10864 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10865 }
10866 else
10867 {
10868 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10869 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10870 }
10871 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10872 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10873 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10874 countreg2, destexp, srcexp));
10875
10876 if (label)
10877 {
10878 emit_label (label);
10879 LABEL_NUSES (label) = 1;
10880 }
10881 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10882 {
10883 srcmem = change_address (src, SImode, srcreg);
10884 dstmem = change_address (dst, SImode, destreg);
10885 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10886 }
10887 if ((align <= 4 || count == 0) && TARGET_64BIT)
10888 {
10889 rtx label = ix86_expand_aligntest (countreg, 4);
10890 srcmem = change_address (src, SImode, srcreg);
10891 dstmem = change_address (dst, SImode, destreg);
10892 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10893 emit_label (label);
10894 LABEL_NUSES (label) = 1;
10895 }
10896 if (align > 2 && count != 0 && (count & 2))
10897 {
10898 srcmem = change_address (src, HImode, srcreg);
10899 dstmem = change_address (dst, HImode, destreg);
10900 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10901 }
10902 if (align <= 2 || count == 0)
10903 {
10904 rtx label = ix86_expand_aligntest (countreg, 2);
10905 srcmem = change_address (src, HImode, srcreg);
10906 dstmem = change_address (dst, HImode, destreg);
10907 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10908 emit_label (label);
10909 LABEL_NUSES (label) = 1;
10910 }
10911 if (align > 1 && count != 0 && (count & 1))
10912 {
10913 srcmem = change_address (src, QImode, srcreg);
10914 dstmem = change_address (dst, QImode, destreg);
10915 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10916 }
10917 if (align <= 1 || count == 0)
10918 {
10919 rtx label = ix86_expand_aligntest (countreg, 1);
10920 srcmem = change_address (src, QImode, srcreg);
10921 dstmem = change_address (dst, QImode, destreg);
10922 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10923 emit_label (label);
10924 LABEL_NUSES (label) = 1;
10925 }
10926 }
10927
10928 return 1;
10929 }
10930
10931 /* Expand string clear operation (bzero). Use i386 string operations when
10932 profitable. expand_movmem contains similar code. */
10933 int
10934 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10935 {
10936 rtx destreg, zeroreg, countreg, destexp;
10937 enum machine_mode counter_mode;
10938 HOST_WIDE_INT align = 0;
10939 unsigned HOST_WIDE_INT count = 0;
10940
10941 if (GET_CODE (align_exp) == CONST_INT)
10942 align = INTVAL (align_exp);
10943
10944 /* Can't use any of this if the user has appropriated esi. */
10945 if (global_regs[4])
10946 return 0;
10947
10948 /* This simple hack avoids all inlining code and simplifies code below. */
10949 if (!TARGET_ALIGN_STRINGOPS)
10950 align = 32;
10951
10952 if (GET_CODE (count_exp) == CONST_INT)
10953 {
10954 count = INTVAL (count_exp);
10955 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10956 return 0;
10957 }
10958 /* Figure out proper mode for counter. For 32bits it is always SImode,
10959 for 64bits use SImode when possible, otherwise DImode.
10960 Set count to number of bytes copied when known at compile time. */
10961 if (!TARGET_64BIT
10962 || GET_MODE (count_exp) == SImode
10963 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10964 counter_mode = SImode;
10965 else
10966 counter_mode = DImode;
10967
10968 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10969 if (destreg != XEXP (dst, 0))
10970 dst = replace_equiv_address_nv (dst, destreg);
10971
10972
10973 /* When optimizing for size emit simple rep ; movsb instruction for
10974 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10975 sequence is 7 bytes long, so if optimizing for size and count is
10976 small enough that some stosl, stosw and stosb instructions without
10977 rep are shorter, fall back into the next if. */
10978
10979 if ((!optimize || optimize_size)
10980 && (count == 0
10981 || ((count & 0x03)
10982 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10983 {
10984 emit_insn (gen_cld ());
10985
10986 countreg = ix86_zero_extend_to_Pmode (count_exp);
10987 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10988 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10989 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10990 }
10991 else if (count != 0
10992 && (align >= 8
10993 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10994 || optimize_size || count < (unsigned int) 64))
10995 {
10996 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10997 unsigned HOST_WIDE_INT offset = 0;
10998
10999 emit_insn (gen_cld ());
11000
11001 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11002 if (count & ~(size - 1))
11003 {
11004 unsigned HOST_WIDE_INT repcount;
11005 unsigned int max_nonrep;
11006
11007 repcount = count >> (size == 4 ? 2 : 3);
11008 if (!TARGET_64BIT)
11009 repcount &= 0x3fffffff;
11010
11011 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11012 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11013 bytes. In both cases the latter seems to be faster for small
11014 values of N. */
11015 max_nonrep = size == 4 ? 7 : 4;
11016 if (!optimize_size)
11017 switch (ix86_tune)
11018 {
11019 case PROCESSOR_PENTIUM4:
11020 case PROCESSOR_NOCONA:
11021 max_nonrep = 3;
11022 break;
11023 default:
11024 break;
11025 }
11026
11027 if (repcount <= max_nonrep)
11028 while (repcount-- > 0)
11029 {
11030 rtx mem = adjust_automodify_address_nv (dst,
11031 GET_MODE (zeroreg),
11032 destreg, offset);
11033 emit_insn (gen_strset (destreg, mem, zeroreg));
11034 offset += size;
11035 }
11036 else
11037 {
11038 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11039 countreg = ix86_zero_extend_to_Pmode (countreg);
11040 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11041 GEN_INT (size == 4 ? 2 : 3));
11042 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11043 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11044 destexp));
11045 offset = count & ~(size - 1);
11046 }
11047 }
11048 if (size == 8 && (count & 0x04))
11049 {
11050 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11051 offset);
11052 emit_insn (gen_strset (destreg, mem,
11053 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11054 offset += 4;
11055 }
11056 if (count & 0x02)
11057 {
11058 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11059 offset);
11060 emit_insn (gen_strset (destreg, mem,
11061 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11062 offset += 2;
11063 }
11064 if (count & 0x01)
11065 {
11066 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11067 offset);
11068 emit_insn (gen_strset (destreg, mem,
11069 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11070 }
11071 }
11072 else
11073 {
11074 rtx countreg2;
11075 rtx label = NULL;
11076 /* Compute desired alignment of the string operation. */
11077 int desired_alignment = (TARGET_PENTIUMPRO
11078 && (count == 0 || count >= (unsigned int) 260)
11079 ? 8 : UNITS_PER_WORD);
11080
11081 /* In case we don't know anything about the alignment, default to
11082 library version, since it is usually equally fast and result in
11083 shorter code.
11084
11085 Also emit call when we know that the count is large and call overhead
11086 will not be important. */
11087 if (!TARGET_INLINE_ALL_STRINGOPS
11088 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11089 return 0;
11090
11091 if (TARGET_SINGLE_STRINGOP)
11092 emit_insn (gen_cld ());
11093
11094 countreg2 = gen_reg_rtx (Pmode);
11095 countreg = copy_to_mode_reg (counter_mode, count_exp);
11096 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11097 /* Get rid of MEM_OFFSET, it won't be accurate. */
11098 dst = change_address (dst, BLKmode, destreg);
11099
11100 if (count == 0 && align < desired_alignment)
11101 {
11102 label = gen_label_rtx ();
11103 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11104 LEU, 0, counter_mode, 1, label);
11105 }
11106 if (align <= 1)
11107 {
11108 rtx label = ix86_expand_aligntest (destreg, 1);
11109 emit_insn (gen_strset (destreg, dst,
11110 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11111 ix86_adjust_counter (countreg, 1);
11112 emit_label (label);
11113 LABEL_NUSES (label) = 1;
11114 }
11115 if (align <= 2)
11116 {
11117 rtx label = ix86_expand_aligntest (destreg, 2);
11118 emit_insn (gen_strset (destreg, dst,
11119 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11120 ix86_adjust_counter (countreg, 2);
11121 emit_label (label);
11122 LABEL_NUSES (label) = 1;
11123 }
11124 if (align <= 4 && desired_alignment > 4)
11125 {
11126 rtx label = ix86_expand_aligntest (destreg, 4);
11127 emit_insn (gen_strset (destreg, dst,
11128 (TARGET_64BIT
11129 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11130 : zeroreg)));
11131 ix86_adjust_counter (countreg, 4);
11132 emit_label (label);
11133 LABEL_NUSES (label) = 1;
11134 }
11135
11136 if (label && desired_alignment > 4 && !TARGET_64BIT)
11137 {
11138 emit_label (label);
11139 LABEL_NUSES (label) = 1;
11140 label = NULL_RTX;
11141 }
11142
11143 if (!TARGET_SINGLE_STRINGOP)
11144 emit_insn (gen_cld ());
11145 if (TARGET_64BIT)
11146 {
11147 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11148 GEN_INT (3)));
11149 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11150 }
11151 else
11152 {
11153 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11154 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11155 }
11156 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11157 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11158
11159 if (label)
11160 {
11161 emit_label (label);
11162 LABEL_NUSES (label) = 1;
11163 }
11164
11165 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11166 emit_insn (gen_strset (destreg, dst,
11167 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11168 if (TARGET_64BIT && (align <= 4 || count == 0))
11169 {
11170 rtx label = ix86_expand_aligntest (countreg, 4);
11171 emit_insn (gen_strset (destreg, dst,
11172 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11173 emit_label (label);
11174 LABEL_NUSES (label) = 1;
11175 }
11176 if (align > 2 && count != 0 && (count & 2))
11177 emit_insn (gen_strset (destreg, dst,
11178 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11179 if (align <= 2 || count == 0)
11180 {
11181 rtx label = ix86_expand_aligntest (countreg, 2);
11182 emit_insn (gen_strset (destreg, dst,
11183 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11184 emit_label (label);
11185 LABEL_NUSES (label) = 1;
11186 }
11187 if (align > 1 && count != 0 && (count & 1))
11188 emit_insn (gen_strset (destreg, dst,
11189 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11190 if (align <= 1 || count == 0)
11191 {
11192 rtx label = ix86_expand_aligntest (countreg, 1);
11193 emit_insn (gen_strset (destreg, dst,
11194 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11195 emit_label (label);
11196 LABEL_NUSES (label) = 1;
11197 }
11198 }
11199 return 1;
11200 }
11201
11202 /* Expand strlen. */
11203 int
11204 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11205 {
11206 rtx addr, scratch1, scratch2, scratch3, scratch4;
11207
11208 /* The generic case of strlen expander is long. Avoid it's
11209 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11210
11211 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11212 && !TARGET_INLINE_ALL_STRINGOPS
11213 && !optimize_size
11214 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11215 return 0;
11216
11217 addr = force_reg (Pmode, XEXP (src, 0));
11218 scratch1 = gen_reg_rtx (Pmode);
11219
11220 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11221 && !optimize_size)
11222 {
11223 /* Well it seems that some optimizer does not combine a call like
11224 foo(strlen(bar), strlen(bar));
11225 when the move and the subtraction is done here. It does calculate
11226 the length just once when these instructions are done inside of
11227 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11228 often used and I use one fewer register for the lifetime of
11229 output_strlen_unroll() this is better. */
11230
11231 emit_move_insn (out, addr);
11232
11233 ix86_expand_strlensi_unroll_1 (out, src, align);
11234
11235 /* strlensi_unroll_1 returns the address of the zero at the end of
11236 the string, like memchr(), so compute the length by subtracting
11237 the start address. */
11238 if (TARGET_64BIT)
11239 emit_insn (gen_subdi3 (out, out, addr));
11240 else
11241 emit_insn (gen_subsi3 (out, out, addr));
11242 }
11243 else
11244 {
11245 rtx unspec;
11246 scratch2 = gen_reg_rtx (Pmode);
11247 scratch3 = gen_reg_rtx (Pmode);
11248 scratch4 = force_reg (Pmode, constm1_rtx);
11249
11250 emit_move_insn (scratch3, addr);
11251 eoschar = force_reg (QImode, eoschar);
11252
11253 emit_insn (gen_cld ());
11254 src = replace_equiv_address_nv (src, scratch3);
11255
11256 /* If .md starts supporting :P, this can be done in .md. */
11257 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11258 scratch4), UNSPEC_SCAS);
11259 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11260 if (TARGET_64BIT)
11261 {
11262 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11263 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11264 }
11265 else
11266 {
11267 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11268 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11269 }
11270 }
11271 return 1;
11272 }
11273
11274 /* Expand the appropriate insns for doing strlen if not just doing
11275 repnz; scasb
11276
11277 out = result, initialized with the start address
11278 align_rtx = alignment of the address.
11279 scratch = scratch register, initialized with the startaddress when
11280 not aligned, otherwise undefined
11281
11282 This is just the body. It needs the initializations mentioned above and
11283 some address computing at the end. These things are done in i386.md. */
11284
11285 static void
11286 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11287 {
11288 int align;
11289 rtx tmp;
11290 rtx align_2_label = NULL_RTX;
11291 rtx align_3_label = NULL_RTX;
11292 rtx align_4_label = gen_label_rtx ();
11293 rtx end_0_label = gen_label_rtx ();
11294 rtx mem;
11295 rtx tmpreg = gen_reg_rtx (SImode);
11296 rtx scratch = gen_reg_rtx (SImode);
11297 rtx cmp;
11298
11299 align = 0;
11300 if (GET_CODE (align_rtx) == CONST_INT)
11301 align = INTVAL (align_rtx);
11302
11303 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11304
11305 /* Is there a known alignment and is it less than 4? */
11306 if (align < 4)
11307 {
11308 rtx scratch1 = gen_reg_rtx (Pmode);
11309 emit_move_insn (scratch1, out);
11310 /* Is there a known alignment and is it not 2? */
11311 if (align != 2)
11312 {
11313 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11314 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11315
11316 /* Leave just the 3 lower bits. */
11317 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11318 NULL_RTX, 0, OPTAB_WIDEN);
11319
11320 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11321 Pmode, 1, align_4_label);
11322 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11323 Pmode, 1, align_2_label);
11324 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11325 Pmode, 1, align_3_label);
11326 }
11327 else
11328 {
11329 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11330 check if is aligned to 4 - byte. */
11331
11332 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11333 NULL_RTX, 0, OPTAB_WIDEN);
11334
11335 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11336 Pmode, 1, align_4_label);
11337 }
11338
11339 mem = change_address (src, QImode, out);
11340
11341 /* Now compare the bytes. */
11342
11343 /* Compare the first n unaligned byte on a byte per byte basis. */
11344 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11345 QImode, 1, end_0_label);
11346
11347 /* Increment the address. */
11348 if (TARGET_64BIT)
11349 emit_insn (gen_adddi3 (out, out, const1_rtx));
11350 else
11351 emit_insn (gen_addsi3 (out, out, const1_rtx));
11352
11353 /* Not needed with an alignment of 2 */
11354 if (align != 2)
11355 {
11356 emit_label (align_2_label);
11357
11358 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11359 end_0_label);
11360
11361 if (TARGET_64BIT)
11362 emit_insn (gen_adddi3 (out, out, const1_rtx));
11363 else
11364 emit_insn (gen_addsi3 (out, out, const1_rtx));
11365
11366 emit_label (align_3_label);
11367 }
11368
11369 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11370 end_0_label);
11371
11372 if (TARGET_64BIT)
11373 emit_insn (gen_adddi3 (out, out, const1_rtx));
11374 else
11375 emit_insn (gen_addsi3 (out, out, const1_rtx));
11376 }
11377
11378 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11379 align this loop. It gives only huge programs, but does not help to
11380 speed up. */
11381 emit_label (align_4_label);
11382
11383 mem = change_address (src, SImode, out);
11384 emit_move_insn (scratch, mem);
11385 if (TARGET_64BIT)
11386 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11387 else
11388 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11389
11390 /* This formula yields a nonzero result iff one of the bytes is zero.
11391 This saves three branches inside loop and many cycles. */
11392
11393 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11394 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11395 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11396 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11397 gen_int_mode (0x80808080, SImode)));
11398 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11399 align_4_label);
11400
11401 if (TARGET_CMOVE)
11402 {
11403 rtx reg = gen_reg_rtx (SImode);
11404 rtx reg2 = gen_reg_rtx (Pmode);
11405 emit_move_insn (reg, tmpreg);
11406 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11407
11408 /* If zero is not in the first two bytes, move two bytes forward. */
11409 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11410 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11411 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11412 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11413 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11414 reg,
11415 tmpreg)));
11416 /* Emit lea manually to avoid clobbering of flags. */
11417 emit_insn (gen_rtx_SET (SImode, reg2,
11418 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11419
11420 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11421 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11422 emit_insn (gen_rtx_SET (VOIDmode, out,
11423 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11424 reg2,
11425 out)));
11426
11427 }
11428 else
11429 {
11430 rtx end_2_label = gen_label_rtx ();
11431 /* Is zero in the first two bytes? */
11432
11433 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11434 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11435 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11436 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11437 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11438 pc_rtx);
11439 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11440 JUMP_LABEL (tmp) = end_2_label;
11441
11442 /* Not in the first two. Move two bytes forward. */
11443 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11444 if (TARGET_64BIT)
11445 emit_insn (gen_adddi3 (out, out, const2_rtx));
11446 else
11447 emit_insn (gen_addsi3 (out, out, const2_rtx));
11448
11449 emit_label (end_2_label);
11450
11451 }
11452
11453 /* Avoid branch in fixing the byte. */
11454 tmpreg = gen_lowpart (QImode, tmpreg);
11455 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11456 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11457 if (TARGET_64BIT)
11458 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11459 else
11460 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11461
11462 emit_label (end_0_label);
11463 }
11464
11465 void
11466 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11467 rtx callarg2 ATTRIBUTE_UNUSED,
11468 rtx pop, int sibcall)
11469 {
11470 rtx use = NULL, call;
11471
11472 if (pop == const0_rtx)
11473 pop = NULL;
11474 if (TARGET_64BIT && pop)
11475 abort ();
11476
11477 #if TARGET_MACHO
11478 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11479 fnaddr = machopic_indirect_call_target (fnaddr);
11480 #else
11481 /* Static functions and indirect calls don't need the pic register. */
11482 if (! TARGET_64BIT && flag_pic
11483 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11484 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11485 use_reg (&use, pic_offset_table_rtx);
11486
11487 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11488 {
11489 rtx al = gen_rtx_REG (QImode, 0);
11490 emit_move_insn (al, callarg2);
11491 use_reg (&use, al);
11492 }
11493 #endif /* TARGET_MACHO */
11494
11495 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11496 {
11497 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11498 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11499 }
11500 if (sibcall && TARGET_64BIT
11501 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11502 {
11503 rtx addr;
11504 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11505 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11506 emit_move_insn (fnaddr, addr);
11507 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11508 }
11509
11510 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11511 if (retval)
11512 call = gen_rtx_SET (VOIDmode, retval, call);
11513 if (pop)
11514 {
11515 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11516 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11517 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11518 }
11519
11520 call = emit_call_insn (call);
11521 if (use)
11522 CALL_INSN_FUNCTION_USAGE (call) = use;
11523 }
11524
11525 \f
11526 /* Clear stack slot assignments remembered from previous functions.
11527 This is called from INIT_EXPANDERS once before RTL is emitted for each
11528 function. */
11529
11530 static struct machine_function *
11531 ix86_init_machine_status (void)
11532 {
11533 struct machine_function *f;
11534
11535 f = ggc_alloc_cleared (sizeof (struct machine_function));
11536 f->use_fast_prologue_epilogue_nregs = -1;
11537
11538 return f;
11539 }
11540
11541 /* Return a MEM corresponding to a stack slot with mode MODE.
11542 Allocate a new slot if necessary.
11543
11544 The RTL for a function can have several slots available: N is
11545 which slot to use. */
11546
11547 rtx
11548 assign_386_stack_local (enum machine_mode mode, int n)
11549 {
11550 struct stack_local_entry *s;
11551
11552 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11553 abort ();
11554
11555 for (s = ix86_stack_locals; s; s = s->next)
11556 if (s->mode == mode && s->n == n)
11557 return s->rtl;
11558
11559 s = (struct stack_local_entry *)
11560 ggc_alloc (sizeof (struct stack_local_entry));
11561 s->n = n;
11562 s->mode = mode;
11563 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11564
11565 s->next = ix86_stack_locals;
11566 ix86_stack_locals = s;
11567 return s->rtl;
11568 }
11569
11570 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11571
11572 static GTY(()) rtx ix86_tls_symbol;
11573 rtx
11574 ix86_tls_get_addr (void)
11575 {
11576
11577 if (!ix86_tls_symbol)
11578 {
11579 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11580 (TARGET_GNU_TLS && !TARGET_64BIT)
11581 ? "___tls_get_addr"
11582 : "__tls_get_addr");
11583 }
11584
11585 return ix86_tls_symbol;
11586 }
11587 \f
11588 /* Calculate the length of the memory address in the instruction
11589 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11590
11591 int
11592 memory_address_length (rtx addr)
11593 {
11594 struct ix86_address parts;
11595 rtx base, index, disp;
11596 int len;
11597
11598 if (GET_CODE (addr) == PRE_DEC
11599 || GET_CODE (addr) == POST_INC
11600 || GET_CODE (addr) == PRE_MODIFY
11601 || GET_CODE (addr) == POST_MODIFY)
11602 return 0;
11603
11604 if (! ix86_decompose_address (addr, &parts))
11605 abort ();
11606
11607 base = parts.base;
11608 index = parts.index;
11609 disp = parts.disp;
11610 len = 0;
11611
11612 /* Rule of thumb:
11613 - esp as the base always wants an index,
11614 - ebp as the base always wants a displacement. */
11615
11616 /* Register Indirect. */
11617 if (base && !index && !disp)
11618 {
11619 /* esp (for its index) and ebp (for its displacement) need
11620 the two-byte modrm form. */
11621 if (addr == stack_pointer_rtx
11622 || addr == arg_pointer_rtx
11623 || addr == frame_pointer_rtx
11624 || addr == hard_frame_pointer_rtx)
11625 len = 1;
11626 }
11627
11628 /* Direct Addressing. */
11629 else if (disp && !base && !index)
11630 len = 4;
11631
11632 else
11633 {
11634 /* Find the length of the displacement constant. */
11635 if (disp)
11636 {
11637 if (GET_CODE (disp) == CONST_INT
11638 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11639 && base)
11640 len = 1;
11641 else
11642 len = 4;
11643 }
11644 /* ebp always wants a displacement. */
11645 else if (base == hard_frame_pointer_rtx)
11646 len = 1;
11647
11648 /* An index requires the two-byte modrm form.... */
11649 if (index
11650 /* ...like esp, which always wants an index. */
11651 || base == stack_pointer_rtx
11652 || base == arg_pointer_rtx
11653 || base == frame_pointer_rtx)
11654 len += 1;
11655 }
11656
11657 return len;
11658 }
11659
11660 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11661 is set, expect that insn have 8bit immediate alternative. */
11662 int
11663 ix86_attr_length_immediate_default (rtx insn, int shortform)
11664 {
11665 int len = 0;
11666 int i;
11667 extract_insn_cached (insn);
11668 for (i = recog_data.n_operands - 1; i >= 0; --i)
11669 if (CONSTANT_P (recog_data.operand[i]))
11670 {
11671 if (len)
11672 abort ();
11673 if (shortform
11674 && GET_CODE (recog_data.operand[i]) == CONST_INT
11675 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11676 len = 1;
11677 else
11678 {
11679 switch (get_attr_mode (insn))
11680 {
11681 case MODE_QI:
11682 len+=1;
11683 break;
11684 case MODE_HI:
11685 len+=2;
11686 break;
11687 case MODE_SI:
11688 len+=4;
11689 break;
11690 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11691 case MODE_DI:
11692 len+=4;
11693 break;
11694 default:
11695 fatal_insn ("unknown insn mode", insn);
11696 }
11697 }
11698 }
11699 return len;
11700 }
11701 /* Compute default value for "length_address" attribute. */
11702 int
11703 ix86_attr_length_address_default (rtx insn)
11704 {
11705 int i;
11706
11707 if (get_attr_type (insn) == TYPE_LEA)
11708 {
11709 rtx set = PATTERN (insn);
11710 if (GET_CODE (set) == SET)
11711 ;
11712 else if (GET_CODE (set) == PARALLEL
11713 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11714 set = XVECEXP (set, 0, 0);
11715 else
11716 {
11717 #ifdef ENABLE_CHECKING
11718 abort ();
11719 #endif
11720 return 0;
11721 }
11722
11723 return memory_address_length (SET_SRC (set));
11724 }
11725
11726 extract_insn_cached (insn);
11727 for (i = recog_data.n_operands - 1; i >= 0; --i)
11728 if (GET_CODE (recog_data.operand[i]) == MEM)
11729 {
11730 return memory_address_length (XEXP (recog_data.operand[i], 0));
11731 break;
11732 }
11733 return 0;
11734 }
11735 \f
11736 /* Return the maximum number of instructions a cpu can issue. */
11737
11738 static int
11739 ix86_issue_rate (void)
11740 {
11741 switch (ix86_tune)
11742 {
11743 case PROCESSOR_PENTIUM:
11744 case PROCESSOR_K6:
11745 return 2;
11746
11747 case PROCESSOR_PENTIUMPRO:
11748 case PROCESSOR_PENTIUM4:
11749 case PROCESSOR_ATHLON:
11750 case PROCESSOR_K8:
11751 case PROCESSOR_NOCONA:
11752 return 3;
11753
11754 default:
11755 return 1;
11756 }
11757 }
11758
11759 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11760 by DEP_INSN and nothing set by DEP_INSN. */
11761
11762 static int
11763 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11764 {
11765 rtx set, set2;
11766
11767 /* Simplify the test for uninteresting insns. */
11768 if (insn_type != TYPE_SETCC
11769 && insn_type != TYPE_ICMOV
11770 && insn_type != TYPE_FCMOV
11771 && insn_type != TYPE_IBR)
11772 return 0;
11773
11774 if ((set = single_set (dep_insn)) != 0)
11775 {
11776 set = SET_DEST (set);
11777 set2 = NULL_RTX;
11778 }
11779 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11780 && XVECLEN (PATTERN (dep_insn), 0) == 2
11781 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11782 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11783 {
11784 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11785 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11786 }
11787 else
11788 return 0;
11789
11790 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11791 return 0;
11792
11793 /* This test is true if the dependent insn reads the flags but
11794 not any other potentially set register. */
11795 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11796 return 0;
11797
11798 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11799 return 0;
11800
11801 return 1;
11802 }
11803
11804 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11805 address with operands set by DEP_INSN. */
11806
11807 static int
11808 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11809 {
11810 rtx addr;
11811
11812 if (insn_type == TYPE_LEA
11813 && TARGET_PENTIUM)
11814 {
11815 addr = PATTERN (insn);
11816 if (GET_CODE (addr) == SET)
11817 ;
11818 else if (GET_CODE (addr) == PARALLEL
11819 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11820 addr = XVECEXP (addr, 0, 0);
11821 else
11822 abort ();
11823 addr = SET_SRC (addr);
11824 }
11825 else
11826 {
11827 int i;
11828 extract_insn_cached (insn);
11829 for (i = recog_data.n_operands - 1; i >= 0; --i)
11830 if (GET_CODE (recog_data.operand[i]) == MEM)
11831 {
11832 addr = XEXP (recog_data.operand[i], 0);
11833 goto found;
11834 }
11835 return 0;
11836 found:;
11837 }
11838
11839 return modified_in_p (addr, dep_insn);
11840 }
11841
11842 static int
11843 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11844 {
11845 enum attr_type insn_type, dep_insn_type;
11846 enum attr_memory memory;
11847 rtx set, set2;
11848 int dep_insn_code_number;
11849
11850 /* Anti and output dependencies have zero cost on all CPUs. */
11851 if (REG_NOTE_KIND (link) != 0)
11852 return 0;
11853
11854 dep_insn_code_number = recog_memoized (dep_insn);
11855
11856 /* If we can't recognize the insns, we can't really do anything. */
11857 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11858 return cost;
11859
11860 insn_type = get_attr_type (insn);
11861 dep_insn_type = get_attr_type (dep_insn);
11862
11863 switch (ix86_tune)
11864 {
11865 case PROCESSOR_PENTIUM:
11866 /* Address Generation Interlock adds a cycle of latency. */
11867 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11868 cost += 1;
11869
11870 /* ??? Compares pair with jump/setcc. */
11871 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11872 cost = 0;
11873
11874 /* Floating point stores require value to be ready one cycle earlier. */
11875 if (insn_type == TYPE_FMOV
11876 && get_attr_memory (insn) == MEMORY_STORE
11877 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11878 cost += 1;
11879 break;
11880
11881 case PROCESSOR_PENTIUMPRO:
11882 memory = get_attr_memory (insn);
11883
11884 /* INT->FP conversion is expensive. */
11885 if (get_attr_fp_int_src (dep_insn))
11886 cost += 5;
11887
11888 /* There is one cycle extra latency between an FP op and a store. */
11889 if (insn_type == TYPE_FMOV
11890 && (set = single_set (dep_insn)) != NULL_RTX
11891 && (set2 = single_set (insn)) != NULL_RTX
11892 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11893 && GET_CODE (SET_DEST (set2)) == MEM)
11894 cost += 1;
11895
11896 /* Show ability of reorder buffer to hide latency of load by executing
11897 in parallel with previous instruction in case
11898 previous instruction is not needed to compute the address. */
11899 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11900 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11901 {
11902 /* Claim moves to take one cycle, as core can issue one load
11903 at time and the next load can start cycle later. */
11904 if (dep_insn_type == TYPE_IMOV
11905 || dep_insn_type == TYPE_FMOV)
11906 cost = 1;
11907 else if (cost > 1)
11908 cost--;
11909 }
11910 break;
11911
11912 case PROCESSOR_K6:
11913 memory = get_attr_memory (insn);
11914
11915 /* The esp dependency is resolved before the instruction is really
11916 finished. */
11917 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11918 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11919 return 1;
11920
11921 /* INT->FP conversion is expensive. */
11922 if (get_attr_fp_int_src (dep_insn))
11923 cost += 5;
11924
11925 /* Show ability of reorder buffer to hide latency of load by executing
11926 in parallel with previous instruction in case
11927 previous instruction is not needed to compute the address. */
11928 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11929 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11930 {
11931 /* Claim moves to take one cycle, as core can issue one load
11932 at time and the next load can start cycle later. */
11933 if (dep_insn_type == TYPE_IMOV
11934 || dep_insn_type == TYPE_FMOV)
11935 cost = 1;
11936 else if (cost > 2)
11937 cost -= 2;
11938 else
11939 cost = 1;
11940 }
11941 break;
11942
11943 case PROCESSOR_ATHLON:
11944 case PROCESSOR_K8:
11945 memory = get_attr_memory (insn);
11946
11947 /* Show ability of reorder buffer to hide latency of load by executing
11948 in parallel with previous instruction in case
11949 previous instruction is not needed to compute the address. */
11950 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11951 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11952 {
11953 enum attr_unit unit = get_attr_unit (insn);
11954 int loadcost = 3;
11955
11956 /* Because of the difference between the length of integer and
11957 floating unit pipeline preparation stages, the memory operands
11958 for floating point are cheaper.
11959
11960 ??? For Athlon it the difference is most probably 2. */
11961 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11962 loadcost = 3;
11963 else
11964 loadcost = TARGET_ATHLON ? 2 : 0;
11965
11966 if (cost >= loadcost)
11967 cost -= loadcost;
11968 else
11969 cost = 0;
11970 }
11971
11972 default:
11973 break;
11974 }
11975
11976 return cost;
11977 }
11978
11979 /* How many alternative schedules to try. This should be as wide as the
11980 scheduling freedom in the DFA, but no wider. Making this value too
11981 large results extra work for the scheduler. */
11982
11983 static int
11984 ia32_multipass_dfa_lookahead (void)
11985 {
11986 if (ix86_tune == PROCESSOR_PENTIUM)
11987 return 2;
11988
11989 if (ix86_tune == PROCESSOR_PENTIUMPRO
11990 || ix86_tune == PROCESSOR_K6)
11991 return 1;
11992
11993 else
11994 return 0;
11995 }
11996
11997 \f
11998 /* Compute the alignment given to a constant that is being placed in memory.
11999 EXP is the constant and ALIGN is the alignment that the object would
12000 ordinarily have.
12001 The value of this function is used instead of that alignment to align
12002 the object. */
12003
12004 int
12005 ix86_constant_alignment (tree exp, int align)
12006 {
12007 if (TREE_CODE (exp) == REAL_CST)
12008 {
12009 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12010 return 64;
12011 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12012 return 128;
12013 }
12014 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12015 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12016 return BITS_PER_WORD;
12017
12018 return align;
12019 }
12020
12021 /* Compute the alignment for a static variable.
12022 TYPE is the data type, and ALIGN is the alignment that
12023 the object would ordinarily have. The value of this function is used
12024 instead of that alignment to align the object. */
12025
12026 int
12027 ix86_data_alignment (tree type, int align)
12028 {
12029 if (AGGREGATE_TYPE_P (type)
12030 && TYPE_SIZE (type)
12031 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12032 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12033 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12034 return 256;
12035
12036 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12037 to 16byte boundary. */
12038 if (TARGET_64BIT)
12039 {
12040 if (AGGREGATE_TYPE_P (type)
12041 && TYPE_SIZE (type)
12042 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12043 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12044 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12045 return 128;
12046 }
12047
12048 if (TREE_CODE (type) == ARRAY_TYPE)
12049 {
12050 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12051 return 64;
12052 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12053 return 128;
12054 }
12055 else if (TREE_CODE (type) == COMPLEX_TYPE)
12056 {
12057
12058 if (TYPE_MODE (type) == DCmode && align < 64)
12059 return 64;
12060 if (TYPE_MODE (type) == XCmode && align < 128)
12061 return 128;
12062 }
12063 else if ((TREE_CODE (type) == RECORD_TYPE
12064 || TREE_CODE (type) == UNION_TYPE
12065 || TREE_CODE (type) == QUAL_UNION_TYPE)
12066 && TYPE_FIELDS (type))
12067 {
12068 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12069 return 64;
12070 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12071 return 128;
12072 }
12073 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12074 || TREE_CODE (type) == INTEGER_TYPE)
12075 {
12076 if (TYPE_MODE (type) == DFmode && align < 64)
12077 return 64;
12078 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12079 return 128;
12080 }
12081
12082 return align;
12083 }
12084
12085 /* Compute the alignment for a local variable.
12086 TYPE is the data type, and ALIGN is the alignment that
12087 the object would ordinarily have. The value of this macro is used
12088 instead of that alignment to align the object. */
12089
12090 int
12091 ix86_local_alignment (tree type, int align)
12092 {
12093 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12094 to 16byte boundary. */
12095 if (TARGET_64BIT)
12096 {
12097 if (AGGREGATE_TYPE_P (type)
12098 && TYPE_SIZE (type)
12099 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12100 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12101 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12102 return 128;
12103 }
12104 if (TREE_CODE (type) == ARRAY_TYPE)
12105 {
12106 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12107 return 64;
12108 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12109 return 128;
12110 }
12111 else if (TREE_CODE (type) == COMPLEX_TYPE)
12112 {
12113 if (TYPE_MODE (type) == DCmode && align < 64)
12114 return 64;
12115 if (TYPE_MODE (type) == XCmode && align < 128)
12116 return 128;
12117 }
12118 else if ((TREE_CODE (type) == RECORD_TYPE
12119 || TREE_CODE (type) == UNION_TYPE
12120 || TREE_CODE (type) == QUAL_UNION_TYPE)
12121 && TYPE_FIELDS (type))
12122 {
12123 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12124 return 64;
12125 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12126 return 128;
12127 }
12128 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12129 || TREE_CODE (type) == INTEGER_TYPE)
12130 {
12131
12132 if (TYPE_MODE (type) == DFmode && align < 64)
12133 return 64;
12134 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12135 return 128;
12136 }
12137 return align;
12138 }
12139 \f
12140 /* Emit RTL insns to initialize the variable parts of a trampoline.
12141 FNADDR is an RTX for the address of the function's pure code.
12142 CXT is an RTX for the static chain value for the function. */
12143 void
12144 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12145 {
12146 if (!TARGET_64BIT)
12147 {
12148 /* Compute offset from the end of the jmp to the target function. */
12149 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12150 plus_constant (tramp, 10),
12151 NULL_RTX, 1, OPTAB_DIRECT);
12152 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12153 gen_int_mode (0xb9, QImode));
12154 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12155 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12156 gen_int_mode (0xe9, QImode));
12157 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12158 }
12159 else
12160 {
12161 int offset = 0;
12162 /* Try to load address using shorter movl instead of movabs.
12163 We may want to support movq for kernel mode, but kernel does not use
12164 trampolines at the moment. */
12165 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12166 {
12167 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12168 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12169 gen_int_mode (0xbb41, HImode));
12170 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12171 gen_lowpart (SImode, fnaddr));
12172 offset += 6;
12173 }
12174 else
12175 {
12176 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12177 gen_int_mode (0xbb49, HImode));
12178 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12179 fnaddr);
12180 offset += 10;
12181 }
12182 /* Load static chain using movabs to r10. */
12183 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12184 gen_int_mode (0xba49, HImode));
12185 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12186 cxt);
12187 offset += 10;
12188 /* Jump to the r11 */
12189 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12190 gen_int_mode (0xff49, HImode));
12191 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12192 gen_int_mode (0xe3, QImode));
12193 offset += 3;
12194 if (offset > TRAMPOLINE_SIZE)
12195 abort ();
12196 }
12197
12198 #ifdef ENABLE_EXECUTE_STACK
12199 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12200 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12201 #endif
12202 }
12203 \f
12204 /* Codes for all the SSE/MMX builtins. */
12205 enum ix86_builtins
12206 {
12207 IX86_BUILTIN_ADDPS,
12208 IX86_BUILTIN_ADDSS,
12209 IX86_BUILTIN_DIVPS,
12210 IX86_BUILTIN_DIVSS,
12211 IX86_BUILTIN_MULPS,
12212 IX86_BUILTIN_MULSS,
12213 IX86_BUILTIN_SUBPS,
12214 IX86_BUILTIN_SUBSS,
12215
12216 IX86_BUILTIN_CMPEQPS,
12217 IX86_BUILTIN_CMPLTPS,
12218 IX86_BUILTIN_CMPLEPS,
12219 IX86_BUILTIN_CMPGTPS,
12220 IX86_BUILTIN_CMPGEPS,
12221 IX86_BUILTIN_CMPNEQPS,
12222 IX86_BUILTIN_CMPNLTPS,
12223 IX86_BUILTIN_CMPNLEPS,
12224 IX86_BUILTIN_CMPNGTPS,
12225 IX86_BUILTIN_CMPNGEPS,
12226 IX86_BUILTIN_CMPORDPS,
12227 IX86_BUILTIN_CMPUNORDPS,
12228 IX86_BUILTIN_CMPNEPS,
12229 IX86_BUILTIN_CMPEQSS,
12230 IX86_BUILTIN_CMPLTSS,
12231 IX86_BUILTIN_CMPLESS,
12232 IX86_BUILTIN_CMPNEQSS,
12233 IX86_BUILTIN_CMPNLTSS,
12234 IX86_BUILTIN_CMPNLESS,
12235 IX86_BUILTIN_CMPNGTSS,
12236 IX86_BUILTIN_CMPNGESS,
12237 IX86_BUILTIN_CMPORDSS,
12238 IX86_BUILTIN_CMPUNORDSS,
12239 IX86_BUILTIN_CMPNESS,
12240
12241 IX86_BUILTIN_COMIEQSS,
12242 IX86_BUILTIN_COMILTSS,
12243 IX86_BUILTIN_COMILESS,
12244 IX86_BUILTIN_COMIGTSS,
12245 IX86_BUILTIN_COMIGESS,
12246 IX86_BUILTIN_COMINEQSS,
12247 IX86_BUILTIN_UCOMIEQSS,
12248 IX86_BUILTIN_UCOMILTSS,
12249 IX86_BUILTIN_UCOMILESS,
12250 IX86_BUILTIN_UCOMIGTSS,
12251 IX86_BUILTIN_UCOMIGESS,
12252 IX86_BUILTIN_UCOMINEQSS,
12253
12254 IX86_BUILTIN_CVTPI2PS,
12255 IX86_BUILTIN_CVTPS2PI,
12256 IX86_BUILTIN_CVTSI2SS,
12257 IX86_BUILTIN_CVTSI642SS,
12258 IX86_BUILTIN_CVTSS2SI,
12259 IX86_BUILTIN_CVTSS2SI64,
12260 IX86_BUILTIN_CVTTPS2PI,
12261 IX86_BUILTIN_CVTTSS2SI,
12262 IX86_BUILTIN_CVTTSS2SI64,
12263
12264 IX86_BUILTIN_MAXPS,
12265 IX86_BUILTIN_MAXSS,
12266 IX86_BUILTIN_MINPS,
12267 IX86_BUILTIN_MINSS,
12268
12269 IX86_BUILTIN_LOADUPS,
12270 IX86_BUILTIN_STOREUPS,
12271 IX86_BUILTIN_MOVSS,
12272
12273 IX86_BUILTIN_MOVHLPS,
12274 IX86_BUILTIN_MOVLHPS,
12275 IX86_BUILTIN_LOADHPS,
12276 IX86_BUILTIN_LOADLPS,
12277 IX86_BUILTIN_STOREHPS,
12278 IX86_BUILTIN_STORELPS,
12279
12280 IX86_BUILTIN_MASKMOVQ,
12281 IX86_BUILTIN_MOVMSKPS,
12282 IX86_BUILTIN_PMOVMSKB,
12283
12284 IX86_BUILTIN_MOVNTPS,
12285 IX86_BUILTIN_MOVNTQ,
12286
12287 IX86_BUILTIN_LOADDQU,
12288 IX86_BUILTIN_STOREDQU,
12289
12290 IX86_BUILTIN_PACKSSWB,
12291 IX86_BUILTIN_PACKSSDW,
12292 IX86_BUILTIN_PACKUSWB,
12293
12294 IX86_BUILTIN_PADDB,
12295 IX86_BUILTIN_PADDW,
12296 IX86_BUILTIN_PADDD,
12297 IX86_BUILTIN_PADDQ,
12298 IX86_BUILTIN_PADDSB,
12299 IX86_BUILTIN_PADDSW,
12300 IX86_BUILTIN_PADDUSB,
12301 IX86_BUILTIN_PADDUSW,
12302 IX86_BUILTIN_PSUBB,
12303 IX86_BUILTIN_PSUBW,
12304 IX86_BUILTIN_PSUBD,
12305 IX86_BUILTIN_PSUBQ,
12306 IX86_BUILTIN_PSUBSB,
12307 IX86_BUILTIN_PSUBSW,
12308 IX86_BUILTIN_PSUBUSB,
12309 IX86_BUILTIN_PSUBUSW,
12310
12311 IX86_BUILTIN_PAND,
12312 IX86_BUILTIN_PANDN,
12313 IX86_BUILTIN_POR,
12314 IX86_BUILTIN_PXOR,
12315
12316 IX86_BUILTIN_PAVGB,
12317 IX86_BUILTIN_PAVGW,
12318
12319 IX86_BUILTIN_PCMPEQB,
12320 IX86_BUILTIN_PCMPEQW,
12321 IX86_BUILTIN_PCMPEQD,
12322 IX86_BUILTIN_PCMPGTB,
12323 IX86_BUILTIN_PCMPGTW,
12324 IX86_BUILTIN_PCMPGTD,
12325
12326 IX86_BUILTIN_PMADDWD,
12327
12328 IX86_BUILTIN_PMAXSW,
12329 IX86_BUILTIN_PMAXUB,
12330 IX86_BUILTIN_PMINSW,
12331 IX86_BUILTIN_PMINUB,
12332
12333 IX86_BUILTIN_PMULHUW,
12334 IX86_BUILTIN_PMULHW,
12335 IX86_BUILTIN_PMULLW,
12336
12337 IX86_BUILTIN_PSADBW,
12338 IX86_BUILTIN_PSHUFW,
12339
12340 IX86_BUILTIN_PSLLW,
12341 IX86_BUILTIN_PSLLD,
12342 IX86_BUILTIN_PSLLQ,
12343 IX86_BUILTIN_PSRAW,
12344 IX86_BUILTIN_PSRAD,
12345 IX86_BUILTIN_PSRLW,
12346 IX86_BUILTIN_PSRLD,
12347 IX86_BUILTIN_PSRLQ,
12348 IX86_BUILTIN_PSLLWI,
12349 IX86_BUILTIN_PSLLDI,
12350 IX86_BUILTIN_PSLLQI,
12351 IX86_BUILTIN_PSRAWI,
12352 IX86_BUILTIN_PSRADI,
12353 IX86_BUILTIN_PSRLWI,
12354 IX86_BUILTIN_PSRLDI,
12355 IX86_BUILTIN_PSRLQI,
12356
12357 IX86_BUILTIN_PUNPCKHBW,
12358 IX86_BUILTIN_PUNPCKHWD,
12359 IX86_BUILTIN_PUNPCKHDQ,
12360 IX86_BUILTIN_PUNPCKLBW,
12361 IX86_BUILTIN_PUNPCKLWD,
12362 IX86_BUILTIN_PUNPCKLDQ,
12363
12364 IX86_BUILTIN_SHUFPS,
12365
12366 IX86_BUILTIN_RCPPS,
12367 IX86_BUILTIN_RCPSS,
12368 IX86_BUILTIN_RSQRTPS,
12369 IX86_BUILTIN_RSQRTSS,
12370 IX86_BUILTIN_SQRTPS,
12371 IX86_BUILTIN_SQRTSS,
12372
12373 IX86_BUILTIN_UNPCKHPS,
12374 IX86_BUILTIN_UNPCKLPS,
12375
12376 IX86_BUILTIN_ANDPS,
12377 IX86_BUILTIN_ANDNPS,
12378 IX86_BUILTIN_ORPS,
12379 IX86_BUILTIN_XORPS,
12380
12381 IX86_BUILTIN_EMMS,
12382 IX86_BUILTIN_LDMXCSR,
12383 IX86_BUILTIN_STMXCSR,
12384 IX86_BUILTIN_SFENCE,
12385
12386 /* 3DNow! Original */
12387 IX86_BUILTIN_FEMMS,
12388 IX86_BUILTIN_PAVGUSB,
12389 IX86_BUILTIN_PF2ID,
12390 IX86_BUILTIN_PFACC,
12391 IX86_BUILTIN_PFADD,
12392 IX86_BUILTIN_PFCMPEQ,
12393 IX86_BUILTIN_PFCMPGE,
12394 IX86_BUILTIN_PFCMPGT,
12395 IX86_BUILTIN_PFMAX,
12396 IX86_BUILTIN_PFMIN,
12397 IX86_BUILTIN_PFMUL,
12398 IX86_BUILTIN_PFRCP,
12399 IX86_BUILTIN_PFRCPIT1,
12400 IX86_BUILTIN_PFRCPIT2,
12401 IX86_BUILTIN_PFRSQIT1,
12402 IX86_BUILTIN_PFRSQRT,
12403 IX86_BUILTIN_PFSUB,
12404 IX86_BUILTIN_PFSUBR,
12405 IX86_BUILTIN_PI2FD,
12406 IX86_BUILTIN_PMULHRW,
12407
12408 /* 3DNow! Athlon Extensions */
12409 IX86_BUILTIN_PF2IW,
12410 IX86_BUILTIN_PFNACC,
12411 IX86_BUILTIN_PFPNACC,
12412 IX86_BUILTIN_PI2FW,
12413 IX86_BUILTIN_PSWAPDSI,
12414 IX86_BUILTIN_PSWAPDSF,
12415
12416 /* SSE2 */
12417 IX86_BUILTIN_ADDPD,
12418 IX86_BUILTIN_ADDSD,
12419 IX86_BUILTIN_DIVPD,
12420 IX86_BUILTIN_DIVSD,
12421 IX86_BUILTIN_MULPD,
12422 IX86_BUILTIN_MULSD,
12423 IX86_BUILTIN_SUBPD,
12424 IX86_BUILTIN_SUBSD,
12425
12426 IX86_BUILTIN_CMPEQPD,
12427 IX86_BUILTIN_CMPLTPD,
12428 IX86_BUILTIN_CMPLEPD,
12429 IX86_BUILTIN_CMPGTPD,
12430 IX86_BUILTIN_CMPGEPD,
12431 IX86_BUILTIN_CMPNEQPD,
12432 IX86_BUILTIN_CMPNLTPD,
12433 IX86_BUILTIN_CMPNLEPD,
12434 IX86_BUILTIN_CMPNGTPD,
12435 IX86_BUILTIN_CMPNGEPD,
12436 IX86_BUILTIN_CMPORDPD,
12437 IX86_BUILTIN_CMPUNORDPD,
12438 IX86_BUILTIN_CMPNEPD,
12439 IX86_BUILTIN_CMPEQSD,
12440 IX86_BUILTIN_CMPLTSD,
12441 IX86_BUILTIN_CMPLESD,
12442 IX86_BUILTIN_CMPNEQSD,
12443 IX86_BUILTIN_CMPNLTSD,
12444 IX86_BUILTIN_CMPNLESD,
12445 IX86_BUILTIN_CMPORDSD,
12446 IX86_BUILTIN_CMPUNORDSD,
12447 IX86_BUILTIN_CMPNESD,
12448
12449 IX86_BUILTIN_COMIEQSD,
12450 IX86_BUILTIN_COMILTSD,
12451 IX86_BUILTIN_COMILESD,
12452 IX86_BUILTIN_COMIGTSD,
12453 IX86_BUILTIN_COMIGESD,
12454 IX86_BUILTIN_COMINEQSD,
12455 IX86_BUILTIN_UCOMIEQSD,
12456 IX86_BUILTIN_UCOMILTSD,
12457 IX86_BUILTIN_UCOMILESD,
12458 IX86_BUILTIN_UCOMIGTSD,
12459 IX86_BUILTIN_UCOMIGESD,
12460 IX86_BUILTIN_UCOMINEQSD,
12461
12462 IX86_BUILTIN_MAXPD,
12463 IX86_BUILTIN_MAXSD,
12464 IX86_BUILTIN_MINPD,
12465 IX86_BUILTIN_MINSD,
12466
12467 IX86_BUILTIN_ANDPD,
12468 IX86_BUILTIN_ANDNPD,
12469 IX86_BUILTIN_ORPD,
12470 IX86_BUILTIN_XORPD,
12471
12472 IX86_BUILTIN_SQRTPD,
12473 IX86_BUILTIN_SQRTSD,
12474
12475 IX86_BUILTIN_UNPCKHPD,
12476 IX86_BUILTIN_UNPCKLPD,
12477
12478 IX86_BUILTIN_SHUFPD,
12479
12480 IX86_BUILTIN_LOADUPD,
12481 IX86_BUILTIN_STOREUPD,
12482 IX86_BUILTIN_MOVSD,
12483
12484 IX86_BUILTIN_LOADHPD,
12485 IX86_BUILTIN_LOADLPD,
12486
12487 IX86_BUILTIN_CVTDQ2PD,
12488 IX86_BUILTIN_CVTDQ2PS,
12489
12490 IX86_BUILTIN_CVTPD2DQ,
12491 IX86_BUILTIN_CVTPD2PI,
12492 IX86_BUILTIN_CVTPD2PS,
12493 IX86_BUILTIN_CVTTPD2DQ,
12494 IX86_BUILTIN_CVTTPD2PI,
12495
12496 IX86_BUILTIN_CVTPI2PD,
12497 IX86_BUILTIN_CVTSI2SD,
12498 IX86_BUILTIN_CVTSI642SD,
12499
12500 IX86_BUILTIN_CVTSD2SI,
12501 IX86_BUILTIN_CVTSD2SI64,
12502 IX86_BUILTIN_CVTSD2SS,
12503 IX86_BUILTIN_CVTSS2SD,
12504 IX86_BUILTIN_CVTTSD2SI,
12505 IX86_BUILTIN_CVTTSD2SI64,
12506
12507 IX86_BUILTIN_CVTPS2DQ,
12508 IX86_BUILTIN_CVTPS2PD,
12509 IX86_BUILTIN_CVTTPS2DQ,
12510
12511 IX86_BUILTIN_MOVNTI,
12512 IX86_BUILTIN_MOVNTPD,
12513 IX86_BUILTIN_MOVNTDQ,
12514
12515 /* SSE2 MMX */
12516 IX86_BUILTIN_MASKMOVDQU,
12517 IX86_BUILTIN_MOVMSKPD,
12518 IX86_BUILTIN_PMOVMSKB128,
12519
12520 IX86_BUILTIN_PACKSSWB128,
12521 IX86_BUILTIN_PACKSSDW128,
12522 IX86_BUILTIN_PACKUSWB128,
12523
12524 IX86_BUILTIN_PADDB128,
12525 IX86_BUILTIN_PADDW128,
12526 IX86_BUILTIN_PADDD128,
12527 IX86_BUILTIN_PADDQ128,
12528 IX86_BUILTIN_PADDSB128,
12529 IX86_BUILTIN_PADDSW128,
12530 IX86_BUILTIN_PADDUSB128,
12531 IX86_BUILTIN_PADDUSW128,
12532 IX86_BUILTIN_PSUBB128,
12533 IX86_BUILTIN_PSUBW128,
12534 IX86_BUILTIN_PSUBD128,
12535 IX86_BUILTIN_PSUBQ128,
12536 IX86_BUILTIN_PSUBSB128,
12537 IX86_BUILTIN_PSUBSW128,
12538 IX86_BUILTIN_PSUBUSB128,
12539 IX86_BUILTIN_PSUBUSW128,
12540
12541 IX86_BUILTIN_PAND128,
12542 IX86_BUILTIN_PANDN128,
12543 IX86_BUILTIN_POR128,
12544 IX86_BUILTIN_PXOR128,
12545
12546 IX86_BUILTIN_PAVGB128,
12547 IX86_BUILTIN_PAVGW128,
12548
12549 IX86_BUILTIN_PCMPEQB128,
12550 IX86_BUILTIN_PCMPEQW128,
12551 IX86_BUILTIN_PCMPEQD128,
12552 IX86_BUILTIN_PCMPGTB128,
12553 IX86_BUILTIN_PCMPGTW128,
12554 IX86_BUILTIN_PCMPGTD128,
12555
12556 IX86_BUILTIN_PMADDWD128,
12557
12558 IX86_BUILTIN_PMAXSW128,
12559 IX86_BUILTIN_PMAXUB128,
12560 IX86_BUILTIN_PMINSW128,
12561 IX86_BUILTIN_PMINUB128,
12562
12563 IX86_BUILTIN_PMULUDQ,
12564 IX86_BUILTIN_PMULUDQ128,
12565 IX86_BUILTIN_PMULHUW128,
12566 IX86_BUILTIN_PMULHW128,
12567 IX86_BUILTIN_PMULLW128,
12568
12569 IX86_BUILTIN_PSADBW128,
12570 IX86_BUILTIN_PSHUFHW,
12571 IX86_BUILTIN_PSHUFLW,
12572 IX86_BUILTIN_PSHUFD,
12573
12574 IX86_BUILTIN_PSLLW128,
12575 IX86_BUILTIN_PSLLD128,
12576 IX86_BUILTIN_PSLLQ128,
12577 IX86_BUILTIN_PSRAW128,
12578 IX86_BUILTIN_PSRAD128,
12579 IX86_BUILTIN_PSRLW128,
12580 IX86_BUILTIN_PSRLD128,
12581 IX86_BUILTIN_PSRLQ128,
12582 IX86_BUILTIN_PSLLDQI128,
12583 IX86_BUILTIN_PSLLWI128,
12584 IX86_BUILTIN_PSLLDI128,
12585 IX86_BUILTIN_PSLLQI128,
12586 IX86_BUILTIN_PSRAWI128,
12587 IX86_BUILTIN_PSRADI128,
12588 IX86_BUILTIN_PSRLDQI128,
12589 IX86_BUILTIN_PSRLWI128,
12590 IX86_BUILTIN_PSRLDI128,
12591 IX86_BUILTIN_PSRLQI128,
12592
12593 IX86_BUILTIN_PUNPCKHBW128,
12594 IX86_BUILTIN_PUNPCKHWD128,
12595 IX86_BUILTIN_PUNPCKHDQ128,
12596 IX86_BUILTIN_PUNPCKHQDQ128,
12597 IX86_BUILTIN_PUNPCKLBW128,
12598 IX86_BUILTIN_PUNPCKLWD128,
12599 IX86_BUILTIN_PUNPCKLDQ128,
12600 IX86_BUILTIN_PUNPCKLQDQ128,
12601
12602 IX86_BUILTIN_CLFLUSH,
12603 IX86_BUILTIN_MFENCE,
12604 IX86_BUILTIN_LFENCE,
12605
12606 /* Prescott New Instructions. */
12607 IX86_BUILTIN_ADDSUBPS,
12608 IX86_BUILTIN_HADDPS,
12609 IX86_BUILTIN_HSUBPS,
12610 IX86_BUILTIN_MOVSHDUP,
12611 IX86_BUILTIN_MOVSLDUP,
12612 IX86_BUILTIN_ADDSUBPD,
12613 IX86_BUILTIN_HADDPD,
12614 IX86_BUILTIN_HSUBPD,
12615 IX86_BUILTIN_LDDQU,
12616
12617 IX86_BUILTIN_MONITOR,
12618 IX86_BUILTIN_MWAIT,
12619
12620 IX86_BUILTIN_VEC_INIT_V2SI,
12621 IX86_BUILTIN_VEC_INIT_V4HI,
12622 IX86_BUILTIN_VEC_INIT_V8QI,
12623 IX86_BUILTIN_VEC_EXT_V2DF,
12624 IX86_BUILTIN_VEC_EXT_V2DI,
12625 IX86_BUILTIN_VEC_EXT_V4SF,
12626 IX86_BUILTIN_VEC_EXT_V4SI,
12627 IX86_BUILTIN_VEC_EXT_V8HI,
12628 IX86_BUILTIN_VEC_EXT_V4HI,
12629 IX86_BUILTIN_VEC_SET_V8HI,
12630 IX86_BUILTIN_VEC_SET_V4HI,
12631
12632 IX86_BUILTIN_MAX
12633 };
12634
12635 #define def_builtin(MASK, NAME, TYPE, CODE) \
12636 do { \
12637 if ((MASK) & target_flags \
12638 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12639 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12640 NULL, NULL_TREE); \
12641 } while (0)
12642
12643 /* Bits for builtin_description.flag. */
12644
12645 /* Set when we don't support the comparison natively, and should
12646 swap_comparison in order to support it. */
12647 #define BUILTIN_DESC_SWAP_OPERANDS 1
12648
12649 struct builtin_description
12650 {
12651 const unsigned int mask;
12652 const enum insn_code icode;
12653 const char *const name;
12654 const enum ix86_builtins code;
12655 const enum rtx_code comparison;
12656 const unsigned int flag;
12657 };
12658
12659 static const struct builtin_description bdesc_comi[] =
12660 {
12661 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12662 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12663 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12664 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12665 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12666 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12667 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12668 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12669 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12670 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12671 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12672 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12673 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12674 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12675 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12676 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12677 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12678 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12679 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12680 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12681 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12682 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12683 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12684 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12685 };
12686
12687 static const struct builtin_description bdesc_2arg[] =
12688 {
12689 /* SSE */
12690 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12694 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12695 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12696 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12697 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12698
12699 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12700 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12701 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12702 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
12703 BUILTIN_DESC_SWAP_OPERANDS },
12704 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
12705 BUILTIN_DESC_SWAP_OPERANDS },
12706 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12707 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
12708 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
12709 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
12710 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
12711 BUILTIN_DESC_SWAP_OPERANDS },
12712 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
12713 BUILTIN_DESC_SWAP_OPERANDS },
12714 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
12715 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12716 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12717 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12718 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12719 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
12720 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
12721 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
12722 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
12723 BUILTIN_DESC_SWAP_OPERANDS },
12724 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
12725 BUILTIN_DESC_SWAP_OPERANDS },
12726 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12727
12728 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12729 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12730 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12731 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12732
12733 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12734 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12735 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12736 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12737
12738 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12739 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12740 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12741 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12742 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12743
12744 /* MMX */
12745 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12746 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12747 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12749 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12750 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12751 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12752 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12753
12754 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12755 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12756 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12757 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12760 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12761 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12762
12763 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12764 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12765 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12766
12767 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12768 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12770 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12771
12772 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12773 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12774
12775 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12776 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12777 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12778 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12779 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12780 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12781
12782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12784 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12785 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12786
12787 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12788 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12789 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12790 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12792 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12793
12794 /* Special. */
12795 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12796 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12797 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12798
12799 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12800 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12801 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12802
12803 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12804 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12805 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12806 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12807 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12809
12810 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12811 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12812 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12814 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12815 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12816
12817 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12818 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12819 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12820 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12821
12822 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12823 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12824
12825 /* SSE2 */
12826 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12834
12835 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12836 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12837 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12838 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
12839 BUILTIN_DESC_SWAP_OPERANDS },
12840 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
12841 BUILTIN_DESC_SWAP_OPERANDS },
12842 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
12844 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
12845 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
12846 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
12847 BUILTIN_DESC_SWAP_OPERANDS },
12848 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
12849 BUILTIN_DESC_SWAP_OPERANDS },
12850 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
12851 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12852 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12853 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12854 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12855 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
12856 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
12857 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
12858 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
12859
12860 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12864
12865 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12869
12870 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12873
12874 /* SSE2 MMX */
12875 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12883
12884 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12885 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12886 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12887 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12888 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12889 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12890 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12891 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12892
12893 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12895
12896 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12900
12901 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12903
12904 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12910
12911 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12915
12916 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12924
12925 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12928
12929 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12931
12932 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12934
12935 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12938
12939 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12942
12943 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12945
12946 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12947
12948 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12949 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12952
12953 /* SSE3 MMX */
12954 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12955 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12956 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12957 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12958 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12959 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12960 };
12961
12962 static const struct builtin_description bdesc_1arg[] =
12963 {
12964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12965 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12966
12967 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12968 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12969 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12970
12971 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12972 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12973 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12974 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12975 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12976 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12977
12978 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12980
12981 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12982
12983 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12985
12986 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12991
12992 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12993
12994 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12996 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12997 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12998
12999 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13002
13003 /* SSE3 */
13004 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13005 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13006 };
13007
13008 static void
13009 ix86_init_builtins (void)
13010 {
13011 if (TARGET_MMX)
13012 ix86_init_mmx_sse_builtins ();
13013 }
13014
13015 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13016 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13017 builtins. */
13018 static void
13019 ix86_init_mmx_sse_builtins (void)
13020 {
13021 const struct builtin_description * d;
13022 size_t i;
13023
13024 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13025 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13026 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13027 tree V2DI_type_node
13028 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13029 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13030 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13031 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13032 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13033 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13034 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13035
13036 tree pchar_type_node = build_pointer_type (char_type_node);
13037 tree pcchar_type_node = build_pointer_type (
13038 build_type_variant (char_type_node, 1, 0));
13039 tree pfloat_type_node = build_pointer_type (float_type_node);
13040 tree pcfloat_type_node = build_pointer_type (
13041 build_type_variant (float_type_node, 1, 0));
13042 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13043 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13044 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13045
13046 /* Comparisons. */
13047 tree int_ftype_v4sf_v4sf
13048 = build_function_type_list (integer_type_node,
13049 V4SF_type_node, V4SF_type_node, NULL_TREE);
13050 tree v4si_ftype_v4sf_v4sf
13051 = build_function_type_list (V4SI_type_node,
13052 V4SF_type_node, V4SF_type_node, NULL_TREE);
13053 /* MMX/SSE/integer conversions. */
13054 tree int_ftype_v4sf
13055 = build_function_type_list (integer_type_node,
13056 V4SF_type_node, NULL_TREE);
13057 tree int64_ftype_v4sf
13058 = build_function_type_list (long_long_integer_type_node,
13059 V4SF_type_node, NULL_TREE);
13060 tree int_ftype_v8qi
13061 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13062 tree v4sf_ftype_v4sf_int
13063 = build_function_type_list (V4SF_type_node,
13064 V4SF_type_node, integer_type_node, NULL_TREE);
13065 tree v4sf_ftype_v4sf_int64
13066 = build_function_type_list (V4SF_type_node,
13067 V4SF_type_node, long_long_integer_type_node,
13068 NULL_TREE);
13069 tree v4sf_ftype_v4sf_v2si
13070 = build_function_type_list (V4SF_type_node,
13071 V4SF_type_node, V2SI_type_node, NULL_TREE);
13072
13073 /* Miscellaneous. */
13074 tree v8qi_ftype_v4hi_v4hi
13075 = build_function_type_list (V8QI_type_node,
13076 V4HI_type_node, V4HI_type_node, NULL_TREE);
13077 tree v4hi_ftype_v2si_v2si
13078 = build_function_type_list (V4HI_type_node,
13079 V2SI_type_node, V2SI_type_node, NULL_TREE);
13080 tree v4sf_ftype_v4sf_v4sf_int
13081 = build_function_type_list (V4SF_type_node,
13082 V4SF_type_node, V4SF_type_node,
13083 integer_type_node, NULL_TREE);
13084 tree v2si_ftype_v4hi_v4hi
13085 = build_function_type_list (V2SI_type_node,
13086 V4HI_type_node, V4HI_type_node, NULL_TREE);
13087 tree v4hi_ftype_v4hi_int
13088 = build_function_type_list (V4HI_type_node,
13089 V4HI_type_node, integer_type_node, NULL_TREE);
13090 tree v4hi_ftype_v4hi_di
13091 = build_function_type_list (V4HI_type_node,
13092 V4HI_type_node, long_long_unsigned_type_node,
13093 NULL_TREE);
13094 tree v2si_ftype_v2si_di
13095 = build_function_type_list (V2SI_type_node,
13096 V2SI_type_node, long_long_unsigned_type_node,
13097 NULL_TREE);
13098 tree void_ftype_void
13099 = build_function_type (void_type_node, void_list_node);
13100 tree void_ftype_unsigned
13101 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13102 tree void_ftype_unsigned_unsigned
13103 = build_function_type_list (void_type_node, unsigned_type_node,
13104 unsigned_type_node, NULL_TREE);
13105 tree void_ftype_pcvoid_unsigned_unsigned
13106 = build_function_type_list (void_type_node, const_ptr_type_node,
13107 unsigned_type_node, unsigned_type_node,
13108 NULL_TREE);
13109 tree unsigned_ftype_void
13110 = build_function_type (unsigned_type_node, void_list_node);
13111 tree v2si_ftype_v4sf
13112 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13113 /* Loads/stores. */
13114 tree void_ftype_v8qi_v8qi_pchar
13115 = build_function_type_list (void_type_node,
13116 V8QI_type_node, V8QI_type_node,
13117 pchar_type_node, NULL_TREE);
13118 tree v4sf_ftype_pcfloat
13119 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13120 /* @@@ the type is bogus */
13121 tree v4sf_ftype_v4sf_pv2si
13122 = build_function_type_list (V4SF_type_node,
13123 V4SF_type_node, pv2si_type_node, NULL_TREE);
13124 tree void_ftype_pv2si_v4sf
13125 = build_function_type_list (void_type_node,
13126 pv2si_type_node, V4SF_type_node, NULL_TREE);
13127 tree void_ftype_pfloat_v4sf
13128 = build_function_type_list (void_type_node,
13129 pfloat_type_node, V4SF_type_node, NULL_TREE);
13130 tree void_ftype_pdi_di
13131 = build_function_type_list (void_type_node,
13132 pdi_type_node, long_long_unsigned_type_node,
13133 NULL_TREE);
13134 tree void_ftype_pv2di_v2di
13135 = build_function_type_list (void_type_node,
13136 pv2di_type_node, V2DI_type_node, NULL_TREE);
13137 /* Normal vector unops. */
13138 tree v4sf_ftype_v4sf
13139 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13140
13141 /* Normal vector binops. */
13142 tree v4sf_ftype_v4sf_v4sf
13143 = build_function_type_list (V4SF_type_node,
13144 V4SF_type_node, V4SF_type_node, NULL_TREE);
13145 tree v8qi_ftype_v8qi_v8qi
13146 = build_function_type_list (V8QI_type_node,
13147 V8QI_type_node, V8QI_type_node, NULL_TREE);
13148 tree v4hi_ftype_v4hi_v4hi
13149 = build_function_type_list (V4HI_type_node,
13150 V4HI_type_node, V4HI_type_node, NULL_TREE);
13151 tree v2si_ftype_v2si_v2si
13152 = build_function_type_list (V2SI_type_node,
13153 V2SI_type_node, V2SI_type_node, NULL_TREE);
13154 tree di_ftype_di_di
13155 = build_function_type_list (long_long_unsigned_type_node,
13156 long_long_unsigned_type_node,
13157 long_long_unsigned_type_node, NULL_TREE);
13158
13159 tree v2si_ftype_v2sf
13160 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13161 tree v2sf_ftype_v2si
13162 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13163 tree v2si_ftype_v2si
13164 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13165 tree v2sf_ftype_v2sf
13166 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13167 tree v2sf_ftype_v2sf_v2sf
13168 = build_function_type_list (V2SF_type_node,
13169 V2SF_type_node, V2SF_type_node, NULL_TREE);
13170 tree v2si_ftype_v2sf_v2sf
13171 = build_function_type_list (V2SI_type_node,
13172 V2SF_type_node, V2SF_type_node, NULL_TREE);
13173 tree pint_type_node = build_pointer_type (integer_type_node);
13174 tree pdouble_type_node = build_pointer_type (double_type_node);
13175 tree pcdouble_type_node = build_pointer_type (
13176 build_type_variant (double_type_node, 1, 0));
13177 tree int_ftype_v2df_v2df
13178 = build_function_type_list (integer_type_node,
13179 V2DF_type_node, V2DF_type_node, NULL_TREE);
13180
13181 tree ti_ftype_ti_ti
13182 = build_function_type_list (intTI_type_node,
13183 intTI_type_node, intTI_type_node, NULL_TREE);
13184 tree void_ftype_pcvoid
13185 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13186 tree v4sf_ftype_v4si
13187 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13188 tree v4si_ftype_v4sf
13189 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13190 tree v2df_ftype_v4si
13191 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13192 tree v4si_ftype_v2df
13193 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13194 tree v2si_ftype_v2df
13195 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13196 tree v4sf_ftype_v2df
13197 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13198 tree v2df_ftype_v2si
13199 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13200 tree v2df_ftype_v4sf
13201 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13202 tree int_ftype_v2df
13203 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13204 tree int64_ftype_v2df
13205 = build_function_type_list (long_long_integer_type_node,
13206 V2DF_type_node, NULL_TREE);
13207 tree v2df_ftype_v2df_int
13208 = build_function_type_list (V2DF_type_node,
13209 V2DF_type_node, integer_type_node, NULL_TREE);
13210 tree v2df_ftype_v2df_int64
13211 = build_function_type_list (V2DF_type_node,
13212 V2DF_type_node, long_long_integer_type_node,
13213 NULL_TREE);
13214 tree v4sf_ftype_v4sf_v2df
13215 = build_function_type_list (V4SF_type_node,
13216 V4SF_type_node, V2DF_type_node, NULL_TREE);
13217 tree v2df_ftype_v2df_v4sf
13218 = build_function_type_list (V2DF_type_node,
13219 V2DF_type_node, V4SF_type_node, NULL_TREE);
13220 tree v2df_ftype_v2df_v2df_int
13221 = build_function_type_list (V2DF_type_node,
13222 V2DF_type_node, V2DF_type_node,
13223 integer_type_node,
13224 NULL_TREE);
13225 tree v2df_ftype_v2df_pcdouble
13226 = build_function_type_list (V2DF_type_node,
13227 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13228 tree void_ftype_pdouble_v2df
13229 = build_function_type_list (void_type_node,
13230 pdouble_type_node, V2DF_type_node, NULL_TREE);
13231 tree void_ftype_pint_int
13232 = build_function_type_list (void_type_node,
13233 pint_type_node, integer_type_node, NULL_TREE);
13234 tree void_ftype_v16qi_v16qi_pchar
13235 = build_function_type_list (void_type_node,
13236 V16QI_type_node, V16QI_type_node,
13237 pchar_type_node, NULL_TREE);
13238 tree v2df_ftype_pcdouble
13239 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13240 tree v2df_ftype_v2df_v2df
13241 = build_function_type_list (V2DF_type_node,
13242 V2DF_type_node, V2DF_type_node, NULL_TREE);
13243 tree v16qi_ftype_v16qi_v16qi
13244 = build_function_type_list (V16QI_type_node,
13245 V16QI_type_node, V16QI_type_node, NULL_TREE);
13246 tree v8hi_ftype_v8hi_v8hi
13247 = build_function_type_list (V8HI_type_node,
13248 V8HI_type_node, V8HI_type_node, NULL_TREE);
13249 tree v4si_ftype_v4si_v4si
13250 = build_function_type_list (V4SI_type_node,
13251 V4SI_type_node, V4SI_type_node, NULL_TREE);
13252 tree v2di_ftype_v2di_v2di
13253 = build_function_type_list (V2DI_type_node,
13254 V2DI_type_node, V2DI_type_node, NULL_TREE);
13255 tree v2di_ftype_v2df_v2df
13256 = build_function_type_list (V2DI_type_node,
13257 V2DF_type_node, V2DF_type_node, NULL_TREE);
13258 tree v2df_ftype_v2df
13259 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13260 tree v2di_ftype_v2di_int
13261 = build_function_type_list (V2DI_type_node,
13262 V2DI_type_node, integer_type_node, NULL_TREE);
13263 tree v4si_ftype_v4si_int
13264 = build_function_type_list (V4SI_type_node,
13265 V4SI_type_node, integer_type_node, NULL_TREE);
13266 tree v8hi_ftype_v8hi_int
13267 = build_function_type_list (V8HI_type_node,
13268 V8HI_type_node, integer_type_node, NULL_TREE);
13269 tree v8hi_ftype_v8hi_v2di
13270 = build_function_type_list (V8HI_type_node,
13271 V8HI_type_node, V2DI_type_node, NULL_TREE);
13272 tree v4si_ftype_v4si_v2di
13273 = build_function_type_list (V4SI_type_node,
13274 V4SI_type_node, V2DI_type_node, NULL_TREE);
13275 tree v4si_ftype_v8hi_v8hi
13276 = build_function_type_list (V4SI_type_node,
13277 V8HI_type_node, V8HI_type_node, NULL_TREE);
13278 tree di_ftype_v8qi_v8qi
13279 = build_function_type_list (long_long_unsigned_type_node,
13280 V8QI_type_node, V8QI_type_node, NULL_TREE);
13281 tree di_ftype_v2si_v2si
13282 = build_function_type_list (long_long_unsigned_type_node,
13283 V2SI_type_node, V2SI_type_node, NULL_TREE);
13284 tree v2di_ftype_v16qi_v16qi
13285 = build_function_type_list (V2DI_type_node,
13286 V16QI_type_node, V16QI_type_node, NULL_TREE);
13287 tree v2di_ftype_v4si_v4si
13288 = build_function_type_list (V2DI_type_node,
13289 V4SI_type_node, V4SI_type_node, NULL_TREE);
13290 tree int_ftype_v16qi
13291 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13292 tree v16qi_ftype_pcchar
13293 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13294 tree void_ftype_pchar_v16qi
13295 = build_function_type_list (void_type_node,
13296 pchar_type_node, V16QI_type_node, NULL_TREE);
13297
13298 tree float80_type;
13299 tree float128_type;
13300 tree ftype;
13301
13302 /* The __float80 type. */
13303 if (TYPE_MODE (long_double_type_node) == XFmode)
13304 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13305 "__float80");
13306 else
13307 {
13308 /* The __float80 type. */
13309 float80_type = make_node (REAL_TYPE);
13310 TYPE_PRECISION (float80_type) = 80;
13311 layout_type (float80_type);
13312 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13313 }
13314
13315 float128_type = make_node (REAL_TYPE);
13316 TYPE_PRECISION (float128_type) = 128;
13317 layout_type (float128_type);
13318 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13319
13320 /* Add all builtins that are more or less simple operations on two
13321 operands. */
13322 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13323 {
13324 /* Use one of the operands; the target can have a different mode for
13325 mask-generating compares. */
13326 enum machine_mode mode;
13327 tree type;
13328
13329 if (d->name == 0)
13330 continue;
13331 mode = insn_data[d->icode].operand[1].mode;
13332
13333 switch (mode)
13334 {
13335 case V16QImode:
13336 type = v16qi_ftype_v16qi_v16qi;
13337 break;
13338 case V8HImode:
13339 type = v8hi_ftype_v8hi_v8hi;
13340 break;
13341 case V4SImode:
13342 type = v4si_ftype_v4si_v4si;
13343 break;
13344 case V2DImode:
13345 type = v2di_ftype_v2di_v2di;
13346 break;
13347 case V2DFmode:
13348 type = v2df_ftype_v2df_v2df;
13349 break;
13350 case TImode:
13351 type = ti_ftype_ti_ti;
13352 break;
13353 case V4SFmode:
13354 type = v4sf_ftype_v4sf_v4sf;
13355 break;
13356 case V8QImode:
13357 type = v8qi_ftype_v8qi_v8qi;
13358 break;
13359 case V4HImode:
13360 type = v4hi_ftype_v4hi_v4hi;
13361 break;
13362 case V2SImode:
13363 type = v2si_ftype_v2si_v2si;
13364 break;
13365 case DImode:
13366 type = di_ftype_di_di;
13367 break;
13368
13369 default:
13370 abort ();
13371 }
13372
13373 /* Override for comparisons. */
13374 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13375 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13376 type = v4si_ftype_v4sf_v4sf;
13377
13378 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13379 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13380 type = v2di_ftype_v2df_v2df;
13381
13382 def_builtin (d->mask, d->name, type, d->code);
13383 }
13384
13385 /* Add the remaining MMX insns with somewhat more complicated types. */
13386 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13387 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13388 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13389 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13390
13391 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13392 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13393 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13394
13395 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13396 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13397
13398 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13399 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13400
13401 /* comi/ucomi insns. */
13402 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13403 if (d->mask == MASK_SSE2)
13404 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13405 else
13406 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13407
13408 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13409 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13410 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13411
13412 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13413 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13414 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13415 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13416 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13417 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13418 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13419 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13420 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13421 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13422 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13423
13424 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13425
13426 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13427 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13428
13429 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13430 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13431 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13432 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13433
13434 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13435 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13436 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13437 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13438
13439 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13440
13441 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13442
13443 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13444 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13445 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13446 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13447 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13448 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13449
13450 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13451
13452 /* Original 3DNow! */
13453 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13454 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13455 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13456 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13457 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13458 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13459 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13460 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13461 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13462 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13467 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13468 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13469 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13470 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13473
13474 /* 3DNow! extension as used in the Athlon CPU. */
13475 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13476 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13477 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13478 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13479 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13480 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13481
13482 /* SSE2 */
13483 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13484
13485 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13486 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13487
13488 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13489 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
13490
13491 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13492 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13493 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13494 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13495 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13496
13497 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13498 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13499 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13500 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13501
13502 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13504
13505 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13506
13507 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13509
13510 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13515
13516 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13517
13518 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13520 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13521 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13522
13523 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13524 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13526
13527 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13528 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13529 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13531
13532 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13535
13536 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13538
13539 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13541
13542 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13545
13546 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13549
13550 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13552
13553 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13557
13558 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13562
13563 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13565
13566 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13567
13568 /* Prescott New Instructions. */
13569 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13570 void_ftype_pcvoid_unsigned_unsigned,
13571 IX86_BUILTIN_MONITOR);
13572 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13573 void_ftype_unsigned_unsigned,
13574 IX86_BUILTIN_MWAIT);
13575 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13576 v4sf_ftype_v4sf,
13577 IX86_BUILTIN_MOVSHDUP);
13578 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13579 v4sf_ftype_v4sf,
13580 IX86_BUILTIN_MOVSLDUP);
13581 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13582 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13583
13584 /* Access to the vec_init patterns. */
13585 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
13586 integer_type_node, NULL_TREE);
13587 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
13588 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
13589
13590 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
13591 short_integer_type_node,
13592 short_integer_type_node,
13593 short_integer_type_node, NULL_TREE);
13594 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
13595 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
13596
13597 ftype = build_function_type_list (V8QI_type_node, char_type_node,
13598 char_type_node, char_type_node,
13599 char_type_node, char_type_node,
13600 char_type_node, char_type_node,
13601 char_type_node, NULL_TREE);
13602 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
13603 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
13604
13605 /* Access to the vec_extract patterns. */
13606 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13607 integer_type_node, NULL_TREE);
13608 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
13609 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
13610
13611 ftype = build_function_type_list (long_long_integer_type_node,
13612 V2DI_type_node, integer_type_node,
13613 NULL_TREE);
13614 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
13615 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
13616
13617 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13618 integer_type_node, NULL_TREE);
13619 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
13620 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
13621
13622 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13623 integer_type_node, NULL_TREE);
13624 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
13625 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
13626
13627 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13628 integer_type_node, NULL_TREE);
13629 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
13630 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
13631
13632 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
13633 integer_type_node, NULL_TREE);
13634 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
13635 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
13636
13637 /* Access to the vec_set patterns. */
13638 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13639 intHI_type_node,
13640 integer_type_node, NULL_TREE);
13641 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
13642 ftype, IX86_BUILTIN_VEC_SET_V8HI);
13643
13644 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
13645 intHI_type_node,
13646 integer_type_node, NULL_TREE);
13647 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
13648 ftype, IX86_BUILTIN_VEC_SET_V4HI);
13649 }
13650
13651 /* Errors in the source file can cause expand_expr to return const0_rtx
13652 where we expect a vector. To avoid crashing, use one of the vector
13653 clear instructions. */
13654 static rtx
13655 safe_vector_operand (rtx x, enum machine_mode mode)
13656 {
13657 if (x == const0_rtx)
13658 x = CONST0_RTX (mode);
13659 return x;
13660 }
13661
13662 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13663
13664 static rtx
13665 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13666 {
13667 rtx pat, xops[3];
13668 tree arg0 = TREE_VALUE (arglist);
13669 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13670 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13671 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13672 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13673 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13674 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13675
13676 if (VECTOR_MODE_P (mode0))
13677 op0 = safe_vector_operand (op0, mode0);
13678 if (VECTOR_MODE_P (mode1))
13679 op1 = safe_vector_operand (op1, mode1);
13680
13681 if (optimize || !target
13682 || GET_MODE (target) != tmode
13683 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13684 target = gen_reg_rtx (tmode);
13685
13686 if (GET_MODE (op1) == SImode && mode1 == TImode)
13687 {
13688 rtx x = gen_reg_rtx (V4SImode);
13689 emit_insn (gen_sse2_loadd (x, op1));
13690 op1 = gen_lowpart (TImode, x);
13691 }
13692
13693 /* In case the insn wants input operands in modes different from
13694 the result, abort. */
13695 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13696 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13697 abort ();
13698
13699 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13700 op0 = copy_to_mode_reg (mode0, op0);
13701 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13702 op1 = copy_to_mode_reg (mode1, op1);
13703
13704 /* ??? Using ix86_fixup_binary_operands is problematic when
13705 we've got mismatched modes. Fake it. */
13706
13707 xops[0] = target;
13708 xops[1] = op0;
13709 xops[2] = op1;
13710
13711 if (tmode == mode0 && tmode == mode1)
13712 {
13713 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
13714 op0 = xops[1];
13715 op1 = xops[2];
13716 }
13717 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
13718 {
13719 op0 = force_reg (mode0, op0);
13720 op1 = force_reg (mode1, op1);
13721 target = gen_reg_rtx (tmode);
13722 }
13723
13724 pat = GEN_FCN (icode) (target, op0, op1);
13725 if (! pat)
13726 return 0;
13727 emit_insn (pat);
13728 return target;
13729 }
13730
13731 /* Subroutine of ix86_expand_builtin to take care of stores. */
13732
13733 static rtx
13734 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13735 {
13736 rtx pat;
13737 tree arg0 = TREE_VALUE (arglist);
13738 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13739 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13740 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13741 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13742 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13743
13744 if (VECTOR_MODE_P (mode1))
13745 op1 = safe_vector_operand (op1, mode1);
13746
13747 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13748 op1 = copy_to_mode_reg (mode1, op1);
13749
13750 pat = GEN_FCN (icode) (op0, op1);
13751 if (pat)
13752 emit_insn (pat);
13753 return 0;
13754 }
13755
13756 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13757
13758 static rtx
13759 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13760 rtx target, int do_load)
13761 {
13762 rtx pat;
13763 tree arg0 = TREE_VALUE (arglist);
13764 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13765 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13766 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13767
13768 if (optimize || !target
13769 || GET_MODE (target) != tmode
13770 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13771 target = gen_reg_rtx (tmode);
13772 if (do_load)
13773 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13774 else
13775 {
13776 if (VECTOR_MODE_P (mode0))
13777 op0 = safe_vector_operand (op0, mode0);
13778
13779 if ((optimize && !register_operand (op0, mode0))
13780 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13781 op0 = copy_to_mode_reg (mode0, op0);
13782 }
13783
13784 pat = GEN_FCN (icode) (target, op0);
13785 if (! pat)
13786 return 0;
13787 emit_insn (pat);
13788 return target;
13789 }
13790
13791 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13792 sqrtss, rsqrtss, rcpss. */
13793
13794 static rtx
13795 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13796 {
13797 rtx pat;
13798 tree arg0 = TREE_VALUE (arglist);
13799 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13800 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13801 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13802
13803 if (optimize || !target
13804 || GET_MODE (target) != tmode
13805 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13806 target = gen_reg_rtx (tmode);
13807
13808 if (VECTOR_MODE_P (mode0))
13809 op0 = safe_vector_operand (op0, mode0);
13810
13811 if ((optimize && !register_operand (op0, mode0))
13812 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13813 op0 = copy_to_mode_reg (mode0, op0);
13814
13815 op1 = op0;
13816 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13817 op1 = copy_to_mode_reg (mode0, op1);
13818
13819 pat = GEN_FCN (icode) (target, op0, op1);
13820 if (! pat)
13821 return 0;
13822 emit_insn (pat);
13823 return target;
13824 }
13825
13826 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13827
13828 static rtx
13829 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13830 rtx target)
13831 {
13832 rtx pat;
13833 tree arg0 = TREE_VALUE (arglist);
13834 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13835 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13836 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13837 rtx op2;
13838 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13839 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13840 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13841 enum rtx_code comparison = d->comparison;
13842
13843 if (VECTOR_MODE_P (mode0))
13844 op0 = safe_vector_operand (op0, mode0);
13845 if (VECTOR_MODE_P (mode1))
13846 op1 = safe_vector_operand (op1, mode1);
13847
13848 /* Swap operands if we have a comparison that isn't available in
13849 hardware. */
13850 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
13851 {
13852 rtx tmp = gen_reg_rtx (mode1);
13853 emit_move_insn (tmp, op1);
13854 op1 = op0;
13855 op0 = tmp;
13856 }
13857
13858 if (optimize || !target
13859 || GET_MODE (target) != tmode
13860 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13861 target = gen_reg_rtx (tmode);
13862
13863 if ((optimize && !register_operand (op0, mode0))
13864 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13865 op0 = copy_to_mode_reg (mode0, op0);
13866 if ((optimize && !register_operand (op1, mode1))
13867 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13868 op1 = copy_to_mode_reg (mode1, op1);
13869
13870 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13871 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13872 if (! pat)
13873 return 0;
13874 emit_insn (pat);
13875 return target;
13876 }
13877
13878 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13879
13880 static rtx
13881 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13882 rtx target)
13883 {
13884 rtx pat;
13885 tree arg0 = TREE_VALUE (arglist);
13886 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13887 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13888 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13889 rtx op2;
13890 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13891 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13892 enum rtx_code comparison = d->comparison;
13893
13894 if (VECTOR_MODE_P (mode0))
13895 op0 = safe_vector_operand (op0, mode0);
13896 if (VECTOR_MODE_P (mode1))
13897 op1 = safe_vector_operand (op1, mode1);
13898
13899 /* Swap operands if we have a comparison that isn't available in
13900 hardware. */
13901 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
13902 {
13903 rtx tmp = op1;
13904 op1 = op0;
13905 op0 = tmp;
13906 }
13907
13908 target = gen_reg_rtx (SImode);
13909 emit_move_insn (target, const0_rtx);
13910 target = gen_rtx_SUBREG (QImode, target, 0);
13911
13912 if ((optimize && !register_operand (op0, mode0))
13913 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13914 op0 = copy_to_mode_reg (mode0, op0);
13915 if ((optimize && !register_operand (op1, mode1))
13916 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13917 op1 = copy_to_mode_reg (mode1, op1);
13918
13919 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13920 pat = GEN_FCN (d->icode) (op0, op1);
13921 if (! pat)
13922 return 0;
13923 emit_insn (pat);
13924 emit_insn (gen_rtx_SET (VOIDmode,
13925 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13926 gen_rtx_fmt_ee (comparison, QImode,
13927 SET_DEST (pat),
13928 const0_rtx)));
13929
13930 return SUBREG_REG (target);
13931 }
13932
13933 /* Return the integer constant in ARG. Constrain it to be in the range
13934 of the subparts of VEC_TYPE; issue an error if not. */
13935
13936 static int
13937 get_element_number (tree vec_type, tree arg)
13938 {
13939 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13940
13941 if (!host_integerp (arg, 1)
13942 || (elt = tree_low_cst (arg, 1), elt > max))
13943 {
13944 error ("selector must be an integer constant in the range 0..%i", max);
13945 return 0;
13946 }
13947
13948 return elt;
13949 }
13950
13951 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
13952 ix86_expand_vector_init. We DO have language-level syntax for this, in
13953 the form of (type){ init-list }. Except that since we can't place emms
13954 instructions from inside the compiler, we can't allow the use of MMX
13955 registers unless the user explicitly asks for it. So we do *not* define
13956 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
13957 we have builtins invoked by mmintrin.h that gives us license to emit
13958 these sorts of instructions. */
13959
13960 static rtx
13961 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
13962 {
13963 enum machine_mode tmode = TYPE_MODE (type);
13964 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
13965 int i, n_elt = GET_MODE_NUNITS (tmode);
13966 rtvec v = rtvec_alloc (n_elt);
13967
13968 gcc_assert (VECTOR_MODE_P (tmode));
13969
13970 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
13971 {
13972 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13973 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13974 }
13975
13976 gcc_assert (arglist == NULL);
13977
13978 if (!target || !register_operand (target, tmode))
13979 target = gen_reg_rtx (tmode);
13980
13981 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
13982 return target;
13983 }
13984
13985 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
13986 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
13987 had a language-level syntax for referencing vector elements. */
13988
13989 static rtx
13990 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
13991 {
13992 enum machine_mode tmode, mode0;
13993 tree arg0, arg1;
13994 int elt;
13995 rtx op0;
13996
13997 arg0 = TREE_VALUE (arglist);
13998 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13999
14000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14001 elt = get_element_number (TREE_TYPE (arg0), arg1);
14002
14003 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14004 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14005 gcc_assert (VECTOR_MODE_P (mode0));
14006
14007 op0 = force_reg (mode0, op0);
14008
14009 if (optimize || !target || !register_operand (target, tmode))
14010 target = gen_reg_rtx (tmode);
14011
14012 ix86_expand_vector_extract (true, target, op0, elt);
14013
14014 return target;
14015 }
14016
14017 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14018 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14019 a language-level syntax for referencing vector elements. */
14020
14021 static rtx
14022 ix86_expand_vec_set_builtin (tree arglist)
14023 {
14024 enum machine_mode tmode, mode1;
14025 tree arg0, arg1, arg2;
14026 int elt;
14027 rtx op0, op1;
14028
14029 arg0 = TREE_VALUE (arglist);
14030 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14031 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14032
14033 tmode = TYPE_MODE (TREE_TYPE (arg0));
14034 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14035 gcc_assert (VECTOR_MODE_P (tmode));
14036
14037 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14038 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14039 elt = get_element_number (TREE_TYPE (arg0), arg2);
14040
14041 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14042 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14043
14044 op0 = force_reg (tmode, op0);
14045 op1 = force_reg (mode1, op1);
14046
14047 ix86_expand_vector_set (true, op0, op1, elt);
14048
14049 return op0;
14050 }
14051
14052 /* Expand an expression EXP that calls a built-in function,
14053 with result going to TARGET if that's convenient
14054 (and in mode MODE if that's convenient).
14055 SUBTARGET may be used as the target for computing one of EXP's operands.
14056 IGNORE is nonzero if the value is to be ignored. */
14057
14058 static rtx
14059 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14060 enum machine_mode mode ATTRIBUTE_UNUSED,
14061 int ignore ATTRIBUTE_UNUSED)
14062 {
14063 const struct builtin_description *d;
14064 size_t i;
14065 enum insn_code icode;
14066 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14067 tree arglist = TREE_OPERAND (exp, 1);
14068 tree arg0, arg1, arg2;
14069 rtx op0, op1, op2, pat;
14070 enum machine_mode tmode, mode0, mode1, mode2;
14071 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14072
14073 switch (fcode)
14074 {
14075 case IX86_BUILTIN_EMMS:
14076 emit_insn (gen_mmx_emms ());
14077 return 0;
14078
14079 case IX86_BUILTIN_SFENCE:
14080 emit_insn (gen_sse_sfence ());
14081 return 0;
14082
14083 case IX86_BUILTIN_MASKMOVQ:
14084 case IX86_BUILTIN_MASKMOVDQU:
14085 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14086 ? CODE_FOR_mmx_maskmovq
14087 : CODE_FOR_sse2_maskmovdqu);
14088 /* Note the arg order is different from the operand order. */
14089 arg1 = TREE_VALUE (arglist);
14090 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14091 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14092 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14093 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14094 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14095 mode0 = insn_data[icode].operand[0].mode;
14096 mode1 = insn_data[icode].operand[1].mode;
14097 mode2 = insn_data[icode].operand[2].mode;
14098
14099 op0 = force_reg (Pmode, op0);
14100 op0 = gen_rtx_MEM (mode1, op0);
14101
14102 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14103 op0 = copy_to_mode_reg (mode0, op0);
14104 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14105 op1 = copy_to_mode_reg (mode1, op1);
14106 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14107 op2 = copy_to_mode_reg (mode2, op2);
14108 pat = GEN_FCN (icode) (op0, op1, op2);
14109 if (! pat)
14110 return 0;
14111 emit_insn (pat);
14112 return 0;
14113
14114 case IX86_BUILTIN_SQRTSS:
14115 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14116 case IX86_BUILTIN_RSQRTSS:
14117 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14118 case IX86_BUILTIN_RCPSS:
14119 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14120
14121 case IX86_BUILTIN_LOADUPS:
14122 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14123
14124 case IX86_BUILTIN_STOREUPS:
14125 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14126
14127 case IX86_BUILTIN_LOADHPS:
14128 case IX86_BUILTIN_LOADLPS:
14129 case IX86_BUILTIN_LOADHPD:
14130 case IX86_BUILTIN_LOADLPD:
14131 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14132 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14133 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14134 : CODE_FOR_sse2_loadlpd);
14135 arg0 = TREE_VALUE (arglist);
14136 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14137 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14138 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14139 tmode = insn_data[icode].operand[0].mode;
14140 mode0 = insn_data[icode].operand[1].mode;
14141 mode1 = insn_data[icode].operand[2].mode;
14142
14143 op0 = force_reg (mode0, op0);
14144 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14145 if (optimize || target == 0
14146 || GET_MODE (target) != tmode
14147 || !register_operand (target, tmode))
14148 target = gen_reg_rtx (tmode);
14149 pat = GEN_FCN (icode) (target, op0, op1);
14150 if (! pat)
14151 return 0;
14152 emit_insn (pat);
14153 return target;
14154
14155 case IX86_BUILTIN_STOREHPS:
14156 case IX86_BUILTIN_STORELPS:
14157 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14158 : CODE_FOR_sse_storelps);
14159 arg0 = TREE_VALUE (arglist);
14160 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14161 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14162 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14163 mode0 = insn_data[icode].operand[0].mode;
14164 mode1 = insn_data[icode].operand[1].mode;
14165
14166 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14167 op1 = force_reg (mode1, op1);
14168
14169 pat = GEN_FCN (icode) (op0, op1);
14170 if (! pat)
14171 return 0;
14172 emit_insn (pat);
14173 return const0_rtx;
14174
14175 case IX86_BUILTIN_MOVNTPS:
14176 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14177 case IX86_BUILTIN_MOVNTQ:
14178 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14179
14180 case IX86_BUILTIN_LDMXCSR:
14181 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14182 target = assign_386_stack_local (SImode, 0);
14183 emit_move_insn (target, op0);
14184 emit_insn (gen_sse_ldmxcsr (target));
14185 return 0;
14186
14187 case IX86_BUILTIN_STMXCSR:
14188 target = assign_386_stack_local (SImode, 0);
14189 emit_insn (gen_sse_stmxcsr (target));
14190 return copy_to_mode_reg (SImode, target);
14191
14192 case IX86_BUILTIN_SHUFPS:
14193 case IX86_BUILTIN_SHUFPD:
14194 icode = (fcode == IX86_BUILTIN_SHUFPS
14195 ? CODE_FOR_sse_shufps
14196 : CODE_FOR_sse2_shufpd);
14197 arg0 = TREE_VALUE (arglist);
14198 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14199 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14200 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14201 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14202 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14203 tmode = insn_data[icode].operand[0].mode;
14204 mode0 = insn_data[icode].operand[1].mode;
14205 mode1 = insn_data[icode].operand[2].mode;
14206 mode2 = insn_data[icode].operand[3].mode;
14207
14208 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14209 op0 = copy_to_mode_reg (mode0, op0);
14210 if ((optimize && !register_operand (op1, mode1))
14211 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14212 op1 = copy_to_mode_reg (mode1, op1);
14213 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14214 {
14215 /* @@@ better error message */
14216 error ("mask must be an immediate");
14217 return gen_reg_rtx (tmode);
14218 }
14219 if (optimize || target == 0
14220 || GET_MODE (target) != tmode
14221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14222 target = gen_reg_rtx (tmode);
14223 pat = GEN_FCN (icode) (target, op0, op1, op2);
14224 if (! pat)
14225 return 0;
14226 emit_insn (pat);
14227 return target;
14228
14229 case IX86_BUILTIN_PSHUFW:
14230 case IX86_BUILTIN_PSHUFD:
14231 case IX86_BUILTIN_PSHUFHW:
14232 case IX86_BUILTIN_PSHUFLW:
14233 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14234 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14235 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14236 : CODE_FOR_mmx_pshufw);
14237 arg0 = TREE_VALUE (arglist);
14238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14239 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14240 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14241 tmode = insn_data[icode].operand[0].mode;
14242 mode1 = insn_data[icode].operand[1].mode;
14243 mode2 = insn_data[icode].operand[2].mode;
14244
14245 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14246 op0 = copy_to_mode_reg (mode1, op0);
14247 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14248 {
14249 /* @@@ better error message */
14250 error ("mask must be an immediate");
14251 return const0_rtx;
14252 }
14253 if (target == 0
14254 || GET_MODE (target) != tmode
14255 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14256 target = gen_reg_rtx (tmode);
14257 pat = GEN_FCN (icode) (target, op0, op1);
14258 if (! pat)
14259 return 0;
14260 emit_insn (pat);
14261 return target;
14262
14263 case IX86_BUILTIN_PSLLDQI128:
14264 case IX86_BUILTIN_PSRLDQI128:
14265 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14266 : CODE_FOR_sse2_lshrti3);
14267 arg0 = TREE_VALUE (arglist);
14268 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14269 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14270 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14271 tmode = insn_data[icode].operand[0].mode;
14272 mode1 = insn_data[icode].operand[1].mode;
14273 mode2 = insn_data[icode].operand[2].mode;
14274
14275 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14276 {
14277 op0 = copy_to_reg (op0);
14278 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14279 }
14280 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14281 {
14282 error ("shift must be an immediate");
14283 return const0_rtx;
14284 }
14285 target = gen_reg_rtx (V2DImode);
14286 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14287 if (! pat)
14288 return 0;
14289 emit_insn (pat);
14290 return target;
14291
14292 case IX86_BUILTIN_FEMMS:
14293 emit_insn (gen_mmx_femms ());
14294 return NULL_RTX;
14295
14296 case IX86_BUILTIN_PAVGUSB:
14297 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14298
14299 case IX86_BUILTIN_PF2ID:
14300 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14301
14302 case IX86_BUILTIN_PFACC:
14303 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14304
14305 case IX86_BUILTIN_PFADD:
14306 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14307
14308 case IX86_BUILTIN_PFCMPEQ:
14309 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14310
14311 case IX86_BUILTIN_PFCMPGE:
14312 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14313
14314 case IX86_BUILTIN_PFCMPGT:
14315 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14316
14317 case IX86_BUILTIN_PFMAX:
14318 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14319
14320 case IX86_BUILTIN_PFMIN:
14321 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14322
14323 case IX86_BUILTIN_PFMUL:
14324 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14325
14326 case IX86_BUILTIN_PFRCP:
14327 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14328
14329 case IX86_BUILTIN_PFRCPIT1:
14330 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14331
14332 case IX86_BUILTIN_PFRCPIT2:
14333 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14334
14335 case IX86_BUILTIN_PFRSQIT1:
14336 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14337
14338 case IX86_BUILTIN_PFRSQRT:
14339 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14340
14341 case IX86_BUILTIN_PFSUB:
14342 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14343
14344 case IX86_BUILTIN_PFSUBR:
14345 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14346
14347 case IX86_BUILTIN_PI2FD:
14348 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14349
14350 case IX86_BUILTIN_PMULHRW:
14351 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14352
14353 case IX86_BUILTIN_PF2IW:
14354 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14355
14356 case IX86_BUILTIN_PFNACC:
14357 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14358
14359 case IX86_BUILTIN_PFPNACC:
14360 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14361
14362 case IX86_BUILTIN_PI2FW:
14363 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14364
14365 case IX86_BUILTIN_PSWAPDSI:
14366 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14367
14368 case IX86_BUILTIN_PSWAPDSF:
14369 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14370
14371 case IX86_BUILTIN_SQRTSD:
14372 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14373 case IX86_BUILTIN_LOADUPD:
14374 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14375 case IX86_BUILTIN_STOREUPD:
14376 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14377
14378 case IX86_BUILTIN_MFENCE:
14379 emit_insn (gen_sse2_mfence ());
14380 return 0;
14381 case IX86_BUILTIN_LFENCE:
14382 emit_insn (gen_sse2_lfence ());
14383 return 0;
14384
14385 case IX86_BUILTIN_CLFLUSH:
14386 arg0 = TREE_VALUE (arglist);
14387 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14388 icode = CODE_FOR_sse2_clflush;
14389 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14390 op0 = copy_to_mode_reg (Pmode, op0);
14391
14392 emit_insn (gen_sse2_clflush (op0));
14393 return 0;
14394
14395 case IX86_BUILTIN_MOVNTPD:
14396 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14397 case IX86_BUILTIN_MOVNTDQ:
14398 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14399 case IX86_BUILTIN_MOVNTI:
14400 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14401
14402 case IX86_BUILTIN_LOADDQU:
14403 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14404 case IX86_BUILTIN_STOREDQU:
14405 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14406
14407 case IX86_BUILTIN_MONITOR:
14408 arg0 = TREE_VALUE (arglist);
14409 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14410 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14411 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14412 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14413 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14414 if (!REG_P (op0))
14415 op0 = copy_to_mode_reg (SImode, op0);
14416 if (!REG_P (op1))
14417 op1 = copy_to_mode_reg (SImode, op1);
14418 if (!REG_P (op2))
14419 op2 = copy_to_mode_reg (SImode, op2);
14420 emit_insn (gen_sse3_monitor (op0, op1, op2));
14421 return 0;
14422
14423 case IX86_BUILTIN_MWAIT:
14424 arg0 = TREE_VALUE (arglist);
14425 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14426 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14427 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14428 if (!REG_P (op0))
14429 op0 = copy_to_mode_reg (SImode, op0);
14430 if (!REG_P (op1))
14431 op1 = copy_to_mode_reg (SImode, op1);
14432 emit_insn (gen_sse3_mwait (op0, op1));
14433 return 0;
14434
14435 case IX86_BUILTIN_LDDQU:
14436 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14437 target, 1);
14438
14439 case IX86_BUILTIN_VEC_INIT_V2SI:
14440 case IX86_BUILTIN_VEC_INIT_V4HI:
14441 case IX86_BUILTIN_VEC_INIT_V8QI:
14442 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14443
14444 case IX86_BUILTIN_VEC_EXT_V2DF:
14445 case IX86_BUILTIN_VEC_EXT_V2DI:
14446 case IX86_BUILTIN_VEC_EXT_V4SF:
14447 case IX86_BUILTIN_VEC_EXT_V4SI:
14448 case IX86_BUILTIN_VEC_EXT_V8HI:
14449 case IX86_BUILTIN_VEC_EXT_V4HI:
14450 return ix86_expand_vec_ext_builtin (arglist, target);
14451
14452 case IX86_BUILTIN_VEC_SET_V8HI:
14453 case IX86_BUILTIN_VEC_SET_V4HI:
14454 return ix86_expand_vec_set_builtin (arglist);
14455
14456 default:
14457 break;
14458 }
14459
14460 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14461 if (d->code == fcode)
14462 {
14463 /* Compares are treated specially. */
14464 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14465 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14466 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14467 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14468 return ix86_expand_sse_compare (d, arglist, target);
14469
14470 return ix86_expand_binop_builtin (d->icode, arglist, target);
14471 }
14472
14473 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14474 if (d->code == fcode)
14475 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14476
14477 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14478 if (d->code == fcode)
14479 return ix86_expand_sse_comi (d, arglist, target);
14480
14481 gcc_unreachable ();
14482 }
14483
14484 /* Store OPERAND to the memory after reload is completed. This means
14485 that we can't easily use assign_stack_local. */
14486 rtx
14487 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14488 {
14489 rtx result;
14490 if (!reload_completed)
14491 abort ();
14492 if (TARGET_RED_ZONE)
14493 {
14494 result = gen_rtx_MEM (mode,
14495 gen_rtx_PLUS (Pmode,
14496 stack_pointer_rtx,
14497 GEN_INT (-RED_ZONE_SIZE)));
14498 emit_move_insn (result, operand);
14499 }
14500 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14501 {
14502 switch (mode)
14503 {
14504 case HImode:
14505 case SImode:
14506 operand = gen_lowpart (DImode, operand);
14507 /* FALLTHRU */
14508 case DImode:
14509 emit_insn (
14510 gen_rtx_SET (VOIDmode,
14511 gen_rtx_MEM (DImode,
14512 gen_rtx_PRE_DEC (DImode,
14513 stack_pointer_rtx)),
14514 operand));
14515 break;
14516 default:
14517 abort ();
14518 }
14519 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14520 }
14521 else
14522 {
14523 switch (mode)
14524 {
14525 case DImode:
14526 {
14527 rtx operands[2];
14528 split_di (&operand, 1, operands, operands + 1);
14529 emit_insn (
14530 gen_rtx_SET (VOIDmode,
14531 gen_rtx_MEM (SImode,
14532 gen_rtx_PRE_DEC (Pmode,
14533 stack_pointer_rtx)),
14534 operands[1]));
14535 emit_insn (
14536 gen_rtx_SET (VOIDmode,
14537 gen_rtx_MEM (SImode,
14538 gen_rtx_PRE_DEC (Pmode,
14539 stack_pointer_rtx)),
14540 operands[0]));
14541 }
14542 break;
14543 case HImode:
14544 /* It is better to store HImodes as SImodes. */
14545 if (!TARGET_PARTIAL_REG_STALL)
14546 operand = gen_lowpart (SImode, operand);
14547 /* FALLTHRU */
14548 case SImode:
14549 emit_insn (
14550 gen_rtx_SET (VOIDmode,
14551 gen_rtx_MEM (GET_MODE (operand),
14552 gen_rtx_PRE_DEC (SImode,
14553 stack_pointer_rtx)),
14554 operand));
14555 break;
14556 default:
14557 abort ();
14558 }
14559 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14560 }
14561 return result;
14562 }
14563
14564 /* Free operand from the memory. */
14565 void
14566 ix86_free_from_memory (enum machine_mode mode)
14567 {
14568 if (!TARGET_RED_ZONE)
14569 {
14570 int size;
14571
14572 if (mode == DImode || TARGET_64BIT)
14573 size = 8;
14574 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14575 size = 2;
14576 else
14577 size = 4;
14578 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14579 to pop or add instruction if registers are available. */
14580 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14581 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14582 GEN_INT (size))));
14583 }
14584 }
14585
14586 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14587 QImode must go into class Q_REGS.
14588 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14589 movdf to do mem-to-mem moves through integer regs. */
14590 enum reg_class
14591 ix86_preferred_reload_class (rtx x, enum reg_class class)
14592 {
14593 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14594 return NO_REGS;
14595 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14596 {
14597 /* SSE can't load any constant directly yet. */
14598 if (SSE_CLASS_P (class))
14599 return NO_REGS;
14600 /* Floats can load 0 and 1. */
14601 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14602 {
14603 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14604 if (MAYBE_SSE_CLASS_P (class))
14605 return (reg_class_subset_p (class, GENERAL_REGS)
14606 ? GENERAL_REGS : FLOAT_REGS);
14607 else
14608 return class;
14609 }
14610 /* General regs can load everything. */
14611 if (reg_class_subset_p (class, GENERAL_REGS))
14612 return GENERAL_REGS;
14613 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14614 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14615 return NO_REGS;
14616 }
14617 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14618 return NO_REGS;
14619 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14620 return Q_REGS;
14621 return class;
14622 }
14623
14624 /* If we are copying between general and FP registers, we need a memory
14625 location. The same is true for SSE and MMX registers.
14626
14627 The macro can't work reliably when one of the CLASSES is class containing
14628 registers from multiple units (SSE, MMX, integer). We avoid this by never
14629 combining those units in single alternative in the machine description.
14630 Ensure that this constraint holds to avoid unexpected surprises.
14631
14632 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14633 enforce these sanity checks. */
14634 int
14635 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14636 enum machine_mode mode, int strict)
14637 {
14638 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14639 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14640 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14641 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14642 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14643 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14644 {
14645 if (strict)
14646 abort ();
14647 else
14648 return 1;
14649 }
14650 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14651 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14652 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14653 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14654 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14655 }
14656 /* Return the cost of moving data from a register in class CLASS1 to
14657 one in class CLASS2.
14658
14659 It is not required that the cost always equal 2 when FROM is the same as TO;
14660 on some machines it is expensive to move between registers if they are not
14661 general registers. */
14662 int
14663 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14664 enum reg_class class2)
14665 {
14666 /* In case we require secondary memory, compute cost of the store followed
14667 by load. In order to avoid bad register allocation choices, we need
14668 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14669
14670 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14671 {
14672 int cost = 1;
14673
14674 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14675 MEMORY_MOVE_COST (mode, class1, 1));
14676 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14677 MEMORY_MOVE_COST (mode, class2, 1));
14678
14679 /* In case of copying from general_purpose_register we may emit multiple
14680 stores followed by single load causing memory size mismatch stall.
14681 Count this as arbitrarily high cost of 20. */
14682 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14683 cost += 20;
14684
14685 /* In the case of FP/MMX moves, the registers actually overlap, and we
14686 have to switch modes in order to treat them differently. */
14687 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14688 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14689 cost += 20;
14690
14691 return cost;
14692 }
14693
14694 /* Moves between SSE/MMX and integer unit are expensive. */
14695 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14696 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14697 return ix86_cost->mmxsse_to_integer;
14698 if (MAYBE_FLOAT_CLASS_P (class1))
14699 return ix86_cost->fp_move;
14700 if (MAYBE_SSE_CLASS_P (class1))
14701 return ix86_cost->sse_move;
14702 if (MAYBE_MMX_CLASS_P (class1))
14703 return ix86_cost->mmx_move;
14704 return 2;
14705 }
14706
14707 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14708 int
14709 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14710 {
14711 /* Flags and only flags can only hold CCmode values. */
14712 if (CC_REGNO_P (regno))
14713 return GET_MODE_CLASS (mode) == MODE_CC;
14714 if (GET_MODE_CLASS (mode) == MODE_CC
14715 || GET_MODE_CLASS (mode) == MODE_RANDOM
14716 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14717 return 0;
14718 if (FP_REGNO_P (regno))
14719 return VALID_FP_MODE_P (mode);
14720 if (SSE_REGNO_P (regno))
14721 {
14722 /* We implement the move patterns for all vector modes into and
14723 out of SSE registers, even when no operation instructions
14724 are available. */
14725 return (VALID_SSE_REG_MODE (mode)
14726 || VALID_SSE2_REG_MODE (mode)
14727 || VALID_MMX_REG_MODE (mode)
14728 || VALID_MMX_REG_MODE_3DNOW (mode));
14729 }
14730 if (MMX_REGNO_P (regno))
14731 {
14732 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14733 so if the register is available at all, then we can move data of
14734 the given mode into or out of it. */
14735 return (VALID_MMX_REG_MODE (mode)
14736 || VALID_MMX_REG_MODE_3DNOW (mode));
14737 }
14738 /* We handle both integer and floats in the general purpose registers.
14739 In future we should be able to handle vector modes as well. */
14740 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14741 return 0;
14742 /* Take care for QImode values - they can be in non-QI regs, but then
14743 they do cause partial register stalls. */
14744 if (regno < 4 || mode != QImode || TARGET_64BIT)
14745 return 1;
14746 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14747 }
14748
14749 /* Return the cost of moving data of mode M between a
14750 register and memory. A value of 2 is the default; this cost is
14751 relative to those in `REGISTER_MOVE_COST'.
14752
14753 If moving between registers and memory is more expensive than
14754 between two registers, you should define this macro to express the
14755 relative cost.
14756
14757 Model also increased moving costs of QImode registers in non
14758 Q_REGS classes.
14759 */
14760 int
14761 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14762 {
14763 if (FLOAT_CLASS_P (class))
14764 {
14765 int index;
14766 switch (mode)
14767 {
14768 case SFmode:
14769 index = 0;
14770 break;
14771 case DFmode:
14772 index = 1;
14773 break;
14774 case XFmode:
14775 index = 2;
14776 break;
14777 default:
14778 return 100;
14779 }
14780 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14781 }
14782 if (SSE_CLASS_P (class))
14783 {
14784 int index;
14785 switch (GET_MODE_SIZE (mode))
14786 {
14787 case 4:
14788 index = 0;
14789 break;
14790 case 8:
14791 index = 1;
14792 break;
14793 case 16:
14794 index = 2;
14795 break;
14796 default:
14797 return 100;
14798 }
14799 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14800 }
14801 if (MMX_CLASS_P (class))
14802 {
14803 int index;
14804 switch (GET_MODE_SIZE (mode))
14805 {
14806 case 4:
14807 index = 0;
14808 break;
14809 case 8:
14810 index = 1;
14811 break;
14812 default:
14813 return 100;
14814 }
14815 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14816 }
14817 switch (GET_MODE_SIZE (mode))
14818 {
14819 case 1:
14820 if (in)
14821 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14822 : ix86_cost->movzbl_load);
14823 else
14824 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14825 : ix86_cost->int_store[0] + 4);
14826 break;
14827 case 2:
14828 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14829 default:
14830 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14831 if (mode == TFmode)
14832 mode = XFmode;
14833 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14834 * (((int) GET_MODE_SIZE (mode)
14835 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14836 }
14837 }
14838
14839 /* Compute a (partial) cost for rtx X. Return true if the complete
14840 cost has been computed, and false if subexpressions should be
14841 scanned. In either case, *TOTAL contains the cost result. */
14842
14843 static bool
14844 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14845 {
14846 enum machine_mode mode = GET_MODE (x);
14847
14848 switch (code)
14849 {
14850 case CONST_INT:
14851 case CONST:
14852 case LABEL_REF:
14853 case SYMBOL_REF:
14854 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14855 *total = 3;
14856 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14857 *total = 2;
14858 else if (flag_pic && SYMBOLIC_CONST (x)
14859 && (!TARGET_64BIT
14860 || (!GET_CODE (x) != LABEL_REF
14861 && (GET_CODE (x) != SYMBOL_REF
14862 || !SYMBOL_REF_LOCAL_P (x)))))
14863 *total = 1;
14864 else
14865 *total = 0;
14866 return true;
14867
14868 case CONST_DOUBLE:
14869 if (mode == VOIDmode)
14870 *total = 0;
14871 else
14872 switch (standard_80387_constant_p (x))
14873 {
14874 case 1: /* 0.0 */
14875 *total = 1;
14876 break;
14877 default: /* Other constants */
14878 *total = 2;
14879 break;
14880 case 0:
14881 case -1:
14882 /* Start with (MEM (SYMBOL_REF)), since that's where
14883 it'll probably end up. Add a penalty for size. */
14884 *total = (COSTS_N_INSNS (1)
14885 + (flag_pic != 0 && !TARGET_64BIT)
14886 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14887 break;
14888 }
14889 return true;
14890
14891 case ZERO_EXTEND:
14892 /* The zero extensions is often completely free on x86_64, so make
14893 it as cheap as possible. */
14894 if (TARGET_64BIT && mode == DImode
14895 && GET_MODE (XEXP (x, 0)) == SImode)
14896 *total = 1;
14897 else if (TARGET_ZERO_EXTEND_WITH_AND)
14898 *total = COSTS_N_INSNS (ix86_cost->add);
14899 else
14900 *total = COSTS_N_INSNS (ix86_cost->movzx);
14901 return false;
14902
14903 case SIGN_EXTEND:
14904 *total = COSTS_N_INSNS (ix86_cost->movsx);
14905 return false;
14906
14907 case ASHIFT:
14908 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14909 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14910 {
14911 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14912 if (value == 1)
14913 {
14914 *total = COSTS_N_INSNS (ix86_cost->add);
14915 return false;
14916 }
14917 if ((value == 2 || value == 3)
14918 && ix86_cost->lea <= ix86_cost->shift_const)
14919 {
14920 *total = COSTS_N_INSNS (ix86_cost->lea);
14921 return false;
14922 }
14923 }
14924 /* FALLTHRU */
14925
14926 case ROTATE:
14927 case ASHIFTRT:
14928 case LSHIFTRT:
14929 case ROTATERT:
14930 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14931 {
14932 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14933 {
14934 if (INTVAL (XEXP (x, 1)) > 32)
14935 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14936 else
14937 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14938 }
14939 else
14940 {
14941 if (GET_CODE (XEXP (x, 1)) == AND)
14942 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14943 else
14944 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14945 }
14946 }
14947 else
14948 {
14949 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14950 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14951 else
14952 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14953 }
14954 return false;
14955
14956 case MULT:
14957 if (FLOAT_MODE_P (mode))
14958 {
14959 *total = COSTS_N_INSNS (ix86_cost->fmul);
14960 return false;
14961 }
14962 else
14963 {
14964 rtx op0 = XEXP (x, 0);
14965 rtx op1 = XEXP (x, 1);
14966 int nbits;
14967 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14968 {
14969 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14970 for (nbits = 0; value != 0; value &= value - 1)
14971 nbits++;
14972 }
14973 else
14974 /* This is arbitrary. */
14975 nbits = 7;
14976
14977 /* Compute costs correctly for widening multiplication. */
14978 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14979 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14980 == GET_MODE_SIZE (mode))
14981 {
14982 int is_mulwiden = 0;
14983 enum machine_mode inner_mode = GET_MODE (op0);
14984
14985 if (GET_CODE (op0) == GET_CODE (op1))
14986 is_mulwiden = 1, op1 = XEXP (op1, 0);
14987 else if (GET_CODE (op1) == CONST_INT)
14988 {
14989 if (GET_CODE (op0) == SIGN_EXTEND)
14990 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14991 == INTVAL (op1);
14992 else
14993 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14994 }
14995
14996 if (is_mulwiden)
14997 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14998 }
14999
15000 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15001 + nbits * ix86_cost->mult_bit)
15002 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15003
15004 return true;
15005 }
15006
15007 case DIV:
15008 case UDIV:
15009 case MOD:
15010 case UMOD:
15011 if (FLOAT_MODE_P (mode))
15012 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15013 else
15014 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15015 return false;
15016
15017 case PLUS:
15018 if (FLOAT_MODE_P (mode))
15019 *total = COSTS_N_INSNS (ix86_cost->fadd);
15020 else if (GET_MODE_CLASS (mode) == MODE_INT
15021 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15022 {
15023 if (GET_CODE (XEXP (x, 0)) == PLUS
15024 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15025 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15026 && CONSTANT_P (XEXP (x, 1)))
15027 {
15028 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15029 if (val == 2 || val == 4 || val == 8)
15030 {
15031 *total = COSTS_N_INSNS (ix86_cost->lea);
15032 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15033 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15034 outer_code);
15035 *total += rtx_cost (XEXP (x, 1), outer_code);
15036 return true;
15037 }
15038 }
15039 else if (GET_CODE (XEXP (x, 0)) == MULT
15040 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15041 {
15042 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15043 if (val == 2 || val == 4 || val == 8)
15044 {
15045 *total = COSTS_N_INSNS (ix86_cost->lea);
15046 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15047 *total += rtx_cost (XEXP (x, 1), outer_code);
15048 return true;
15049 }
15050 }
15051 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15052 {
15053 *total = COSTS_N_INSNS (ix86_cost->lea);
15054 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15055 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15056 *total += rtx_cost (XEXP (x, 1), outer_code);
15057 return true;
15058 }
15059 }
15060 /* FALLTHRU */
15061
15062 case MINUS:
15063 if (FLOAT_MODE_P (mode))
15064 {
15065 *total = COSTS_N_INSNS (ix86_cost->fadd);
15066 return false;
15067 }
15068 /* FALLTHRU */
15069
15070 case AND:
15071 case IOR:
15072 case XOR:
15073 if (!TARGET_64BIT && mode == DImode)
15074 {
15075 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15076 + (rtx_cost (XEXP (x, 0), outer_code)
15077 << (GET_MODE (XEXP (x, 0)) != DImode))
15078 + (rtx_cost (XEXP (x, 1), outer_code)
15079 << (GET_MODE (XEXP (x, 1)) != DImode)));
15080 return true;
15081 }
15082 /* FALLTHRU */
15083
15084 case NEG:
15085 if (FLOAT_MODE_P (mode))
15086 {
15087 *total = COSTS_N_INSNS (ix86_cost->fchs);
15088 return false;
15089 }
15090 /* FALLTHRU */
15091
15092 case NOT:
15093 if (!TARGET_64BIT && mode == DImode)
15094 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15095 else
15096 *total = COSTS_N_INSNS (ix86_cost->add);
15097 return false;
15098
15099 case COMPARE:
15100 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15101 && XEXP (XEXP (x, 0), 1) == const1_rtx
15102 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15103 && XEXP (x, 1) == const0_rtx)
15104 {
15105 /* This kind of construct is implemented using test[bwl].
15106 Treat it as if we had an AND. */
15107 *total = (COSTS_N_INSNS (ix86_cost->add)
15108 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15109 + rtx_cost (const1_rtx, outer_code));
15110 return true;
15111 }
15112 return false;
15113
15114 case FLOAT_EXTEND:
15115 if (!TARGET_SSE_MATH
15116 || mode == XFmode
15117 || (mode == DFmode && !TARGET_SSE2))
15118 *total = 0;
15119 return false;
15120
15121 case ABS:
15122 if (FLOAT_MODE_P (mode))
15123 *total = COSTS_N_INSNS (ix86_cost->fabs);
15124 return false;
15125
15126 case SQRT:
15127 if (FLOAT_MODE_P (mode))
15128 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15129 return false;
15130
15131 case UNSPEC:
15132 if (XINT (x, 1) == UNSPEC_TP)
15133 *total = 0;
15134 return false;
15135
15136 default:
15137 return false;
15138 }
15139 }
15140
15141 #if TARGET_MACHO
15142
15143 static int current_machopic_label_num;
15144
15145 /* Given a symbol name and its associated stub, write out the
15146 definition of the stub. */
15147
15148 void
15149 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15150 {
15151 unsigned int length;
15152 char *binder_name, *symbol_name, lazy_ptr_name[32];
15153 int label = ++current_machopic_label_num;
15154
15155 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15156 symb = (*targetm.strip_name_encoding) (symb);
15157
15158 length = strlen (stub);
15159 binder_name = alloca (length + 32);
15160 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15161
15162 length = strlen (symb);
15163 symbol_name = alloca (length + 32);
15164 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15165
15166 sprintf (lazy_ptr_name, "L%d$lz", label);
15167
15168 if (MACHOPIC_PURE)
15169 machopic_picsymbol_stub_section ();
15170 else
15171 machopic_symbol_stub_section ();
15172
15173 fprintf (file, "%s:\n", stub);
15174 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15175
15176 if (MACHOPIC_PURE)
15177 {
15178 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15179 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15180 fprintf (file, "\tjmp %%edx\n");
15181 }
15182 else
15183 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15184
15185 fprintf (file, "%s:\n", binder_name);
15186
15187 if (MACHOPIC_PURE)
15188 {
15189 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15190 fprintf (file, "\tpushl %%eax\n");
15191 }
15192 else
15193 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15194
15195 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15196
15197 machopic_lazy_symbol_ptr_section ();
15198 fprintf (file, "%s:\n", lazy_ptr_name);
15199 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15200 fprintf (file, "\t.long %s\n", binder_name);
15201 }
15202 #endif /* TARGET_MACHO */
15203
15204 /* Order the registers for register allocator. */
15205
15206 void
15207 x86_order_regs_for_local_alloc (void)
15208 {
15209 int pos = 0;
15210 int i;
15211
15212 /* First allocate the local general purpose registers. */
15213 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15214 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15215 reg_alloc_order [pos++] = i;
15216
15217 /* Global general purpose registers. */
15218 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15219 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15220 reg_alloc_order [pos++] = i;
15221
15222 /* x87 registers come first in case we are doing FP math
15223 using them. */
15224 if (!TARGET_SSE_MATH)
15225 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15226 reg_alloc_order [pos++] = i;
15227
15228 /* SSE registers. */
15229 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15230 reg_alloc_order [pos++] = i;
15231 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15232 reg_alloc_order [pos++] = i;
15233
15234 /* x87 registers. */
15235 if (TARGET_SSE_MATH)
15236 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15237 reg_alloc_order [pos++] = i;
15238
15239 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15240 reg_alloc_order [pos++] = i;
15241
15242 /* Initialize the rest of array as we do not allocate some registers
15243 at all. */
15244 while (pos < FIRST_PSEUDO_REGISTER)
15245 reg_alloc_order [pos++] = 0;
15246 }
15247
15248 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15249 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15250 #endif
15251
15252 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15253 struct attribute_spec.handler. */
15254 static tree
15255 ix86_handle_struct_attribute (tree *node, tree name,
15256 tree args ATTRIBUTE_UNUSED,
15257 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15258 {
15259 tree *type = NULL;
15260 if (DECL_P (*node))
15261 {
15262 if (TREE_CODE (*node) == TYPE_DECL)
15263 type = &TREE_TYPE (*node);
15264 }
15265 else
15266 type = node;
15267
15268 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15269 || TREE_CODE (*type) == UNION_TYPE)))
15270 {
15271 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
15272 *no_add_attrs = true;
15273 }
15274
15275 else if ((is_attribute_p ("ms_struct", name)
15276 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15277 || ((is_attribute_p ("gcc_struct", name)
15278 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15279 {
15280 warning ("%qs incompatible attribute ignored",
15281 IDENTIFIER_POINTER (name));
15282 *no_add_attrs = true;
15283 }
15284
15285 return NULL_TREE;
15286 }
15287
15288 static bool
15289 ix86_ms_bitfield_layout_p (tree record_type)
15290 {
15291 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15292 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15293 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15294 }
15295
15296 /* Returns an expression indicating where the this parameter is
15297 located on entry to the FUNCTION. */
15298
15299 static rtx
15300 x86_this_parameter (tree function)
15301 {
15302 tree type = TREE_TYPE (function);
15303
15304 if (TARGET_64BIT)
15305 {
15306 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15307 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15308 }
15309
15310 if (ix86_function_regparm (type, function) > 0)
15311 {
15312 tree parm;
15313
15314 parm = TYPE_ARG_TYPES (type);
15315 /* Figure out whether or not the function has a variable number of
15316 arguments. */
15317 for (; parm; parm = TREE_CHAIN (parm))
15318 if (TREE_VALUE (parm) == void_type_node)
15319 break;
15320 /* If not, the this parameter is in the first argument. */
15321 if (parm)
15322 {
15323 int regno = 0;
15324 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15325 regno = 2;
15326 return gen_rtx_REG (SImode, regno);
15327 }
15328 }
15329
15330 if (aggregate_value_p (TREE_TYPE (type), type))
15331 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15332 else
15333 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15334 }
15335
15336 /* Determine whether x86_output_mi_thunk can succeed. */
15337
15338 static bool
15339 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15340 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15341 HOST_WIDE_INT vcall_offset, tree function)
15342 {
15343 /* 64-bit can handle anything. */
15344 if (TARGET_64BIT)
15345 return true;
15346
15347 /* For 32-bit, everything's fine if we have one free register. */
15348 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15349 return true;
15350
15351 /* Need a free register for vcall_offset. */
15352 if (vcall_offset)
15353 return false;
15354
15355 /* Need a free register for GOT references. */
15356 if (flag_pic && !(*targetm.binds_local_p) (function))
15357 return false;
15358
15359 /* Otherwise ok. */
15360 return true;
15361 }
15362
15363 /* Output the assembler code for a thunk function. THUNK_DECL is the
15364 declaration for the thunk function itself, FUNCTION is the decl for
15365 the target function. DELTA is an immediate constant offset to be
15366 added to THIS. If VCALL_OFFSET is nonzero, the word at
15367 *(*this + vcall_offset) should be added to THIS. */
15368
15369 static void
15370 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15371 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15372 HOST_WIDE_INT vcall_offset, tree function)
15373 {
15374 rtx xops[3];
15375 rtx this = x86_this_parameter (function);
15376 rtx this_reg, tmp;
15377
15378 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15379 pull it in now and let DELTA benefit. */
15380 if (REG_P (this))
15381 this_reg = this;
15382 else if (vcall_offset)
15383 {
15384 /* Put the this parameter into %eax. */
15385 xops[0] = this;
15386 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15387 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15388 }
15389 else
15390 this_reg = NULL_RTX;
15391
15392 /* Adjust the this parameter by a fixed constant. */
15393 if (delta)
15394 {
15395 xops[0] = GEN_INT (delta);
15396 xops[1] = this_reg ? this_reg : this;
15397 if (TARGET_64BIT)
15398 {
15399 if (!x86_64_general_operand (xops[0], DImode))
15400 {
15401 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15402 xops[1] = tmp;
15403 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15404 xops[0] = tmp;
15405 xops[1] = this;
15406 }
15407 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15408 }
15409 else
15410 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15411 }
15412
15413 /* Adjust the this parameter by a value stored in the vtable. */
15414 if (vcall_offset)
15415 {
15416 if (TARGET_64BIT)
15417 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15418 else
15419 {
15420 int tmp_regno = 2 /* ECX */;
15421 if (lookup_attribute ("fastcall",
15422 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15423 tmp_regno = 0 /* EAX */;
15424 tmp = gen_rtx_REG (SImode, tmp_regno);
15425 }
15426
15427 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15428 xops[1] = tmp;
15429 if (TARGET_64BIT)
15430 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15431 else
15432 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15433
15434 /* Adjust the this parameter. */
15435 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15436 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15437 {
15438 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15439 xops[0] = GEN_INT (vcall_offset);
15440 xops[1] = tmp2;
15441 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15442 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15443 }
15444 xops[1] = this_reg;
15445 if (TARGET_64BIT)
15446 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15447 else
15448 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15449 }
15450
15451 /* If necessary, drop THIS back to its stack slot. */
15452 if (this_reg && this_reg != this)
15453 {
15454 xops[0] = this_reg;
15455 xops[1] = this;
15456 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15457 }
15458
15459 xops[0] = XEXP (DECL_RTL (function), 0);
15460 if (TARGET_64BIT)
15461 {
15462 if (!flag_pic || (*targetm.binds_local_p) (function))
15463 output_asm_insn ("jmp\t%P0", xops);
15464 else
15465 {
15466 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15467 tmp = gen_rtx_CONST (Pmode, tmp);
15468 tmp = gen_rtx_MEM (QImode, tmp);
15469 xops[0] = tmp;
15470 output_asm_insn ("jmp\t%A0", xops);
15471 }
15472 }
15473 else
15474 {
15475 if (!flag_pic || (*targetm.binds_local_p) (function))
15476 output_asm_insn ("jmp\t%P0", xops);
15477 else
15478 #if TARGET_MACHO
15479 if (TARGET_MACHO)
15480 {
15481 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15482 tmp = (gen_rtx_SYMBOL_REF
15483 (Pmode,
15484 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15485 tmp = gen_rtx_MEM (QImode, tmp);
15486 xops[0] = tmp;
15487 output_asm_insn ("jmp\t%0", xops);
15488 }
15489 else
15490 #endif /* TARGET_MACHO */
15491 {
15492 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15493 output_set_got (tmp);
15494
15495 xops[1] = tmp;
15496 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15497 output_asm_insn ("jmp\t{*}%1", xops);
15498 }
15499 }
15500 }
15501
15502 static void
15503 x86_file_start (void)
15504 {
15505 default_file_start ();
15506 if (X86_FILE_START_VERSION_DIRECTIVE)
15507 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15508 if (X86_FILE_START_FLTUSED)
15509 fputs ("\t.global\t__fltused\n", asm_out_file);
15510 if (ix86_asm_dialect == ASM_INTEL)
15511 fputs ("\t.intel_syntax\n", asm_out_file);
15512 }
15513
15514 int
15515 x86_field_alignment (tree field, int computed)
15516 {
15517 enum machine_mode mode;
15518 tree type = TREE_TYPE (field);
15519
15520 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15521 return computed;
15522 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15523 ? get_inner_array_type (type) : type);
15524 if (mode == DFmode || mode == DCmode
15525 || GET_MODE_CLASS (mode) == MODE_INT
15526 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15527 return MIN (32, computed);
15528 return computed;
15529 }
15530
15531 /* Output assembler code to FILE to increment profiler label # LABELNO
15532 for profiling a function entry. */
15533 void
15534 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15535 {
15536 if (TARGET_64BIT)
15537 if (flag_pic)
15538 {
15539 #ifndef NO_PROFILE_COUNTERS
15540 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15541 #endif
15542 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15543 }
15544 else
15545 {
15546 #ifndef NO_PROFILE_COUNTERS
15547 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15548 #endif
15549 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15550 }
15551 else if (flag_pic)
15552 {
15553 #ifndef NO_PROFILE_COUNTERS
15554 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15555 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15556 #endif
15557 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15558 }
15559 else
15560 {
15561 #ifndef NO_PROFILE_COUNTERS
15562 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15563 PROFILE_COUNT_REGISTER);
15564 #endif
15565 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15566 }
15567 }
15568
15569 /* We don't have exact information about the insn sizes, but we may assume
15570 quite safely that we are informed about all 1 byte insns and memory
15571 address sizes. This is enough to eliminate unnecessary padding in
15572 99% of cases. */
15573
15574 static int
15575 min_insn_size (rtx insn)
15576 {
15577 int l = 0;
15578
15579 if (!INSN_P (insn) || !active_insn_p (insn))
15580 return 0;
15581
15582 /* Discard alignments we've emit and jump instructions. */
15583 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15584 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15585 return 0;
15586 if (GET_CODE (insn) == JUMP_INSN
15587 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15588 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15589 return 0;
15590
15591 /* Important case - calls are always 5 bytes.
15592 It is common to have many calls in the row. */
15593 if (GET_CODE (insn) == CALL_INSN
15594 && symbolic_reference_mentioned_p (PATTERN (insn))
15595 && !SIBLING_CALL_P (insn))
15596 return 5;
15597 if (get_attr_length (insn) <= 1)
15598 return 1;
15599
15600 /* For normal instructions we may rely on the sizes of addresses
15601 and the presence of symbol to require 4 bytes of encoding.
15602 This is not the case for jumps where references are PC relative. */
15603 if (GET_CODE (insn) != JUMP_INSN)
15604 {
15605 l = get_attr_length_address (insn);
15606 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15607 l = 4;
15608 }
15609 if (l)
15610 return 1+l;
15611 else
15612 return 2;
15613 }
15614
15615 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15616 window. */
15617
15618 static void
15619 ix86_avoid_jump_misspredicts (void)
15620 {
15621 rtx insn, start = get_insns ();
15622 int nbytes = 0, njumps = 0;
15623 int isjump = 0;
15624
15625 /* Look for all minimal intervals of instructions containing 4 jumps.
15626 The intervals are bounded by START and INSN. NBYTES is the total
15627 size of instructions in the interval including INSN and not including
15628 START. When the NBYTES is smaller than 16 bytes, it is possible
15629 that the end of START and INSN ends up in the same 16byte page.
15630
15631 The smallest offset in the page INSN can start is the case where START
15632 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15633 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15634 */
15635 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15636 {
15637
15638 nbytes += min_insn_size (insn);
15639 if (dump_file)
15640 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15641 INSN_UID (insn), min_insn_size (insn));
15642 if ((GET_CODE (insn) == JUMP_INSN
15643 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15644 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15645 || GET_CODE (insn) == CALL_INSN)
15646 njumps++;
15647 else
15648 continue;
15649
15650 while (njumps > 3)
15651 {
15652 start = NEXT_INSN (start);
15653 if ((GET_CODE (start) == JUMP_INSN
15654 && GET_CODE (PATTERN (start)) != ADDR_VEC
15655 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15656 || GET_CODE (start) == CALL_INSN)
15657 njumps--, isjump = 1;
15658 else
15659 isjump = 0;
15660 nbytes -= min_insn_size (start);
15661 }
15662 if (njumps < 0)
15663 abort ();
15664 if (dump_file)
15665 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15666 INSN_UID (start), INSN_UID (insn), nbytes);
15667
15668 if (njumps == 3 && isjump && nbytes < 16)
15669 {
15670 int padsize = 15 - nbytes + min_insn_size (insn);
15671
15672 if (dump_file)
15673 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15674 INSN_UID (insn), padsize);
15675 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15676 }
15677 }
15678 }
15679
15680 /* AMD Athlon works faster
15681 when RET is not destination of conditional jump or directly preceded
15682 by other jump instruction. We avoid the penalty by inserting NOP just
15683 before the RET instructions in such cases. */
15684 static void
15685 ix86_pad_returns (void)
15686 {
15687 edge e;
15688 edge_iterator ei;
15689
15690 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15691 {
15692 basic_block bb = e->src;
15693 rtx ret = BB_END (bb);
15694 rtx prev;
15695 bool replace = false;
15696
15697 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15698 || !maybe_hot_bb_p (bb))
15699 continue;
15700 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15701 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15702 break;
15703 if (prev && GET_CODE (prev) == CODE_LABEL)
15704 {
15705 edge e;
15706 edge_iterator ei;
15707
15708 FOR_EACH_EDGE (e, ei, bb->preds)
15709 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15710 && !(e->flags & EDGE_FALLTHRU))
15711 replace = true;
15712 }
15713 if (!replace)
15714 {
15715 prev = prev_active_insn (ret);
15716 if (prev
15717 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15718 || GET_CODE (prev) == CALL_INSN))
15719 replace = true;
15720 /* Empty functions get branch mispredict even when the jump destination
15721 is not visible to us. */
15722 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15723 replace = true;
15724 }
15725 if (replace)
15726 {
15727 emit_insn_before (gen_return_internal_long (), ret);
15728 delete_insn (ret);
15729 }
15730 }
15731 }
15732
15733 /* Implement machine specific optimizations. We implement padding of returns
15734 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15735 static void
15736 ix86_reorg (void)
15737 {
15738 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15739 ix86_pad_returns ();
15740 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15741 ix86_avoid_jump_misspredicts ();
15742 }
15743
15744 /* Return nonzero when QImode register that must be represented via REX prefix
15745 is used. */
15746 bool
15747 x86_extended_QIreg_mentioned_p (rtx insn)
15748 {
15749 int i;
15750 extract_insn_cached (insn);
15751 for (i = 0; i < recog_data.n_operands; i++)
15752 if (REG_P (recog_data.operand[i])
15753 && REGNO (recog_data.operand[i]) >= 4)
15754 return true;
15755 return false;
15756 }
15757
15758 /* Return nonzero when P points to register encoded via REX prefix.
15759 Called via for_each_rtx. */
15760 static int
15761 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15762 {
15763 unsigned int regno;
15764 if (!REG_P (*p))
15765 return 0;
15766 regno = REGNO (*p);
15767 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15768 }
15769
15770 /* Return true when INSN mentions register that must be encoded using REX
15771 prefix. */
15772 bool
15773 x86_extended_reg_mentioned_p (rtx insn)
15774 {
15775 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15776 }
15777
15778 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15779 optabs would emit if we didn't have TFmode patterns. */
15780
15781 void
15782 x86_emit_floatuns (rtx operands[2])
15783 {
15784 rtx neglab, donelab, i0, i1, f0, in, out;
15785 enum machine_mode mode, inmode;
15786
15787 inmode = GET_MODE (operands[1]);
15788 if (inmode != SImode
15789 && inmode != DImode)
15790 abort ();
15791
15792 out = operands[0];
15793 in = force_reg (inmode, operands[1]);
15794 mode = GET_MODE (out);
15795 neglab = gen_label_rtx ();
15796 donelab = gen_label_rtx ();
15797 i1 = gen_reg_rtx (Pmode);
15798 f0 = gen_reg_rtx (mode);
15799
15800 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15801
15802 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15803 emit_jump_insn (gen_jump (donelab));
15804 emit_barrier ();
15805
15806 emit_label (neglab);
15807
15808 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15809 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15810 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15811 expand_float (f0, i0, 0);
15812 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15813
15814 emit_label (donelab);
15815 }
15816 \f
15817 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15818 with all elements equal to VAR. Return true if successful. */
15819
15820 static bool
15821 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
15822 rtx target, rtx val)
15823 {
15824 enum machine_mode smode, wsmode, wvmode;
15825 rtx x;
15826
15827 switch (mode)
15828 {
15829 case V2SImode:
15830 case V2SFmode:
15831 if (!mmx_ok && !TARGET_SSE)
15832 return false;
15833 /* FALLTHRU */
15834
15835 case V2DFmode:
15836 case V2DImode:
15837 case V4SFmode:
15838 case V4SImode:
15839 val = force_reg (GET_MODE_INNER (mode), val);
15840 x = gen_rtx_VEC_DUPLICATE (mode, val);
15841 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15842 return true;
15843
15844 case V4HImode:
15845 if (!mmx_ok)
15846 return false;
15847 val = gen_lowpart (SImode, val);
15848 x = gen_rtx_TRUNCATE (HImode, val);
15849 x = gen_rtx_VEC_DUPLICATE (mode, x);
15850 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15851 return true;
15852
15853 case V8QImode:
15854 if (!mmx_ok)
15855 return false;
15856 smode = QImode;
15857 wsmode = HImode;
15858 wvmode = V4HImode;
15859 goto widen;
15860 case V8HImode:
15861 smode = HImode;
15862 wsmode = SImode;
15863 wvmode = V4SImode;
15864 goto widen;
15865 case V16QImode:
15866 smode = QImode;
15867 wsmode = HImode;
15868 wvmode = V8HImode;
15869 goto widen;
15870 widen:
15871 /* Replicate the value once into the next wider mode and recurse. */
15872 val = convert_modes (wsmode, smode, val, true);
15873 x = expand_simple_binop (wsmode, ASHIFT, val,
15874 GEN_INT (GET_MODE_BITSIZE (smode)),
15875 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15876 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
15877
15878 x = gen_reg_rtx (wvmode);
15879 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
15880 gcc_unreachable ();
15881 emit_move_insn (target, gen_lowpart (mode, x));
15882 return true;
15883
15884 default:
15885 return false;
15886 }
15887 }
15888
15889 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15890 whose low element is VAR, and other elements are zero. Return true
15891 if successful. */
15892
15893 static bool
15894 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
15895 rtx target, rtx var)
15896 {
15897 enum machine_mode vsimode;
15898 rtx x;
15899
15900 switch (mode)
15901 {
15902 case V2SFmode:
15903 case V2SImode:
15904 if (!mmx_ok && !TARGET_SSE)
15905 return false;
15906 /* FALLTHRU */
15907
15908 case V2DFmode:
15909 case V2DImode:
15910 var = force_reg (GET_MODE_INNER (mode), var);
15911 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
15912 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15913 return true;
15914
15915 case V4SFmode:
15916 case V4SImode:
15917 var = force_reg (GET_MODE_INNER (mode), var);
15918 x = gen_rtx_VEC_DUPLICATE (mode, var);
15919 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
15920 emit_insn (gen_rtx_SET (VOIDmode, target, x));
15921 return true;
15922
15923 case V8HImode:
15924 case V16QImode:
15925 vsimode = V4SImode;
15926 goto widen;
15927 case V4HImode:
15928 case V8QImode:
15929 if (!mmx_ok)
15930 return false;
15931 vsimode = V2SImode;
15932 goto widen;
15933 widen:
15934 /* Zero extend the variable element to SImode and recurse. */
15935 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
15936
15937 x = gen_reg_rtx (vsimode);
15938 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
15939 gcc_unreachable ();
15940
15941 emit_move_insn (target, gen_lowpart (mode, x));
15942 return true;
15943
15944 default:
15945 return false;
15946 }
15947 }
15948
15949 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
15950 consisting of the values in VALS. It is known that all elements
15951 except ONE_VAR are constants. Return true if successful. */
15952
15953 static bool
15954 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
15955 rtx target, rtx vals, int one_var)
15956 {
15957 rtx var = XVECEXP (vals, 0, one_var);
15958 enum machine_mode wmode;
15959 rtx const_vec, x;
15960
15961 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
15962 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
15963
15964 switch (mode)
15965 {
15966 case V2DFmode:
15967 case V2DImode:
15968 case V2SFmode:
15969 case V2SImode:
15970 /* For the two element vectors, it's just as easy to use
15971 the general case. */
15972 return false;
15973
15974 case V4SFmode:
15975 case V4SImode:
15976 case V8HImode:
15977 case V4HImode:
15978 break;
15979
15980 case V16QImode:
15981 wmode = V8HImode;
15982 goto widen;
15983 case V8QImode:
15984 wmode = V4HImode;
15985 goto widen;
15986 widen:
15987 /* There's no way to set one QImode entry easily. Combine
15988 the variable value with its adjacent constant value, and
15989 promote to an HImode set. */
15990 x = XVECEXP (vals, 0, one_var ^ 1);
15991 if (one_var & 1)
15992 {
15993 var = convert_modes (HImode, QImode, var, true);
15994 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
15995 NULL_RTX, 1, OPTAB_LIB_WIDEN);
15996 x = GEN_INT (INTVAL (x) & 0xff);
15997 }
15998 else
15999 {
16000 var = convert_modes (HImode, QImode, var, true);
16001 x = gen_int_mode (INTVAL (x) << 8, HImode);
16002 }
16003 if (x != const0_rtx)
16004 var = expand_simple_binop (HImode, IOR, var, x, var,
16005 1, OPTAB_LIB_WIDEN);
16006
16007 x = gen_reg_rtx (wmode);
16008 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16009 ix86_expand_vector_set (mmx_ok, target, var, one_var >> 1);
16010
16011 emit_move_insn (target, gen_lowpart (mode, x));
16012 return true;
16013
16014 default:
16015 return false;
16016 }
16017
16018 emit_move_insn (target, const_vec);
16019 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16020 return true;
16021 }
16022
16023 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16024 all values variable, and none identical. */
16025
16026 static void
16027 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16028 rtx target, rtx vals)
16029 {
16030 enum machine_mode half_mode = GET_MODE_INNER (mode);
16031 rtx op0 = NULL, op1 = NULL;
16032 bool use_vec_concat = false;
16033
16034 switch (mode)
16035 {
16036 case V2SFmode:
16037 case V2SImode:
16038 if (!mmx_ok && !TARGET_SSE)
16039 break;
16040 /* FALLTHRU */
16041
16042 case V2DFmode:
16043 case V2DImode:
16044 /* For the two element vectors, we always implement VEC_CONCAT. */
16045 op0 = XVECEXP (vals, 0, 0);
16046 op1 = XVECEXP (vals, 0, 1);
16047 use_vec_concat = true;
16048 break;
16049
16050 case V4SFmode:
16051 half_mode = V2SFmode;
16052 goto half;
16053 case V4SImode:
16054 half_mode = V2SImode;
16055 goto half;
16056 half:
16057 {
16058 rtvec v;
16059
16060 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16061 Recurse to load the two halves. */
16062
16063 op0 = gen_reg_rtx (half_mode);
16064 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16065 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16066
16067 op1 = gen_reg_rtx (half_mode);
16068 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16069 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16070
16071 use_vec_concat = true;
16072 }
16073 break;
16074
16075 case V8HImode:
16076 case V16QImode:
16077 case V4HImode:
16078 case V8QImode:
16079 break;
16080
16081 default:
16082 gcc_unreachable ();
16083 }
16084
16085 if (use_vec_concat)
16086 {
16087 if (!register_operand (op0, half_mode))
16088 op0 = force_reg (half_mode, op0);
16089 if (!register_operand (op1, half_mode))
16090 op1 = force_reg (half_mode, op1);
16091
16092 emit_insn (gen_rtx_SET (VOIDmode, target,
16093 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16094 }
16095 else
16096 {
16097 int i, j, n_elts, n_words, n_elt_per_word;
16098 enum machine_mode inner_mode;
16099 rtx words[4], shift;
16100
16101 inner_mode = GET_MODE_INNER (mode);
16102 n_elts = GET_MODE_NUNITS (mode);
16103 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16104 n_elt_per_word = n_elts / n_words;
16105 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16106
16107 for (i = 0; i < n_words; ++i)
16108 {
16109 rtx word = NULL_RTX;
16110
16111 for (j = 0; j < n_elt_per_word; ++j)
16112 {
16113 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16114 elt = convert_modes (word_mode, inner_mode, elt, true);
16115
16116 if (j == 0)
16117 word = elt;
16118 else
16119 {
16120 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16121 word, 1, OPTAB_LIB_WIDEN);
16122 word = expand_simple_binop (word_mode, IOR, word, elt,
16123 word, 1, OPTAB_LIB_WIDEN);
16124 }
16125 }
16126
16127 words[i] = word;
16128 }
16129
16130 if (n_words == 1)
16131 emit_move_insn (target, gen_lowpart (mode, words[0]));
16132 else if (n_words == 2)
16133 {
16134 rtx tmp = gen_reg_rtx (mode);
16135 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16136 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16137 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16138 emit_move_insn (target, tmp);
16139 }
16140 else if (n_words == 4)
16141 {
16142 rtx tmp = gen_reg_rtx (V4SImode);
16143 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16144 ix86_expand_vector_init_general (false, V4SImode, target, vals);
16145 emit_move_insn (target, gen_lowpart (mode, tmp));
16146 }
16147 else
16148 gcc_unreachable ();
16149 }
16150 }
16151
16152 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16153 instructions unless MMX_OK is true. */
16154
16155 void
16156 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16157 {
16158 enum machine_mode mode = GET_MODE (target);
16159 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16160 int n_elts = GET_MODE_NUNITS (mode);
16161 int n_var = 0, one_var = -1;
16162 bool all_same = true, all_const_zero = true;
16163 int i;
16164 rtx x;
16165
16166 for (i = 0; i < n_elts; ++i)
16167 {
16168 x = XVECEXP (vals, 0, i);
16169 if (!CONSTANT_P (x))
16170 n_var++, one_var = i;
16171 else if (x != CONST0_RTX (inner_mode))
16172 all_const_zero = false;
16173 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16174 all_same = false;
16175 }
16176
16177 /* Constants are best loaded from the constant pool. */
16178 if (n_var == 0)
16179 {
16180 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16181 return;
16182 }
16183
16184 /* If all values are identical, broadcast the value. */
16185 if (all_same
16186 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16187 XVECEXP (vals, 0, 0)))
16188 return;
16189
16190 /* Values where only one field is non-constant are best loaded from
16191 the pool and overwritten via move later. */
16192 if (n_var == 1)
16193 {
16194 if (all_const_zero && one_var == 0
16195 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16196 XVECEXP (vals, 0, 0)))
16197 return;
16198
16199 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16200 return;
16201 }
16202
16203 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16204 }
16205
16206 void
16207 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16208 {
16209 enum machine_mode mode = GET_MODE (target);
16210 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16211 bool use_vec_merge = false;
16212 rtx tmp;
16213
16214 switch (mode)
16215 {
16216 case V2SFmode:
16217 case V2SImode:
16218 if (!mmx_ok)
16219 break;
16220 /* FALLTHRU */
16221
16222 case V2DFmode:
16223 case V2DImode:
16224 {
16225 rtx op0, op1;
16226
16227 /* For the two element vectors, we implement a VEC_CONCAT with
16228 the extraction of the other element. */
16229
16230 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16231 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16232
16233 if (elt == 0)
16234 op0 = val, op1 = tmp;
16235 else
16236 op0 = tmp, op1 = val;
16237
16238 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16239 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16240 }
16241 return;
16242
16243 case V4SFmode:
16244 switch (elt)
16245 {
16246 case 0:
16247 use_vec_merge = true;
16248 break;
16249
16250 case 1:
16251 /* tmp = op0 = A B C D */
16252 tmp = copy_to_reg (target);
16253
16254 /* op0 = C C D D */
16255 emit_insn (gen_sse_unpcklps (target, target, target));
16256
16257 /* op0 = C C D X */
16258 ix86_expand_vector_set (false, target, val, 0);
16259
16260 /* op0 = A B X D */
16261 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16262 GEN_INT (1), GEN_INT (0),
16263 GEN_INT (2), GEN_INT (3)));
16264 return;
16265
16266 case 2:
16267 tmp = copy_to_reg (target);
16268 ix86_expand_vector_set (false, target, val, 0);
16269 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16270 GEN_INT (0), GEN_INT (1),
16271 GEN_INT (0), GEN_INT (3)));
16272 return;
16273
16274 case 3:
16275 tmp = copy_to_reg (target);
16276 ix86_expand_vector_set (false, target, val, 0);
16277 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16278 GEN_INT (0), GEN_INT (1),
16279 GEN_INT (2), GEN_INT (0)));
16280 return;
16281
16282 default:
16283 gcc_unreachable ();
16284 }
16285 break;
16286
16287 case V4SImode:
16288 /* Element 0 handled by vec_merge below. */
16289 if (elt == 0)
16290 {
16291 use_vec_merge = true;
16292 break;
16293 }
16294
16295 if (TARGET_SSE2)
16296 {
16297 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16298 store into element 0, then shuffle them back. */
16299
16300 rtx order[4];
16301
16302 order[0] = GEN_INT (elt);
16303 order[1] = const1_rtx;
16304 order[2] = const2_rtx;
16305 order[3] = GEN_INT (3);
16306 order[elt] = const0_rtx;
16307
16308 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16309 order[1], order[2], order[3]));
16310
16311 ix86_expand_vector_set (false, target, val, 0);
16312
16313 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16314 order[1], order[2], order[3]));
16315 }
16316 else
16317 {
16318 /* For SSE1, we have to reuse the V4SF code. */
16319 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
16320 gen_lowpart (SFmode, val), elt);
16321 }
16322 return;
16323
16324 case V8HImode:
16325 use_vec_merge = TARGET_SSE2;
16326 break;
16327 case V4HImode:
16328 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16329 break;
16330
16331 case V16QImode:
16332 case V8QImode:
16333 default:
16334 break;
16335 }
16336
16337 if (use_vec_merge)
16338 {
16339 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
16340 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
16341 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16342 }
16343 else
16344 {
16345 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16346
16347 emit_move_insn (mem, target);
16348
16349 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16350 emit_move_insn (tmp, val);
16351
16352 emit_move_insn (target, mem);
16353 }
16354 }
16355
16356 void
16357 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
16358 {
16359 enum machine_mode mode = GET_MODE (vec);
16360 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16361 bool use_vec_extr = false;
16362 rtx tmp;
16363
16364 switch (mode)
16365 {
16366 case V2SImode:
16367 case V2SFmode:
16368 if (!mmx_ok)
16369 break;
16370 /* FALLTHRU */
16371
16372 case V2DFmode:
16373 case V2DImode:
16374 use_vec_extr = true;
16375 break;
16376
16377 case V4SFmode:
16378 switch (elt)
16379 {
16380 case 0:
16381 tmp = vec;
16382 break;
16383
16384 case 1:
16385 case 3:
16386 tmp = gen_reg_rtx (mode);
16387 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
16388 GEN_INT (elt), GEN_INT (elt),
16389 GEN_INT (elt), GEN_INT (elt)));
16390 break;
16391
16392 case 2:
16393 tmp = gen_reg_rtx (mode);
16394 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
16395 break;
16396
16397 default:
16398 gcc_unreachable ();
16399 }
16400 vec = tmp;
16401 use_vec_extr = true;
16402 elt = 0;
16403 break;
16404
16405 case V4SImode:
16406 if (TARGET_SSE2)
16407 {
16408 switch (elt)
16409 {
16410 case 0:
16411 tmp = vec;
16412 break;
16413
16414 case 1:
16415 case 3:
16416 tmp = gen_reg_rtx (mode);
16417 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
16418 GEN_INT (elt), GEN_INT (elt),
16419 GEN_INT (elt), GEN_INT (elt)));
16420 break;
16421
16422 case 2:
16423 tmp = gen_reg_rtx (mode);
16424 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
16425 break;
16426
16427 default:
16428 gcc_unreachable ();
16429 }
16430 vec = tmp;
16431 use_vec_extr = true;
16432 elt = 0;
16433 }
16434 else
16435 {
16436 /* For SSE1, we have to reuse the V4SF code. */
16437 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
16438 gen_lowpart (V4SFmode, vec), elt);
16439 return;
16440 }
16441 break;
16442
16443 case V8HImode:
16444 use_vec_extr = TARGET_SSE2;
16445 break;
16446 case V4HImode:
16447 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16448 break;
16449
16450 case V16QImode:
16451 case V8QImode:
16452 /* ??? Could extract the appropriate HImode element and shift. */
16453 default:
16454 break;
16455 }
16456
16457 if (use_vec_extr)
16458 {
16459 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
16460 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
16461
16462 /* Let the rtl optimizers know about the zero extension performed. */
16463 if (inner_mode == HImode)
16464 {
16465 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
16466 target = gen_lowpart (SImode, target);
16467 }
16468
16469 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16470 }
16471 else
16472 {
16473 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16474
16475 emit_move_insn (mem, vec);
16476
16477 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16478 emit_move_insn (target, tmp);
16479 }
16480 }
16481 \f
16482 /* Implements target hook vector_mode_supported_p. */
16483 static bool
16484 ix86_vector_mode_supported_p (enum machine_mode mode)
16485 {
16486 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
16487 return true;
16488 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
16489 return true;
16490 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
16491 return true;
16492 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
16493 return true;
16494 return false;
16495 }
16496
16497 /* Worker function for TARGET_MD_ASM_CLOBBERS.
16498
16499 We do this in the new i386 backend to maintain source compatibility
16500 with the old cc0-based compiler. */
16501
16502 static tree
16503 ix86_md_asm_clobbers (tree clobbers)
16504 {
16505 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
16506 clobbers);
16507 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
16508 clobbers);
16509 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
16510 clobbers);
16511 return clobbers;
16512 }
16513
16514 /* Worker function for REVERSE_CONDITION. */
16515
16516 enum rtx_code
16517 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
16518 {
16519 return (mode != CCFPmode && mode != CCFPUmode
16520 ? reverse_condition (code)
16521 : reverse_condition_maybe_unordered (code));
16522 }
16523
16524 /* Output code to perform an x87 FP register move, from OPERANDS[1]
16525 to OPERANDS[0]. */
16526
16527 const char *
16528 output_387_reg_move (rtx insn, rtx *operands)
16529 {
16530 if (REG_P (operands[1])
16531 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16532 {
16533 if (REGNO (operands[0]) == FIRST_STACK_REG
16534 && TARGET_USE_FFREEP)
16535 return "ffreep\t%y0";
16536 return "fstp\t%y0";
16537 }
16538 if (STACK_TOP_P (operands[0]))
16539 return "fld%z1\t%y1";
16540 return "fst\t%y0";
16541 }
16542
16543 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16544 FP status register is set. */
16545
16546 void
16547 ix86_emit_fp_unordered_jump (rtx label)
16548 {
16549 rtx reg = gen_reg_rtx (HImode);
16550 rtx temp;
16551
16552 emit_insn (gen_x86_fnstsw_1 (reg));
16553
16554 if (TARGET_USE_SAHF)
16555 {
16556 emit_insn (gen_x86_sahf_1 (reg));
16557
16558 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16559 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16560 }
16561 else
16562 {
16563 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16564
16565 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16566 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16567 }
16568
16569 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16570 gen_rtx_LABEL_REF (VOIDmode, label),
16571 pc_rtx);
16572 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16573 emit_jump_insn (temp);
16574 }
16575
16576 /* Output code to perform a log1p XFmode calculation. */
16577
16578 void ix86_emit_i387_log1p (rtx op0, rtx op1)
16579 {
16580 rtx label1 = gen_label_rtx ();
16581 rtx label2 = gen_label_rtx ();
16582
16583 rtx tmp = gen_reg_rtx (XFmode);
16584 rtx tmp2 = gen_reg_rtx (XFmode);
16585
16586 emit_insn (gen_absxf2 (tmp, op1));
16587 emit_insn (gen_cmpxf (tmp,
16588 CONST_DOUBLE_FROM_REAL_VALUE (
16589 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16590 XFmode)));
16591 emit_jump_insn (gen_bge (label1));
16592
16593 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16594 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16595 emit_jump (label2);
16596
16597 emit_label (label1);
16598 emit_move_insn (tmp, CONST1_RTX (XFmode));
16599 emit_insn (gen_addxf3 (tmp, op1, tmp));
16600 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16601 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16602
16603 emit_label (label2);
16604 }
16605
16606 /* Solaris named-section hook. Parameters are as for
16607 named_section_real. */
16608
16609 static void
16610 i386_solaris_elf_named_section (const char *name, unsigned int flags,
16611 tree decl)
16612 {
16613 /* With Binutils 2.15, the "@unwind" marker must be specified on
16614 every occurrence of the ".eh_frame" section, not just the first
16615 one. */
16616 if (TARGET_64BIT
16617 && strcmp (name, ".eh_frame") == 0)
16618 {
16619 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
16620 flags & SECTION_WRITE ? "aw" : "a");
16621 return;
16622 }
16623 default_elf_asm_named_section (name, flags, decl);
16624 }
16625
16626 #include "gt-i386.h"
This page took 0.808831 seconds and 5 git commands to generate.