]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
tree.h (enum tree_index): Add TI_VA_LIST_GPR_COUNTER_FIELD and TI_VA_LIST_FPR_COUNTER...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_fisttp = m_NOCONA;
529 const int x86_3dnow_a = m_ATHLON_K8;
530 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints = 0;
536 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
537 const int x86_partial_reg_stall = m_PPRO;
538 const int x86_use_loop = m_K6;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
570 undefined. */
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
580
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
587
588 /* In case the average insn count for single function invocation is
589 lower than this constant, emit fast (but longer) prologue and
590 epilogue code. */
591 #define FAST_PROLOGUE_INSN_COUNT 20
592
593 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
594 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
595 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
596 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
597
598 /* Array of the smallest class containing reg number REGNO, indexed by
599 REGNO. Used by REGNO_REG_CLASS in i386.h. */
600
601 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
602 {
603 /* ax, dx, cx, bx */
604 AREG, DREG, CREG, BREG,
605 /* si, di, bp, sp */
606 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
607 /* FP registers */
608 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
609 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
610 /* arg pointer */
611 NON_Q_REGS,
612 /* flags, fpsr, dirflag, frame */
613 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
615 SSE_REGS, SSE_REGS,
616 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
617 MMX_REGS, MMX_REGS,
618 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
619 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
620 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
621 SSE_REGS, SSE_REGS,
622 };
623
624 /* The "default" register map used in 32bit mode. */
625
626 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
627 {
628 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
629 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
630 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
631 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
632 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
633 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
634 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
635 };
636
637 static int const x86_64_int_parameter_registers[6] =
638 {
639 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
640 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
641 };
642
643 static int const x86_64_int_return_registers[4] =
644 {
645 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
646 };
647
648 /* The "default" register map used in 64bit mode. */
649 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
650 {
651 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
652 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
653 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
654 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
655 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
656 8,9,10,11,12,13,14,15, /* extended integer registers */
657 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
658 };
659
660 /* Define the register numbers to be used in Dwarf debugging information.
661 The SVR4 reference port C compiler uses the following register numbers
662 in its Dwarf output code:
663 0 for %eax (gcc regno = 0)
664 1 for %ecx (gcc regno = 2)
665 2 for %edx (gcc regno = 1)
666 3 for %ebx (gcc regno = 3)
667 4 for %esp (gcc regno = 7)
668 5 for %ebp (gcc regno = 6)
669 6 for %esi (gcc regno = 4)
670 7 for %edi (gcc regno = 5)
671 The following three DWARF register numbers are never generated by
672 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
673 believes these numbers have these meanings.
674 8 for %eip (no gcc equivalent)
675 9 for %eflags (gcc regno = 17)
676 10 for %trapno (no gcc equivalent)
677 It is not at all clear how we should number the FP stack registers
678 for the x86 architecture. If the version of SDB on x86/svr4 were
679 a bit less brain dead with respect to floating-point then we would
680 have a precedent to follow with respect to DWARF register numbers
681 for x86 FP registers, but the SDB on x86/svr4 is so completely
682 broken with respect to FP registers that it is hardly worth thinking
683 of it as something to strive for compatibility with.
684 The version of x86/svr4 SDB I have at the moment does (partially)
685 seem to believe that DWARF register number 11 is associated with
686 the x86 register %st(0), but that's about all. Higher DWARF
687 register numbers don't seem to be associated with anything in
688 particular, and even for DWARF regno 11, SDB only seems to under-
689 stand that it should say that a variable lives in %st(0) (when
690 asked via an `=' command) if we said it was in DWARF regno 11,
691 but SDB still prints garbage when asked for the value of the
692 variable in question (via a `/' command).
693 (Also note that the labels SDB prints for various FP stack regs
694 when doing an `x' command are all wrong.)
695 Note that these problems generally don't affect the native SVR4
696 C compiler because it doesn't allow the use of -O with -g and
697 because when it is *not* optimizing, it allocates a memory
698 location for each floating-point variable, and the memory
699 location is what gets described in the DWARF AT_location
700 attribute for the variable in question.
701 Regardless of the severe mental illness of the x86/svr4 SDB, we
702 do something sensible here and we use the following DWARF
703 register numbers. Note that these are all stack-top-relative
704 numbers.
705 11 for %st(0) (gcc regno = 8)
706 12 for %st(1) (gcc regno = 9)
707 13 for %st(2) (gcc regno = 10)
708 14 for %st(3) (gcc regno = 11)
709 15 for %st(4) (gcc regno = 12)
710 16 for %st(5) (gcc regno = 13)
711 17 for %st(6) (gcc regno = 14)
712 18 for %st(7) (gcc regno = 15)
713 */
714 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
715 {
716 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
717 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
718 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
719 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
720 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
722 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
723 };
724
725 /* Test and compare insns in i386.md store the information needed to
726 generate branch and scc insns here. */
727
728 rtx ix86_compare_op0 = NULL_RTX;
729 rtx ix86_compare_op1 = NULL_RTX;
730
731 #define MAX_386_STACK_LOCALS 3
732 /* Size of the register save area. */
733 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
734
735 /* Define the structure for the machine field in struct function. */
736
737 struct stack_local_entry GTY(())
738 {
739 unsigned short mode;
740 unsigned short n;
741 rtx rtl;
742 struct stack_local_entry *next;
743 };
744
745 /* Structure describing stack frame layout.
746 Stack grows downward:
747
748 [arguments]
749 <- ARG_POINTER
750 saved pc
751
752 saved frame pointer if frame_pointer_needed
753 <- HARD_FRAME_POINTER
754 [saved regs]
755
756 [padding1] \
757 )
758 [va_arg registers] (
759 > to_allocate <- FRAME_POINTER
760 [frame] (
761 )
762 [padding2] /
763 */
764 struct ix86_frame
765 {
766 int nregs;
767 int padding1;
768 int va_arg_size;
769 HOST_WIDE_INT frame;
770 int padding2;
771 int outgoing_arguments_size;
772 int red_zone_size;
773
774 HOST_WIDE_INT to_allocate;
775 /* The offsets relative to ARG_POINTER. */
776 HOST_WIDE_INT frame_pointer_offset;
777 HOST_WIDE_INT hard_frame_pointer_offset;
778 HOST_WIDE_INT stack_pointer_offset;
779
780 /* When save_regs_using_mov is set, emit prologue using
781 move instead of push instructions. */
782 bool save_regs_using_mov;
783 };
784
785 /* Used to enable/disable debugging features. */
786 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
787 /* Code model option as passed by user. */
788 const char *ix86_cmodel_string;
789 /* Parsed value. */
790 enum cmodel ix86_cmodel;
791 /* Asm dialect. */
792 const char *ix86_asm_string;
793 enum asm_dialect ix86_asm_dialect = ASM_ATT;
794 /* TLS dialext. */
795 const char *ix86_tls_dialect_string;
796 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
797
798 /* Which unit we are generating floating point math for. */
799 enum fpmath_unit ix86_fpmath;
800
801 /* Which cpu are we scheduling for. */
802 enum processor_type ix86_tune;
803 /* Which instruction set architecture to use. */
804 enum processor_type ix86_arch;
805
806 /* Strings to hold which cpu and instruction set architecture to use. */
807 const char *ix86_tune_string; /* for -mtune=<xxx> */
808 const char *ix86_arch_string; /* for -march=<xxx> */
809 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
810
811 /* # of registers to use to pass arguments. */
812 const char *ix86_regparm_string;
813
814 /* true if sse prefetch instruction is not NOOP. */
815 int x86_prefetch_sse;
816
817 /* ix86_regparm_string as a number */
818 int ix86_regparm;
819
820 /* Alignment to use for loops and jumps: */
821
822 /* Power of two alignment for loops. */
823 const char *ix86_align_loops_string;
824
825 /* Power of two alignment for non-loop jumps. */
826 const char *ix86_align_jumps_string;
827
828 /* Power of two alignment for stack boundary in bytes. */
829 const char *ix86_preferred_stack_boundary_string;
830
831 /* Preferred alignment for stack boundary in bits. */
832 unsigned int ix86_preferred_stack_boundary;
833
834 /* Values 1-5: see jump.c */
835 int ix86_branch_cost;
836 const char *ix86_branch_cost_string;
837
838 /* Power of two alignment for functions. */
839 const char *ix86_align_funcs_string;
840
841 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
842 char internal_label_prefix[16];
843 int internal_label_prefix_len;
844 \f
845 static void output_pic_addr_const (FILE *, rtx, int);
846 static void put_condition_code (enum rtx_code, enum machine_mode,
847 int, int, FILE *);
848 static const char *get_some_local_dynamic_name (void);
849 static int get_some_local_dynamic_name_1 (rtx *, void *);
850 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
851 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
852 rtx *);
853 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
854 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
855 enum machine_mode);
856 static rtx get_thread_pointer (int);
857 static rtx legitimize_tls_address (rtx, enum tls_model, int);
858 static void get_pc_thunk_name (char [32], unsigned int);
859 static rtx gen_push (rtx);
860 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
861 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
862 static struct machine_function * ix86_init_machine_status (void);
863 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
864 static int ix86_nsaved_regs (void);
865 static void ix86_emit_save_regs (void);
866 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
867 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
868 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
869 static HOST_WIDE_INT ix86_GOT_alias_set (void);
870 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
871 static rtx ix86_expand_aligntest (rtx, int);
872 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
873 static int ix86_issue_rate (void);
874 static int ix86_adjust_cost (rtx, rtx, rtx, int);
875 static int ia32_multipass_dfa_lookahead (void);
876 static void ix86_init_mmx_sse_builtins (void);
877 static rtx x86_this_parameter (tree);
878 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
879 HOST_WIDE_INT, tree);
880 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
881 static void x86_file_start (void);
882 static void ix86_reorg (void);
883 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
884 static tree ix86_build_builtin_va_list (void);
885 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
886 tree, int *, int);
887 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
888 static bool ix86_vector_mode_supported_p (enum machine_mode);
889
890 static int ix86_address_cost (rtx);
891 static bool ix86_cannot_force_const_mem (rtx);
892 static rtx ix86_delegitimize_address (rtx);
893
894 struct builtin_description;
895 static rtx ix86_expand_sse_comi (const struct builtin_description *,
896 tree, rtx);
897 static rtx ix86_expand_sse_compare (const struct builtin_description *,
898 tree, rtx);
899 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
900 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
901 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
902 static rtx ix86_expand_store_builtin (enum insn_code, tree);
903 static rtx safe_vector_operand (rtx, enum machine_mode);
904 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
905 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
906 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
907 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
908 static int ix86_fp_comparison_cost (enum rtx_code code);
909 static unsigned int ix86_select_alt_pic_regnum (void);
910 static int ix86_save_reg (unsigned int, int);
911 static void ix86_compute_frame_layout (struct ix86_frame *);
912 static int ix86_comp_type_attributes (tree, tree);
913 static int ix86_function_regparm (tree, tree);
914 const struct attribute_spec ix86_attribute_table[];
915 static bool ix86_function_ok_for_sibcall (tree, tree);
916 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
917 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
918 static int ix86_value_regno (enum machine_mode, tree);
919 static bool contains_128bit_aligned_vector_p (tree);
920 static rtx ix86_struct_value_rtx (tree, int);
921 static bool ix86_ms_bitfield_layout_p (tree);
922 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
923 static int extended_reg_mentioned_1 (rtx *, void *);
924 static bool ix86_rtx_costs (rtx, int, int, int *);
925 static int min_insn_size (rtx);
926 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
927 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
928 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
929 tree, bool);
930 static void ix86_init_builtins (void);
931 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
932
933 /* This function is only used on Solaris. */
934 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
935 ATTRIBUTE_UNUSED;
936
937 /* Register class used for passing given 64bit part of the argument.
938 These represent classes as documented by the PS ABI, with the exception
939 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
940 use SF or DFmode move instead of DImode to avoid reformatting penalties.
941
942 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
943 whenever possible (upper half does contain padding).
944 */
945 enum x86_64_reg_class
946 {
947 X86_64_NO_CLASS,
948 X86_64_INTEGER_CLASS,
949 X86_64_INTEGERSI_CLASS,
950 X86_64_SSE_CLASS,
951 X86_64_SSESF_CLASS,
952 X86_64_SSEDF_CLASS,
953 X86_64_SSEUP_CLASS,
954 X86_64_X87_CLASS,
955 X86_64_X87UP_CLASS,
956 X86_64_COMPLEX_X87_CLASS,
957 X86_64_MEMORY_CLASS
958 };
959 static const char * const x86_64_reg_class_name[] = {
960 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
961 "sseup", "x87", "x87up", "cplx87", "no"
962 };
963
964 #define MAX_CLASSES 4
965
966 /* Table of constants used by fldpi, fldln2, etc.... */
967 static REAL_VALUE_TYPE ext_80387_constants_table [5];
968 static bool ext_80387_constants_init = 0;
969 static void init_ext_80387_constants (void);
970 \f
971 /* Initialize the GCC target structure. */
972 #undef TARGET_ATTRIBUTE_TABLE
973 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
974 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
975 # undef TARGET_MERGE_DECL_ATTRIBUTES
976 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
977 #endif
978
979 #undef TARGET_COMP_TYPE_ATTRIBUTES
980 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
981
982 #undef TARGET_INIT_BUILTINS
983 #define TARGET_INIT_BUILTINS ix86_init_builtins
984 #undef TARGET_EXPAND_BUILTIN
985 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
986
987 #undef TARGET_ASM_FUNCTION_EPILOGUE
988 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
989
990 #undef TARGET_ASM_OPEN_PAREN
991 #define TARGET_ASM_OPEN_PAREN ""
992 #undef TARGET_ASM_CLOSE_PAREN
993 #define TARGET_ASM_CLOSE_PAREN ""
994
995 #undef TARGET_ASM_ALIGNED_HI_OP
996 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
997 #undef TARGET_ASM_ALIGNED_SI_OP
998 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
999 #ifdef ASM_QUAD
1000 #undef TARGET_ASM_ALIGNED_DI_OP
1001 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1002 #endif
1003
1004 #undef TARGET_ASM_UNALIGNED_HI_OP
1005 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1006 #undef TARGET_ASM_UNALIGNED_SI_OP
1007 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1008 #undef TARGET_ASM_UNALIGNED_DI_OP
1009 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1010
1011 #undef TARGET_SCHED_ADJUST_COST
1012 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1013 #undef TARGET_SCHED_ISSUE_RATE
1014 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1015 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1016 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1017 ia32_multipass_dfa_lookahead
1018
1019 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1020 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1021
1022 #ifdef HAVE_AS_TLS
1023 #undef TARGET_HAVE_TLS
1024 #define TARGET_HAVE_TLS true
1025 #endif
1026 #undef TARGET_CANNOT_FORCE_CONST_MEM
1027 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1028
1029 #undef TARGET_DELEGITIMIZE_ADDRESS
1030 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1031
1032 #undef TARGET_MS_BITFIELD_LAYOUT_P
1033 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1034
1035 #undef TARGET_ASM_OUTPUT_MI_THUNK
1036 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1037 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1038 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1039
1040 #undef TARGET_ASM_FILE_START
1041 #define TARGET_ASM_FILE_START x86_file_start
1042
1043 #undef TARGET_RTX_COSTS
1044 #define TARGET_RTX_COSTS ix86_rtx_costs
1045 #undef TARGET_ADDRESS_COST
1046 #define TARGET_ADDRESS_COST ix86_address_cost
1047
1048 #undef TARGET_FIXED_CONDITION_CODE_REGS
1049 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1050 #undef TARGET_CC_MODES_COMPATIBLE
1051 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1052
1053 #undef TARGET_MACHINE_DEPENDENT_REORG
1054 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1055
1056 #undef TARGET_BUILD_BUILTIN_VA_LIST
1057 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1058
1059 #undef TARGET_MD_ASM_CLOBBERS
1060 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1061
1062 #undef TARGET_PROMOTE_PROTOTYPES
1063 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1064 #undef TARGET_STRUCT_VALUE_RTX
1065 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1066 #undef TARGET_SETUP_INCOMING_VARARGS
1067 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1068 #undef TARGET_MUST_PASS_IN_STACK
1069 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1070 #undef TARGET_PASS_BY_REFERENCE
1071 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1072
1073 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1074 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1075
1076 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1077 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1078
1079 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1080 #undef TARGET_INSERT_ATTRIBUTES
1081 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1082 #endif
1083
1084 struct gcc_target targetm = TARGET_INITIALIZER;
1085
1086 \f
1087 /* The svr4 ABI for the i386 says that records and unions are returned
1088 in memory. */
1089 #ifndef DEFAULT_PCC_STRUCT_RETURN
1090 #define DEFAULT_PCC_STRUCT_RETURN 1
1091 #endif
1092
1093 /* Sometimes certain combinations of command options do not make
1094 sense on a particular target machine. You can define a macro
1095 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1096 defined, is executed once just after all the command options have
1097 been parsed.
1098
1099 Don't use this macro to turn on various extra optimizations for
1100 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1101
1102 void
1103 override_options (void)
1104 {
1105 int i;
1106 int ix86_tune_defaulted = 0;
1107
1108 /* Comes from final.c -- no real reason to change it. */
1109 #define MAX_CODE_ALIGN 16
1110
1111 static struct ptt
1112 {
1113 const struct processor_costs *cost; /* Processor costs */
1114 const int target_enable; /* Target flags to enable. */
1115 const int target_disable; /* Target flags to disable. */
1116 const int align_loop; /* Default alignments. */
1117 const int align_loop_max_skip;
1118 const int align_jump;
1119 const int align_jump_max_skip;
1120 const int align_func;
1121 }
1122 const processor_target_table[PROCESSOR_max] =
1123 {
1124 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1125 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1126 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1127 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1128 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1129 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1131 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1132 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1133 };
1134
1135 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1136 static struct pta
1137 {
1138 const char *const name; /* processor name or nickname. */
1139 const enum processor_type processor;
1140 const enum pta_flags
1141 {
1142 PTA_SSE = 1,
1143 PTA_SSE2 = 2,
1144 PTA_SSE3 = 4,
1145 PTA_MMX = 8,
1146 PTA_PREFETCH_SSE = 16,
1147 PTA_3DNOW = 32,
1148 PTA_3DNOW_A = 64,
1149 PTA_64BIT = 128
1150 } flags;
1151 }
1152 const processor_alias_table[] =
1153 {
1154 {"i386", PROCESSOR_I386, 0},
1155 {"i486", PROCESSOR_I486, 0},
1156 {"i586", PROCESSOR_PENTIUM, 0},
1157 {"pentium", PROCESSOR_PENTIUM, 0},
1158 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1159 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1160 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1161 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1162 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1163 {"i686", PROCESSOR_PENTIUMPRO, 0},
1164 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1165 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1166 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1167 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1168 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1169 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1172 | PTA_MMX | PTA_PREFETCH_SSE},
1173 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1176 | PTA_MMX | PTA_PREFETCH_SSE},
1177 {"k6", PROCESSOR_K6, PTA_MMX},
1178 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1179 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1180 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1181 | PTA_3DNOW_A},
1182 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1183 | PTA_3DNOW | PTA_3DNOW_A},
1184 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1185 | PTA_3DNOW_A | PTA_SSE},
1186 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1187 | PTA_3DNOW_A | PTA_SSE},
1188 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1189 | PTA_3DNOW_A | PTA_SSE},
1190 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1191 | PTA_SSE | PTA_SSE2 },
1192 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1195 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1196 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1198 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1199 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1200 };
1201
1202 int const pta_size = ARRAY_SIZE (processor_alias_table);
1203
1204 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1205 SUBTARGET_OVERRIDE_OPTIONS;
1206 #endif
1207
1208 /* Set the default values for switches whose default depends on TARGET_64BIT
1209 in case they weren't overwritten by command line options. */
1210 if (TARGET_64BIT)
1211 {
1212 if (flag_omit_frame_pointer == 2)
1213 flag_omit_frame_pointer = 1;
1214 if (flag_asynchronous_unwind_tables == 2)
1215 flag_asynchronous_unwind_tables = 1;
1216 if (flag_pcc_struct_return == 2)
1217 flag_pcc_struct_return = 0;
1218 }
1219 else
1220 {
1221 if (flag_omit_frame_pointer == 2)
1222 flag_omit_frame_pointer = 0;
1223 if (flag_asynchronous_unwind_tables == 2)
1224 flag_asynchronous_unwind_tables = 0;
1225 if (flag_pcc_struct_return == 2)
1226 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1227 }
1228
1229 if (!ix86_tune_string && ix86_arch_string)
1230 ix86_tune_string = ix86_arch_string;
1231 if (!ix86_tune_string)
1232 {
1233 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1234 ix86_tune_defaulted = 1;
1235 }
1236 if (!ix86_arch_string)
1237 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1238
1239 if (ix86_cmodel_string != 0)
1240 {
1241 if (!strcmp (ix86_cmodel_string, "small"))
1242 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1243 else if (flag_pic)
1244 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1245 else if (!strcmp (ix86_cmodel_string, "32"))
1246 ix86_cmodel = CM_32;
1247 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1248 ix86_cmodel = CM_KERNEL;
1249 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1250 ix86_cmodel = CM_MEDIUM;
1251 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1252 ix86_cmodel = CM_LARGE;
1253 else
1254 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1255 }
1256 else
1257 {
1258 ix86_cmodel = CM_32;
1259 if (TARGET_64BIT)
1260 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1261 }
1262 if (ix86_asm_string != 0)
1263 {
1264 if (!strcmp (ix86_asm_string, "intel"))
1265 ix86_asm_dialect = ASM_INTEL;
1266 else if (!strcmp (ix86_asm_string, "att"))
1267 ix86_asm_dialect = ASM_ATT;
1268 else
1269 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1270 }
1271 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1272 error ("code model %qs not supported in the %s bit mode",
1273 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1274 if (ix86_cmodel == CM_LARGE)
1275 sorry ("code model %<large%> not supported yet");
1276 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1277 sorry ("%i-bit mode not compiled in",
1278 (target_flags & MASK_64BIT) ? 64 : 32);
1279
1280 for (i = 0; i < pta_size; i++)
1281 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1282 {
1283 ix86_arch = processor_alias_table[i].processor;
1284 /* Default cpu tuning to the architecture. */
1285 ix86_tune = ix86_arch;
1286 if (processor_alias_table[i].flags & PTA_MMX
1287 && !(target_flags_explicit & MASK_MMX))
1288 target_flags |= MASK_MMX;
1289 if (processor_alias_table[i].flags & PTA_3DNOW
1290 && !(target_flags_explicit & MASK_3DNOW))
1291 target_flags |= MASK_3DNOW;
1292 if (processor_alias_table[i].flags & PTA_3DNOW_A
1293 && !(target_flags_explicit & MASK_3DNOW_A))
1294 target_flags |= MASK_3DNOW_A;
1295 if (processor_alias_table[i].flags & PTA_SSE
1296 && !(target_flags_explicit & MASK_SSE))
1297 target_flags |= MASK_SSE;
1298 if (processor_alias_table[i].flags & PTA_SSE2
1299 && !(target_flags_explicit & MASK_SSE2))
1300 target_flags |= MASK_SSE2;
1301 if (processor_alias_table[i].flags & PTA_SSE3
1302 && !(target_flags_explicit & MASK_SSE3))
1303 target_flags |= MASK_SSE3;
1304 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1305 x86_prefetch_sse = true;
1306 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1307 error ("CPU you selected does not support x86-64 "
1308 "instruction set");
1309 break;
1310 }
1311
1312 if (i == pta_size)
1313 error ("bad value (%s) for -march= switch", ix86_arch_string);
1314
1315 for (i = 0; i < pta_size; i++)
1316 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1317 {
1318 ix86_tune = processor_alias_table[i].processor;
1319 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1320 {
1321 if (ix86_tune_defaulted)
1322 {
1323 ix86_tune_string = "x86-64";
1324 for (i = 0; i < pta_size; i++)
1325 if (! strcmp (ix86_tune_string,
1326 processor_alias_table[i].name))
1327 break;
1328 ix86_tune = processor_alias_table[i].processor;
1329 }
1330 else
1331 error ("CPU you selected does not support x86-64 "
1332 "instruction set");
1333 }
1334 /* Intel CPUs have always interpreted SSE prefetch instructions as
1335 NOPs; so, we can enable SSE prefetch instructions even when
1336 -mtune (rather than -march) points us to a processor that has them.
1337 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1338 higher processors. */
1339 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1340 x86_prefetch_sse = true;
1341 break;
1342 }
1343 if (i == pta_size)
1344 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1345
1346 if (optimize_size)
1347 ix86_cost = &size_cost;
1348 else
1349 ix86_cost = processor_target_table[ix86_tune].cost;
1350 target_flags |= processor_target_table[ix86_tune].target_enable;
1351 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1352
1353 /* Arrange to set up i386_stack_locals for all functions. */
1354 init_machine_status = ix86_init_machine_status;
1355
1356 /* Validate -mregparm= value. */
1357 if (ix86_regparm_string)
1358 {
1359 i = atoi (ix86_regparm_string);
1360 if (i < 0 || i > REGPARM_MAX)
1361 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1362 else
1363 ix86_regparm = i;
1364 }
1365 else
1366 if (TARGET_64BIT)
1367 ix86_regparm = REGPARM_MAX;
1368
1369 /* If the user has provided any of the -malign-* options,
1370 warn and use that value only if -falign-* is not set.
1371 Remove this code in GCC 3.2 or later. */
1372 if (ix86_align_loops_string)
1373 {
1374 warning ("-malign-loops is obsolete, use -falign-loops");
1375 if (align_loops == 0)
1376 {
1377 i = atoi (ix86_align_loops_string);
1378 if (i < 0 || i > MAX_CODE_ALIGN)
1379 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1380 else
1381 align_loops = 1 << i;
1382 }
1383 }
1384
1385 if (ix86_align_jumps_string)
1386 {
1387 warning ("-malign-jumps is obsolete, use -falign-jumps");
1388 if (align_jumps == 0)
1389 {
1390 i = atoi (ix86_align_jumps_string);
1391 if (i < 0 || i > MAX_CODE_ALIGN)
1392 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1393 else
1394 align_jumps = 1 << i;
1395 }
1396 }
1397
1398 if (ix86_align_funcs_string)
1399 {
1400 warning ("-malign-functions is obsolete, use -falign-functions");
1401 if (align_functions == 0)
1402 {
1403 i = atoi (ix86_align_funcs_string);
1404 if (i < 0 || i > MAX_CODE_ALIGN)
1405 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1406 else
1407 align_functions = 1 << i;
1408 }
1409 }
1410
1411 /* Default align_* from the processor table. */
1412 if (align_loops == 0)
1413 {
1414 align_loops = processor_target_table[ix86_tune].align_loop;
1415 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1416 }
1417 if (align_jumps == 0)
1418 {
1419 align_jumps = processor_target_table[ix86_tune].align_jump;
1420 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1421 }
1422 if (align_functions == 0)
1423 {
1424 align_functions = processor_target_table[ix86_tune].align_func;
1425 }
1426
1427 /* Validate -mpreferred-stack-boundary= value, or provide default.
1428 The default of 128 bits is for Pentium III's SSE __m128, but we
1429 don't want additional code to keep the stack aligned when
1430 optimizing for code size. */
1431 ix86_preferred_stack_boundary = (optimize_size
1432 ? TARGET_64BIT ? 128 : 32
1433 : 128);
1434 if (ix86_preferred_stack_boundary_string)
1435 {
1436 i = atoi (ix86_preferred_stack_boundary_string);
1437 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1438 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1439 TARGET_64BIT ? 4 : 2);
1440 else
1441 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1442 }
1443
1444 /* Validate -mbranch-cost= value, or provide default. */
1445 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1446 if (ix86_branch_cost_string)
1447 {
1448 i = atoi (ix86_branch_cost_string);
1449 if (i < 0 || i > 5)
1450 error ("-mbranch-cost=%d is not between 0 and 5", i);
1451 else
1452 ix86_branch_cost = i;
1453 }
1454
1455 if (ix86_tls_dialect_string)
1456 {
1457 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1458 ix86_tls_dialect = TLS_DIALECT_GNU;
1459 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1460 ix86_tls_dialect = TLS_DIALECT_SUN;
1461 else
1462 error ("bad value (%s) for -mtls-dialect= switch",
1463 ix86_tls_dialect_string);
1464 }
1465
1466 /* Keep nonleaf frame pointers. */
1467 if (flag_omit_frame_pointer)
1468 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1469 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1470 flag_omit_frame_pointer = 1;
1471
1472 /* If we're doing fast math, we don't care about comparison order
1473 wrt NaNs. This lets us use a shorter comparison sequence. */
1474 if (flag_unsafe_math_optimizations)
1475 target_flags &= ~MASK_IEEE_FP;
1476
1477 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1478 since the insns won't need emulation. */
1479 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1480 target_flags &= ~MASK_NO_FANCY_MATH_387;
1481
1482 /* Likewise, if the target doesn't have a 387, or we've specified
1483 software floating point, don't use 387 inline instrinsics. */
1484 if (!TARGET_80387)
1485 target_flags |= MASK_NO_FANCY_MATH_387;
1486
1487 /* Turn on SSE2 builtins for -msse3. */
1488 if (TARGET_SSE3)
1489 target_flags |= MASK_SSE2;
1490
1491 /* Turn on SSE builtins for -msse2. */
1492 if (TARGET_SSE2)
1493 target_flags |= MASK_SSE;
1494
1495 /* Turn on MMX builtins for -msse. */
1496 if (TARGET_SSE)
1497 {
1498 target_flags |= MASK_MMX & ~target_flags_explicit;
1499 x86_prefetch_sse = true;
1500 }
1501
1502 /* Turn on MMX builtins for 3Dnow. */
1503 if (TARGET_3DNOW)
1504 target_flags |= MASK_MMX;
1505
1506 if (TARGET_64BIT)
1507 {
1508 if (TARGET_ALIGN_DOUBLE)
1509 error ("-malign-double makes no sense in the 64bit mode");
1510 if (TARGET_RTD)
1511 error ("-mrtd calling convention not supported in the 64bit mode");
1512
1513 /* Enable by default the SSE and MMX builtins. Do allow the user to
1514 explicitly disable any of these. In particular, disabling SSE and
1515 MMX for kernel code is extremely useful. */
1516 target_flags
1517 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1518 & ~target_flags_explicit);
1519
1520 if (TARGET_SSE)
1521 ix86_fpmath = FPMATH_SSE;
1522 }
1523 else
1524 {
1525 ix86_fpmath = FPMATH_387;
1526 /* i386 ABI does not specify red zone. It still makes sense to use it
1527 when programmer takes care to stack from being destroyed. */
1528 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1529 target_flags |= MASK_NO_RED_ZONE;
1530 }
1531
1532 if (ix86_fpmath_string != 0)
1533 {
1534 if (! strcmp (ix86_fpmath_string, "387"))
1535 ix86_fpmath = FPMATH_387;
1536 else if (! strcmp (ix86_fpmath_string, "sse"))
1537 {
1538 if (!TARGET_SSE)
1539 {
1540 warning ("SSE instruction set disabled, using 387 arithmetics");
1541 ix86_fpmath = FPMATH_387;
1542 }
1543 else
1544 ix86_fpmath = FPMATH_SSE;
1545 }
1546 else if (! strcmp (ix86_fpmath_string, "387,sse")
1547 || ! strcmp (ix86_fpmath_string, "sse,387"))
1548 {
1549 if (!TARGET_SSE)
1550 {
1551 warning ("SSE instruction set disabled, using 387 arithmetics");
1552 ix86_fpmath = FPMATH_387;
1553 }
1554 else if (!TARGET_80387)
1555 {
1556 warning ("387 instruction set disabled, using SSE arithmetics");
1557 ix86_fpmath = FPMATH_SSE;
1558 }
1559 else
1560 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1561 }
1562 else
1563 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1564 }
1565
1566 /* If the i387 is disabled, then do not return values in it. */
1567 if (!TARGET_80387)
1568 target_flags &= ~MASK_FLOAT_RETURNS;
1569
1570 if ((x86_accumulate_outgoing_args & TUNEMASK)
1571 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1572 && !optimize_size)
1573 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1574
1575 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1576 {
1577 char *p;
1578 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1579 p = strchr (internal_label_prefix, 'X');
1580 internal_label_prefix_len = p - internal_label_prefix;
1581 *p = '\0';
1582 }
1583
1584 /* When scheduling description is not available, disable scheduler pass
1585 so it won't slow down the compilation and make x87 code slower. */
1586 if (!TARGET_SCHEDULE)
1587 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1588 }
1589 \f
1590 void
1591 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1592 {
1593 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1594 make the problem with not enough registers even worse. */
1595 #ifdef INSN_SCHEDULING
1596 if (level > 1)
1597 flag_schedule_insns = 0;
1598 #endif
1599
1600 /* The default values of these switches depend on the TARGET_64BIT
1601 that is not known at this moment. Mark these values with 2 and
1602 let user the to override these. In case there is no command line option
1603 specifying them, we will set the defaults in override_options. */
1604 if (optimize >= 1)
1605 flag_omit_frame_pointer = 2;
1606 flag_pcc_struct_return = 2;
1607 flag_asynchronous_unwind_tables = 2;
1608 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1609 SUBTARGET_OPTIMIZATION_OPTIONS;
1610 #endif
1611 }
1612 \f
1613 /* Table of valid machine attributes. */
1614 const struct attribute_spec ix86_attribute_table[] =
1615 {
1616 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1617 /* Stdcall attribute says callee is responsible for popping arguments
1618 if they are not variable. */
1619 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1620 /* Fastcall attribute says callee is responsible for popping arguments
1621 if they are not variable. */
1622 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1623 /* Cdecl attribute says the callee is a normal C declaration */
1624 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1625 /* Regparm attribute specifies how many integer arguments are to be
1626 passed in registers. */
1627 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1628 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1629 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1630 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1631 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1632 #endif
1633 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1634 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1635 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1636 SUBTARGET_ATTRIBUTE_TABLE,
1637 #endif
1638 { NULL, 0, 0, false, false, false, NULL }
1639 };
1640
1641 /* Decide whether we can make a sibling call to a function. DECL is the
1642 declaration of the function being targeted by the call and EXP is the
1643 CALL_EXPR representing the call. */
1644
1645 static bool
1646 ix86_function_ok_for_sibcall (tree decl, tree exp)
1647 {
1648 tree func;
1649
1650 /* If we are generating position-independent code, we cannot sibcall
1651 optimize any indirect call, or a direct call to a global function,
1652 as the PLT requires %ebx be live. */
1653 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1654 return false;
1655
1656 if (decl)
1657 func = decl;
1658 else
1659 func = NULL;
1660
1661 /* If we are returning floats on the 80387 register stack, we cannot
1662 make a sibcall from a function that doesn't return a float to a
1663 function that does or, conversely, from a function that does return
1664 a float to a function that doesn't; the necessary stack adjustment
1665 would not be executed. */
1666 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
1667 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1668 cfun->decl)))
1669 return false;
1670
1671 /* If this call is indirect, we'll need to be able to use a call-clobbered
1672 register for the address of the target function. Make sure that all
1673 such registers are not used for passing parameters. */
1674 if (!decl && !TARGET_64BIT)
1675 {
1676 tree type;
1677
1678 /* We're looking at the CALL_EXPR, we need the type of the function. */
1679 type = TREE_OPERAND (exp, 0); /* pointer expression */
1680 type = TREE_TYPE (type); /* pointer type */
1681 type = TREE_TYPE (type); /* function type */
1682
1683 if (ix86_function_regparm (type, NULL) >= 3)
1684 {
1685 /* ??? Need to count the actual number of registers to be used,
1686 not the possible number of registers. Fix later. */
1687 return false;
1688 }
1689 }
1690
1691 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1692 /* Dllimport'd functions are also called indirectly. */
1693 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1694 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1695 return false;
1696 #endif
1697
1698 /* Otherwise okay. That also includes certain types of indirect calls. */
1699 return true;
1700 }
1701
1702 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1703 arguments as in struct attribute_spec.handler. */
1704 static tree
1705 ix86_handle_cdecl_attribute (tree *node, tree name,
1706 tree args ATTRIBUTE_UNUSED,
1707 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1708 {
1709 if (TREE_CODE (*node) != FUNCTION_TYPE
1710 && TREE_CODE (*node) != METHOD_TYPE
1711 && TREE_CODE (*node) != FIELD_DECL
1712 && TREE_CODE (*node) != TYPE_DECL)
1713 {
1714 warning ("%qs attribute only applies to functions",
1715 IDENTIFIER_POINTER (name));
1716 *no_add_attrs = true;
1717 }
1718 else
1719 {
1720 if (is_attribute_p ("fastcall", name))
1721 {
1722 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1723 {
1724 error ("fastcall and stdcall attributes are not compatible");
1725 }
1726 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1727 {
1728 error ("fastcall and regparm attributes are not compatible");
1729 }
1730 }
1731 else if (is_attribute_p ("stdcall", name))
1732 {
1733 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1734 {
1735 error ("fastcall and stdcall attributes are not compatible");
1736 }
1737 }
1738 }
1739
1740 if (TARGET_64BIT)
1741 {
1742 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1743 *no_add_attrs = true;
1744 }
1745
1746 return NULL_TREE;
1747 }
1748
1749 /* Handle a "regparm" attribute;
1750 arguments as in struct attribute_spec.handler. */
1751 static tree
1752 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1753 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1754 {
1755 if (TREE_CODE (*node) != FUNCTION_TYPE
1756 && TREE_CODE (*node) != METHOD_TYPE
1757 && TREE_CODE (*node) != FIELD_DECL
1758 && TREE_CODE (*node) != TYPE_DECL)
1759 {
1760 warning ("%qs attribute only applies to functions",
1761 IDENTIFIER_POINTER (name));
1762 *no_add_attrs = true;
1763 }
1764 else
1765 {
1766 tree cst;
1767
1768 cst = TREE_VALUE (args);
1769 if (TREE_CODE (cst) != INTEGER_CST)
1770 {
1771 warning ("%qs attribute requires an integer constant argument",
1772 IDENTIFIER_POINTER (name));
1773 *no_add_attrs = true;
1774 }
1775 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1776 {
1777 warning ("argument to %qs attribute larger than %d",
1778 IDENTIFIER_POINTER (name), REGPARM_MAX);
1779 *no_add_attrs = true;
1780 }
1781
1782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1783 {
1784 error ("fastcall and regparm attributes are not compatible");
1785 }
1786 }
1787
1788 return NULL_TREE;
1789 }
1790
1791 /* Return 0 if the attributes for two types are incompatible, 1 if they
1792 are compatible, and 2 if they are nearly compatible (which causes a
1793 warning to be generated). */
1794
1795 static int
1796 ix86_comp_type_attributes (tree type1, tree type2)
1797 {
1798 /* Check for mismatch of non-default calling convention. */
1799 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1800
1801 if (TREE_CODE (type1) != FUNCTION_TYPE)
1802 return 1;
1803
1804 /* Check for mismatched fastcall types */
1805 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1806 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1807 return 0;
1808
1809 /* Check for mismatched return types (cdecl vs stdcall). */
1810 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1811 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1812 return 0;
1813 if (ix86_function_regparm (type1, NULL)
1814 != ix86_function_regparm (type2, NULL))
1815 return 0;
1816 return 1;
1817 }
1818 \f
1819 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1820 DECL may be NULL when calling function indirectly
1821 or considering a libcall. */
1822
1823 static int
1824 ix86_function_regparm (tree type, tree decl)
1825 {
1826 tree attr;
1827 int regparm = ix86_regparm;
1828 bool user_convention = false;
1829
1830 if (!TARGET_64BIT)
1831 {
1832 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1833 if (attr)
1834 {
1835 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1836 user_convention = true;
1837 }
1838
1839 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1840 {
1841 regparm = 2;
1842 user_convention = true;
1843 }
1844
1845 /* Use register calling convention for local functions when possible. */
1846 if (!TARGET_64BIT && !user_convention && decl
1847 && flag_unit_at_a_time && !profile_flag)
1848 {
1849 struct cgraph_local_info *i = cgraph_local_info (decl);
1850 if (i && i->local)
1851 {
1852 /* We can't use regparm(3) for nested functions as these use
1853 static chain pointer in third argument. */
1854 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1855 regparm = 2;
1856 else
1857 regparm = 3;
1858 }
1859 }
1860 }
1861 return regparm;
1862 }
1863
1864 /* Return true if EAX is live at the start of the function. Used by
1865 ix86_expand_prologue to determine if we need special help before
1866 calling allocate_stack_worker. */
1867
1868 static bool
1869 ix86_eax_live_at_start_p (void)
1870 {
1871 /* Cheat. Don't bother working forward from ix86_function_regparm
1872 to the function type to whether an actual argument is located in
1873 eax. Instead just look at cfg info, which is still close enough
1874 to correct at this point. This gives false positives for broken
1875 functions that might use uninitialized data that happens to be
1876 allocated in eax, but who cares? */
1877 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1878 }
1879
1880 /* Value is the number of bytes of arguments automatically
1881 popped when returning from a subroutine call.
1882 FUNDECL is the declaration node of the function (as a tree),
1883 FUNTYPE is the data type of the function (as a tree),
1884 or for a library call it is an identifier node for the subroutine name.
1885 SIZE is the number of bytes of arguments passed on the stack.
1886
1887 On the 80386, the RTD insn may be used to pop them if the number
1888 of args is fixed, but if the number is variable then the caller
1889 must pop them all. RTD can't be used for library calls now
1890 because the library is compiled with the Unix compiler.
1891 Use of RTD is a selectable option, since it is incompatible with
1892 standard Unix calling sequences. If the option is not selected,
1893 the caller must always pop the args.
1894
1895 The attribute stdcall is equivalent to RTD on a per module basis. */
1896
1897 int
1898 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1899 {
1900 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1901
1902 /* Cdecl functions override -mrtd, and never pop the stack. */
1903 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1904
1905 /* Stdcall and fastcall functions will pop the stack if not
1906 variable args. */
1907 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1908 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1909 rtd = 1;
1910
1911 if (rtd
1912 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1913 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1914 == void_type_node)))
1915 return size;
1916 }
1917
1918 /* Lose any fake structure return argument if it is passed on the stack. */
1919 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1920 && !TARGET_64BIT
1921 && !KEEP_AGGREGATE_RETURN_POINTER)
1922 {
1923 int nregs = ix86_function_regparm (funtype, fundecl);
1924
1925 if (!nregs)
1926 return GET_MODE_SIZE (Pmode);
1927 }
1928
1929 return 0;
1930 }
1931 \f
1932 /* Argument support functions. */
1933
1934 /* Return true when register may be used to pass function parameters. */
1935 bool
1936 ix86_function_arg_regno_p (int regno)
1937 {
1938 int i;
1939 if (!TARGET_64BIT)
1940 return (regno < REGPARM_MAX
1941 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1942 if (SSE_REGNO_P (regno) && TARGET_SSE)
1943 return true;
1944 /* RAX is used as hidden argument to va_arg functions. */
1945 if (!regno)
1946 return true;
1947 for (i = 0; i < REGPARM_MAX; i++)
1948 if (regno == x86_64_int_parameter_registers[i])
1949 return true;
1950 return false;
1951 }
1952
1953 /* Return if we do not know how to pass TYPE solely in registers. */
1954
1955 static bool
1956 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1957 {
1958 if (must_pass_in_stack_var_size_or_pad (mode, type))
1959 return true;
1960
1961 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1962 The layout_type routine is crafty and tries to trick us into passing
1963 currently unsupported vector types on the stack by using TImode. */
1964 return (!TARGET_64BIT && mode == TImode
1965 && type && TREE_CODE (type) != VECTOR_TYPE);
1966 }
1967
1968 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1969 for a call to a function whose data type is FNTYPE.
1970 For a library call, FNTYPE is 0. */
1971
1972 void
1973 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1974 tree fntype, /* tree ptr for function decl */
1975 rtx libname, /* SYMBOL_REF of library name or 0 */
1976 tree fndecl)
1977 {
1978 static CUMULATIVE_ARGS zero_cum;
1979 tree param, next_param;
1980
1981 if (TARGET_DEBUG_ARG)
1982 {
1983 fprintf (stderr, "\ninit_cumulative_args (");
1984 if (fntype)
1985 fprintf (stderr, "fntype code = %s, ret code = %s",
1986 tree_code_name[(int) TREE_CODE (fntype)],
1987 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1988 else
1989 fprintf (stderr, "no fntype");
1990
1991 if (libname)
1992 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1993 }
1994
1995 *cum = zero_cum;
1996
1997 /* Set up the number of registers to use for passing arguments. */
1998 if (fntype)
1999 cum->nregs = ix86_function_regparm (fntype, fndecl);
2000 else
2001 cum->nregs = ix86_regparm;
2002 if (TARGET_SSE)
2003 cum->sse_nregs = SSE_REGPARM_MAX;
2004 if (TARGET_MMX)
2005 cum->mmx_nregs = MMX_REGPARM_MAX;
2006 cum->warn_sse = true;
2007 cum->warn_mmx = true;
2008 cum->maybe_vaarg = false;
2009
2010 /* Use ecx and edx registers if function has fastcall attribute */
2011 if (fntype && !TARGET_64BIT)
2012 {
2013 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2014 {
2015 cum->nregs = 2;
2016 cum->fastcall = 1;
2017 }
2018 }
2019
2020 /* Determine if this function has variable arguments. This is
2021 indicated by the last argument being 'void_type_mode' if there
2022 are no variable arguments. If there are variable arguments, then
2023 we won't pass anything in registers in 32-bit mode. */
2024
2025 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2026 {
2027 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2028 param != 0; param = next_param)
2029 {
2030 next_param = TREE_CHAIN (param);
2031 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2032 {
2033 if (!TARGET_64BIT)
2034 {
2035 cum->nregs = 0;
2036 cum->sse_nregs = 0;
2037 cum->mmx_nregs = 0;
2038 cum->warn_sse = 0;
2039 cum->warn_mmx = 0;
2040 cum->fastcall = 0;
2041 }
2042 cum->maybe_vaarg = true;
2043 }
2044 }
2045 }
2046 if ((!fntype && !libname)
2047 || (fntype && !TYPE_ARG_TYPES (fntype)))
2048 cum->maybe_vaarg = true;
2049
2050 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2051 in SSE registers even for 32-bit mode and not just 3, but up to
2052 8 SSE arguments in registers. */
2053 if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall
2054 && cum->sse_nregs == SSE_REGPARM_MAX && fndecl
2055 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2056 {
2057 struct cgraph_local_info *i = cgraph_local_info (fndecl);
2058 if (i && i->local)
2059 {
2060 cum->sse_nregs = 8;
2061 cum->float_in_sse = true;
2062 }
2063 }
2064
2065 if (TARGET_DEBUG_ARG)
2066 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2067
2068 return;
2069 }
2070
2071 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2072 But in the case of vector types, it is some vector mode.
2073
2074 When we have only some of our vector isa extensions enabled, then there
2075 are some modes for which vector_mode_supported_p is false. For these
2076 modes, the generic vector support in gcc will choose some non-vector mode
2077 in order to implement the type. By computing the natural mode, we'll
2078 select the proper ABI location for the operand and not depend on whatever
2079 the middle-end decides to do with these vector types. */
2080
2081 static enum machine_mode
2082 type_natural_mode (tree type)
2083 {
2084 enum machine_mode mode = TYPE_MODE (type);
2085
2086 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2087 {
2088 HOST_WIDE_INT size = int_size_in_bytes (type);
2089 if ((size == 8 || size == 16)
2090 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2091 && TYPE_VECTOR_SUBPARTS (type) > 1)
2092 {
2093 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2094
2095 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2096 mode = MIN_MODE_VECTOR_FLOAT;
2097 else
2098 mode = MIN_MODE_VECTOR_INT;
2099
2100 /* Get the mode which has this inner mode and number of units. */
2101 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2102 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2103 && GET_MODE_INNER (mode) == innermode)
2104 return mode;
2105
2106 abort ();
2107 }
2108 }
2109
2110 return mode;
2111 }
2112
2113 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2114 this may not agree with the mode that the type system has chosen for the
2115 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2116 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2117
2118 static rtx
2119 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2120 unsigned int regno)
2121 {
2122 rtx tmp;
2123
2124 if (orig_mode != BLKmode)
2125 tmp = gen_rtx_REG (orig_mode, regno);
2126 else
2127 {
2128 tmp = gen_rtx_REG (mode, regno);
2129 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2130 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2131 }
2132
2133 return tmp;
2134 }
2135
2136 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2137 of this code is to classify each 8bytes of incoming argument by the register
2138 class and assign registers accordingly. */
2139
2140 /* Return the union class of CLASS1 and CLASS2.
2141 See the x86-64 PS ABI for details. */
2142
2143 static enum x86_64_reg_class
2144 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2145 {
2146 /* Rule #1: If both classes are equal, this is the resulting class. */
2147 if (class1 == class2)
2148 return class1;
2149
2150 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2151 the other class. */
2152 if (class1 == X86_64_NO_CLASS)
2153 return class2;
2154 if (class2 == X86_64_NO_CLASS)
2155 return class1;
2156
2157 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2158 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2159 return X86_64_MEMORY_CLASS;
2160
2161 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2162 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2163 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2164 return X86_64_INTEGERSI_CLASS;
2165 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2166 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2167 return X86_64_INTEGER_CLASS;
2168
2169 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2170 MEMORY is used. */
2171 if (class1 == X86_64_X87_CLASS
2172 || class1 == X86_64_X87UP_CLASS
2173 || class1 == X86_64_COMPLEX_X87_CLASS
2174 || class2 == X86_64_X87_CLASS
2175 || class2 == X86_64_X87UP_CLASS
2176 || class2 == X86_64_COMPLEX_X87_CLASS)
2177 return X86_64_MEMORY_CLASS;
2178
2179 /* Rule #6: Otherwise class SSE is used. */
2180 return X86_64_SSE_CLASS;
2181 }
2182
2183 /* Classify the argument of type TYPE and mode MODE.
2184 CLASSES will be filled by the register class used to pass each word
2185 of the operand. The number of words is returned. In case the parameter
2186 should be passed in memory, 0 is returned. As a special case for zero
2187 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2188
2189 BIT_OFFSET is used internally for handling records and specifies offset
2190 of the offset in bits modulo 256 to avoid overflow cases.
2191
2192 See the x86-64 PS ABI for details.
2193 */
2194
2195 static int
2196 classify_argument (enum machine_mode mode, tree type,
2197 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2198 {
2199 HOST_WIDE_INT bytes =
2200 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2201 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2202
2203 /* Variable sized entities are always passed/returned in memory. */
2204 if (bytes < 0)
2205 return 0;
2206
2207 if (mode != VOIDmode
2208 && targetm.calls.must_pass_in_stack (mode, type))
2209 return 0;
2210
2211 if (type && AGGREGATE_TYPE_P (type))
2212 {
2213 int i;
2214 tree field;
2215 enum x86_64_reg_class subclasses[MAX_CLASSES];
2216
2217 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2218 if (bytes > 16)
2219 return 0;
2220
2221 for (i = 0; i < words; i++)
2222 classes[i] = X86_64_NO_CLASS;
2223
2224 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2225 signalize memory class, so handle it as special case. */
2226 if (!words)
2227 {
2228 classes[0] = X86_64_NO_CLASS;
2229 return 1;
2230 }
2231
2232 /* Classify each field of record and merge classes. */
2233 if (TREE_CODE (type) == RECORD_TYPE)
2234 {
2235 /* For classes first merge in the field of the subclasses. */
2236 if (TYPE_BINFO (type))
2237 {
2238 tree binfo, base_binfo;
2239 int basenum;
2240
2241 for (binfo = TYPE_BINFO (type), basenum = 0;
2242 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2243 {
2244 int num;
2245 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2246 tree type = BINFO_TYPE (base_binfo);
2247
2248 num = classify_argument (TYPE_MODE (type),
2249 type, subclasses,
2250 (offset + bit_offset) % 256);
2251 if (!num)
2252 return 0;
2253 for (i = 0; i < num; i++)
2254 {
2255 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2256 classes[i + pos] =
2257 merge_classes (subclasses[i], classes[i + pos]);
2258 }
2259 }
2260 }
2261 /* And now merge the fields of structure. */
2262 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2263 {
2264 if (TREE_CODE (field) == FIELD_DECL)
2265 {
2266 int num;
2267
2268 /* Bitfields are always classified as integer. Handle them
2269 early, since later code would consider them to be
2270 misaligned integers. */
2271 if (DECL_BIT_FIELD (field))
2272 {
2273 for (i = int_bit_position (field) / 8 / 8;
2274 i < (int_bit_position (field)
2275 + tree_low_cst (DECL_SIZE (field), 0)
2276 + 63) / 8 / 8; i++)
2277 classes[i] =
2278 merge_classes (X86_64_INTEGER_CLASS,
2279 classes[i]);
2280 }
2281 else
2282 {
2283 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2284 TREE_TYPE (field), subclasses,
2285 (int_bit_position (field)
2286 + bit_offset) % 256);
2287 if (!num)
2288 return 0;
2289 for (i = 0; i < num; i++)
2290 {
2291 int pos =
2292 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2293 classes[i + pos] =
2294 merge_classes (subclasses[i], classes[i + pos]);
2295 }
2296 }
2297 }
2298 }
2299 }
2300 /* Arrays are handled as small records. */
2301 else if (TREE_CODE (type) == ARRAY_TYPE)
2302 {
2303 int num;
2304 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2305 TREE_TYPE (type), subclasses, bit_offset);
2306 if (!num)
2307 return 0;
2308
2309 /* The partial classes are now full classes. */
2310 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2311 subclasses[0] = X86_64_SSE_CLASS;
2312 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2313 subclasses[0] = X86_64_INTEGER_CLASS;
2314
2315 for (i = 0; i < words; i++)
2316 classes[i] = subclasses[i % num];
2317 }
2318 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2319 else if (TREE_CODE (type) == UNION_TYPE
2320 || TREE_CODE (type) == QUAL_UNION_TYPE)
2321 {
2322 /* For classes first merge in the field of the subclasses. */
2323 if (TYPE_BINFO (type))
2324 {
2325 tree binfo, base_binfo;
2326 int basenum;
2327
2328 for (binfo = TYPE_BINFO (type), basenum = 0;
2329 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2330 {
2331 int num;
2332 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2333 tree type = BINFO_TYPE (base_binfo);
2334
2335 num = classify_argument (TYPE_MODE (type),
2336 type, subclasses,
2337 (offset + (bit_offset % 64)) % 256);
2338 if (!num)
2339 return 0;
2340 for (i = 0; i < num; i++)
2341 {
2342 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2343 classes[i + pos] =
2344 merge_classes (subclasses[i], classes[i + pos]);
2345 }
2346 }
2347 }
2348 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2349 {
2350 if (TREE_CODE (field) == FIELD_DECL)
2351 {
2352 int num;
2353 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2354 TREE_TYPE (field), subclasses,
2355 bit_offset);
2356 if (!num)
2357 return 0;
2358 for (i = 0; i < num; i++)
2359 classes[i] = merge_classes (subclasses[i], classes[i]);
2360 }
2361 }
2362 }
2363 else
2364 abort ();
2365
2366 /* Final merger cleanup. */
2367 for (i = 0; i < words; i++)
2368 {
2369 /* If one class is MEMORY, everything should be passed in
2370 memory. */
2371 if (classes[i] == X86_64_MEMORY_CLASS)
2372 return 0;
2373
2374 /* The X86_64_SSEUP_CLASS should be always preceded by
2375 X86_64_SSE_CLASS. */
2376 if (classes[i] == X86_64_SSEUP_CLASS
2377 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2378 classes[i] = X86_64_SSE_CLASS;
2379
2380 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2381 if (classes[i] == X86_64_X87UP_CLASS
2382 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2383 classes[i] = X86_64_SSE_CLASS;
2384 }
2385 return words;
2386 }
2387
2388 /* Compute alignment needed. We align all types to natural boundaries with
2389 exception of XFmode that is aligned to 64bits. */
2390 if (mode != VOIDmode && mode != BLKmode)
2391 {
2392 int mode_alignment = GET_MODE_BITSIZE (mode);
2393
2394 if (mode == XFmode)
2395 mode_alignment = 128;
2396 else if (mode == XCmode)
2397 mode_alignment = 256;
2398 if (COMPLEX_MODE_P (mode))
2399 mode_alignment /= 2;
2400 /* Misaligned fields are always returned in memory. */
2401 if (bit_offset % mode_alignment)
2402 return 0;
2403 }
2404
2405 /* for V1xx modes, just use the base mode */
2406 if (VECTOR_MODE_P (mode)
2407 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2408 mode = GET_MODE_INNER (mode);
2409
2410 /* Classification of atomic types. */
2411 switch (mode)
2412 {
2413 case DImode:
2414 case SImode:
2415 case HImode:
2416 case QImode:
2417 case CSImode:
2418 case CHImode:
2419 case CQImode:
2420 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2421 classes[0] = X86_64_INTEGERSI_CLASS;
2422 else
2423 classes[0] = X86_64_INTEGER_CLASS;
2424 return 1;
2425 case CDImode:
2426 case TImode:
2427 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2428 return 2;
2429 case CTImode:
2430 return 0;
2431 case SFmode:
2432 if (!(bit_offset % 64))
2433 classes[0] = X86_64_SSESF_CLASS;
2434 else
2435 classes[0] = X86_64_SSE_CLASS;
2436 return 1;
2437 case DFmode:
2438 classes[0] = X86_64_SSEDF_CLASS;
2439 return 1;
2440 case XFmode:
2441 classes[0] = X86_64_X87_CLASS;
2442 classes[1] = X86_64_X87UP_CLASS;
2443 return 2;
2444 case TFmode:
2445 classes[0] = X86_64_SSE_CLASS;
2446 classes[1] = X86_64_SSEUP_CLASS;
2447 return 2;
2448 case SCmode:
2449 classes[0] = X86_64_SSE_CLASS;
2450 return 1;
2451 case DCmode:
2452 classes[0] = X86_64_SSEDF_CLASS;
2453 classes[1] = X86_64_SSEDF_CLASS;
2454 return 2;
2455 case XCmode:
2456 classes[0] = X86_64_COMPLEX_X87_CLASS;
2457 return 1;
2458 case TCmode:
2459 /* This modes is larger than 16 bytes. */
2460 return 0;
2461 case V4SFmode:
2462 case V4SImode:
2463 case V16QImode:
2464 case V8HImode:
2465 case V2DFmode:
2466 case V2DImode:
2467 classes[0] = X86_64_SSE_CLASS;
2468 classes[1] = X86_64_SSEUP_CLASS;
2469 return 2;
2470 case V2SFmode:
2471 case V2SImode:
2472 case V4HImode:
2473 case V8QImode:
2474 classes[0] = X86_64_SSE_CLASS;
2475 return 1;
2476 case BLKmode:
2477 case VOIDmode:
2478 return 0;
2479 default:
2480 if (VECTOR_MODE_P (mode))
2481 {
2482 if (bytes > 16)
2483 return 0;
2484 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2485 {
2486 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2487 classes[0] = X86_64_INTEGERSI_CLASS;
2488 else
2489 classes[0] = X86_64_INTEGER_CLASS;
2490 classes[1] = X86_64_INTEGER_CLASS;
2491 return 1 + (bytes > 8);
2492 }
2493 }
2494 abort ();
2495 }
2496 }
2497
2498 /* Examine the argument and return set number of register required in each
2499 class. Return 0 iff parameter should be passed in memory. */
2500 static int
2501 examine_argument (enum machine_mode mode, tree type, int in_return,
2502 int *int_nregs, int *sse_nregs)
2503 {
2504 enum x86_64_reg_class class[MAX_CLASSES];
2505 int n = classify_argument (mode, type, class, 0);
2506
2507 *int_nregs = 0;
2508 *sse_nregs = 0;
2509 if (!n)
2510 return 0;
2511 for (n--; n >= 0; n--)
2512 switch (class[n])
2513 {
2514 case X86_64_INTEGER_CLASS:
2515 case X86_64_INTEGERSI_CLASS:
2516 (*int_nregs)++;
2517 break;
2518 case X86_64_SSE_CLASS:
2519 case X86_64_SSESF_CLASS:
2520 case X86_64_SSEDF_CLASS:
2521 (*sse_nregs)++;
2522 break;
2523 case X86_64_NO_CLASS:
2524 case X86_64_SSEUP_CLASS:
2525 break;
2526 case X86_64_X87_CLASS:
2527 case X86_64_X87UP_CLASS:
2528 if (!in_return)
2529 return 0;
2530 break;
2531 case X86_64_COMPLEX_X87_CLASS:
2532 return in_return ? 2 : 0;
2533 case X86_64_MEMORY_CLASS:
2534 abort ();
2535 }
2536 return 1;
2537 }
2538
2539 /* Construct container for the argument used by GCC interface. See
2540 FUNCTION_ARG for the detailed description. */
2541
2542 static rtx
2543 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2544 tree type, int in_return, int nintregs, int nsseregs,
2545 const int *intreg, int sse_regno)
2546 {
2547 enum machine_mode tmpmode;
2548 int bytes =
2549 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2550 enum x86_64_reg_class class[MAX_CLASSES];
2551 int n;
2552 int i;
2553 int nexps = 0;
2554 int needed_sseregs, needed_intregs;
2555 rtx exp[MAX_CLASSES];
2556 rtx ret;
2557
2558 n = classify_argument (mode, type, class, 0);
2559 if (TARGET_DEBUG_ARG)
2560 {
2561 if (!n)
2562 fprintf (stderr, "Memory class\n");
2563 else
2564 {
2565 fprintf (stderr, "Classes:");
2566 for (i = 0; i < n; i++)
2567 {
2568 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2569 }
2570 fprintf (stderr, "\n");
2571 }
2572 }
2573 if (!n)
2574 return NULL;
2575 if (!examine_argument (mode, type, in_return, &needed_intregs,
2576 &needed_sseregs))
2577 return NULL;
2578 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2579 return NULL;
2580
2581 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2582 some less clueful developer tries to use floating-point anyway. */
2583 if (needed_sseregs && !TARGET_SSE)
2584 {
2585 static bool issued_error;
2586 if (!issued_error)
2587 {
2588 issued_error = true;
2589 if (in_return)
2590 error ("SSE register return with SSE disabled");
2591 else
2592 error ("SSE register argument with SSE disabled");
2593 }
2594 return NULL;
2595 }
2596
2597 /* First construct simple cases. Avoid SCmode, since we want to use
2598 single register to pass this type. */
2599 if (n == 1 && mode != SCmode)
2600 switch (class[0])
2601 {
2602 case X86_64_INTEGER_CLASS:
2603 case X86_64_INTEGERSI_CLASS:
2604 return gen_rtx_REG (mode, intreg[0]);
2605 case X86_64_SSE_CLASS:
2606 case X86_64_SSESF_CLASS:
2607 case X86_64_SSEDF_CLASS:
2608 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2609 case X86_64_X87_CLASS:
2610 case X86_64_COMPLEX_X87_CLASS:
2611 return gen_rtx_REG (mode, FIRST_STACK_REG);
2612 case X86_64_NO_CLASS:
2613 /* Zero sized array, struct or class. */
2614 return NULL;
2615 default:
2616 abort ();
2617 }
2618 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2619 && mode != BLKmode)
2620 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2621 if (n == 2
2622 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2623 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2624 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2625 && class[1] == X86_64_INTEGER_CLASS
2626 && (mode == CDImode || mode == TImode || mode == TFmode)
2627 && intreg[0] + 1 == intreg[1])
2628 return gen_rtx_REG (mode, intreg[0]);
2629
2630 /* Otherwise figure out the entries of the PARALLEL. */
2631 for (i = 0; i < n; i++)
2632 {
2633 switch (class[i])
2634 {
2635 case X86_64_NO_CLASS:
2636 break;
2637 case X86_64_INTEGER_CLASS:
2638 case X86_64_INTEGERSI_CLASS:
2639 /* Merge TImodes on aligned occasions here too. */
2640 if (i * 8 + 8 > bytes)
2641 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2642 else if (class[i] == X86_64_INTEGERSI_CLASS)
2643 tmpmode = SImode;
2644 else
2645 tmpmode = DImode;
2646 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2647 if (tmpmode == BLKmode)
2648 tmpmode = DImode;
2649 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2650 gen_rtx_REG (tmpmode, *intreg),
2651 GEN_INT (i*8));
2652 intreg++;
2653 break;
2654 case X86_64_SSESF_CLASS:
2655 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2656 gen_rtx_REG (SFmode,
2657 SSE_REGNO (sse_regno)),
2658 GEN_INT (i*8));
2659 sse_regno++;
2660 break;
2661 case X86_64_SSEDF_CLASS:
2662 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2663 gen_rtx_REG (DFmode,
2664 SSE_REGNO (sse_regno)),
2665 GEN_INT (i*8));
2666 sse_regno++;
2667 break;
2668 case X86_64_SSE_CLASS:
2669 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2670 tmpmode = TImode;
2671 else
2672 tmpmode = DImode;
2673 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2674 gen_rtx_REG (tmpmode,
2675 SSE_REGNO (sse_regno)),
2676 GEN_INT (i*8));
2677 if (tmpmode == TImode)
2678 i++;
2679 sse_regno++;
2680 break;
2681 default:
2682 abort ();
2683 }
2684 }
2685
2686 /* Empty aligned struct, union or class. */
2687 if (nexps == 0)
2688 return NULL;
2689
2690 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2691 for (i = 0; i < nexps; i++)
2692 XVECEXP (ret, 0, i) = exp [i];
2693 return ret;
2694 }
2695
2696 /* Update the data in CUM to advance over an argument
2697 of mode MODE and data type TYPE.
2698 (TYPE is null for libcalls where that information may not be available.) */
2699
2700 void
2701 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2702 tree type, int named)
2703 {
2704 int bytes =
2705 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2706 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2707
2708 if (type)
2709 mode = type_natural_mode (type);
2710
2711 if (TARGET_DEBUG_ARG)
2712 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2713 "mode=%s, named=%d)\n\n",
2714 words, cum->words, cum->nregs, cum->sse_nregs,
2715 GET_MODE_NAME (mode), named);
2716
2717 if (TARGET_64BIT)
2718 {
2719 int int_nregs, sse_nregs;
2720 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2721 cum->words += words;
2722 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2723 {
2724 cum->nregs -= int_nregs;
2725 cum->sse_nregs -= sse_nregs;
2726 cum->regno += int_nregs;
2727 cum->sse_regno += sse_nregs;
2728 }
2729 else
2730 cum->words += words;
2731 }
2732 else
2733 {
2734 switch (mode)
2735 {
2736 default:
2737 break;
2738
2739 case BLKmode:
2740 if (bytes < 0)
2741 break;
2742 /* FALLTHRU */
2743
2744 case DImode:
2745 case SImode:
2746 case HImode:
2747 case QImode:
2748 cum->words += words;
2749 cum->nregs -= words;
2750 cum->regno += words;
2751
2752 if (cum->nregs <= 0)
2753 {
2754 cum->nregs = 0;
2755 cum->regno = 0;
2756 }
2757 break;
2758
2759 case DFmode:
2760 if (!TARGET_SSE2)
2761 break;
2762 case SFmode:
2763 if (!cum->float_in_sse)
2764 break;
2765 /* FALLTHRU */
2766
2767 case TImode:
2768 case V16QImode:
2769 case V8HImode:
2770 case V4SImode:
2771 case V2DImode:
2772 case V4SFmode:
2773 case V2DFmode:
2774 if (!type || !AGGREGATE_TYPE_P (type))
2775 {
2776 cum->sse_words += words;
2777 cum->sse_nregs -= 1;
2778 cum->sse_regno += 1;
2779 if (cum->sse_nregs <= 0)
2780 {
2781 cum->sse_nregs = 0;
2782 cum->sse_regno = 0;
2783 }
2784 }
2785 break;
2786
2787 case V8QImode:
2788 case V4HImode:
2789 case V2SImode:
2790 case V2SFmode:
2791 if (!type || !AGGREGATE_TYPE_P (type))
2792 {
2793 cum->mmx_words += words;
2794 cum->mmx_nregs -= 1;
2795 cum->mmx_regno += 1;
2796 if (cum->mmx_nregs <= 0)
2797 {
2798 cum->mmx_nregs = 0;
2799 cum->mmx_regno = 0;
2800 }
2801 }
2802 break;
2803 }
2804 }
2805 }
2806
2807 /* Define where to put the arguments to a function.
2808 Value is zero to push the argument on the stack,
2809 or a hard register in which to store the argument.
2810
2811 MODE is the argument's machine mode.
2812 TYPE is the data type of the argument (as a tree).
2813 This is null for libcalls where that information may
2814 not be available.
2815 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2816 the preceding args and about the function being called.
2817 NAMED is nonzero if this argument is a named parameter
2818 (otherwise it is an extra parameter matching an ellipsis). */
2819
2820 rtx
2821 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2822 tree type, int named)
2823 {
2824 enum machine_mode mode = orig_mode;
2825 rtx ret = NULL_RTX;
2826 int bytes =
2827 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2828 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2829 static bool warnedsse, warnedmmx;
2830
2831 /* To simplify the code below, represent vector types with a vector mode
2832 even if MMX/SSE are not active. */
2833 if (type && TREE_CODE (type) == VECTOR_TYPE)
2834 mode = type_natural_mode (type);
2835
2836 /* Handle a hidden AL argument containing number of registers for varargs
2837 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2838 any AL settings. */
2839 if (mode == VOIDmode)
2840 {
2841 if (TARGET_64BIT)
2842 return GEN_INT (cum->maybe_vaarg
2843 ? (cum->sse_nregs < 0
2844 ? SSE_REGPARM_MAX
2845 : cum->sse_regno)
2846 : -1);
2847 else
2848 return constm1_rtx;
2849 }
2850 if (TARGET_64BIT)
2851 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2852 cum->sse_nregs,
2853 &x86_64_int_parameter_registers [cum->regno],
2854 cum->sse_regno);
2855 else
2856 switch (mode)
2857 {
2858 /* For now, pass fp/complex values on the stack. */
2859 default:
2860 break;
2861
2862 case BLKmode:
2863 if (bytes < 0)
2864 break;
2865 /* FALLTHRU */
2866 case DImode:
2867 case SImode:
2868 case HImode:
2869 case QImode:
2870 if (words <= cum->nregs)
2871 {
2872 int regno = cum->regno;
2873
2874 /* Fastcall allocates the first two DWORD (SImode) or
2875 smaller arguments to ECX and EDX. */
2876 if (cum->fastcall)
2877 {
2878 if (mode == BLKmode || mode == DImode)
2879 break;
2880
2881 /* ECX not EAX is the first allocated register. */
2882 if (regno == 0)
2883 regno = 2;
2884 }
2885 ret = gen_rtx_REG (mode, regno);
2886 }
2887 break;
2888 case DFmode:
2889 if (!TARGET_SSE2)
2890 break;
2891 case SFmode:
2892 if (!cum->float_in_sse)
2893 break;
2894 /* FALLTHRU */
2895 case TImode:
2896 case V16QImode:
2897 case V8HImode:
2898 case V4SImode:
2899 case V2DImode:
2900 case V4SFmode:
2901 case V2DFmode:
2902 if (!type || !AGGREGATE_TYPE_P (type))
2903 {
2904 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2905 {
2906 warnedsse = true;
2907 warning ("SSE vector argument without SSE enabled "
2908 "changes the ABI");
2909 }
2910 if (cum->sse_nregs)
2911 ret = gen_reg_or_parallel (mode, orig_mode,
2912 cum->sse_regno + FIRST_SSE_REG);
2913 }
2914 break;
2915 case V8QImode:
2916 case V4HImode:
2917 case V2SImode:
2918 case V2SFmode:
2919 if (!type || !AGGREGATE_TYPE_P (type))
2920 {
2921 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2922 {
2923 warnedmmx = true;
2924 warning ("MMX vector argument without MMX enabled "
2925 "changes the ABI");
2926 }
2927 if (cum->mmx_nregs)
2928 ret = gen_reg_or_parallel (mode, orig_mode,
2929 cum->mmx_regno + FIRST_MMX_REG);
2930 }
2931 break;
2932 }
2933
2934 if (TARGET_DEBUG_ARG)
2935 {
2936 fprintf (stderr,
2937 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2938 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2939
2940 if (ret)
2941 print_simple_rtl (stderr, ret);
2942 else
2943 fprintf (stderr, ", stack");
2944
2945 fprintf (stderr, " )\n");
2946 }
2947
2948 return ret;
2949 }
2950
2951 /* A C expression that indicates when an argument must be passed by
2952 reference. If nonzero for an argument, a copy of that argument is
2953 made in memory and a pointer to the argument is passed instead of
2954 the argument itself. The pointer is passed in whatever way is
2955 appropriate for passing a pointer to that type. */
2956
2957 static bool
2958 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2959 enum machine_mode mode ATTRIBUTE_UNUSED,
2960 tree type, bool named ATTRIBUTE_UNUSED)
2961 {
2962 if (!TARGET_64BIT)
2963 return 0;
2964
2965 if (type && int_size_in_bytes (type) == -1)
2966 {
2967 if (TARGET_DEBUG_ARG)
2968 fprintf (stderr, "function_arg_pass_by_reference\n");
2969 return 1;
2970 }
2971
2972 return 0;
2973 }
2974
2975 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2976 ABI. Only called if TARGET_SSE. */
2977 static bool
2978 contains_128bit_aligned_vector_p (tree type)
2979 {
2980 enum machine_mode mode = TYPE_MODE (type);
2981 if (SSE_REG_MODE_P (mode)
2982 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2983 return true;
2984 if (TYPE_ALIGN (type) < 128)
2985 return false;
2986
2987 if (AGGREGATE_TYPE_P (type))
2988 {
2989 /* Walk the aggregates recursively. */
2990 if (TREE_CODE (type) == RECORD_TYPE
2991 || TREE_CODE (type) == UNION_TYPE
2992 || TREE_CODE (type) == QUAL_UNION_TYPE)
2993 {
2994 tree field;
2995
2996 if (TYPE_BINFO (type))
2997 {
2998 tree binfo, base_binfo;
2999 int i;
3000
3001 for (binfo = TYPE_BINFO (type), i = 0;
3002 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3003 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
3004 return true;
3005 }
3006 /* And now merge the fields of structure. */
3007 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3008 {
3009 if (TREE_CODE (field) == FIELD_DECL
3010 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3011 return true;
3012 }
3013 }
3014 /* Just for use if some languages passes arrays by value. */
3015 else if (TREE_CODE (type) == ARRAY_TYPE)
3016 {
3017 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3018 return true;
3019 }
3020 else
3021 abort ();
3022 }
3023 return false;
3024 }
3025
3026 /* Gives the alignment boundary, in bits, of an argument with the
3027 specified mode and type. */
3028
3029 int
3030 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3031 {
3032 int align;
3033 if (type)
3034 align = TYPE_ALIGN (type);
3035 else
3036 align = GET_MODE_ALIGNMENT (mode);
3037 if (align < PARM_BOUNDARY)
3038 align = PARM_BOUNDARY;
3039 if (!TARGET_64BIT)
3040 {
3041 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3042 make an exception for SSE modes since these require 128bit
3043 alignment.
3044
3045 The handling here differs from field_alignment. ICC aligns MMX
3046 arguments to 4 byte boundaries, while structure fields are aligned
3047 to 8 byte boundaries. */
3048 if (!TARGET_SSE)
3049 align = PARM_BOUNDARY;
3050 else if (!type)
3051 {
3052 if (!SSE_REG_MODE_P (mode))
3053 align = PARM_BOUNDARY;
3054 }
3055 else
3056 {
3057 if (!contains_128bit_aligned_vector_p (type))
3058 align = PARM_BOUNDARY;
3059 }
3060 }
3061 if (align > 128)
3062 align = 128;
3063 return align;
3064 }
3065
3066 /* Return true if N is a possible register number of function value. */
3067 bool
3068 ix86_function_value_regno_p (int regno)
3069 {
3070 if (!TARGET_64BIT)
3071 {
3072 return ((regno) == 0
3073 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3074 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3075 }
3076 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3077 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3078 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3079 }
3080
3081 /* Define how to find the value returned by a function.
3082 VALTYPE is the data type of the value (as a tree).
3083 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3084 otherwise, FUNC is 0. */
3085 rtx
3086 ix86_function_value (tree valtype, tree func)
3087 {
3088 enum machine_mode natmode = type_natural_mode (valtype);
3089
3090 if (TARGET_64BIT)
3091 {
3092 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3093 1, REGPARM_MAX, SSE_REGPARM_MAX,
3094 x86_64_int_return_registers, 0);
3095 /* For zero sized structures, construct_container return NULL, but we
3096 need to keep rest of compiler happy by returning meaningful value. */
3097 if (!ret)
3098 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3099 return ret;
3100 }
3101 else
3102 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
3103 }
3104
3105 /* Return false iff type is returned in memory. */
3106 int
3107 ix86_return_in_memory (tree type)
3108 {
3109 int needed_intregs, needed_sseregs, size;
3110 enum machine_mode mode = type_natural_mode (type);
3111
3112 if (TARGET_64BIT)
3113 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3114
3115 if (mode == BLKmode)
3116 return 1;
3117
3118 size = int_size_in_bytes (type);
3119
3120 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3121 return 0;
3122
3123 if (VECTOR_MODE_P (mode) || mode == TImode)
3124 {
3125 /* User-created vectors small enough to fit in EAX. */
3126 if (size < 8)
3127 return 0;
3128
3129 /* MMX/3dNow values are returned on the stack, since we've
3130 got to EMMS/FEMMS before returning. */
3131 if (size == 8)
3132 return 1;
3133
3134 /* SSE values are returned in XMM0, except when it doesn't exist. */
3135 if (size == 16)
3136 return (TARGET_SSE ? 0 : 1);
3137 }
3138
3139 if (mode == XFmode)
3140 return 0;
3141
3142 if (size > 12)
3143 return 1;
3144 return 0;
3145 }
3146
3147 /* When returning SSE vector types, we have a choice of either
3148 (1) being abi incompatible with a -march switch, or
3149 (2) generating an error.
3150 Given no good solution, I think the safest thing is one warning.
3151 The user won't be able to use -Werror, but....
3152
3153 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3154 called in response to actually generating a caller or callee that
3155 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3156 via aggregate_value_p for general type probing from tree-ssa. */
3157
3158 static rtx
3159 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3160 {
3161 static bool warned;
3162
3163 if (!TARGET_SSE && type && !warned)
3164 {
3165 /* Look at the return type of the function, not the function type. */
3166 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3167
3168 if (mode == TImode
3169 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3170 {
3171 warned = true;
3172 warning ("SSE vector return without SSE enabled changes the ABI");
3173 }
3174 }
3175
3176 return NULL;
3177 }
3178
3179 /* Define how to find the value returned by a library function
3180 assuming the value has mode MODE. */
3181 rtx
3182 ix86_libcall_value (enum machine_mode mode)
3183 {
3184 if (TARGET_64BIT)
3185 {
3186 switch (mode)
3187 {
3188 case SFmode:
3189 case SCmode:
3190 case DFmode:
3191 case DCmode:
3192 case TFmode:
3193 return gen_rtx_REG (mode, FIRST_SSE_REG);
3194 case XFmode:
3195 case XCmode:
3196 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3197 case TCmode:
3198 return NULL;
3199 default:
3200 return gen_rtx_REG (mode, 0);
3201 }
3202 }
3203 else
3204 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
3205 }
3206
3207 /* Given a mode, return the register to use for a return value. */
3208
3209 static int
3210 ix86_value_regno (enum machine_mode mode, tree func)
3211 {
3212 gcc_assert (!TARGET_64BIT);
3213
3214 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3215 we prevent this case when sse is not available. */
3216 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3217 return FIRST_SSE_REG;
3218
3219 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3220 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3221 return 0;
3222
3223 /* Floating point return values in %st(0), except for local functions when
3224 SSE math is enabled. */
3225 if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
3226 && flag_unit_at_a_time)
3227 {
3228 struct cgraph_local_info *i = cgraph_local_info (func);
3229 if (i && i->local)
3230 return FIRST_SSE_REG;
3231 }
3232
3233 return FIRST_FLOAT_REG;
3234 }
3235 \f
3236 /* Create the va_list data type. */
3237
3238 static tree
3239 ix86_build_builtin_va_list (void)
3240 {
3241 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3242
3243 /* For i386 we use plain pointer to argument area. */
3244 if (!TARGET_64BIT)
3245 return build_pointer_type (char_type_node);
3246
3247 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3248 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3249
3250 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3251 unsigned_type_node);
3252 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3253 unsigned_type_node);
3254 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3255 ptr_type_node);
3256 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3257 ptr_type_node);
3258
3259 va_list_gpr_counter_field = f_gpr;
3260 va_list_fpr_counter_field = f_fpr;
3261
3262 DECL_FIELD_CONTEXT (f_gpr) = record;
3263 DECL_FIELD_CONTEXT (f_fpr) = record;
3264 DECL_FIELD_CONTEXT (f_ovf) = record;
3265 DECL_FIELD_CONTEXT (f_sav) = record;
3266
3267 TREE_CHAIN (record) = type_decl;
3268 TYPE_NAME (record) = type_decl;
3269 TYPE_FIELDS (record) = f_gpr;
3270 TREE_CHAIN (f_gpr) = f_fpr;
3271 TREE_CHAIN (f_fpr) = f_ovf;
3272 TREE_CHAIN (f_ovf) = f_sav;
3273
3274 layout_type (record);
3275
3276 /* The correct type is an array type of one element. */
3277 return build_array_type (record, build_index_type (size_zero_node));
3278 }
3279
3280 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3281
3282 static void
3283 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3284 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3285 int no_rtl)
3286 {
3287 CUMULATIVE_ARGS next_cum;
3288 rtx save_area = NULL_RTX, mem;
3289 rtx label;
3290 rtx label_ref;
3291 rtx tmp_reg;
3292 rtx nsse_reg;
3293 int set;
3294 tree fntype;
3295 int stdarg_p;
3296 int i;
3297
3298 if (!TARGET_64BIT)
3299 return;
3300
3301 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3302 return;
3303
3304 /* Indicate to allocate space on the stack for varargs save area. */
3305 ix86_save_varrargs_registers = 1;
3306
3307 cfun->stack_alignment_needed = 128;
3308
3309 fntype = TREE_TYPE (current_function_decl);
3310 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3311 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3312 != void_type_node));
3313
3314 /* For varargs, we do not want to skip the dummy va_dcl argument.
3315 For stdargs, we do want to skip the last named argument. */
3316 next_cum = *cum;
3317 if (stdarg_p)
3318 function_arg_advance (&next_cum, mode, type, 1);
3319
3320 if (!no_rtl)
3321 save_area = frame_pointer_rtx;
3322
3323 set = get_varargs_alias_set ();
3324
3325 for (i = next_cum.regno;
3326 i < ix86_regparm
3327 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3328 i++)
3329 {
3330 mem = gen_rtx_MEM (Pmode,
3331 plus_constant (save_area, i * UNITS_PER_WORD));
3332 set_mem_alias_set (mem, set);
3333 emit_move_insn (mem, gen_rtx_REG (Pmode,
3334 x86_64_int_parameter_registers[i]));
3335 }
3336
3337 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3338 {
3339 /* Now emit code to save SSE registers. The AX parameter contains number
3340 of SSE parameter registers used to call this function. We use
3341 sse_prologue_save insn template that produces computed jump across
3342 SSE saves. We need some preparation work to get this working. */
3343
3344 label = gen_label_rtx ();
3345 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3346
3347 /* Compute address to jump to :
3348 label - 5*eax + nnamed_sse_arguments*5 */
3349 tmp_reg = gen_reg_rtx (Pmode);
3350 nsse_reg = gen_reg_rtx (Pmode);
3351 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3352 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3353 gen_rtx_MULT (Pmode, nsse_reg,
3354 GEN_INT (4))));
3355 if (next_cum.sse_regno)
3356 emit_move_insn
3357 (nsse_reg,
3358 gen_rtx_CONST (DImode,
3359 gen_rtx_PLUS (DImode,
3360 label_ref,
3361 GEN_INT (next_cum.sse_regno * 4))));
3362 else
3363 emit_move_insn (nsse_reg, label_ref);
3364 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3365
3366 /* Compute address of memory block we save into. We always use pointer
3367 pointing 127 bytes after first byte to store - this is needed to keep
3368 instruction size limited by 4 bytes. */
3369 tmp_reg = gen_reg_rtx (Pmode);
3370 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3371 plus_constant (save_area,
3372 8 * REGPARM_MAX + 127)));
3373 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3374 set_mem_alias_set (mem, set);
3375 set_mem_align (mem, BITS_PER_WORD);
3376
3377 /* And finally do the dirty job! */
3378 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3379 GEN_INT (next_cum.sse_regno), label));
3380 }
3381
3382 }
3383
3384 /* Implement va_start. */
3385
3386 void
3387 ix86_va_start (tree valist, rtx nextarg)
3388 {
3389 HOST_WIDE_INT words, n_gpr, n_fpr;
3390 tree f_gpr, f_fpr, f_ovf, f_sav;
3391 tree gpr, fpr, ovf, sav, t;
3392
3393 /* Only 64bit target needs something special. */
3394 if (!TARGET_64BIT)
3395 {
3396 std_expand_builtin_va_start (valist, nextarg);
3397 return;
3398 }
3399
3400 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3401 f_fpr = TREE_CHAIN (f_gpr);
3402 f_ovf = TREE_CHAIN (f_fpr);
3403 f_sav = TREE_CHAIN (f_ovf);
3404
3405 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3406 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3407 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3408 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3409 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3410
3411 /* Count number of gp and fp argument registers used. */
3412 words = current_function_args_info.words;
3413 n_gpr = current_function_args_info.regno;
3414 n_fpr = current_function_args_info.sse_regno;
3415
3416 if (TARGET_DEBUG_ARG)
3417 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3418 (int) words, (int) n_gpr, (int) n_fpr);
3419
3420 if (cfun->va_list_gpr_size)
3421 {
3422 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3423 build_int_cst (NULL_TREE, n_gpr * 8));
3424 TREE_SIDE_EFFECTS (t) = 1;
3425 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3426 }
3427
3428 if (cfun->va_list_fpr_size)
3429 {
3430 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3431 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3432 TREE_SIDE_EFFECTS (t) = 1;
3433 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3434 }
3435
3436 /* Find the overflow area. */
3437 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3438 if (words != 0)
3439 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3440 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3441 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3442 TREE_SIDE_EFFECTS (t) = 1;
3443 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3444
3445 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3446 {
3447 /* Find the register save area.
3448 Prologue of the function save it right above stack frame. */
3449 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3450 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3451 TREE_SIDE_EFFECTS (t) = 1;
3452 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3453 }
3454 }
3455
3456 /* Implement va_arg. */
3457
3458 tree
3459 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3460 {
3461 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3462 tree f_gpr, f_fpr, f_ovf, f_sav;
3463 tree gpr, fpr, ovf, sav, t;
3464 int size, rsize;
3465 tree lab_false, lab_over = NULL_TREE;
3466 tree addr, t2;
3467 rtx container;
3468 int indirect_p = 0;
3469 tree ptrtype;
3470 enum machine_mode nat_mode;
3471
3472 /* Only 64bit target needs something special. */
3473 if (!TARGET_64BIT)
3474 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3475
3476 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3477 f_fpr = TREE_CHAIN (f_gpr);
3478 f_ovf = TREE_CHAIN (f_fpr);
3479 f_sav = TREE_CHAIN (f_ovf);
3480
3481 valist = build_va_arg_indirect_ref (valist);
3482 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3483 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3484 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3485 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3486
3487 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3488 if (indirect_p)
3489 type = build_pointer_type (type);
3490 size = int_size_in_bytes (type);
3491 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3492
3493 nat_mode = type_natural_mode (type);
3494 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3495 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3496
3497 /* Pull the value out of the saved registers. */
3498
3499 addr = create_tmp_var (ptr_type_node, "addr");
3500 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3501
3502 if (container)
3503 {
3504 int needed_intregs, needed_sseregs;
3505 bool need_temp;
3506 tree int_addr, sse_addr;
3507
3508 lab_false = create_artificial_label ();
3509 lab_over = create_artificial_label ();
3510
3511 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3512
3513 need_temp = (!REG_P (container)
3514 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3515 || TYPE_ALIGN (type) > 128));
3516
3517 /* In case we are passing structure, verify that it is consecutive block
3518 on the register save area. If not we need to do moves. */
3519 if (!need_temp && !REG_P (container))
3520 {
3521 /* Verify that all registers are strictly consecutive */
3522 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3523 {
3524 int i;
3525
3526 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3527 {
3528 rtx slot = XVECEXP (container, 0, i);
3529 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3530 || INTVAL (XEXP (slot, 1)) != i * 16)
3531 need_temp = 1;
3532 }
3533 }
3534 else
3535 {
3536 int i;
3537
3538 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3539 {
3540 rtx slot = XVECEXP (container, 0, i);
3541 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3542 || INTVAL (XEXP (slot, 1)) != i * 8)
3543 need_temp = 1;
3544 }
3545 }
3546 }
3547 if (!need_temp)
3548 {
3549 int_addr = addr;
3550 sse_addr = addr;
3551 }
3552 else
3553 {
3554 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3555 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3556 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3557 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3558 }
3559
3560 /* First ensure that we fit completely in registers. */
3561 if (needed_intregs)
3562 {
3563 t = build_int_cst (TREE_TYPE (gpr),
3564 (REGPARM_MAX - needed_intregs + 1) * 8);
3565 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3566 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3567 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3568 gimplify_and_add (t, pre_p);
3569 }
3570 if (needed_sseregs)
3571 {
3572 t = build_int_cst (TREE_TYPE (fpr),
3573 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3574 + REGPARM_MAX * 8);
3575 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3576 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3577 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3578 gimplify_and_add (t, pre_p);
3579 }
3580
3581 /* Compute index to start of area used for integer regs. */
3582 if (needed_intregs)
3583 {
3584 /* int_addr = gpr + sav; */
3585 t = fold_convert (ptr_type_node, gpr);
3586 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3587 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3588 gimplify_and_add (t, pre_p);
3589 }
3590 if (needed_sseregs)
3591 {
3592 /* sse_addr = fpr + sav; */
3593 t = fold_convert (ptr_type_node, fpr);
3594 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3595 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3596 gimplify_and_add (t, pre_p);
3597 }
3598 if (need_temp)
3599 {
3600 int i;
3601 tree temp = create_tmp_var (type, "va_arg_tmp");
3602
3603 /* addr = &temp; */
3604 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3605 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3606 gimplify_and_add (t, pre_p);
3607
3608 for (i = 0; i < XVECLEN (container, 0); i++)
3609 {
3610 rtx slot = XVECEXP (container, 0, i);
3611 rtx reg = XEXP (slot, 0);
3612 enum machine_mode mode = GET_MODE (reg);
3613 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3614 tree addr_type = build_pointer_type (piece_type);
3615 tree src_addr, src;
3616 int src_offset;
3617 tree dest_addr, dest;
3618
3619 if (SSE_REGNO_P (REGNO (reg)))
3620 {
3621 src_addr = sse_addr;
3622 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3623 }
3624 else
3625 {
3626 src_addr = int_addr;
3627 src_offset = REGNO (reg) * 8;
3628 }
3629 src_addr = fold_convert (addr_type, src_addr);
3630 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3631 size_int (src_offset)));
3632 src = build_va_arg_indirect_ref (src_addr);
3633
3634 dest_addr = fold_convert (addr_type, addr);
3635 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3636 size_int (INTVAL (XEXP (slot, 1)))));
3637 dest = build_va_arg_indirect_ref (dest_addr);
3638
3639 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3640 gimplify_and_add (t, pre_p);
3641 }
3642 }
3643
3644 if (needed_intregs)
3645 {
3646 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3647 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3648 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3649 gimplify_and_add (t, pre_p);
3650 }
3651 if (needed_sseregs)
3652 {
3653 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3654 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3655 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3656 gimplify_and_add (t, pre_p);
3657 }
3658
3659 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3660 gimplify_and_add (t, pre_p);
3661
3662 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3663 append_to_statement_list (t, pre_p);
3664 }
3665
3666 /* ... otherwise out of the overflow area. */
3667
3668 /* Care for on-stack alignment if needed. */
3669 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3670 t = ovf;
3671 else
3672 {
3673 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3674 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3675 build_int_cst (TREE_TYPE (ovf), align - 1));
3676 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3677 build_int_cst (TREE_TYPE (t), -align));
3678 }
3679 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3680
3681 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3682 gimplify_and_add (t2, pre_p);
3683
3684 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3685 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3686 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3687 gimplify_and_add (t, pre_p);
3688
3689 if (container)
3690 {
3691 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3692 append_to_statement_list (t, pre_p);
3693 }
3694
3695 ptrtype = build_pointer_type (type);
3696 addr = fold_convert (ptrtype, addr);
3697
3698 if (indirect_p)
3699 addr = build_va_arg_indirect_ref (addr);
3700 return build_va_arg_indirect_ref (addr);
3701 }
3702 \f
3703 /* Return nonzero if OPNUM's MEM should be matched
3704 in movabs* patterns. */
3705
3706 int
3707 ix86_check_movabs (rtx insn, int opnum)
3708 {
3709 rtx set, mem;
3710
3711 set = PATTERN (insn);
3712 if (GET_CODE (set) == PARALLEL)
3713 set = XVECEXP (set, 0, 0);
3714 if (GET_CODE (set) != SET)
3715 abort ();
3716 mem = XEXP (set, opnum);
3717 while (GET_CODE (mem) == SUBREG)
3718 mem = SUBREG_REG (mem);
3719 if (GET_CODE (mem) != MEM)
3720 abort ();
3721 return (volatile_ok || !MEM_VOLATILE_P (mem));
3722 }
3723 \f
3724 /* Initialize the table of extra 80387 mathematical constants. */
3725
3726 static void
3727 init_ext_80387_constants (void)
3728 {
3729 static const char * cst[5] =
3730 {
3731 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3732 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3733 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3734 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3735 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3736 };
3737 int i;
3738
3739 for (i = 0; i < 5; i++)
3740 {
3741 real_from_string (&ext_80387_constants_table[i], cst[i]);
3742 /* Ensure each constant is rounded to XFmode precision. */
3743 real_convert (&ext_80387_constants_table[i],
3744 XFmode, &ext_80387_constants_table[i]);
3745 }
3746
3747 ext_80387_constants_init = 1;
3748 }
3749
3750 /* Return true if the constant is something that can be loaded with
3751 a special instruction. */
3752
3753 int
3754 standard_80387_constant_p (rtx x)
3755 {
3756 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3757 return -1;
3758
3759 if (x == CONST0_RTX (GET_MODE (x)))
3760 return 1;
3761 if (x == CONST1_RTX (GET_MODE (x)))
3762 return 2;
3763
3764 /* For XFmode constants, try to find a special 80387 instruction when
3765 optimizing for size or on those CPUs that benefit from them. */
3766 if (GET_MODE (x) == XFmode
3767 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3768 {
3769 REAL_VALUE_TYPE r;
3770 int i;
3771
3772 if (! ext_80387_constants_init)
3773 init_ext_80387_constants ();
3774
3775 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3776 for (i = 0; i < 5; i++)
3777 if (real_identical (&r, &ext_80387_constants_table[i]))
3778 return i + 3;
3779 }
3780
3781 return 0;
3782 }
3783
3784 /* Return the opcode of the special instruction to be used to load
3785 the constant X. */
3786
3787 const char *
3788 standard_80387_constant_opcode (rtx x)
3789 {
3790 switch (standard_80387_constant_p (x))
3791 {
3792 case 1:
3793 return "fldz";
3794 case 2:
3795 return "fld1";
3796 case 3:
3797 return "fldlg2";
3798 case 4:
3799 return "fldln2";
3800 case 5:
3801 return "fldl2e";
3802 case 6:
3803 return "fldl2t";
3804 case 7:
3805 return "fldpi";
3806 }
3807 abort ();
3808 }
3809
3810 /* Return the CONST_DOUBLE representing the 80387 constant that is
3811 loaded by the specified special instruction. The argument IDX
3812 matches the return value from standard_80387_constant_p. */
3813
3814 rtx
3815 standard_80387_constant_rtx (int idx)
3816 {
3817 int i;
3818
3819 if (! ext_80387_constants_init)
3820 init_ext_80387_constants ();
3821
3822 switch (idx)
3823 {
3824 case 3:
3825 case 4:
3826 case 5:
3827 case 6:
3828 case 7:
3829 i = idx - 3;
3830 break;
3831
3832 default:
3833 abort ();
3834 }
3835
3836 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3837 XFmode);
3838 }
3839
3840 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3841 */
3842 int
3843 standard_sse_constant_p (rtx x)
3844 {
3845 if (x == const0_rtx)
3846 return 1;
3847 return (x == CONST0_RTX (GET_MODE (x)));
3848 }
3849
3850 /* Returns 1 if OP contains a symbol reference */
3851
3852 int
3853 symbolic_reference_mentioned_p (rtx op)
3854 {
3855 const char *fmt;
3856 int i;
3857
3858 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3859 return 1;
3860
3861 fmt = GET_RTX_FORMAT (GET_CODE (op));
3862 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3863 {
3864 if (fmt[i] == 'E')
3865 {
3866 int j;
3867
3868 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3869 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3870 return 1;
3871 }
3872
3873 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3874 return 1;
3875 }
3876
3877 return 0;
3878 }
3879
3880 /* Return 1 if it is appropriate to emit `ret' instructions in the
3881 body of a function. Do this only if the epilogue is simple, needing a
3882 couple of insns. Prior to reloading, we can't tell how many registers
3883 must be saved, so return 0 then. Return 0 if there is no frame
3884 marker to de-allocate. */
3885
3886 int
3887 ix86_can_use_return_insn_p (void)
3888 {
3889 struct ix86_frame frame;
3890
3891 if (! reload_completed || frame_pointer_needed)
3892 return 0;
3893
3894 /* Don't allow more than 32 pop, since that's all we can do
3895 with one instruction. */
3896 if (current_function_pops_args
3897 && current_function_args_size >= 32768)
3898 return 0;
3899
3900 ix86_compute_frame_layout (&frame);
3901 return frame.to_allocate == 0 && frame.nregs == 0;
3902 }
3903 \f
3904 /* Value should be nonzero if functions must have frame pointers.
3905 Zero means the frame pointer need not be set up (and parms may
3906 be accessed via the stack pointer) in functions that seem suitable. */
3907
3908 int
3909 ix86_frame_pointer_required (void)
3910 {
3911 /* If we accessed previous frames, then the generated code expects
3912 to be able to access the saved ebp value in our frame. */
3913 if (cfun->machine->accesses_prev_frame)
3914 return 1;
3915
3916 /* Several x86 os'es need a frame pointer for other reasons,
3917 usually pertaining to setjmp. */
3918 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3919 return 1;
3920
3921 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3922 the frame pointer by default. Turn it back on now if we've not
3923 got a leaf function. */
3924 if (TARGET_OMIT_LEAF_FRAME_POINTER
3925 && (!current_function_is_leaf))
3926 return 1;
3927
3928 if (current_function_profile)
3929 return 1;
3930
3931 return 0;
3932 }
3933
3934 /* Record that the current function accesses previous call frames. */
3935
3936 void
3937 ix86_setup_frame_addresses (void)
3938 {
3939 cfun->machine->accesses_prev_frame = 1;
3940 }
3941 \f
3942 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3943 # define USE_HIDDEN_LINKONCE 1
3944 #else
3945 # define USE_HIDDEN_LINKONCE 0
3946 #endif
3947
3948 static int pic_labels_used;
3949
3950 /* Fills in the label name that should be used for a pc thunk for
3951 the given register. */
3952
3953 static void
3954 get_pc_thunk_name (char name[32], unsigned int regno)
3955 {
3956 if (USE_HIDDEN_LINKONCE)
3957 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3958 else
3959 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3960 }
3961
3962
3963 /* This function generates code for -fpic that loads %ebx with
3964 the return address of the caller and then returns. */
3965
3966 void
3967 ix86_file_end (void)
3968 {
3969 rtx xops[2];
3970 int regno;
3971
3972 for (regno = 0; regno < 8; ++regno)
3973 {
3974 char name[32];
3975
3976 if (! ((pic_labels_used >> regno) & 1))
3977 continue;
3978
3979 get_pc_thunk_name (name, regno);
3980
3981 if (USE_HIDDEN_LINKONCE)
3982 {
3983 tree decl;
3984
3985 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3986 error_mark_node);
3987 TREE_PUBLIC (decl) = 1;
3988 TREE_STATIC (decl) = 1;
3989 DECL_ONE_ONLY (decl) = 1;
3990
3991 (*targetm.asm_out.unique_section) (decl, 0);
3992 named_section (decl, NULL, 0);
3993
3994 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3995 fputs ("\t.hidden\t", asm_out_file);
3996 assemble_name (asm_out_file, name);
3997 fputc ('\n', asm_out_file);
3998 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3999 }
4000 else
4001 {
4002 text_section ();
4003 ASM_OUTPUT_LABEL (asm_out_file, name);
4004 }
4005
4006 xops[0] = gen_rtx_REG (SImode, regno);
4007 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4008 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4009 output_asm_insn ("ret", xops);
4010 }
4011
4012 if (NEED_INDICATE_EXEC_STACK)
4013 file_end_indicate_exec_stack ();
4014 }
4015
4016 /* Emit code for the SET_GOT patterns. */
4017
4018 const char *
4019 output_set_got (rtx dest)
4020 {
4021 rtx xops[3];
4022
4023 xops[0] = dest;
4024 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4025
4026 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4027 {
4028 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4029
4030 if (!flag_pic)
4031 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4032 else
4033 output_asm_insn ("call\t%a2", xops);
4034
4035 #if TARGET_MACHO
4036 /* Output the "canonical" label name ("Lxx$pb") here too. This
4037 is what will be referred to by the Mach-O PIC subsystem. */
4038 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4039 #endif
4040 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4041 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4042
4043 if (flag_pic)
4044 output_asm_insn ("pop{l}\t%0", xops);
4045 }
4046 else
4047 {
4048 char name[32];
4049 get_pc_thunk_name (name, REGNO (dest));
4050 pic_labels_used |= 1 << REGNO (dest);
4051
4052 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4053 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4054 output_asm_insn ("call\t%X2", xops);
4055 }
4056
4057 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4058 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4059 else if (!TARGET_MACHO)
4060 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4061
4062 return "";
4063 }
4064
4065 /* Generate an "push" pattern for input ARG. */
4066
4067 static rtx
4068 gen_push (rtx arg)
4069 {
4070 return gen_rtx_SET (VOIDmode,
4071 gen_rtx_MEM (Pmode,
4072 gen_rtx_PRE_DEC (Pmode,
4073 stack_pointer_rtx)),
4074 arg);
4075 }
4076
4077 /* Return >= 0 if there is an unused call-clobbered register available
4078 for the entire function. */
4079
4080 static unsigned int
4081 ix86_select_alt_pic_regnum (void)
4082 {
4083 if (current_function_is_leaf && !current_function_profile)
4084 {
4085 int i;
4086 for (i = 2; i >= 0; --i)
4087 if (!regs_ever_live[i])
4088 return i;
4089 }
4090
4091 return INVALID_REGNUM;
4092 }
4093
4094 /* Return 1 if we need to save REGNO. */
4095 static int
4096 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4097 {
4098 if (pic_offset_table_rtx
4099 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4100 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4101 || current_function_profile
4102 || current_function_calls_eh_return
4103 || current_function_uses_const_pool))
4104 {
4105 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4106 return 0;
4107 return 1;
4108 }
4109
4110 if (current_function_calls_eh_return && maybe_eh_return)
4111 {
4112 unsigned i;
4113 for (i = 0; ; i++)
4114 {
4115 unsigned test = EH_RETURN_DATA_REGNO (i);
4116 if (test == INVALID_REGNUM)
4117 break;
4118 if (test == regno)
4119 return 1;
4120 }
4121 }
4122
4123 return (regs_ever_live[regno]
4124 && !call_used_regs[regno]
4125 && !fixed_regs[regno]
4126 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4127 }
4128
4129 /* Return number of registers to be saved on the stack. */
4130
4131 static int
4132 ix86_nsaved_regs (void)
4133 {
4134 int nregs = 0;
4135 int regno;
4136
4137 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4138 if (ix86_save_reg (regno, true))
4139 nregs++;
4140 return nregs;
4141 }
4142
4143 /* Return the offset between two registers, one to be eliminated, and the other
4144 its replacement, at the start of a routine. */
4145
4146 HOST_WIDE_INT
4147 ix86_initial_elimination_offset (int from, int to)
4148 {
4149 struct ix86_frame frame;
4150 ix86_compute_frame_layout (&frame);
4151
4152 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4153 return frame.hard_frame_pointer_offset;
4154 else if (from == FRAME_POINTER_REGNUM
4155 && to == HARD_FRAME_POINTER_REGNUM)
4156 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4157 else
4158 {
4159 if (to != STACK_POINTER_REGNUM)
4160 abort ();
4161 else if (from == ARG_POINTER_REGNUM)
4162 return frame.stack_pointer_offset;
4163 else if (from != FRAME_POINTER_REGNUM)
4164 abort ();
4165 else
4166 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4167 }
4168 }
4169
4170 /* Fill structure ix86_frame about frame of currently computed function. */
4171
4172 static void
4173 ix86_compute_frame_layout (struct ix86_frame *frame)
4174 {
4175 HOST_WIDE_INT total_size;
4176 unsigned int stack_alignment_needed;
4177 HOST_WIDE_INT offset;
4178 unsigned int preferred_alignment;
4179 HOST_WIDE_INT size = get_frame_size ();
4180
4181 frame->nregs = ix86_nsaved_regs ();
4182 total_size = size;
4183
4184 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4185 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4186
4187 /* During reload iteration the amount of registers saved can change.
4188 Recompute the value as needed. Do not recompute when amount of registers
4189 didn't change as reload does multiple calls to the function and does not
4190 expect the decision to change within single iteration. */
4191 if (!optimize_size
4192 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4193 {
4194 int count = frame->nregs;
4195
4196 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4197 /* The fast prologue uses move instead of push to save registers. This
4198 is significantly longer, but also executes faster as modern hardware
4199 can execute the moves in parallel, but can't do that for push/pop.
4200
4201 Be careful about choosing what prologue to emit: When function takes
4202 many instructions to execute we may use slow version as well as in
4203 case function is known to be outside hot spot (this is known with
4204 feedback only). Weight the size of function by number of registers
4205 to save as it is cheap to use one or two push instructions but very
4206 slow to use many of them. */
4207 if (count)
4208 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4209 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4210 || (flag_branch_probabilities
4211 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4212 cfun->machine->use_fast_prologue_epilogue = false;
4213 else
4214 cfun->machine->use_fast_prologue_epilogue
4215 = !expensive_function_p (count);
4216 }
4217 if (TARGET_PROLOGUE_USING_MOVE
4218 && cfun->machine->use_fast_prologue_epilogue)
4219 frame->save_regs_using_mov = true;
4220 else
4221 frame->save_regs_using_mov = false;
4222
4223
4224 /* Skip return address and saved base pointer. */
4225 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4226
4227 frame->hard_frame_pointer_offset = offset;
4228
4229 /* Do some sanity checking of stack_alignment_needed and
4230 preferred_alignment, since i386 port is the only using those features
4231 that may break easily. */
4232
4233 if (size && !stack_alignment_needed)
4234 abort ();
4235 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4236 abort ();
4237 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4238 abort ();
4239 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4240 abort ();
4241
4242 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4243 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4244
4245 /* Register save area */
4246 offset += frame->nregs * UNITS_PER_WORD;
4247
4248 /* Va-arg area */
4249 if (ix86_save_varrargs_registers)
4250 {
4251 offset += X86_64_VARARGS_SIZE;
4252 frame->va_arg_size = X86_64_VARARGS_SIZE;
4253 }
4254 else
4255 frame->va_arg_size = 0;
4256
4257 /* Align start of frame for local function. */
4258 frame->padding1 = ((offset + stack_alignment_needed - 1)
4259 & -stack_alignment_needed) - offset;
4260
4261 offset += frame->padding1;
4262
4263 /* Frame pointer points here. */
4264 frame->frame_pointer_offset = offset;
4265
4266 offset += size;
4267
4268 /* Add outgoing arguments area. Can be skipped if we eliminated
4269 all the function calls as dead code.
4270 Skipping is however impossible when function calls alloca. Alloca
4271 expander assumes that last current_function_outgoing_args_size
4272 of stack frame are unused. */
4273 if (ACCUMULATE_OUTGOING_ARGS
4274 && (!current_function_is_leaf || current_function_calls_alloca))
4275 {
4276 offset += current_function_outgoing_args_size;
4277 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4278 }
4279 else
4280 frame->outgoing_arguments_size = 0;
4281
4282 /* Align stack boundary. Only needed if we're calling another function
4283 or using alloca. */
4284 if (!current_function_is_leaf || current_function_calls_alloca)
4285 frame->padding2 = ((offset + preferred_alignment - 1)
4286 & -preferred_alignment) - offset;
4287 else
4288 frame->padding2 = 0;
4289
4290 offset += frame->padding2;
4291
4292 /* We've reached end of stack frame. */
4293 frame->stack_pointer_offset = offset;
4294
4295 /* Size prologue needs to allocate. */
4296 frame->to_allocate =
4297 (size + frame->padding1 + frame->padding2
4298 + frame->outgoing_arguments_size + frame->va_arg_size);
4299
4300 if ((!frame->to_allocate && frame->nregs <= 1)
4301 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4302 frame->save_regs_using_mov = false;
4303
4304 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4305 && current_function_is_leaf)
4306 {
4307 frame->red_zone_size = frame->to_allocate;
4308 if (frame->save_regs_using_mov)
4309 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4310 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4311 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4312 }
4313 else
4314 frame->red_zone_size = 0;
4315 frame->to_allocate -= frame->red_zone_size;
4316 frame->stack_pointer_offset -= frame->red_zone_size;
4317 #if 0
4318 fprintf (stderr, "nregs: %i\n", frame->nregs);
4319 fprintf (stderr, "size: %i\n", size);
4320 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4321 fprintf (stderr, "padding1: %i\n", frame->padding1);
4322 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4323 fprintf (stderr, "padding2: %i\n", frame->padding2);
4324 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4325 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4326 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4327 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4328 frame->hard_frame_pointer_offset);
4329 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4330 #endif
4331 }
4332
4333 /* Emit code to save registers in the prologue. */
4334
4335 static void
4336 ix86_emit_save_regs (void)
4337 {
4338 int regno;
4339 rtx insn;
4340
4341 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4342 if (ix86_save_reg (regno, true))
4343 {
4344 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4345 RTX_FRAME_RELATED_P (insn) = 1;
4346 }
4347 }
4348
4349 /* Emit code to save registers using MOV insns. First register
4350 is restored from POINTER + OFFSET. */
4351 static void
4352 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4353 {
4354 int regno;
4355 rtx insn;
4356
4357 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4358 if (ix86_save_reg (regno, true))
4359 {
4360 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4361 Pmode, offset),
4362 gen_rtx_REG (Pmode, regno));
4363 RTX_FRAME_RELATED_P (insn) = 1;
4364 offset += UNITS_PER_WORD;
4365 }
4366 }
4367
4368 /* Expand prologue or epilogue stack adjustment.
4369 The pattern exist to put a dependency on all ebp-based memory accesses.
4370 STYLE should be negative if instructions should be marked as frame related,
4371 zero if %r11 register is live and cannot be freely used and positive
4372 otherwise. */
4373
4374 static void
4375 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4376 {
4377 rtx insn;
4378
4379 if (! TARGET_64BIT)
4380 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4381 else if (x86_64_immediate_operand (offset, DImode))
4382 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4383 else
4384 {
4385 rtx r11;
4386 /* r11 is used by indirect sibcall return as well, set before the
4387 epilogue and used after the epilogue. ATM indirect sibcall
4388 shouldn't be used together with huge frame sizes in one
4389 function because of the frame_size check in sibcall.c. */
4390 if (style == 0)
4391 abort ();
4392 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4393 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4394 if (style < 0)
4395 RTX_FRAME_RELATED_P (insn) = 1;
4396 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4397 offset));
4398 }
4399 if (style < 0)
4400 RTX_FRAME_RELATED_P (insn) = 1;
4401 }
4402
4403 /* Expand the prologue into a bunch of separate insns. */
4404
4405 void
4406 ix86_expand_prologue (void)
4407 {
4408 rtx insn;
4409 bool pic_reg_used;
4410 struct ix86_frame frame;
4411 HOST_WIDE_INT allocate;
4412
4413 ix86_compute_frame_layout (&frame);
4414
4415 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4416 slower on all targets. Also sdb doesn't like it. */
4417
4418 if (frame_pointer_needed)
4419 {
4420 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4421 RTX_FRAME_RELATED_P (insn) = 1;
4422
4423 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4424 RTX_FRAME_RELATED_P (insn) = 1;
4425 }
4426
4427 allocate = frame.to_allocate;
4428
4429 if (!frame.save_regs_using_mov)
4430 ix86_emit_save_regs ();
4431 else
4432 allocate += frame.nregs * UNITS_PER_WORD;
4433
4434 /* When using red zone we may start register saving before allocating
4435 the stack frame saving one cycle of the prologue. */
4436 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4437 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4438 : stack_pointer_rtx,
4439 -frame.nregs * UNITS_PER_WORD);
4440
4441 if (allocate == 0)
4442 ;
4443 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4444 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4445 GEN_INT (-allocate), -1);
4446 else
4447 {
4448 /* Only valid for Win32. */
4449 rtx eax = gen_rtx_REG (SImode, 0);
4450 bool eax_live = ix86_eax_live_at_start_p ();
4451 rtx t;
4452
4453 if (TARGET_64BIT)
4454 abort ();
4455
4456 if (eax_live)
4457 {
4458 emit_insn (gen_push (eax));
4459 allocate -= 4;
4460 }
4461
4462 emit_move_insn (eax, GEN_INT (allocate));
4463
4464 insn = emit_insn (gen_allocate_stack_worker (eax));
4465 RTX_FRAME_RELATED_P (insn) = 1;
4466 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4467 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4468 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4469 t, REG_NOTES (insn));
4470
4471 if (eax_live)
4472 {
4473 if (frame_pointer_needed)
4474 t = plus_constant (hard_frame_pointer_rtx,
4475 allocate
4476 - frame.to_allocate
4477 - frame.nregs * UNITS_PER_WORD);
4478 else
4479 t = plus_constant (stack_pointer_rtx, allocate);
4480 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4481 }
4482 }
4483
4484 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4485 {
4486 if (!frame_pointer_needed || !frame.to_allocate)
4487 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4488 else
4489 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4490 -frame.nregs * UNITS_PER_WORD);
4491 }
4492
4493 pic_reg_used = false;
4494 if (pic_offset_table_rtx
4495 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4496 || current_function_profile))
4497 {
4498 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4499
4500 if (alt_pic_reg_used != INVALID_REGNUM)
4501 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4502
4503 pic_reg_used = true;
4504 }
4505
4506 if (pic_reg_used)
4507 {
4508 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4509
4510 /* Even with accurate pre-reload life analysis, we can wind up
4511 deleting all references to the pic register after reload.
4512 Consider if cross-jumping unifies two sides of a branch
4513 controlled by a comparison vs the only read from a global.
4514 In which case, allow the set_got to be deleted, though we're
4515 too late to do anything about the ebx save in the prologue. */
4516 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4517 }
4518
4519 /* Prevent function calls from be scheduled before the call to mcount.
4520 In the pic_reg_used case, make sure that the got load isn't deleted. */
4521 if (current_function_profile)
4522 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4523 }
4524
4525 /* Emit code to restore saved registers using MOV insns. First register
4526 is restored from POINTER + OFFSET. */
4527 static void
4528 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4529 int maybe_eh_return)
4530 {
4531 int regno;
4532 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4533
4534 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4535 if (ix86_save_reg (regno, maybe_eh_return))
4536 {
4537 /* Ensure that adjust_address won't be forced to produce pointer
4538 out of range allowed by x86-64 instruction set. */
4539 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4540 {
4541 rtx r11;
4542
4543 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4544 emit_move_insn (r11, GEN_INT (offset));
4545 emit_insn (gen_adddi3 (r11, r11, pointer));
4546 base_address = gen_rtx_MEM (Pmode, r11);
4547 offset = 0;
4548 }
4549 emit_move_insn (gen_rtx_REG (Pmode, regno),
4550 adjust_address (base_address, Pmode, offset));
4551 offset += UNITS_PER_WORD;
4552 }
4553 }
4554
4555 /* Restore function stack, frame, and registers. */
4556
4557 void
4558 ix86_expand_epilogue (int style)
4559 {
4560 int regno;
4561 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4562 struct ix86_frame frame;
4563 HOST_WIDE_INT offset;
4564
4565 ix86_compute_frame_layout (&frame);
4566
4567 /* Calculate start of saved registers relative to ebp. Special care
4568 must be taken for the normal return case of a function using
4569 eh_return: the eax and edx registers are marked as saved, but not
4570 restored along this path. */
4571 offset = frame.nregs;
4572 if (current_function_calls_eh_return && style != 2)
4573 offset -= 2;
4574 offset *= -UNITS_PER_WORD;
4575
4576 /* If we're only restoring one register and sp is not valid then
4577 using a move instruction to restore the register since it's
4578 less work than reloading sp and popping the register.
4579
4580 The default code result in stack adjustment using add/lea instruction,
4581 while this code results in LEAVE instruction (or discrete equivalent),
4582 so it is profitable in some other cases as well. Especially when there
4583 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4584 and there is exactly one register to pop. This heuristic may need some
4585 tuning in future. */
4586 if ((!sp_valid && frame.nregs <= 1)
4587 || (TARGET_EPILOGUE_USING_MOVE
4588 && cfun->machine->use_fast_prologue_epilogue
4589 && (frame.nregs > 1 || frame.to_allocate))
4590 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4591 || (frame_pointer_needed && TARGET_USE_LEAVE
4592 && cfun->machine->use_fast_prologue_epilogue
4593 && frame.nregs == 1)
4594 || current_function_calls_eh_return)
4595 {
4596 /* Restore registers. We can use ebp or esp to address the memory
4597 locations. If both are available, default to ebp, since offsets
4598 are known to be small. Only exception is esp pointing directly to the
4599 end of block of saved registers, where we may simplify addressing
4600 mode. */
4601
4602 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4603 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4604 frame.to_allocate, style == 2);
4605 else
4606 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4607 offset, style == 2);
4608
4609 /* eh_return epilogues need %ecx added to the stack pointer. */
4610 if (style == 2)
4611 {
4612 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4613
4614 if (frame_pointer_needed)
4615 {
4616 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4617 tmp = plus_constant (tmp, UNITS_PER_WORD);
4618 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4619
4620 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4621 emit_move_insn (hard_frame_pointer_rtx, tmp);
4622
4623 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4624 const0_rtx, style);
4625 }
4626 else
4627 {
4628 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4629 tmp = plus_constant (tmp, (frame.to_allocate
4630 + frame.nregs * UNITS_PER_WORD));
4631 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4632 }
4633 }
4634 else if (!frame_pointer_needed)
4635 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4636 GEN_INT (frame.to_allocate
4637 + frame.nregs * UNITS_PER_WORD),
4638 style);
4639 /* If not an i386, mov & pop is faster than "leave". */
4640 else if (TARGET_USE_LEAVE || optimize_size
4641 || !cfun->machine->use_fast_prologue_epilogue)
4642 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4643 else
4644 {
4645 pro_epilogue_adjust_stack (stack_pointer_rtx,
4646 hard_frame_pointer_rtx,
4647 const0_rtx, style);
4648 if (TARGET_64BIT)
4649 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4650 else
4651 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4652 }
4653 }
4654 else
4655 {
4656 /* First step is to deallocate the stack frame so that we can
4657 pop the registers. */
4658 if (!sp_valid)
4659 {
4660 if (!frame_pointer_needed)
4661 abort ();
4662 pro_epilogue_adjust_stack (stack_pointer_rtx,
4663 hard_frame_pointer_rtx,
4664 GEN_INT (offset), style);
4665 }
4666 else if (frame.to_allocate)
4667 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4668 GEN_INT (frame.to_allocate), style);
4669
4670 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4671 if (ix86_save_reg (regno, false))
4672 {
4673 if (TARGET_64BIT)
4674 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4675 else
4676 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4677 }
4678 if (frame_pointer_needed)
4679 {
4680 /* Leave results in shorter dependency chains on CPUs that are
4681 able to grok it fast. */
4682 if (TARGET_USE_LEAVE)
4683 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4684 else if (TARGET_64BIT)
4685 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4686 else
4687 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4688 }
4689 }
4690
4691 /* Sibcall epilogues don't want a return instruction. */
4692 if (style == 0)
4693 return;
4694
4695 if (current_function_pops_args && current_function_args_size)
4696 {
4697 rtx popc = GEN_INT (current_function_pops_args);
4698
4699 /* i386 can only pop 64K bytes. If asked to pop more, pop
4700 return address, do explicit add, and jump indirectly to the
4701 caller. */
4702
4703 if (current_function_pops_args >= 65536)
4704 {
4705 rtx ecx = gen_rtx_REG (SImode, 2);
4706
4707 /* There is no "pascal" calling convention in 64bit ABI. */
4708 if (TARGET_64BIT)
4709 abort ();
4710
4711 emit_insn (gen_popsi1 (ecx));
4712 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4713 emit_jump_insn (gen_return_indirect_internal (ecx));
4714 }
4715 else
4716 emit_jump_insn (gen_return_pop_internal (popc));
4717 }
4718 else
4719 emit_jump_insn (gen_return_internal ());
4720 }
4721
4722 /* Reset from the function's potential modifications. */
4723
4724 static void
4725 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4726 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4727 {
4728 if (pic_offset_table_rtx)
4729 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4730 }
4731 \f
4732 /* Extract the parts of an RTL expression that is a valid memory address
4733 for an instruction. Return 0 if the structure of the address is
4734 grossly off. Return -1 if the address contains ASHIFT, so it is not
4735 strictly valid, but still used for computing length of lea instruction. */
4736
4737 int
4738 ix86_decompose_address (rtx addr, struct ix86_address *out)
4739 {
4740 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4741 rtx base_reg, index_reg;
4742 HOST_WIDE_INT scale = 1;
4743 rtx scale_rtx = NULL_RTX;
4744 int retval = 1;
4745 enum ix86_address_seg seg = SEG_DEFAULT;
4746
4747 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4748 base = addr;
4749 else if (GET_CODE (addr) == PLUS)
4750 {
4751 rtx addends[4], op;
4752 int n = 0, i;
4753
4754 op = addr;
4755 do
4756 {
4757 if (n >= 4)
4758 return 0;
4759 addends[n++] = XEXP (op, 1);
4760 op = XEXP (op, 0);
4761 }
4762 while (GET_CODE (op) == PLUS);
4763 if (n >= 4)
4764 return 0;
4765 addends[n] = op;
4766
4767 for (i = n; i >= 0; --i)
4768 {
4769 op = addends[i];
4770 switch (GET_CODE (op))
4771 {
4772 case MULT:
4773 if (index)
4774 return 0;
4775 index = XEXP (op, 0);
4776 scale_rtx = XEXP (op, 1);
4777 break;
4778
4779 case UNSPEC:
4780 if (XINT (op, 1) == UNSPEC_TP
4781 && TARGET_TLS_DIRECT_SEG_REFS
4782 && seg == SEG_DEFAULT)
4783 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4784 else
4785 return 0;
4786 break;
4787
4788 case REG:
4789 case SUBREG:
4790 if (!base)
4791 base = op;
4792 else if (!index)
4793 index = op;
4794 else
4795 return 0;
4796 break;
4797
4798 case CONST:
4799 case CONST_INT:
4800 case SYMBOL_REF:
4801 case LABEL_REF:
4802 if (disp)
4803 return 0;
4804 disp = op;
4805 break;
4806
4807 default:
4808 return 0;
4809 }
4810 }
4811 }
4812 else if (GET_CODE (addr) == MULT)
4813 {
4814 index = XEXP (addr, 0); /* index*scale */
4815 scale_rtx = XEXP (addr, 1);
4816 }
4817 else if (GET_CODE (addr) == ASHIFT)
4818 {
4819 rtx tmp;
4820
4821 /* We're called for lea too, which implements ashift on occasion. */
4822 index = XEXP (addr, 0);
4823 tmp = XEXP (addr, 1);
4824 if (GET_CODE (tmp) != CONST_INT)
4825 return 0;
4826 scale = INTVAL (tmp);
4827 if ((unsigned HOST_WIDE_INT) scale > 3)
4828 return 0;
4829 scale = 1 << scale;
4830 retval = -1;
4831 }
4832 else
4833 disp = addr; /* displacement */
4834
4835 /* Extract the integral value of scale. */
4836 if (scale_rtx)
4837 {
4838 if (GET_CODE (scale_rtx) != CONST_INT)
4839 return 0;
4840 scale = INTVAL (scale_rtx);
4841 }
4842
4843 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4844 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4845
4846 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4847 if (base_reg && index_reg && scale == 1
4848 && (index_reg == arg_pointer_rtx
4849 || index_reg == frame_pointer_rtx
4850 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4851 {
4852 rtx tmp;
4853 tmp = base, base = index, index = tmp;
4854 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4855 }
4856
4857 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4858 if ((base_reg == hard_frame_pointer_rtx
4859 || base_reg == frame_pointer_rtx
4860 || base_reg == arg_pointer_rtx) && !disp)
4861 disp = const0_rtx;
4862
4863 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4864 Avoid this by transforming to [%esi+0]. */
4865 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4866 && base_reg && !index_reg && !disp
4867 && REG_P (base_reg)
4868 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
4869 disp = const0_rtx;
4870
4871 /* Special case: encode reg+reg instead of reg*2. */
4872 if (!base && index && scale && scale == 2)
4873 base = index, base_reg = index_reg, scale = 1;
4874
4875 /* Special case: scaling cannot be encoded without base or displacement. */
4876 if (!base && !disp && index && scale != 1)
4877 disp = const0_rtx;
4878
4879 out->base = base;
4880 out->index = index;
4881 out->disp = disp;
4882 out->scale = scale;
4883 out->seg = seg;
4884
4885 return retval;
4886 }
4887 \f
4888 /* Return cost of the memory address x.
4889 For i386, it is better to use a complex address than let gcc copy
4890 the address into a reg and make a new pseudo. But not if the address
4891 requires to two regs - that would mean more pseudos with longer
4892 lifetimes. */
4893 static int
4894 ix86_address_cost (rtx x)
4895 {
4896 struct ix86_address parts;
4897 int cost = 1;
4898
4899 if (!ix86_decompose_address (x, &parts))
4900 abort ();
4901
4902 if (parts.base && GET_CODE (parts.base) == SUBREG)
4903 parts.base = SUBREG_REG (parts.base);
4904 if (parts.index && GET_CODE (parts.index) == SUBREG)
4905 parts.index = SUBREG_REG (parts.index);
4906
4907 /* More complex memory references are better. */
4908 if (parts.disp && parts.disp != const0_rtx)
4909 cost--;
4910 if (parts.seg != SEG_DEFAULT)
4911 cost--;
4912
4913 /* Attempt to minimize number of registers in the address. */
4914 if ((parts.base
4915 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4916 || (parts.index
4917 && (!REG_P (parts.index)
4918 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4919 cost++;
4920
4921 if (parts.base
4922 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4923 && parts.index
4924 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4925 && parts.base != parts.index)
4926 cost++;
4927
4928 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4929 since it's predecode logic can't detect the length of instructions
4930 and it degenerates to vector decoded. Increase cost of such
4931 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4932 to split such addresses or even refuse such addresses at all.
4933
4934 Following addressing modes are affected:
4935 [base+scale*index]
4936 [scale*index+disp]
4937 [base+index]
4938
4939 The first and last case may be avoidable by explicitly coding the zero in
4940 memory address, but I don't have AMD-K6 machine handy to check this
4941 theory. */
4942
4943 if (TARGET_K6
4944 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4945 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4946 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4947 cost += 10;
4948
4949 return cost;
4950 }
4951 \f
4952 /* If X is a machine specific address (i.e. a symbol or label being
4953 referenced as a displacement from the GOT implemented using an
4954 UNSPEC), then return the base term. Otherwise return X. */
4955
4956 rtx
4957 ix86_find_base_term (rtx x)
4958 {
4959 rtx term;
4960
4961 if (TARGET_64BIT)
4962 {
4963 if (GET_CODE (x) != CONST)
4964 return x;
4965 term = XEXP (x, 0);
4966 if (GET_CODE (term) == PLUS
4967 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4968 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4969 term = XEXP (term, 0);
4970 if (GET_CODE (term) != UNSPEC
4971 || XINT (term, 1) != UNSPEC_GOTPCREL)
4972 return x;
4973
4974 term = XVECEXP (term, 0, 0);
4975
4976 if (GET_CODE (term) != SYMBOL_REF
4977 && GET_CODE (term) != LABEL_REF)
4978 return x;
4979
4980 return term;
4981 }
4982
4983 term = ix86_delegitimize_address (x);
4984
4985 if (GET_CODE (term) != SYMBOL_REF
4986 && GET_CODE (term) != LABEL_REF)
4987 return x;
4988
4989 return term;
4990 }
4991
4992 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4993 this is used for to form addresses to local data when -fPIC is in
4994 use. */
4995
4996 static bool
4997 darwin_local_data_pic (rtx disp)
4998 {
4999 if (GET_CODE (disp) == MINUS)
5000 {
5001 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5002 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5003 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5004 {
5005 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5006 if (! strcmp (sym_name, "<pic base>"))
5007 return true;
5008 }
5009 }
5010
5011 return false;
5012 }
5013 \f
5014 /* Determine if a given RTX is a valid constant. We already know this
5015 satisfies CONSTANT_P. */
5016
5017 bool
5018 legitimate_constant_p (rtx x)
5019 {
5020 switch (GET_CODE (x))
5021 {
5022 case CONST:
5023 x = XEXP (x, 0);
5024
5025 if (GET_CODE (x) == PLUS)
5026 {
5027 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5028 return false;
5029 x = XEXP (x, 0);
5030 }
5031
5032 if (TARGET_MACHO && darwin_local_data_pic (x))
5033 return true;
5034
5035 /* Only some unspecs are valid as "constants". */
5036 if (GET_CODE (x) == UNSPEC)
5037 switch (XINT (x, 1))
5038 {
5039 case UNSPEC_TPOFF:
5040 case UNSPEC_NTPOFF:
5041 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5042 case UNSPEC_DTPOFF:
5043 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5044 default:
5045 return false;
5046 }
5047
5048 /* We must have drilled down to a symbol. */
5049 if (!symbolic_operand (x, Pmode))
5050 return false;
5051 /* FALLTHRU */
5052
5053 case SYMBOL_REF:
5054 /* TLS symbols are never valid. */
5055 if (tls_symbolic_operand (x, Pmode))
5056 return false;
5057 break;
5058
5059 default:
5060 break;
5061 }
5062
5063 /* Otherwise we handle everything else in the move patterns. */
5064 return true;
5065 }
5066
5067 /* Determine if it's legal to put X into the constant pool. This
5068 is not possible for the address of thread-local symbols, which
5069 is checked above. */
5070
5071 static bool
5072 ix86_cannot_force_const_mem (rtx x)
5073 {
5074 return !legitimate_constant_p (x);
5075 }
5076
5077 /* Determine if a given RTX is a valid constant address. */
5078
5079 bool
5080 constant_address_p (rtx x)
5081 {
5082 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5083 }
5084
5085 /* Nonzero if the constant value X is a legitimate general operand
5086 when generating PIC code. It is given that flag_pic is on and
5087 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5088
5089 bool
5090 legitimate_pic_operand_p (rtx x)
5091 {
5092 rtx inner;
5093
5094 switch (GET_CODE (x))
5095 {
5096 case CONST:
5097 inner = XEXP (x, 0);
5098
5099 /* Only some unspecs are valid as "constants". */
5100 if (GET_CODE (inner) == UNSPEC)
5101 switch (XINT (inner, 1))
5102 {
5103 case UNSPEC_TPOFF:
5104 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5105 default:
5106 return false;
5107 }
5108 /* FALLTHRU */
5109
5110 case SYMBOL_REF:
5111 case LABEL_REF:
5112 return legitimate_pic_address_disp_p (x);
5113
5114 default:
5115 return true;
5116 }
5117 }
5118
5119 /* Determine if a given CONST RTX is a valid memory displacement
5120 in PIC mode. */
5121
5122 int
5123 legitimate_pic_address_disp_p (rtx disp)
5124 {
5125 bool saw_plus;
5126
5127 /* In 64bit mode we can allow direct addresses of symbols and labels
5128 when they are not dynamic symbols. */
5129 if (TARGET_64BIT)
5130 {
5131 /* TLS references should always be enclosed in UNSPEC. */
5132 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5133 return 0;
5134 if (GET_CODE (disp) == SYMBOL_REF
5135 && ix86_cmodel == CM_SMALL_PIC
5136 && SYMBOL_REF_LOCAL_P (disp))
5137 return 1;
5138 if (GET_CODE (disp) == LABEL_REF)
5139 return 1;
5140 if (GET_CODE (disp) == CONST
5141 && GET_CODE (XEXP (disp, 0)) == PLUS)
5142 {
5143 rtx op0 = XEXP (XEXP (disp, 0), 0);
5144 rtx op1 = XEXP (XEXP (disp, 0), 1);
5145
5146 /* TLS references should always be enclosed in UNSPEC. */
5147 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5148 return 0;
5149 if (((GET_CODE (op0) == SYMBOL_REF
5150 && ix86_cmodel == CM_SMALL_PIC
5151 && SYMBOL_REF_LOCAL_P (op0))
5152 || GET_CODE (op0) == LABEL_REF)
5153 && GET_CODE (op1) == CONST_INT
5154 && INTVAL (op1) < 16*1024*1024
5155 && INTVAL (op1) >= -16*1024*1024)
5156 return 1;
5157 }
5158 }
5159 if (GET_CODE (disp) != CONST)
5160 return 0;
5161 disp = XEXP (disp, 0);
5162
5163 if (TARGET_64BIT)
5164 {
5165 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5166 of GOT tables. We should not need these anyway. */
5167 if (GET_CODE (disp) != UNSPEC
5168 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5169 return 0;
5170
5171 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5172 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5173 return 0;
5174 return 1;
5175 }
5176
5177 saw_plus = false;
5178 if (GET_CODE (disp) == PLUS)
5179 {
5180 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5181 return 0;
5182 disp = XEXP (disp, 0);
5183 saw_plus = true;
5184 }
5185
5186 if (TARGET_MACHO && darwin_local_data_pic (disp))
5187 return 1;
5188
5189 if (GET_CODE (disp) != UNSPEC)
5190 return 0;
5191
5192 switch (XINT (disp, 1))
5193 {
5194 case UNSPEC_GOT:
5195 if (saw_plus)
5196 return false;
5197 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5198 case UNSPEC_GOTOFF:
5199 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5200 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5201 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5202 return false;
5203 case UNSPEC_GOTTPOFF:
5204 case UNSPEC_GOTNTPOFF:
5205 case UNSPEC_INDNTPOFF:
5206 if (saw_plus)
5207 return false;
5208 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5209 case UNSPEC_NTPOFF:
5210 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5211 case UNSPEC_DTPOFF:
5212 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5213 }
5214
5215 return 0;
5216 }
5217
5218 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5219 memory address for an instruction. The MODE argument is the machine mode
5220 for the MEM expression that wants to use this address.
5221
5222 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5223 convert common non-canonical forms to canonical form so that they will
5224 be recognized. */
5225
5226 int
5227 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5228 {
5229 struct ix86_address parts;
5230 rtx base, index, disp;
5231 HOST_WIDE_INT scale;
5232 const char *reason = NULL;
5233 rtx reason_rtx = NULL_RTX;
5234
5235 if (TARGET_DEBUG_ADDR)
5236 {
5237 fprintf (stderr,
5238 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5239 GET_MODE_NAME (mode), strict);
5240 debug_rtx (addr);
5241 }
5242
5243 if (ix86_decompose_address (addr, &parts) <= 0)
5244 {
5245 reason = "decomposition failed";
5246 goto report_error;
5247 }
5248
5249 base = parts.base;
5250 index = parts.index;
5251 disp = parts.disp;
5252 scale = parts.scale;
5253
5254 /* Validate base register.
5255
5256 Don't allow SUBREG's that span more than a word here. It can lead to spill
5257 failures when the base is one word out of a two word structure, which is
5258 represented internally as a DImode int. */
5259
5260 if (base)
5261 {
5262 rtx reg;
5263 reason_rtx = base;
5264
5265 if (REG_P (base))
5266 reg = base;
5267 else if (GET_CODE (base) == SUBREG
5268 && REG_P (SUBREG_REG (base))
5269 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5270 <= UNITS_PER_WORD)
5271 reg = SUBREG_REG (base);
5272 else
5273 {
5274 reason = "base is not a register";
5275 goto report_error;
5276 }
5277
5278 if (GET_MODE (base) != Pmode)
5279 {
5280 reason = "base is not in Pmode";
5281 goto report_error;
5282 }
5283
5284 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5285 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5286 {
5287 reason = "base is not valid";
5288 goto report_error;
5289 }
5290 }
5291
5292 /* Validate index register.
5293
5294 Don't allow SUBREG's that span more than a word here -- same as above. */
5295
5296 if (index)
5297 {
5298 rtx reg;
5299 reason_rtx = index;
5300
5301 if (REG_P (index))
5302 reg = index;
5303 else if (GET_CODE (index) == SUBREG
5304 && REG_P (SUBREG_REG (index))
5305 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5306 <= UNITS_PER_WORD)
5307 reg = SUBREG_REG (index);
5308 else
5309 {
5310 reason = "index is not a register";
5311 goto report_error;
5312 }
5313
5314 if (GET_MODE (index) != Pmode)
5315 {
5316 reason = "index is not in Pmode";
5317 goto report_error;
5318 }
5319
5320 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5321 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5322 {
5323 reason = "index is not valid";
5324 goto report_error;
5325 }
5326 }
5327
5328 /* Validate scale factor. */
5329 if (scale != 1)
5330 {
5331 reason_rtx = GEN_INT (scale);
5332 if (!index)
5333 {
5334 reason = "scale without index";
5335 goto report_error;
5336 }
5337
5338 if (scale != 2 && scale != 4 && scale != 8)
5339 {
5340 reason = "scale is not a valid multiplier";
5341 goto report_error;
5342 }
5343 }
5344
5345 /* Validate displacement. */
5346 if (disp)
5347 {
5348 reason_rtx = disp;
5349
5350 if (GET_CODE (disp) == CONST
5351 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5352 switch (XINT (XEXP (disp, 0), 1))
5353 {
5354 case UNSPEC_GOT:
5355 case UNSPEC_GOTOFF:
5356 case UNSPEC_GOTPCREL:
5357 if (!flag_pic)
5358 abort ();
5359 goto is_legitimate_pic;
5360
5361 case UNSPEC_GOTTPOFF:
5362 case UNSPEC_GOTNTPOFF:
5363 case UNSPEC_INDNTPOFF:
5364 case UNSPEC_NTPOFF:
5365 case UNSPEC_DTPOFF:
5366 break;
5367
5368 default:
5369 reason = "invalid address unspec";
5370 goto report_error;
5371 }
5372
5373 else if (flag_pic && (SYMBOLIC_CONST (disp)
5374 #if TARGET_MACHO
5375 && !machopic_operand_p (disp)
5376 #endif
5377 ))
5378 {
5379 is_legitimate_pic:
5380 if (TARGET_64BIT && (index || base))
5381 {
5382 /* foo@dtpoff(%rX) is ok. */
5383 if (GET_CODE (disp) != CONST
5384 || GET_CODE (XEXP (disp, 0)) != PLUS
5385 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5386 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5387 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5388 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5389 {
5390 reason = "non-constant pic memory reference";
5391 goto report_error;
5392 }
5393 }
5394 else if (! legitimate_pic_address_disp_p (disp))
5395 {
5396 reason = "displacement is an invalid pic construct";
5397 goto report_error;
5398 }
5399
5400 /* This code used to verify that a symbolic pic displacement
5401 includes the pic_offset_table_rtx register.
5402
5403 While this is good idea, unfortunately these constructs may
5404 be created by "adds using lea" optimization for incorrect
5405 code like:
5406
5407 int a;
5408 int foo(int i)
5409 {
5410 return *(&a+i);
5411 }
5412
5413 This code is nonsensical, but results in addressing
5414 GOT table with pic_offset_table_rtx base. We can't
5415 just refuse it easily, since it gets matched by
5416 "addsi3" pattern, that later gets split to lea in the
5417 case output register differs from input. While this
5418 can be handled by separate addsi pattern for this case
5419 that never results in lea, this seems to be easier and
5420 correct fix for crash to disable this test. */
5421 }
5422 else if (GET_CODE (disp) != LABEL_REF
5423 && GET_CODE (disp) != CONST_INT
5424 && (GET_CODE (disp) != CONST
5425 || !legitimate_constant_p (disp))
5426 && (GET_CODE (disp) != SYMBOL_REF
5427 || !legitimate_constant_p (disp)))
5428 {
5429 reason = "displacement is not constant";
5430 goto report_error;
5431 }
5432 else if (TARGET_64BIT
5433 && !x86_64_immediate_operand (disp, VOIDmode))
5434 {
5435 reason = "displacement is out of range";
5436 goto report_error;
5437 }
5438 }
5439
5440 /* Everything looks valid. */
5441 if (TARGET_DEBUG_ADDR)
5442 fprintf (stderr, "Success.\n");
5443 return TRUE;
5444
5445 report_error:
5446 if (TARGET_DEBUG_ADDR)
5447 {
5448 fprintf (stderr, "Error: %s\n", reason);
5449 debug_rtx (reason_rtx);
5450 }
5451 return FALSE;
5452 }
5453 \f
5454 /* Return an unique alias set for the GOT. */
5455
5456 static HOST_WIDE_INT
5457 ix86_GOT_alias_set (void)
5458 {
5459 static HOST_WIDE_INT set = -1;
5460 if (set == -1)
5461 set = new_alias_set ();
5462 return set;
5463 }
5464
5465 /* Return a legitimate reference for ORIG (an address) using the
5466 register REG. If REG is 0, a new pseudo is generated.
5467
5468 There are two types of references that must be handled:
5469
5470 1. Global data references must load the address from the GOT, via
5471 the PIC reg. An insn is emitted to do this load, and the reg is
5472 returned.
5473
5474 2. Static data references, constant pool addresses, and code labels
5475 compute the address as an offset from the GOT, whose base is in
5476 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5477 differentiate them from global data objects. The returned
5478 address is the PIC reg + an unspec constant.
5479
5480 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5481 reg also appears in the address. */
5482
5483 static rtx
5484 legitimize_pic_address (rtx orig, rtx reg)
5485 {
5486 rtx addr = orig;
5487 rtx new = orig;
5488 rtx base;
5489
5490 #if TARGET_MACHO
5491 if (reg == 0)
5492 reg = gen_reg_rtx (Pmode);
5493 /* Use the generic Mach-O PIC machinery. */
5494 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5495 #endif
5496
5497 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5498 new = addr;
5499 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5500 {
5501 /* This symbol may be referenced via a displacement from the PIC
5502 base address (@GOTOFF). */
5503
5504 if (reload_in_progress)
5505 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5506 if (GET_CODE (addr) == CONST)
5507 addr = XEXP (addr, 0);
5508 if (GET_CODE (addr) == PLUS)
5509 {
5510 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5511 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5512 }
5513 else
5514 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5515 new = gen_rtx_CONST (Pmode, new);
5516 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5517
5518 if (reg != 0)
5519 {
5520 emit_move_insn (reg, new);
5521 new = reg;
5522 }
5523 }
5524 else if (GET_CODE (addr) == SYMBOL_REF)
5525 {
5526 if (TARGET_64BIT)
5527 {
5528 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5529 new = gen_rtx_CONST (Pmode, new);
5530 new = gen_const_mem (Pmode, new);
5531 set_mem_alias_set (new, ix86_GOT_alias_set ());
5532
5533 if (reg == 0)
5534 reg = gen_reg_rtx (Pmode);
5535 /* Use directly gen_movsi, otherwise the address is loaded
5536 into register for CSE. We don't want to CSE this addresses,
5537 instead we CSE addresses from the GOT table, so skip this. */
5538 emit_insn (gen_movsi (reg, new));
5539 new = reg;
5540 }
5541 else
5542 {
5543 /* This symbol must be referenced via a load from the
5544 Global Offset Table (@GOT). */
5545
5546 if (reload_in_progress)
5547 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5548 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5549 new = gen_rtx_CONST (Pmode, new);
5550 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5551 new = gen_const_mem (Pmode, new);
5552 set_mem_alias_set (new, ix86_GOT_alias_set ());
5553
5554 if (reg == 0)
5555 reg = gen_reg_rtx (Pmode);
5556 emit_move_insn (reg, new);
5557 new = reg;
5558 }
5559 }
5560 else
5561 {
5562 if (GET_CODE (addr) == CONST)
5563 {
5564 addr = XEXP (addr, 0);
5565
5566 /* We must match stuff we generate before. Assume the only
5567 unspecs that can get here are ours. Not that we could do
5568 anything with them anyway.... */
5569 if (GET_CODE (addr) == UNSPEC
5570 || (GET_CODE (addr) == PLUS
5571 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5572 return orig;
5573 if (GET_CODE (addr) != PLUS)
5574 abort ();
5575 }
5576 if (GET_CODE (addr) == PLUS)
5577 {
5578 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5579
5580 /* Check first to see if this is a constant offset from a @GOTOFF
5581 symbol reference. */
5582 if (local_symbolic_operand (op0, Pmode)
5583 && GET_CODE (op1) == CONST_INT)
5584 {
5585 if (!TARGET_64BIT)
5586 {
5587 if (reload_in_progress)
5588 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5589 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5590 UNSPEC_GOTOFF);
5591 new = gen_rtx_PLUS (Pmode, new, op1);
5592 new = gen_rtx_CONST (Pmode, new);
5593 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5594
5595 if (reg != 0)
5596 {
5597 emit_move_insn (reg, new);
5598 new = reg;
5599 }
5600 }
5601 else
5602 {
5603 if (INTVAL (op1) < -16*1024*1024
5604 || INTVAL (op1) >= 16*1024*1024)
5605 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5606 }
5607 }
5608 else
5609 {
5610 base = legitimize_pic_address (XEXP (addr, 0), reg);
5611 new = legitimize_pic_address (XEXP (addr, 1),
5612 base == reg ? NULL_RTX : reg);
5613
5614 if (GET_CODE (new) == CONST_INT)
5615 new = plus_constant (base, INTVAL (new));
5616 else
5617 {
5618 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5619 {
5620 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5621 new = XEXP (new, 1);
5622 }
5623 new = gen_rtx_PLUS (Pmode, base, new);
5624 }
5625 }
5626 }
5627 }
5628 return new;
5629 }
5630 \f
5631 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5632
5633 static rtx
5634 get_thread_pointer (int to_reg)
5635 {
5636 rtx tp, reg, insn;
5637
5638 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5639 if (!to_reg)
5640 return tp;
5641
5642 reg = gen_reg_rtx (Pmode);
5643 insn = gen_rtx_SET (VOIDmode, reg, tp);
5644 insn = emit_insn (insn);
5645
5646 return reg;
5647 }
5648
5649 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5650 false if we expect this to be used for a memory address and true if
5651 we expect to load the address into a register. */
5652
5653 static rtx
5654 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5655 {
5656 rtx dest, base, off, pic;
5657 int type;
5658
5659 switch (model)
5660 {
5661 case TLS_MODEL_GLOBAL_DYNAMIC:
5662 dest = gen_reg_rtx (Pmode);
5663 if (TARGET_64BIT)
5664 {
5665 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5666
5667 start_sequence ();
5668 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5669 insns = get_insns ();
5670 end_sequence ();
5671
5672 emit_libcall_block (insns, dest, rax, x);
5673 }
5674 else
5675 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5676 break;
5677
5678 case TLS_MODEL_LOCAL_DYNAMIC:
5679 base = gen_reg_rtx (Pmode);
5680 if (TARGET_64BIT)
5681 {
5682 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5683
5684 start_sequence ();
5685 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5686 insns = get_insns ();
5687 end_sequence ();
5688
5689 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5690 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5691 emit_libcall_block (insns, base, rax, note);
5692 }
5693 else
5694 emit_insn (gen_tls_local_dynamic_base_32 (base));
5695
5696 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5697 off = gen_rtx_CONST (Pmode, off);
5698
5699 return gen_rtx_PLUS (Pmode, base, off);
5700
5701 case TLS_MODEL_INITIAL_EXEC:
5702 if (TARGET_64BIT)
5703 {
5704 pic = NULL;
5705 type = UNSPEC_GOTNTPOFF;
5706 }
5707 else if (flag_pic)
5708 {
5709 if (reload_in_progress)
5710 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5711 pic = pic_offset_table_rtx;
5712 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5713 }
5714 else if (!TARGET_GNU_TLS)
5715 {
5716 pic = gen_reg_rtx (Pmode);
5717 emit_insn (gen_set_got (pic));
5718 type = UNSPEC_GOTTPOFF;
5719 }
5720 else
5721 {
5722 pic = NULL;
5723 type = UNSPEC_INDNTPOFF;
5724 }
5725
5726 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5727 off = gen_rtx_CONST (Pmode, off);
5728 if (pic)
5729 off = gen_rtx_PLUS (Pmode, pic, off);
5730 off = gen_const_mem (Pmode, off);
5731 set_mem_alias_set (off, ix86_GOT_alias_set ());
5732
5733 if (TARGET_64BIT || TARGET_GNU_TLS)
5734 {
5735 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5736 off = force_reg (Pmode, off);
5737 return gen_rtx_PLUS (Pmode, base, off);
5738 }
5739 else
5740 {
5741 base = get_thread_pointer (true);
5742 dest = gen_reg_rtx (Pmode);
5743 emit_insn (gen_subsi3 (dest, base, off));
5744 }
5745 break;
5746
5747 case TLS_MODEL_LOCAL_EXEC:
5748 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5749 (TARGET_64BIT || TARGET_GNU_TLS)
5750 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5751 off = gen_rtx_CONST (Pmode, off);
5752
5753 if (TARGET_64BIT || TARGET_GNU_TLS)
5754 {
5755 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5756 return gen_rtx_PLUS (Pmode, base, off);
5757 }
5758 else
5759 {
5760 base = get_thread_pointer (true);
5761 dest = gen_reg_rtx (Pmode);
5762 emit_insn (gen_subsi3 (dest, base, off));
5763 }
5764 break;
5765
5766 default:
5767 abort ();
5768 }
5769
5770 return dest;
5771 }
5772
5773 /* Try machine-dependent ways of modifying an illegitimate address
5774 to be legitimate. If we find one, return the new, valid address.
5775 This macro is used in only one place: `memory_address' in explow.c.
5776
5777 OLDX is the address as it was before break_out_memory_refs was called.
5778 In some cases it is useful to look at this to decide what needs to be done.
5779
5780 MODE and WIN are passed so that this macro can use
5781 GO_IF_LEGITIMATE_ADDRESS.
5782
5783 It is always safe for this macro to do nothing. It exists to recognize
5784 opportunities to optimize the output.
5785
5786 For the 80386, we handle X+REG by loading X into a register R and
5787 using R+REG. R will go in a general reg and indexing will be used.
5788 However, if REG is a broken-out memory address or multiplication,
5789 nothing needs to be done because REG can certainly go in a general reg.
5790
5791 When -fpic is used, special handling is needed for symbolic references.
5792 See comments by legitimize_pic_address in i386.c for details. */
5793
5794 rtx
5795 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5796 {
5797 int changed = 0;
5798 unsigned log;
5799
5800 if (TARGET_DEBUG_ADDR)
5801 {
5802 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5803 GET_MODE_NAME (mode));
5804 debug_rtx (x);
5805 }
5806
5807 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5808 if (log)
5809 return legitimize_tls_address (x, log, false);
5810 if (GET_CODE (x) == CONST
5811 && GET_CODE (XEXP (x, 0)) == PLUS
5812 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5813 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5814 {
5815 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5816 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5817 }
5818
5819 if (flag_pic && SYMBOLIC_CONST (x))
5820 return legitimize_pic_address (x, 0);
5821
5822 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5823 if (GET_CODE (x) == ASHIFT
5824 && GET_CODE (XEXP (x, 1)) == CONST_INT
5825 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5826 {
5827 changed = 1;
5828 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5829 GEN_INT (1 << log));
5830 }
5831
5832 if (GET_CODE (x) == PLUS)
5833 {
5834 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5835
5836 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5837 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5838 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5839 {
5840 changed = 1;
5841 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5842 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5843 GEN_INT (1 << log));
5844 }
5845
5846 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5847 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5848 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5849 {
5850 changed = 1;
5851 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5852 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5853 GEN_INT (1 << log));
5854 }
5855
5856 /* Put multiply first if it isn't already. */
5857 if (GET_CODE (XEXP (x, 1)) == MULT)
5858 {
5859 rtx tmp = XEXP (x, 0);
5860 XEXP (x, 0) = XEXP (x, 1);
5861 XEXP (x, 1) = tmp;
5862 changed = 1;
5863 }
5864
5865 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5866 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5867 created by virtual register instantiation, register elimination, and
5868 similar optimizations. */
5869 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5870 {
5871 changed = 1;
5872 x = gen_rtx_PLUS (Pmode,
5873 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5874 XEXP (XEXP (x, 1), 0)),
5875 XEXP (XEXP (x, 1), 1));
5876 }
5877
5878 /* Canonicalize
5879 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5880 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5881 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5882 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5883 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5884 && CONSTANT_P (XEXP (x, 1)))
5885 {
5886 rtx constant;
5887 rtx other = NULL_RTX;
5888
5889 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5890 {
5891 constant = XEXP (x, 1);
5892 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5893 }
5894 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5895 {
5896 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5897 other = XEXP (x, 1);
5898 }
5899 else
5900 constant = 0;
5901
5902 if (constant)
5903 {
5904 changed = 1;
5905 x = gen_rtx_PLUS (Pmode,
5906 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5907 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5908 plus_constant (other, INTVAL (constant)));
5909 }
5910 }
5911
5912 if (changed && legitimate_address_p (mode, x, FALSE))
5913 return x;
5914
5915 if (GET_CODE (XEXP (x, 0)) == MULT)
5916 {
5917 changed = 1;
5918 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5919 }
5920
5921 if (GET_CODE (XEXP (x, 1)) == MULT)
5922 {
5923 changed = 1;
5924 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5925 }
5926
5927 if (changed
5928 && GET_CODE (XEXP (x, 1)) == REG
5929 && GET_CODE (XEXP (x, 0)) == REG)
5930 return x;
5931
5932 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5933 {
5934 changed = 1;
5935 x = legitimize_pic_address (x, 0);
5936 }
5937
5938 if (changed && legitimate_address_p (mode, x, FALSE))
5939 return x;
5940
5941 if (GET_CODE (XEXP (x, 0)) == REG)
5942 {
5943 rtx temp = gen_reg_rtx (Pmode);
5944 rtx val = force_operand (XEXP (x, 1), temp);
5945 if (val != temp)
5946 emit_move_insn (temp, val);
5947
5948 XEXP (x, 1) = temp;
5949 return x;
5950 }
5951
5952 else if (GET_CODE (XEXP (x, 1)) == REG)
5953 {
5954 rtx temp = gen_reg_rtx (Pmode);
5955 rtx val = force_operand (XEXP (x, 0), temp);
5956 if (val != temp)
5957 emit_move_insn (temp, val);
5958
5959 XEXP (x, 0) = temp;
5960 return x;
5961 }
5962 }
5963
5964 return x;
5965 }
5966 \f
5967 /* Print an integer constant expression in assembler syntax. Addition
5968 and subtraction are the only arithmetic that may appear in these
5969 expressions. FILE is the stdio stream to write to, X is the rtx, and
5970 CODE is the operand print code from the output string. */
5971
5972 static void
5973 output_pic_addr_const (FILE *file, rtx x, int code)
5974 {
5975 char buf[256];
5976
5977 switch (GET_CODE (x))
5978 {
5979 case PC:
5980 if (flag_pic)
5981 putc ('.', file);
5982 else
5983 abort ();
5984 break;
5985
5986 case SYMBOL_REF:
5987 assemble_name (file, XSTR (x, 0));
5988 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5989 fputs ("@PLT", file);
5990 break;
5991
5992 case LABEL_REF:
5993 x = XEXP (x, 0);
5994 /* FALLTHRU */
5995 case CODE_LABEL:
5996 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5997 assemble_name (asm_out_file, buf);
5998 break;
5999
6000 case CONST_INT:
6001 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6002 break;
6003
6004 case CONST:
6005 /* This used to output parentheses around the expression,
6006 but that does not work on the 386 (either ATT or BSD assembler). */
6007 output_pic_addr_const (file, XEXP (x, 0), code);
6008 break;
6009
6010 case CONST_DOUBLE:
6011 if (GET_MODE (x) == VOIDmode)
6012 {
6013 /* We can use %d if the number is <32 bits and positive. */
6014 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6015 fprintf (file, "0x%lx%08lx",
6016 (unsigned long) CONST_DOUBLE_HIGH (x),
6017 (unsigned long) CONST_DOUBLE_LOW (x));
6018 else
6019 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6020 }
6021 else
6022 /* We can't handle floating point constants;
6023 PRINT_OPERAND must handle them. */
6024 output_operand_lossage ("floating constant misused");
6025 break;
6026
6027 case PLUS:
6028 /* Some assemblers need integer constants to appear first. */
6029 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6030 {
6031 output_pic_addr_const (file, XEXP (x, 0), code);
6032 putc ('+', file);
6033 output_pic_addr_const (file, XEXP (x, 1), code);
6034 }
6035 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6036 {
6037 output_pic_addr_const (file, XEXP (x, 1), code);
6038 putc ('+', file);
6039 output_pic_addr_const (file, XEXP (x, 0), code);
6040 }
6041 else
6042 abort ();
6043 break;
6044
6045 case MINUS:
6046 if (!TARGET_MACHO)
6047 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6048 output_pic_addr_const (file, XEXP (x, 0), code);
6049 putc ('-', file);
6050 output_pic_addr_const (file, XEXP (x, 1), code);
6051 if (!TARGET_MACHO)
6052 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6053 break;
6054
6055 case UNSPEC:
6056 if (XVECLEN (x, 0) != 1)
6057 abort ();
6058 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6059 switch (XINT (x, 1))
6060 {
6061 case UNSPEC_GOT:
6062 fputs ("@GOT", file);
6063 break;
6064 case UNSPEC_GOTOFF:
6065 fputs ("@GOTOFF", file);
6066 break;
6067 case UNSPEC_GOTPCREL:
6068 fputs ("@GOTPCREL(%rip)", file);
6069 break;
6070 case UNSPEC_GOTTPOFF:
6071 /* FIXME: This might be @TPOFF in Sun ld too. */
6072 fputs ("@GOTTPOFF", file);
6073 break;
6074 case UNSPEC_TPOFF:
6075 fputs ("@TPOFF", file);
6076 break;
6077 case UNSPEC_NTPOFF:
6078 if (TARGET_64BIT)
6079 fputs ("@TPOFF", file);
6080 else
6081 fputs ("@NTPOFF", file);
6082 break;
6083 case UNSPEC_DTPOFF:
6084 fputs ("@DTPOFF", file);
6085 break;
6086 case UNSPEC_GOTNTPOFF:
6087 if (TARGET_64BIT)
6088 fputs ("@GOTTPOFF(%rip)", file);
6089 else
6090 fputs ("@GOTNTPOFF", file);
6091 break;
6092 case UNSPEC_INDNTPOFF:
6093 fputs ("@INDNTPOFF", file);
6094 break;
6095 default:
6096 output_operand_lossage ("invalid UNSPEC as operand");
6097 break;
6098 }
6099 break;
6100
6101 default:
6102 output_operand_lossage ("invalid expression as operand");
6103 }
6104 }
6105
6106 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6107 We need to emit DTP-relative relocations. */
6108
6109 void
6110 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6111 {
6112 fputs (ASM_LONG, file);
6113 output_addr_const (file, x);
6114 fputs ("@DTPOFF", file);
6115 switch (size)
6116 {
6117 case 4:
6118 break;
6119 case 8:
6120 fputs (", 0", file);
6121 break;
6122 default:
6123 abort ();
6124 }
6125 }
6126
6127 /* In the name of slightly smaller debug output, and to cater to
6128 general assembler lossage, recognize PIC+GOTOFF and turn it back
6129 into a direct symbol reference. */
6130
6131 static rtx
6132 ix86_delegitimize_address (rtx orig_x)
6133 {
6134 rtx x = orig_x, y;
6135
6136 if (GET_CODE (x) == MEM)
6137 x = XEXP (x, 0);
6138
6139 if (TARGET_64BIT)
6140 {
6141 if (GET_CODE (x) != CONST
6142 || GET_CODE (XEXP (x, 0)) != UNSPEC
6143 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6144 || GET_CODE (orig_x) != MEM)
6145 return orig_x;
6146 return XVECEXP (XEXP (x, 0), 0, 0);
6147 }
6148
6149 if (GET_CODE (x) != PLUS
6150 || GET_CODE (XEXP (x, 1)) != CONST)
6151 return orig_x;
6152
6153 if (GET_CODE (XEXP (x, 0)) == REG
6154 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6155 /* %ebx + GOT/GOTOFF */
6156 y = NULL;
6157 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6158 {
6159 /* %ebx + %reg * scale + GOT/GOTOFF */
6160 y = XEXP (x, 0);
6161 if (GET_CODE (XEXP (y, 0)) == REG
6162 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6163 y = XEXP (y, 1);
6164 else if (GET_CODE (XEXP (y, 1)) == REG
6165 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6166 y = XEXP (y, 0);
6167 else
6168 return orig_x;
6169 if (GET_CODE (y) != REG
6170 && GET_CODE (y) != MULT
6171 && GET_CODE (y) != ASHIFT)
6172 return orig_x;
6173 }
6174 else
6175 return orig_x;
6176
6177 x = XEXP (XEXP (x, 1), 0);
6178 if (GET_CODE (x) == UNSPEC
6179 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6180 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6181 {
6182 if (y)
6183 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6184 return XVECEXP (x, 0, 0);
6185 }
6186
6187 if (GET_CODE (x) == PLUS
6188 && GET_CODE (XEXP (x, 0)) == UNSPEC
6189 && GET_CODE (XEXP (x, 1)) == CONST_INT
6190 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6191 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6192 && GET_CODE (orig_x) != MEM)))
6193 {
6194 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6195 if (y)
6196 return gen_rtx_PLUS (Pmode, y, x);
6197 return x;
6198 }
6199
6200 return orig_x;
6201 }
6202 \f
6203 static void
6204 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6205 int fp, FILE *file)
6206 {
6207 const char *suffix;
6208
6209 if (mode == CCFPmode || mode == CCFPUmode)
6210 {
6211 enum rtx_code second_code, bypass_code;
6212 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6213 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6214 abort ();
6215 code = ix86_fp_compare_code_to_integer (code);
6216 mode = CCmode;
6217 }
6218 if (reverse)
6219 code = reverse_condition (code);
6220
6221 switch (code)
6222 {
6223 case EQ:
6224 suffix = "e";
6225 break;
6226 case NE:
6227 suffix = "ne";
6228 break;
6229 case GT:
6230 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6231 abort ();
6232 suffix = "g";
6233 break;
6234 case GTU:
6235 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6236 Those same assemblers have the same but opposite lossage on cmov. */
6237 if (mode != CCmode)
6238 abort ();
6239 suffix = fp ? "nbe" : "a";
6240 break;
6241 case LT:
6242 if (mode == CCNOmode || mode == CCGOCmode)
6243 suffix = "s";
6244 else if (mode == CCmode || mode == CCGCmode)
6245 suffix = "l";
6246 else
6247 abort ();
6248 break;
6249 case LTU:
6250 if (mode != CCmode)
6251 abort ();
6252 suffix = "b";
6253 break;
6254 case GE:
6255 if (mode == CCNOmode || mode == CCGOCmode)
6256 suffix = "ns";
6257 else if (mode == CCmode || mode == CCGCmode)
6258 suffix = "ge";
6259 else
6260 abort ();
6261 break;
6262 case GEU:
6263 /* ??? As above. */
6264 if (mode != CCmode)
6265 abort ();
6266 suffix = fp ? "nb" : "ae";
6267 break;
6268 case LE:
6269 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6270 abort ();
6271 suffix = "le";
6272 break;
6273 case LEU:
6274 if (mode != CCmode)
6275 abort ();
6276 suffix = "be";
6277 break;
6278 case UNORDERED:
6279 suffix = fp ? "u" : "p";
6280 break;
6281 case ORDERED:
6282 suffix = fp ? "nu" : "np";
6283 break;
6284 default:
6285 abort ();
6286 }
6287 fputs (suffix, file);
6288 }
6289
6290 /* Print the name of register X to FILE based on its machine mode and number.
6291 If CODE is 'w', pretend the mode is HImode.
6292 If CODE is 'b', pretend the mode is QImode.
6293 If CODE is 'k', pretend the mode is SImode.
6294 If CODE is 'q', pretend the mode is DImode.
6295 If CODE is 'h', pretend the reg is the `high' byte register.
6296 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6297
6298 void
6299 print_reg (rtx x, int code, FILE *file)
6300 {
6301 if (REGNO (x) == ARG_POINTER_REGNUM
6302 || REGNO (x) == FRAME_POINTER_REGNUM
6303 || REGNO (x) == FLAGS_REG
6304 || REGNO (x) == FPSR_REG)
6305 abort ();
6306
6307 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6308 putc ('%', file);
6309
6310 if (code == 'w' || MMX_REG_P (x))
6311 code = 2;
6312 else if (code == 'b')
6313 code = 1;
6314 else if (code == 'k')
6315 code = 4;
6316 else if (code == 'q')
6317 code = 8;
6318 else if (code == 'y')
6319 code = 3;
6320 else if (code == 'h')
6321 code = 0;
6322 else
6323 code = GET_MODE_SIZE (GET_MODE (x));
6324
6325 /* Irritatingly, AMD extended registers use different naming convention
6326 from the normal registers. */
6327 if (REX_INT_REG_P (x))
6328 {
6329 if (!TARGET_64BIT)
6330 abort ();
6331 switch (code)
6332 {
6333 case 0:
6334 error ("extended registers have no high halves");
6335 break;
6336 case 1:
6337 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6338 break;
6339 case 2:
6340 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6341 break;
6342 case 4:
6343 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6344 break;
6345 case 8:
6346 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6347 break;
6348 default:
6349 error ("unsupported operand size for extended register");
6350 break;
6351 }
6352 return;
6353 }
6354 switch (code)
6355 {
6356 case 3:
6357 if (STACK_TOP_P (x))
6358 {
6359 fputs ("st(0)", file);
6360 break;
6361 }
6362 /* FALLTHRU */
6363 case 8:
6364 case 4:
6365 case 12:
6366 if (! ANY_FP_REG_P (x))
6367 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6368 /* FALLTHRU */
6369 case 16:
6370 case 2:
6371 normal:
6372 fputs (hi_reg_name[REGNO (x)], file);
6373 break;
6374 case 1:
6375 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6376 goto normal;
6377 fputs (qi_reg_name[REGNO (x)], file);
6378 break;
6379 case 0:
6380 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6381 goto normal;
6382 fputs (qi_high_reg_name[REGNO (x)], file);
6383 break;
6384 default:
6385 abort ();
6386 }
6387 }
6388
6389 /* Locate some local-dynamic symbol still in use by this function
6390 so that we can print its name in some tls_local_dynamic_base
6391 pattern. */
6392
6393 static const char *
6394 get_some_local_dynamic_name (void)
6395 {
6396 rtx insn;
6397
6398 if (cfun->machine->some_ld_name)
6399 return cfun->machine->some_ld_name;
6400
6401 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6402 if (INSN_P (insn)
6403 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6404 return cfun->machine->some_ld_name;
6405
6406 abort ();
6407 }
6408
6409 static int
6410 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6411 {
6412 rtx x = *px;
6413
6414 if (GET_CODE (x) == SYMBOL_REF
6415 && local_dynamic_symbolic_operand (x, Pmode))
6416 {
6417 cfun->machine->some_ld_name = XSTR (x, 0);
6418 return 1;
6419 }
6420
6421 return 0;
6422 }
6423
6424 /* Meaning of CODE:
6425 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6426 C -- print opcode suffix for set/cmov insn.
6427 c -- like C, but print reversed condition
6428 F,f -- likewise, but for floating-point.
6429 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6430 otherwise nothing
6431 R -- print the prefix for register names.
6432 z -- print the opcode suffix for the size of the current operand.
6433 * -- print a star (in certain assembler syntax)
6434 A -- print an absolute memory reference.
6435 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6436 s -- print a shift double count, followed by the assemblers argument
6437 delimiter.
6438 b -- print the QImode name of the register for the indicated operand.
6439 %b0 would print %al if operands[0] is reg 0.
6440 w -- likewise, print the HImode name of the register.
6441 k -- likewise, print the SImode name of the register.
6442 q -- likewise, print the DImode name of the register.
6443 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6444 y -- print "st(0)" instead of "st" as a register.
6445 D -- print condition for SSE cmp instruction.
6446 P -- if PIC, print an @PLT suffix.
6447 X -- don't print any sort of PIC '@' suffix for a symbol.
6448 & -- print some in-use local-dynamic symbol name.
6449 H -- print a memory address offset by 8; used for sse high-parts
6450 */
6451
6452 void
6453 print_operand (FILE *file, rtx x, int code)
6454 {
6455 if (code)
6456 {
6457 switch (code)
6458 {
6459 case '*':
6460 if (ASSEMBLER_DIALECT == ASM_ATT)
6461 putc ('*', file);
6462 return;
6463
6464 case '&':
6465 assemble_name (file, get_some_local_dynamic_name ());
6466 return;
6467
6468 case 'A':
6469 if (ASSEMBLER_DIALECT == ASM_ATT)
6470 putc ('*', file);
6471 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6472 {
6473 /* Intel syntax. For absolute addresses, registers should not
6474 be surrounded by braces. */
6475 if (GET_CODE (x) != REG)
6476 {
6477 putc ('[', file);
6478 PRINT_OPERAND (file, x, 0);
6479 putc (']', file);
6480 return;
6481 }
6482 }
6483 else
6484 abort ();
6485
6486 PRINT_OPERAND (file, x, 0);
6487 return;
6488
6489
6490 case 'L':
6491 if (ASSEMBLER_DIALECT == ASM_ATT)
6492 putc ('l', file);
6493 return;
6494
6495 case 'W':
6496 if (ASSEMBLER_DIALECT == ASM_ATT)
6497 putc ('w', file);
6498 return;
6499
6500 case 'B':
6501 if (ASSEMBLER_DIALECT == ASM_ATT)
6502 putc ('b', file);
6503 return;
6504
6505 case 'Q':
6506 if (ASSEMBLER_DIALECT == ASM_ATT)
6507 putc ('l', file);
6508 return;
6509
6510 case 'S':
6511 if (ASSEMBLER_DIALECT == ASM_ATT)
6512 putc ('s', file);
6513 return;
6514
6515 case 'T':
6516 if (ASSEMBLER_DIALECT == ASM_ATT)
6517 putc ('t', file);
6518 return;
6519
6520 case 'z':
6521 /* 387 opcodes don't get size suffixes if the operands are
6522 registers. */
6523 if (STACK_REG_P (x))
6524 return;
6525
6526 /* Likewise if using Intel opcodes. */
6527 if (ASSEMBLER_DIALECT == ASM_INTEL)
6528 return;
6529
6530 /* This is the size of op from size of operand. */
6531 switch (GET_MODE_SIZE (GET_MODE (x)))
6532 {
6533 case 2:
6534 #ifdef HAVE_GAS_FILDS_FISTS
6535 putc ('s', file);
6536 #endif
6537 return;
6538
6539 case 4:
6540 if (GET_MODE (x) == SFmode)
6541 {
6542 putc ('s', file);
6543 return;
6544 }
6545 else
6546 putc ('l', file);
6547 return;
6548
6549 case 12:
6550 case 16:
6551 putc ('t', file);
6552 return;
6553
6554 case 8:
6555 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6556 {
6557 #ifdef GAS_MNEMONICS
6558 putc ('q', file);
6559 #else
6560 putc ('l', file);
6561 putc ('l', file);
6562 #endif
6563 }
6564 else
6565 putc ('l', file);
6566 return;
6567
6568 default:
6569 abort ();
6570 }
6571
6572 case 'b':
6573 case 'w':
6574 case 'k':
6575 case 'q':
6576 case 'h':
6577 case 'y':
6578 case 'X':
6579 case 'P':
6580 break;
6581
6582 case 's':
6583 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6584 {
6585 PRINT_OPERAND (file, x, 0);
6586 putc (',', file);
6587 }
6588 return;
6589
6590 case 'D':
6591 /* Little bit of braindamage here. The SSE compare instructions
6592 does use completely different names for the comparisons that the
6593 fp conditional moves. */
6594 switch (GET_CODE (x))
6595 {
6596 case EQ:
6597 case UNEQ:
6598 fputs ("eq", file);
6599 break;
6600 case LT:
6601 case UNLT:
6602 fputs ("lt", file);
6603 break;
6604 case LE:
6605 case UNLE:
6606 fputs ("le", file);
6607 break;
6608 case UNORDERED:
6609 fputs ("unord", file);
6610 break;
6611 case NE:
6612 case LTGT:
6613 fputs ("neq", file);
6614 break;
6615 case UNGE:
6616 case GE:
6617 fputs ("nlt", file);
6618 break;
6619 case UNGT:
6620 case GT:
6621 fputs ("nle", file);
6622 break;
6623 case ORDERED:
6624 fputs ("ord", file);
6625 break;
6626 default:
6627 abort ();
6628 break;
6629 }
6630 return;
6631 case 'O':
6632 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6633 if (ASSEMBLER_DIALECT == ASM_ATT)
6634 {
6635 switch (GET_MODE (x))
6636 {
6637 case HImode: putc ('w', file); break;
6638 case SImode:
6639 case SFmode: putc ('l', file); break;
6640 case DImode:
6641 case DFmode: putc ('q', file); break;
6642 default: abort ();
6643 }
6644 putc ('.', file);
6645 }
6646 #endif
6647 return;
6648 case 'C':
6649 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6650 return;
6651 case 'F':
6652 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6653 if (ASSEMBLER_DIALECT == ASM_ATT)
6654 putc ('.', file);
6655 #endif
6656 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6657 return;
6658
6659 /* Like above, but reverse condition */
6660 case 'c':
6661 /* Check to see if argument to %c is really a constant
6662 and not a condition code which needs to be reversed. */
6663 if (!COMPARISON_P (x))
6664 {
6665 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6666 return;
6667 }
6668 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6669 return;
6670 case 'f':
6671 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6672 if (ASSEMBLER_DIALECT == ASM_ATT)
6673 putc ('.', file);
6674 #endif
6675 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6676 return;
6677
6678 case 'H':
6679 /* It doesn't actually matter what mode we use here, as we're
6680 only going to use this for printing. */
6681 x = adjust_address_nv (x, DImode, 8);
6682 break;
6683
6684 case '+':
6685 {
6686 rtx x;
6687
6688 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6689 return;
6690
6691 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6692 if (x)
6693 {
6694 int pred_val = INTVAL (XEXP (x, 0));
6695
6696 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6697 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6698 {
6699 int taken = pred_val > REG_BR_PROB_BASE / 2;
6700 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6701
6702 /* Emit hints only in the case default branch prediction
6703 heuristics would fail. */
6704 if (taken != cputaken)
6705 {
6706 /* We use 3e (DS) prefix for taken branches and
6707 2e (CS) prefix for not taken branches. */
6708 if (taken)
6709 fputs ("ds ; ", file);
6710 else
6711 fputs ("cs ; ", file);
6712 }
6713 }
6714 }
6715 return;
6716 }
6717 default:
6718 output_operand_lossage ("invalid operand code '%c'", code);
6719 }
6720 }
6721
6722 if (GET_CODE (x) == REG)
6723 print_reg (x, code, file);
6724
6725 else if (GET_CODE (x) == MEM)
6726 {
6727 /* No `byte ptr' prefix for call instructions. */
6728 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6729 {
6730 const char * size;
6731 switch (GET_MODE_SIZE (GET_MODE (x)))
6732 {
6733 case 1: size = "BYTE"; break;
6734 case 2: size = "WORD"; break;
6735 case 4: size = "DWORD"; break;
6736 case 8: size = "QWORD"; break;
6737 case 12: size = "XWORD"; break;
6738 case 16: size = "XMMWORD"; break;
6739 default:
6740 abort ();
6741 }
6742
6743 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6744 if (code == 'b')
6745 size = "BYTE";
6746 else if (code == 'w')
6747 size = "WORD";
6748 else if (code == 'k')
6749 size = "DWORD";
6750
6751 fputs (size, file);
6752 fputs (" PTR ", file);
6753 }
6754
6755 x = XEXP (x, 0);
6756 /* Avoid (%rip) for call operands. */
6757 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6758 && GET_CODE (x) != CONST_INT)
6759 output_addr_const (file, x);
6760 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6761 output_operand_lossage ("invalid constraints for operand");
6762 else
6763 output_address (x);
6764 }
6765
6766 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6767 {
6768 REAL_VALUE_TYPE r;
6769 long l;
6770
6771 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6772 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6773
6774 if (ASSEMBLER_DIALECT == ASM_ATT)
6775 putc ('$', file);
6776 fprintf (file, "0x%08lx", l);
6777 }
6778
6779 /* These float cases don't actually occur as immediate operands. */
6780 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6781 {
6782 char dstr[30];
6783
6784 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6785 fprintf (file, "%s", dstr);
6786 }
6787
6788 else if (GET_CODE (x) == CONST_DOUBLE
6789 && GET_MODE (x) == XFmode)
6790 {
6791 char dstr[30];
6792
6793 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6794 fprintf (file, "%s", dstr);
6795 }
6796
6797 else
6798 {
6799 /* We have patterns that allow zero sets of memory, for instance.
6800 In 64-bit mode, we should probably support all 8-byte vectors,
6801 since we can in fact encode that into an immediate. */
6802 if (GET_CODE (x) == CONST_VECTOR)
6803 {
6804 if (x == CONST0_RTX (GET_MODE (x)))
6805 x = const0_rtx;
6806 else
6807 abort ();
6808 }
6809
6810 if (code != 'P')
6811 {
6812 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6813 {
6814 if (ASSEMBLER_DIALECT == ASM_ATT)
6815 putc ('$', file);
6816 }
6817 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6818 || GET_CODE (x) == LABEL_REF)
6819 {
6820 if (ASSEMBLER_DIALECT == ASM_ATT)
6821 putc ('$', file);
6822 else
6823 fputs ("OFFSET FLAT:", file);
6824 }
6825 }
6826 if (GET_CODE (x) == CONST_INT)
6827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6828 else if (flag_pic)
6829 output_pic_addr_const (file, x, code);
6830 else
6831 output_addr_const (file, x);
6832 }
6833 }
6834 \f
6835 /* Print a memory operand whose address is ADDR. */
6836
6837 void
6838 print_operand_address (FILE *file, rtx addr)
6839 {
6840 struct ix86_address parts;
6841 rtx base, index, disp;
6842 int scale;
6843
6844 if (! ix86_decompose_address (addr, &parts))
6845 abort ();
6846
6847 base = parts.base;
6848 index = parts.index;
6849 disp = parts.disp;
6850 scale = parts.scale;
6851
6852 switch (parts.seg)
6853 {
6854 case SEG_DEFAULT:
6855 break;
6856 case SEG_FS:
6857 case SEG_GS:
6858 if (USER_LABEL_PREFIX[0] == 0)
6859 putc ('%', file);
6860 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6861 break;
6862 default:
6863 abort ();
6864 }
6865
6866 if (!base && !index)
6867 {
6868 /* Displacement only requires special attention. */
6869
6870 if (GET_CODE (disp) == CONST_INT)
6871 {
6872 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6873 {
6874 if (USER_LABEL_PREFIX[0] == 0)
6875 putc ('%', file);
6876 fputs ("ds:", file);
6877 }
6878 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6879 }
6880 else if (flag_pic)
6881 output_pic_addr_const (file, disp, 0);
6882 else
6883 output_addr_const (file, disp);
6884
6885 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6886 if (TARGET_64BIT
6887 && ((GET_CODE (disp) == SYMBOL_REF
6888 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6889 || GET_CODE (disp) == LABEL_REF
6890 || (GET_CODE (disp) == CONST
6891 && GET_CODE (XEXP (disp, 0)) == PLUS
6892 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6893 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6894 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6895 fputs ("(%rip)", file);
6896 }
6897 else
6898 {
6899 if (ASSEMBLER_DIALECT == ASM_ATT)
6900 {
6901 if (disp)
6902 {
6903 if (flag_pic)
6904 output_pic_addr_const (file, disp, 0);
6905 else if (GET_CODE (disp) == LABEL_REF)
6906 output_asm_label (disp);
6907 else
6908 output_addr_const (file, disp);
6909 }
6910
6911 putc ('(', file);
6912 if (base)
6913 print_reg (base, 0, file);
6914 if (index)
6915 {
6916 putc (',', file);
6917 print_reg (index, 0, file);
6918 if (scale != 1)
6919 fprintf (file, ",%d", scale);
6920 }
6921 putc (')', file);
6922 }
6923 else
6924 {
6925 rtx offset = NULL_RTX;
6926
6927 if (disp)
6928 {
6929 /* Pull out the offset of a symbol; print any symbol itself. */
6930 if (GET_CODE (disp) == CONST
6931 && GET_CODE (XEXP (disp, 0)) == PLUS
6932 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6933 {
6934 offset = XEXP (XEXP (disp, 0), 1);
6935 disp = gen_rtx_CONST (VOIDmode,
6936 XEXP (XEXP (disp, 0), 0));
6937 }
6938
6939 if (flag_pic)
6940 output_pic_addr_const (file, disp, 0);
6941 else if (GET_CODE (disp) == LABEL_REF)
6942 output_asm_label (disp);
6943 else if (GET_CODE (disp) == CONST_INT)
6944 offset = disp;
6945 else
6946 output_addr_const (file, disp);
6947 }
6948
6949 putc ('[', file);
6950 if (base)
6951 {
6952 print_reg (base, 0, file);
6953 if (offset)
6954 {
6955 if (INTVAL (offset) >= 0)
6956 putc ('+', file);
6957 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6958 }
6959 }
6960 else if (offset)
6961 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6962 else
6963 putc ('0', file);
6964
6965 if (index)
6966 {
6967 putc ('+', file);
6968 print_reg (index, 0, file);
6969 if (scale != 1)
6970 fprintf (file, "*%d", scale);
6971 }
6972 putc (']', file);
6973 }
6974 }
6975 }
6976
6977 bool
6978 output_addr_const_extra (FILE *file, rtx x)
6979 {
6980 rtx op;
6981
6982 if (GET_CODE (x) != UNSPEC)
6983 return false;
6984
6985 op = XVECEXP (x, 0, 0);
6986 switch (XINT (x, 1))
6987 {
6988 case UNSPEC_GOTTPOFF:
6989 output_addr_const (file, op);
6990 /* FIXME: This might be @TPOFF in Sun ld. */
6991 fputs ("@GOTTPOFF", file);
6992 break;
6993 case UNSPEC_TPOFF:
6994 output_addr_const (file, op);
6995 fputs ("@TPOFF", file);
6996 break;
6997 case UNSPEC_NTPOFF:
6998 output_addr_const (file, op);
6999 if (TARGET_64BIT)
7000 fputs ("@TPOFF", file);
7001 else
7002 fputs ("@NTPOFF", file);
7003 break;
7004 case UNSPEC_DTPOFF:
7005 output_addr_const (file, op);
7006 fputs ("@DTPOFF", file);
7007 break;
7008 case UNSPEC_GOTNTPOFF:
7009 output_addr_const (file, op);
7010 if (TARGET_64BIT)
7011 fputs ("@GOTTPOFF(%rip)", file);
7012 else
7013 fputs ("@GOTNTPOFF", file);
7014 break;
7015 case UNSPEC_INDNTPOFF:
7016 output_addr_const (file, op);
7017 fputs ("@INDNTPOFF", file);
7018 break;
7019
7020 default:
7021 return false;
7022 }
7023
7024 return true;
7025 }
7026 \f
7027 /* Split one or more DImode RTL references into pairs of SImode
7028 references. The RTL can be REG, offsettable MEM, integer constant, or
7029 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7030 split and "num" is its length. lo_half and hi_half are output arrays
7031 that parallel "operands". */
7032
7033 void
7034 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7035 {
7036 while (num--)
7037 {
7038 rtx op = operands[num];
7039
7040 /* simplify_subreg refuse to split volatile memory addresses,
7041 but we still have to handle it. */
7042 if (GET_CODE (op) == MEM)
7043 {
7044 lo_half[num] = adjust_address (op, SImode, 0);
7045 hi_half[num] = adjust_address (op, SImode, 4);
7046 }
7047 else
7048 {
7049 lo_half[num] = simplify_gen_subreg (SImode, op,
7050 GET_MODE (op) == VOIDmode
7051 ? DImode : GET_MODE (op), 0);
7052 hi_half[num] = simplify_gen_subreg (SImode, op,
7053 GET_MODE (op) == VOIDmode
7054 ? DImode : GET_MODE (op), 4);
7055 }
7056 }
7057 }
7058 /* Split one or more TImode RTL references into pairs of SImode
7059 references. The RTL can be REG, offsettable MEM, integer constant, or
7060 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7061 split and "num" is its length. lo_half and hi_half are output arrays
7062 that parallel "operands". */
7063
7064 void
7065 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7066 {
7067 while (num--)
7068 {
7069 rtx op = operands[num];
7070
7071 /* simplify_subreg refuse to split volatile memory addresses, but we
7072 still have to handle it. */
7073 if (GET_CODE (op) == MEM)
7074 {
7075 lo_half[num] = adjust_address (op, DImode, 0);
7076 hi_half[num] = adjust_address (op, DImode, 8);
7077 }
7078 else
7079 {
7080 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7081 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7082 }
7083 }
7084 }
7085 \f
7086 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7087 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7088 is the expression of the binary operation. The output may either be
7089 emitted here, or returned to the caller, like all output_* functions.
7090
7091 There is no guarantee that the operands are the same mode, as they
7092 might be within FLOAT or FLOAT_EXTEND expressions. */
7093
7094 #ifndef SYSV386_COMPAT
7095 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7096 wants to fix the assemblers because that causes incompatibility
7097 with gcc. No-one wants to fix gcc because that causes
7098 incompatibility with assemblers... You can use the option of
7099 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7100 #define SYSV386_COMPAT 1
7101 #endif
7102
7103 const char *
7104 output_387_binary_op (rtx insn, rtx *operands)
7105 {
7106 static char buf[30];
7107 const char *p;
7108 const char *ssep;
7109 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7110
7111 #ifdef ENABLE_CHECKING
7112 /* Even if we do not want to check the inputs, this documents input
7113 constraints. Which helps in understanding the following code. */
7114 if (STACK_REG_P (operands[0])
7115 && ((REG_P (operands[1])
7116 && REGNO (operands[0]) == REGNO (operands[1])
7117 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7118 || (REG_P (operands[2])
7119 && REGNO (operands[0]) == REGNO (operands[2])
7120 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7121 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7122 ; /* ok */
7123 else if (!is_sse)
7124 abort ();
7125 #endif
7126
7127 switch (GET_CODE (operands[3]))
7128 {
7129 case PLUS:
7130 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7131 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7132 p = "fiadd";
7133 else
7134 p = "fadd";
7135 ssep = "add";
7136 break;
7137
7138 case MINUS:
7139 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7140 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7141 p = "fisub";
7142 else
7143 p = "fsub";
7144 ssep = "sub";
7145 break;
7146
7147 case MULT:
7148 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7149 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7150 p = "fimul";
7151 else
7152 p = "fmul";
7153 ssep = "mul";
7154 break;
7155
7156 case DIV:
7157 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7158 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7159 p = "fidiv";
7160 else
7161 p = "fdiv";
7162 ssep = "div";
7163 break;
7164
7165 default:
7166 abort ();
7167 }
7168
7169 if (is_sse)
7170 {
7171 strcpy (buf, ssep);
7172 if (GET_MODE (operands[0]) == SFmode)
7173 strcat (buf, "ss\t{%2, %0|%0, %2}");
7174 else
7175 strcat (buf, "sd\t{%2, %0|%0, %2}");
7176 return buf;
7177 }
7178 strcpy (buf, p);
7179
7180 switch (GET_CODE (operands[3]))
7181 {
7182 case MULT:
7183 case PLUS:
7184 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7185 {
7186 rtx temp = operands[2];
7187 operands[2] = operands[1];
7188 operands[1] = temp;
7189 }
7190
7191 /* know operands[0] == operands[1]. */
7192
7193 if (GET_CODE (operands[2]) == MEM)
7194 {
7195 p = "%z2\t%2";
7196 break;
7197 }
7198
7199 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7200 {
7201 if (STACK_TOP_P (operands[0]))
7202 /* How is it that we are storing to a dead operand[2]?
7203 Well, presumably operands[1] is dead too. We can't
7204 store the result to st(0) as st(0) gets popped on this
7205 instruction. Instead store to operands[2] (which I
7206 think has to be st(1)). st(1) will be popped later.
7207 gcc <= 2.8.1 didn't have this check and generated
7208 assembly code that the Unixware assembler rejected. */
7209 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7210 else
7211 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7212 break;
7213 }
7214
7215 if (STACK_TOP_P (operands[0]))
7216 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7217 else
7218 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7219 break;
7220
7221 case MINUS:
7222 case DIV:
7223 if (GET_CODE (operands[1]) == MEM)
7224 {
7225 p = "r%z1\t%1";
7226 break;
7227 }
7228
7229 if (GET_CODE (operands[2]) == MEM)
7230 {
7231 p = "%z2\t%2";
7232 break;
7233 }
7234
7235 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7236 {
7237 #if SYSV386_COMPAT
7238 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7239 derived assemblers, confusingly reverse the direction of
7240 the operation for fsub{r} and fdiv{r} when the
7241 destination register is not st(0). The Intel assembler
7242 doesn't have this brain damage. Read !SYSV386_COMPAT to
7243 figure out what the hardware really does. */
7244 if (STACK_TOP_P (operands[0]))
7245 p = "{p\t%0, %2|rp\t%2, %0}";
7246 else
7247 p = "{rp\t%2, %0|p\t%0, %2}";
7248 #else
7249 if (STACK_TOP_P (operands[0]))
7250 /* As above for fmul/fadd, we can't store to st(0). */
7251 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7252 else
7253 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7254 #endif
7255 break;
7256 }
7257
7258 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7259 {
7260 #if SYSV386_COMPAT
7261 if (STACK_TOP_P (operands[0]))
7262 p = "{rp\t%0, %1|p\t%1, %0}";
7263 else
7264 p = "{p\t%1, %0|rp\t%0, %1}";
7265 #else
7266 if (STACK_TOP_P (operands[0]))
7267 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7268 else
7269 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7270 #endif
7271 break;
7272 }
7273
7274 if (STACK_TOP_P (operands[0]))
7275 {
7276 if (STACK_TOP_P (operands[1]))
7277 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7278 else
7279 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7280 break;
7281 }
7282 else if (STACK_TOP_P (operands[1]))
7283 {
7284 #if SYSV386_COMPAT
7285 p = "{\t%1, %0|r\t%0, %1}";
7286 #else
7287 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7288 #endif
7289 }
7290 else
7291 {
7292 #if SYSV386_COMPAT
7293 p = "{r\t%2, %0|\t%0, %2}";
7294 #else
7295 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7296 #endif
7297 }
7298 break;
7299
7300 default:
7301 abort ();
7302 }
7303
7304 strcat (buf, p);
7305 return buf;
7306 }
7307
7308 /* Output code to initialize control word copies used by trunc?f?i and
7309 rounding patterns. CURRENT_MODE is set to current control word,
7310 while NEW_MODE is set to new control word. */
7311
7312 void
7313 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7314 {
7315 rtx reg = gen_reg_rtx (HImode);
7316
7317 emit_insn (gen_x86_fnstcw_1 (current_mode));
7318 emit_move_insn (reg, current_mode);
7319
7320 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7321 && !TARGET_64BIT)
7322 {
7323 switch (mode)
7324 {
7325 case I387_CW_FLOOR:
7326 /* round down toward -oo */
7327 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7328 break;
7329
7330 case I387_CW_CEIL:
7331 /* round up toward +oo */
7332 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7333 break;
7334
7335 case I387_CW_TRUNC:
7336 /* round toward zero (truncate) */
7337 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7338 break;
7339
7340 case I387_CW_MASK_PM:
7341 /* mask precision exception for nearbyint() */
7342 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7343 break;
7344
7345 default:
7346 abort();
7347 }
7348 }
7349 else
7350 {
7351 switch (mode)
7352 {
7353 case I387_CW_FLOOR:
7354 /* round down toward -oo */
7355 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7356 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7357 break;
7358
7359 case I387_CW_CEIL:
7360 /* round up toward +oo */
7361 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7362 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7363 break;
7364
7365 case I387_CW_TRUNC:
7366 /* round toward zero (truncate) */
7367 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7368 break;
7369
7370 case I387_CW_MASK_PM:
7371 /* mask precision exception for nearbyint() */
7372 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7373 break;
7374
7375 default:
7376 abort();
7377 }
7378 }
7379
7380 emit_move_insn (new_mode, reg);
7381 }
7382
7383 /* Output code for INSN to convert a float to a signed int. OPERANDS
7384 are the insn operands. The output may be [HSD]Imode and the input
7385 operand may be [SDX]Fmode. */
7386
7387 const char *
7388 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7389 {
7390 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7391 int dimode_p = GET_MODE (operands[0]) == DImode;
7392 int round_mode = get_attr_i387_cw (insn);
7393
7394 /* Jump through a hoop or two for DImode, since the hardware has no
7395 non-popping instruction. We used to do this a different way, but
7396 that was somewhat fragile and broke with post-reload splitters. */
7397 if ((dimode_p || fisttp) && !stack_top_dies)
7398 output_asm_insn ("fld\t%y1", operands);
7399
7400 if (!STACK_TOP_P (operands[1]))
7401 abort ();
7402
7403 if (GET_CODE (operands[0]) != MEM)
7404 abort ();
7405
7406 if (fisttp)
7407 output_asm_insn ("fisttp%z0\t%0", operands);
7408 else
7409 {
7410 if (round_mode != I387_CW_ANY)
7411 output_asm_insn ("fldcw\t%3", operands);
7412 if (stack_top_dies || dimode_p)
7413 output_asm_insn ("fistp%z0\t%0", operands);
7414 else
7415 output_asm_insn ("fist%z0\t%0", operands);
7416 if (round_mode != I387_CW_ANY)
7417 output_asm_insn ("fldcw\t%2", operands);
7418 }
7419
7420 return "";
7421 }
7422
7423 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7424 should be used. UNORDERED_P is true when fucom should be used. */
7425
7426 const char *
7427 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7428 {
7429 int stack_top_dies;
7430 rtx cmp_op0, cmp_op1;
7431 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7432
7433 if (eflags_p)
7434 {
7435 cmp_op0 = operands[0];
7436 cmp_op1 = operands[1];
7437 }
7438 else
7439 {
7440 cmp_op0 = operands[1];
7441 cmp_op1 = operands[2];
7442 }
7443
7444 if (is_sse)
7445 {
7446 if (GET_MODE (operands[0]) == SFmode)
7447 if (unordered_p)
7448 return "ucomiss\t{%1, %0|%0, %1}";
7449 else
7450 return "comiss\t{%1, %0|%0, %1}";
7451 else
7452 if (unordered_p)
7453 return "ucomisd\t{%1, %0|%0, %1}";
7454 else
7455 return "comisd\t{%1, %0|%0, %1}";
7456 }
7457
7458 if (! STACK_TOP_P (cmp_op0))
7459 abort ();
7460
7461 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7462
7463 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7464 {
7465 if (stack_top_dies)
7466 {
7467 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7468 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7469 }
7470 else
7471 return "ftst\n\tfnstsw\t%0";
7472 }
7473
7474 if (STACK_REG_P (cmp_op1)
7475 && stack_top_dies
7476 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7477 && REGNO (cmp_op1) != FIRST_STACK_REG)
7478 {
7479 /* If both the top of the 387 stack dies, and the other operand
7480 is also a stack register that dies, then this must be a
7481 `fcompp' float compare */
7482
7483 if (eflags_p)
7484 {
7485 /* There is no double popping fcomi variant. Fortunately,
7486 eflags is immune from the fstp's cc clobbering. */
7487 if (unordered_p)
7488 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7489 else
7490 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7491 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7492 }
7493 else
7494 {
7495 if (unordered_p)
7496 return "fucompp\n\tfnstsw\t%0";
7497 else
7498 return "fcompp\n\tfnstsw\t%0";
7499 }
7500 }
7501 else
7502 {
7503 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7504
7505 static const char * const alt[16] =
7506 {
7507 "fcom%z2\t%y2\n\tfnstsw\t%0",
7508 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7509 "fucom%z2\t%y2\n\tfnstsw\t%0",
7510 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7511
7512 "ficom%z2\t%y2\n\tfnstsw\t%0",
7513 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7514 NULL,
7515 NULL,
7516
7517 "fcomi\t{%y1, %0|%0, %y1}",
7518 "fcomip\t{%y1, %0|%0, %y1}",
7519 "fucomi\t{%y1, %0|%0, %y1}",
7520 "fucomip\t{%y1, %0|%0, %y1}",
7521
7522 NULL,
7523 NULL,
7524 NULL,
7525 NULL
7526 };
7527
7528 int mask;
7529 const char *ret;
7530
7531 mask = eflags_p << 3;
7532 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7533 mask |= unordered_p << 1;
7534 mask |= stack_top_dies;
7535
7536 if (mask >= 16)
7537 abort ();
7538 ret = alt[mask];
7539 if (ret == NULL)
7540 abort ();
7541
7542 return ret;
7543 }
7544 }
7545
7546 void
7547 ix86_output_addr_vec_elt (FILE *file, int value)
7548 {
7549 const char *directive = ASM_LONG;
7550
7551 if (TARGET_64BIT)
7552 {
7553 #ifdef ASM_QUAD
7554 directive = ASM_QUAD;
7555 #else
7556 abort ();
7557 #endif
7558 }
7559
7560 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7561 }
7562
7563 void
7564 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7565 {
7566 if (TARGET_64BIT)
7567 fprintf (file, "%s%s%d-%s%d\n",
7568 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7569 else if (HAVE_AS_GOTOFF_IN_DATA)
7570 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7571 #if TARGET_MACHO
7572 else if (TARGET_MACHO)
7573 {
7574 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7575 machopic_output_function_base_name (file);
7576 fprintf(file, "\n");
7577 }
7578 #endif
7579 else
7580 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7581 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7582 }
7583 \f
7584 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7585 for the target. */
7586
7587 void
7588 ix86_expand_clear (rtx dest)
7589 {
7590 rtx tmp;
7591
7592 /* We play register width games, which are only valid after reload. */
7593 if (!reload_completed)
7594 abort ();
7595
7596 /* Avoid HImode and its attendant prefix byte. */
7597 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7598 dest = gen_rtx_REG (SImode, REGNO (dest));
7599
7600 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7601
7602 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7603 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7604 {
7605 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7606 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7607 }
7608
7609 emit_insn (tmp);
7610 }
7611
7612 /* X is an unchanging MEM. If it is a constant pool reference, return
7613 the constant pool rtx, else NULL. */
7614
7615 rtx
7616 maybe_get_pool_constant (rtx x)
7617 {
7618 x = ix86_delegitimize_address (XEXP (x, 0));
7619
7620 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7621 return get_pool_constant (x);
7622
7623 return NULL_RTX;
7624 }
7625
7626 void
7627 ix86_expand_move (enum machine_mode mode, rtx operands[])
7628 {
7629 int strict = (reload_in_progress || reload_completed);
7630 rtx op0, op1;
7631 enum tls_model model;
7632
7633 op0 = operands[0];
7634 op1 = operands[1];
7635
7636 if (GET_CODE (op1) == SYMBOL_REF)
7637 {
7638 model = SYMBOL_REF_TLS_MODEL (op1);
7639 if (model)
7640 {
7641 op1 = legitimize_tls_address (op1, model, true);
7642 op1 = force_operand (op1, op0);
7643 if (op1 == op0)
7644 return;
7645 }
7646 }
7647 else if (GET_CODE (op1) == CONST
7648 && GET_CODE (XEXP (op1, 0)) == PLUS
7649 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7650 {
7651 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7652 if (model)
7653 {
7654 rtx addend = XEXP (XEXP (op1, 0), 1);
7655 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7656 op1 = force_operand (op1, NULL);
7657 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7658 op0, 1, OPTAB_DIRECT);
7659 if (op1 == op0)
7660 return;
7661 }
7662 }
7663
7664 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7665 {
7666 #if TARGET_MACHO
7667 if (MACHOPIC_PURE)
7668 {
7669 rtx temp = ((reload_in_progress
7670 || ((op0 && GET_CODE (op0) == REG)
7671 && mode == Pmode))
7672 ? op0 : gen_reg_rtx (Pmode));
7673 op1 = machopic_indirect_data_reference (op1, temp);
7674 op1 = machopic_legitimize_pic_address (op1, mode,
7675 temp == op1 ? 0 : temp);
7676 }
7677 else if (MACHOPIC_INDIRECT)
7678 op1 = machopic_indirect_data_reference (op1, 0);
7679 if (op0 == op1)
7680 return;
7681 #else
7682 if (GET_CODE (op0) == MEM)
7683 op1 = force_reg (Pmode, op1);
7684 else
7685 op1 = legitimize_address (op1, op1, Pmode);
7686 #endif /* TARGET_MACHO */
7687 }
7688 else
7689 {
7690 if (GET_CODE (op0) == MEM
7691 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7692 || !push_operand (op0, mode))
7693 && GET_CODE (op1) == MEM)
7694 op1 = force_reg (mode, op1);
7695
7696 if (push_operand (op0, mode)
7697 && ! general_no_elim_operand (op1, mode))
7698 op1 = copy_to_mode_reg (mode, op1);
7699
7700 /* Force large constants in 64bit compilation into register
7701 to get them CSEed. */
7702 if (TARGET_64BIT && mode == DImode
7703 && immediate_operand (op1, mode)
7704 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7705 && !register_operand (op0, mode)
7706 && optimize && !reload_completed && !reload_in_progress)
7707 op1 = copy_to_mode_reg (mode, op1);
7708
7709 if (FLOAT_MODE_P (mode))
7710 {
7711 /* If we are loading a floating point constant to a register,
7712 force the value to memory now, since we'll get better code
7713 out the back end. */
7714
7715 if (strict)
7716 ;
7717 else if (GET_CODE (op1) == CONST_DOUBLE)
7718 {
7719 op1 = validize_mem (force_const_mem (mode, op1));
7720 if (!register_operand (op0, mode))
7721 {
7722 rtx temp = gen_reg_rtx (mode);
7723 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7724 emit_move_insn (op0, temp);
7725 return;
7726 }
7727 }
7728 }
7729 }
7730
7731 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7732 }
7733
7734 void
7735 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7736 {
7737 rtx op0 = operands[0], op1 = operands[1];
7738
7739 /* Force constants other than zero into memory. We do not know how
7740 the instructions used to build constants modify the upper 64 bits
7741 of the register, once we have that information we may be able
7742 to handle some of them more efficiently. */
7743 if ((reload_in_progress | reload_completed) == 0
7744 && register_operand (op0, mode)
7745 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7746 op1 = validize_mem (force_const_mem (mode, op1));
7747
7748 /* Make operand1 a register if it isn't already. */
7749 if (!no_new_pseudos
7750 && !register_operand (op0, mode)
7751 && !register_operand (op1, mode))
7752 {
7753 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7754 return;
7755 }
7756
7757 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7758 }
7759
7760 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7761 straight to ix86_expand_vector_move. */
7762
7763 void
7764 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7765 {
7766 rtx op0, op1, m;
7767
7768 op0 = operands[0];
7769 op1 = operands[1];
7770
7771 if (MEM_P (op1))
7772 {
7773 /* If we're optimizing for size, movups is the smallest. */
7774 if (optimize_size)
7775 {
7776 op0 = gen_lowpart (V4SFmode, op0);
7777 op1 = gen_lowpart (V4SFmode, op1);
7778 emit_insn (gen_sse_movups (op0, op1));
7779 return;
7780 }
7781
7782 /* ??? If we have typed data, then it would appear that using
7783 movdqu is the only way to get unaligned data loaded with
7784 integer type. */
7785 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7786 {
7787 op0 = gen_lowpart (V16QImode, op0);
7788 op1 = gen_lowpart (V16QImode, op1);
7789 emit_insn (gen_sse2_movdqu (op0, op1));
7790 return;
7791 }
7792
7793 if (TARGET_SSE2 && mode == V2DFmode)
7794 {
7795 rtx zero;
7796
7797 /* When SSE registers are split into halves, we can avoid
7798 writing to the top half twice. */
7799 if (TARGET_SSE_SPLIT_REGS)
7800 {
7801 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7802 zero = op0;
7803 }
7804 else
7805 {
7806 /* ??? Not sure about the best option for the Intel chips.
7807 The following would seem to satisfy; the register is
7808 entirely cleared, breaking the dependency chain. We
7809 then store to the upper half, with a dependency depth
7810 of one. A rumor has it that Intel recommends two movsd
7811 followed by an unpacklpd, but this is unconfirmed. And
7812 given that the dependency depth of the unpacklpd would
7813 still be one, I'm not sure why this would be better. */
7814 zero = CONST0_RTX (V2DFmode);
7815 }
7816
7817 m = adjust_address (op1, DFmode, 0);
7818 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7819 m = adjust_address (op1, DFmode, 8);
7820 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7821 }
7822 else
7823 {
7824 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7825 emit_move_insn (op0, CONST0_RTX (mode));
7826 else
7827 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7828
7829 if (mode != V4SFmode)
7830 op0 = gen_lowpart (V4SFmode, op0);
7831 m = adjust_address (op1, V2SFmode, 0);
7832 emit_insn (gen_sse_loadlps (op0, op0, m));
7833 m = adjust_address (op1, V2SFmode, 8);
7834 emit_insn (gen_sse_loadhps (op0, op0, m));
7835 }
7836 }
7837 else if (MEM_P (op0))
7838 {
7839 /* If we're optimizing for size, movups is the smallest. */
7840 if (optimize_size)
7841 {
7842 op0 = gen_lowpart (V4SFmode, op0);
7843 op1 = gen_lowpart (V4SFmode, op1);
7844 emit_insn (gen_sse_movups (op0, op1));
7845 return;
7846 }
7847
7848 /* ??? Similar to above, only less clear because of quote
7849 typeless stores unquote. */
7850 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7851 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7852 {
7853 op0 = gen_lowpart (V16QImode, op0);
7854 op1 = gen_lowpart (V16QImode, op1);
7855 emit_insn (gen_sse2_movdqu (op0, op1));
7856 return;
7857 }
7858
7859 if (TARGET_SSE2 && mode == V2DFmode)
7860 {
7861 m = adjust_address (op0, DFmode, 0);
7862 emit_insn (gen_sse2_storelpd (m, op1));
7863 m = adjust_address (op0, DFmode, 8);
7864 emit_insn (gen_sse2_storehpd (m, op1));
7865 }
7866 else
7867 {
7868 if (mode != V4SFmode)
7869 op1 = gen_lowpart (V4SFmode, op1);
7870 m = adjust_address (op0, V2SFmode, 0);
7871 emit_insn (gen_sse_storelps (m, op1));
7872 m = adjust_address (op0, V2SFmode, 8);
7873 emit_insn (gen_sse_storehps (m, op1));
7874 }
7875 }
7876 else
7877 gcc_unreachable ();
7878 }
7879
7880 /* Expand a push in MODE. This is some mode for which we do not support
7881 proper push instructions, at least from the registers that we expect
7882 the value to live in. */
7883
7884 void
7885 ix86_expand_push (enum machine_mode mode, rtx x)
7886 {
7887 rtx tmp;
7888
7889 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
7890 GEN_INT (-GET_MODE_SIZE (mode)),
7891 stack_pointer_rtx, 1, OPTAB_DIRECT);
7892 if (tmp != stack_pointer_rtx)
7893 emit_move_insn (stack_pointer_rtx, tmp);
7894
7895 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
7896 emit_move_insn (tmp, x);
7897 }
7898
7899 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7900 destination to use for the operation. If different from the true
7901 destination in operands[0], a copy operation will be required. */
7902
7903 rtx
7904 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7905 rtx operands[])
7906 {
7907 int matching_memory;
7908 rtx src1, src2, dst;
7909
7910 dst = operands[0];
7911 src1 = operands[1];
7912 src2 = operands[2];
7913
7914 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7915 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7916 && (rtx_equal_p (dst, src2)
7917 || immediate_operand (src1, mode)))
7918 {
7919 rtx temp = src1;
7920 src1 = src2;
7921 src2 = temp;
7922 }
7923
7924 /* If the destination is memory, and we do not have matching source
7925 operands, do things in registers. */
7926 matching_memory = 0;
7927 if (GET_CODE (dst) == MEM)
7928 {
7929 if (rtx_equal_p (dst, src1))
7930 matching_memory = 1;
7931 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7932 && rtx_equal_p (dst, src2))
7933 matching_memory = 2;
7934 else
7935 dst = gen_reg_rtx (mode);
7936 }
7937
7938 /* Both source operands cannot be in memory. */
7939 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7940 {
7941 if (matching_memory != 2)
7942 src2 = force_reg (mode, src2);
7943 else
7944 src1 = force_reg (mode, src1);
7945 }
7946
7947 /* If the operation is not commutable, source 1 cannot be a constant
7948 or non-matching memory. */
7949 if ((CONSTANT_P (src1)
7950 || (!matching_memory && GET_CODE (src1) == MEM))
7951 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7952 src1 = force_reg (mode, src1);
7953
7954 /* If optimizing, copy to regs to improve CSE */
7955 if (optimize && ! no_new_pseudos)
7956 {
7957 if (GET_CODE (dst) == MEM)
7958 dst = gen_reg_rtx (mode);
7959 if (GET_CODE (src1) == MEM)
7960 src1 = force_reg (mode, src1);
7961 if (GET_CODE (src2) == MEM)
7962 src2 = force_reg (mode, src2);
7963 }
7964
7965 src1 = operands[1] = src1;
7966 src2 = operands[2] = src2;
7967 return dst;
7968 }
7969
7970 /* Similarly, but assume that the destination has already been
7971 set up properly. */
7972
7973 void
7974 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
7975 enum machine_mode mode, rtx operands[])
7976 {
7977 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
7978 gcc_assert (dst == operands[0]);
7979 }
7980
7981 /* Attempt to expand a binary operator. Make the expansion closer to the
7982 actual machine, then just general_operand, which will allow 3 separate
7983 memory references (one output, two input) in a single insn. */
7984
7985 void
7986 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7987 rtx operands[])
7988 {
7989 rtx src1, src2, dst, op, clob;
7990
7991 dst = ix86_fixup_binary_operands (code, mode, operands);
7992 src1 = operands[1];
7993 src2 = operands[2];
7994
7995 /* Emit the instruction. */
7996
7997 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7998 if (reload_in_progress)
7999 {
8000 /* Reload doesn't know about the flags register, and doesn't know that
8001 it doesn't want to clobber it. We can only do this with PLUS. */
8002 if (code != PLUS)
8003 abort ();
8004 emit_insn (op);
8005 }
8006 else
8007 {
8008 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8009 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8010 }
8011
8012 /* Fix up the destination if needed. */
8013 if (dst != operands[0])
8014 emit_move_insn (operands[0], dst);
8015 }
8016
8017 /* Return TRUE or FALSE depending on whether the binary operator meets the
8018 appropriate constraints. */
8019
8020 int
8021 ix86_binary_operator_ok (enum rtx_code code,
8022 enum machine_mode mode ATTRIBUTE_UNUSED,
8023 rtx operands[3])
8024 {
8025 /* Both source operands cannot be in memory. */
8026 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8027 return 0;
8028 /* If the operation is not commutable, source 1 cannot be a constant. */
8029 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8030 return 0;
8031 /* If the destination is memory, we must have a matching source operand. */
8032 if (GET_CODE (operands[0]) == MEM
8033 && ! (rtx_equal_p (operands[0], operands[1])
8034 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8035 && rtx_equal_p (operands[0], operands[2]))))
8036 return 0;
8037 /* If the operation is not commutable and the source 1 is memory, we must
8038 have a matching destination. */
8039 if (GET_CODE (operands[1]) == MEM
8040 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8041 && ! rtx_equal_p (operands[0], operands[1]))
8042 return 0;
8043 return 1;
8044 }
8045
8046 /* Attempt to expand a unary operator. Make the expansion closer to the
8047 actual machine, then just general_operand, which will allow 2 separate
8048 memory references (one output, one input) in a single insn. */
8049
8050 void
8051 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8052 rtx operands[])
8053 {
8054 int matching_memory;
8055 rtx src, dst, op, clob;
8056
8057 dst = operands[0];
8058 src = operands[1];
8059
8060 /* If the destination is memory, and we do not have matching source
8061 operands, do things in registers. */
8062 matching_memory = 0;
8063 if (MEM_P (dst))
8064 {
8065 if (rtx_equal_p (dst, src))
8066 matching_memory = 1;
8067 else
8068 dst = gen_reg_rtx (mode);
8069 }
8070
8071 /* When source operand is memory, destination must match. */
8072 if (MEM_P (src) && !matching_memory)
8073 src = force_reg (mode, src);
8074
8075 /* If optimizing, copy to regs to improve CSE. */
8076 if (optimize && ! no_new_pseudos)
8077 {
8078 if (GET_CODE (dst) == MEM)
8079 dst = gen_reg_rtx (mode);
8080 if (GET_CODE (src) == MEM)
8081 src = force_reg (mode, src);
8082 }
8083
8084 /* Emit the instruction. */
8085
8086 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8087 if (reload_in_progress || code == NOT)
8088 {
8089 /* Reload doesn't know about the flags register, and doesn't know that
8090 it doesn't want to clobber it. */
8091 if (code != NOT)
8092 abort ();
8093 emit_insn (op);
8094 }
8095 else
8096 {
8097 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8098 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8099 }
8100
8101 /* Fix up the destination if needed. */
8102 if (dst != operands[0])
8103 emit_move_insn (operands[0], dst);
8104 }
8105
8106 /* Return TRUE or FALSE depending on whether the unary operator meets the
8107 appropriate constraints. */
8108
8109 int
8110 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8111 enum machine_mode mode ATTRIBUTE_UNUSED,
8112 rtx operands[2] ATTRIBUTE_UNUSED)
8113 {
8114 /* If one of operands is memory, source and destination must match. */
8115 if ((GET_CODE (operands[0]) == MEM
8116 || GET_CODE (operands[1]) == MEM)
8117 && ! rtx_equal_p (operands[0], operands[1]))
8118 return FALSE;
8119 return TRUE;
8120 }
8121
8122 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8123 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8124 true, then replicate the mask for all elements of the vector register.
8125 If INVERT is true, then create a mask excluding the sign bit. */
8126
8127 rtx
8128 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8129 {
8130 enum machine_mode vec_mode;
8131 HOST_WIDE_INT hi, lo;
8132 int shift = 63;
8133 rtvec v;
8134 rtx mask;
8135
8136 /* Find the sign bit, sign extended to 2*HWI. */
8137 if (mode == SFmode)
8138 lo = 0x80000000, hi = lo < 0;
8139 else if (HOST_BITS_PER_WIDE_INT >= 64)
8140 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8141 else
8142 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8143
8144 if (invert)
8145 lo = ~lo, hi = ~hi;
8146
8147 /* Force this value into the low part of a fp vector constant. */
8148 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8149 mask = gen_lowpart (mode, mask);
8150
8151 if (mode == SFmode)
8152 {
8153 if (vect)
8154 v = gen_rtvec (4, mask, mask, mask, mask);
8155 else
8156 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8157 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8158 vec_mode = V4SFmode;
8159 }
8160 else
8161 {
8162 if (vect)
8163 v = gen_rtvec (2, mask, mask);
8164 else
8165 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8166 vec_mode = V2DFmode;
8167 }
8168
8169 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8170 }
8171
8172 /* Generate code for floating point ABS or NEG. */
8173
8174 void
8175 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8176 rtx operands[])
8177 {
8178 rtx mask, set, use, clob, dst, src;
8179 bool matching_memory;
8180 bool use_sse = false;
8181 bool vector_mode = VECTOR_MODE_P (mode);
8182 enum machine_mode elt_mode = mode;
8183
8184 if (vector_mode)
8185 {
8186 elt_mode = GET_MODE_INNER (mode);
8187 use_sse = true;
8188 }
8189 else if (TARGET_SSE_MATH)
8190 use_sse = SSE_FLOAT_MODE_P (mode);
8191
8192 /* NEG and ABS performed with SSE use bitwise mask operations.
8193 Create the appropriate mask now. */
8194 if (use_sse)
8195 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8196 else
8197 {
8198 /* When not using SSE, we don't use the mask, but prefer to keep the
8199 same general form of the insn pattern to reduce duplication when
8200 it comes time to split. */
8201 mask = const0_rtx;
8202 }
8203
8204 dst = operands[0];
8205 src = operands[1];
8206
8207 /* If the destination is memory, and we don't have matching source
8208 operands, do things in registers. */
8209 matching_memory = false;
8210 if (MEM_P (dst))
8211 {
8212 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8213 matching_memory = true;
8214 else
8215 dst = gen_reg_rtx (mode);
8216 }
8217 if (MEM_P (src) && !matching_memory)
8218 src = force_reg (mode, src);
8219
8220 if (vector_mode)
8221 {
8222 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8223 set = gen_rtx_SET (VOIDmode, dst, set);
8224 emit_insn (set);
8225 }
8226 else
8227 {
8228 set = gen_rtx_fmt_e (code, mode, src);
8229 set = gen_rtx_SET (VOIDmode, dst, set);
8230 use = gen_rtx_USE (VOIDmode, mask);
8231 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8232 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8233 }
8234
8235 if (dst != operands[0])
8236 emit_move_insn (operands[0], dst);
8237 }
8238
8239 /* Expand a copysign operation. Special case operand 0 being a constant. */
8240
8241 void
8242 ix86_expand_copysign (rtx operands[])
8243 {
8244 enum machine_mode mode, vmode;
8245 rtx dest, op0, op1, mask, nmask;
8246
8247 dest = operands[0];
8248 op0 = operands[1];
8249 op1 = operands[2];
8250
8251 mode = GET_MODE (dest);
8252 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8253
8254 if (GET_CODE (op0) == CONST_DOUBLE)
8255 {
8256 rtvec v;
8257
8258 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8259 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8260
8261 if (op0 == CONST0_RTX (mode))
8262 op0 = CONST0_RTX (vmode);
8263 else
8264 {
8265 if (mode == SFmode)
8266 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8267 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8268 else
8269 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8270 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8271 }
8272
8273 mask = ix86_build_signbit_mask (mode, 0, 0);
8274
8275 if (mode == SFmode)
8276 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8277 else
8278 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8279 }
8280 else
8281 {
8282 nmask = ix86_build_signbit_mask (mode, 0, 1);
8283 mask = ix86_build_signbit_mask (mode, 0, 0);
8284
8285 if (mode == SFmode)
8286 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8287 else
8288 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8289 }
8290 }
8291
8292 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8293 be a constant, and so has already been expanded into a vector constant. */
8294
8295 void
8296 ix86_split_copysign_const (rtx operands[])
8297 {
8298 enum machine_mode mode, vmode;
8299 rtx dest, op0, op1, mask, x;
8300
8301 dest = operands[0];
8302 op0 = operands[1];
8303 op1 = operands[2];
8304 mask = operands[3];
8305
8306 mode = GET_MODE (dest);
8307 vmode = GET_MODE (mask);
8308
8309 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8310 x = gen_rtx_AND (vmode, dest, mask);
8311 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8312
8313 if (op0 != CONST0_RTX (vmode))
8314 {
8315 x = gen_rtx_IOR (vmode, dest, op0);
8316 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8317 }
8318 }
8319
8320 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8321 so we have to do two masks. */
8322
8323 void
8324 ix86_split_copysign_var (rtx operands[])
8325 {
8326 enum machine_mode mode, vmode;
8327 rtx dest, scratch, op0, op1, mask, nmask, x;
8328
8329 dest = operands[0];
8330 scratch = operands[1];
8331 op0 = operands[2];
8332 op1 = operands[3];
8333 nmask = operands[4];
8334 mask = operands[5];
8335
8336 mode = GET_MODE (dest);
8337 vmode = GET_MODE (mask);
8338
8339 if (rtx_equal_p (op0, op1))
8340 {
8341 /* Shouldn't happen often (it's useless, obviously), but when it does
8342 we'd generate incorrect code if we continue below. */
8343 emit_move_insn (dest, op0);
8344 return;
8345 }
8346
8347 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8348 {
8349 gcc_assert (REGNO (op1) == REGNO (scratch));
8350
8351 x = gen_rtx_AND (vmode, scratch, mask);
8352 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8353
8354 dest = mask;
8355 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8356 x = gen_rtx_NOT (vmode, dest);
8357 x = gen_rtx_AND (vmode, x, op0);
8358 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8359 }
8360 else
8361 {
8362 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8363 {
8364 x = gen_rtx_AND (vmode, scratch, mask);
8365 }
8366 else /* alternative 2,4 */
8367 {
8368 gcc_assert (REGNO (mask) == REGNO (scratch));
8369 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8370 x = gen_rtx_AND (vmode, scratch, op1);
8371 }
8372 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8373
8374 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8375 {
8376 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8377 x = gen_rtx_AND (vmode, dest, nmask);
8378 }
8379 else /* alternative 3,4 */
8380 {
8381 gcc_assert (REGNO (nmask) == REGNO (dest));
8382 dest = nmask;
8383 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8384 x = gen_rtx_AND (vmode, dest, op0);
8385 }
8386 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8387 }
8388
8389 x = gen_rtx_IOR (vmode, dest, scratch);
8390 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8391 }
8392
8393 /* Return TRUE or FALSE depending on whether the first SET in INSN
8394 has source and destination with matching CC modes, and that the
8395 CC mode is at least as constrained as REQ_MODE. */
8396
8397 int
8398 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8399 {
8400 rtx set;
8401 enum machine_mode set_mode;
8402
8403 set = PATTERN (insn);
8404 if (GET_CODE (set) == PARALLEL)
8405 set = XVECEXP (set, 0, 0);
8406 if (GET_CODE (set) != SET)
8407 abort ();
8408 if (GET_CODE (SET_SRC (set)) != COMPARE)
8409 abort ();
8410
8411 set_mode = GET_MODE (SET_DEST (set));
8412 switch (set_mode)
8413 {
8414 case CCNOmode:
8415 if (req_mode != CCNOmode
8416 && (req_mode != CCmode
8417 || XEXP (SET_SRC (set), 1) != const0_rtx))
8418 return 0;
8419 break;
8420 case CCmode:
8421 if (req_mode == CCGCmode)
8422 return 0;
8423 /* FALLTHRU */
8424 case CCGCmode:
8425 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8426 return 0;
8427 /* FALLTHRU */
8428 case CCGOCmode:
8429 if (req_mode == CCZmode)
8430 return 0;
8431 /* FALLTHRU */
8432 case CCZmode:
8433 break;
8434
8435 default:
8436 abort ();
8437 }
8438
8439 return (GET_MODE (SET_SRC (set)) == set_mode);
8440 }
8441
8442 /* Generate insn patterns to do an integer compare of OPERANDS. */
8443
8444 static rtx
8445 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8446 {
8447 enum machine_mode cmpmode;
8448 rtx tmp, flags;
8449
8450 cmpmode = SELECT_CC_MODE (code, op0, op1);
8451 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8452
8453 /* This is very simple, but making the interface the same as in the
8454 FP case makes the rest of the code easier. */
8455 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8456 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8457
8458 /* Return the test that should be put into the flags user, i.e.
8459 the bcc, scc, or cmov instruction. */
8460 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8461 }
8462
8463 /* Figure out whether to use ordered or unordered fp comparisons.
8464 Return the appropriate mode to use. */
8465
8466 enum machine_mode
8467 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8468 {
8469 /* ??? In order to make all comparisons reversible, we do all comparisons
8470 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8471 all forms trapping and nontrapping comparisons, we can make inequality
8472 comparisons trapping again, since it results in better code when using
8473 FCOM based compares. */
8474 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8475 }
8476
8477 enum machine_mode
8478 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8479 {
8480 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8481 return ix86_fp_compare_mode (code);
8482 switch (code)
8483 {
8484 /* Only zero flag is needed. */
8485 case EQ: /* ZF=0 */
8486 case NE: /* ZF!=0 */
8487 return CCZmode;
8488 /* Codes needing carry flag. */
8489 case GEU: /* CF=0 */
8490 case GTU: /* CF=0 & ZF=0 */
8491 case LTU: /* CF=1 */
8492 case LEU: /* CF=1 | ZF=1 */
8493 return CCmode;
8494 /* Codes possibly doable only with sign flag when
8495 comparing against zero. */
8496 case GE: /* SF=OF or SF=0 */
8497 case LT: /* SF<>OF or SF=1 */
8498 if (op1 == const0_rtx)
8499 return CCGOCmode;
8500 else
8501 /* For other cases Carry flag is not required. */
8502 return CCGCmode;
8503 /* Codes doable only with sign flag when comparing
8504 against zero, but we miss jump instruction for it
8505 so we need to use relational tests against overflow
8506 that thus needs to be zero. */
8507 case GT: /* ZF=0 & SF=OF */
8508 case LE: /* ZF=1 | SF<>OF */
8509 if (op1 == const0_rtx)
8510 return CCNOmode;
8511 else
8512 return CCGCmode;
8513 /* strcmp pattern do (use flags) and combine may ask us for proper
8514 mode. */
8515 case USE:
8516 return CCmode;
8517 default:
8518 abort ();
8519 }
8520 }
8521
8522 /* Return the fixed registers used for condition codes. */
8523
8524 static bool
8525 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8526 {
8527 *p1 = FLAGS_REG;
8528 *p2 = FPSR_REG;
8529 return true;
8530 }
8531
8532 /* If two condition code modes are compatible, return a condition code
8533 mode which is compatible with both. Otherwise, return
8534 VOIDmode. */
8535
8536 static enum machine_mode
8537 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8538 {
8539 if (m1 == m2)
8540 return m1;
8541
8542 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8543 return VOIDmode;
8544
8545 if ((m1 == CCGCmode && m2 == CCGOCmode)
8546 || (m1 == CCGOCmode && m2 == CCGCmode))
8547 return CCGCmode;
8548
8549 switch (m1)
8550 {
8551 default:
8552 abort ();
8553
8554 case CCmode:
8555 case CCGCmode:
8556 case CCGOCmode:
8557 case CCNOmode:
8558 case CCZmode:
8559 switch (m2)
8560 {
8561 default:
8562 return VOIDmode;
8563
8564 case CCmode:
8565 case CCGCmode:
8566 case CCGOCmode:
8567 case CCNOmode:
8568 case CCZmode:
8569 return CCmode;
8570 }
8571
8572 case CCFPmode:
8573 case CCFPUmode:
8574 /* These are only compatible with themselves, which we already
8575 checked above. */
8576 return VOIDmode;
8577 }
8578 }
8579
8580 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8581
8582 int
8583 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8584 {
8585 enum rtx_code swapped_code = swap_condition (code);
8586 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8587 || (ix86_fp_comparison_cost (swapped_code)
8588 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8589 }
8590
8591 /* Swap, force into registers, or otherwise massage the two operands
8592 to a fp comparison. The operands are updated in place; the new
8593 comparison code is returned. */
8594
8595 static enum rtx_code
8596 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8597 {
8598 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8599 rtx op0 = *pop0, op1 = *pop1;
8600 enum machine_mode op_mode = GET_MODE (op0);
8601 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8602
8603 /* All of the unordered compare instructions only work on registers.
8604 The same is true of the fcomi compare instructions. The same is
8605 true of the XFmode compare instructions if not comparing with
8606 zero (ftst insn is used in this case). */
8607
8608 if (!is_sse
8609 && (fpcmp_mode == CCFPUmode
8610 || (op_mode == XFmode
8611 && ! (standard_80387_constant_p (op0) == 1
8612 || standard_80387_constant_p (op1) == 1))
8613 || ix86_use_fcomi_compare (code)))
8614 {
8615 op0 = force_reg (op_mode, op0);
8616 op1 = force_reg (op_mode, op1);
8617 }
8618 else
8619 {
8620 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8621 things around if they appear profitable, otherwise force op0
8622 into a register. */
8623
8624 if (standard_80387_constant_p (op0) == 0
8625 || (GET_CODE (op0) == MEM
8626 && ! (standard_80387_constant_p (op1) == 0
8627 || GET_CODE (op1) == MEM)))
8628 {
8629 rtx tmp;
8630 tmp = op0, op0 = op1, op1 = tmp;
8631 code = swap_condition (code);
8632 }
8633
8634 if (GET_CODE (op0) != REG)
8635 op0 = force_reg (op_mode, op0);
8636
8637 if (CONSTANT_P (op1))
8638 {
8639 int tmp = standard_80387_constant_p (op1);
8640 if (tmp == 0)
8641 op1 = validize_mem (force_const_mem (op_mode, op1));
8642 else if (tmp == 1)
8643 {
8644 if (TARGET_CMOVE)
8645 op1 = force_reg (op_mode, op1);
8646 }
8647 else
8648 op1 = force_reg (op_mode, op1);
8649 }
8650 }
8651
8652 /* Try to rearrange the comparison to make it cheaper. */
8653 if (ix86_fp_comparison_cost (code)
8654 > ix86_fp_comparison_cost (swap_condition (code))
8655 && (GET_CODE (op1) == REG || !no_new_pseudos))
8656 {
8657 rtx tmp;
8658 tmp = op0, op0 = op1, op1 = tmp;
8659 code = swap_condition (code);
8660 if (GET_CODE (op0) != REG)
8661 op0 = force_reg (op_mode, op0);
8662 }
8663
8664 *pop0 = op0;
8665 *pop1 = op1;
8666 return code;
8667 }
8668
8669 /* Convert comparison codes we use to represent FP comparison to integer
8670 code that will result in proper branch. Return UNKNOWN if no such code
8671 is available. */
8672
8673 enum rtx_code
8674 ix86_fp_compare_code_to_integer (enum rtx_code code)
8675 {
8676 switch (code)
8677 {
8678 case GT:
8679 return GTU;
8680 case GE:
8681 return GEU;
8682 case ORDERED:
8683 case UNORDERED:
8684 return code;
8685 break;
8686 case UNEQ:
8687 return EQ;
8688 break;
8689 case UNLT:
8690 return LTU;
8691 break;
8692 case UNLE:
8693 return LEU;
8694 break;
8695 case LTGT:
8696 return NE;
8697 break;
8698 default:
8699 return UNKNOWN;
8700 }
8701 }
8702
8703 /* Split comparison code CODE into comparisons we can do using branch
8704 instructions. BYPASS_CODE is comparison code for branch that will
8705 branch around FIRST_CODE and SECOND_CODE. If some of branches
8706 is not required, set value to UNKNOWN.
8707 We never require more than two branches. */
8708
8709 void
8710 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8711 enum rtx_code *first_code,
8712 enum rtx_code *second_code)
8713 {
8714 *first_code = code;
8715 *bypass_code = UNKNOWN;
8716 *second_code = UNKNOWN;
8717
8718 /* The fcomi comparison sets flags as follows:
8719
8720 cmp ZF PF CF
8721 > 0 0 0
8722 < 0 0 1
8723 = 1 0 0
8724 un 1 1 1 */
8725
8726 switch (code)
8727 {
8728 case GT: /* GTU - CF=0 & ZF=0 */
8729 case GE: /* GEU - CF=0 */
8730 case ORDERED: /* PF=0 */
8731 case UNORDERED: /* PF=1 */
8732 case UNEQ: /* EQ - ZF=1 */
8733 case UNLT: /* LTU - CF=1 */
8734 case UNLE: /* LEU - CF=1 | ZF=1 */
8735 case LTGT: /* EQ - ZF=0 */
8736 break;
8737 case LT: /* LTU - CF=1 - fails on unordered */
8738 *first_code = UNLT;
8739 *bypass_code = UNORDERED;
8740 break;
8741 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8742 *first_code = UNLE;
8743 *bypass_code = UNORDERED;
8744 break;
8745 case EQ: /* EQ - ZF=1 - fails on unordered */
8746 *first_code = UNEQ;
8747 *bypass_code = UNORDERED;
8748 break;
8749 case NE: /* NE - ZF=0 - fails on unordered */
8750 *first_code = LTGT;
8751 *second_code = UNORDERED;
8752 break;
8753 case UNGE: /* GEU - CF=0 - fails on unordered */
8754 *first_code = GE;
8755 *second_code = UNORDERED;
8756 break;
8757 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8758 *first_code = GT;
8759 *second_code = UNORDERED;
8760 break;
8761 default:
8762 abort ();
8763 }
8764 if (!TARGET_IEEE_FP)
8765 {
8766 *second_code = UNKNOWN;
8767 *bypass_code = UNKNOWN;
8768 }
8769 }
8770
8771 /* Return cost of comparison done fcom + arithmetics operations on AX.
8772 All following functions do use number of instructions as a cost metrics.
8773 In future this should be tweaked to compute bytes for optimize_size and
8774 take into account performance of various instructions on various CPUs. */
8775 static int
8776 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8777 {
8778 if (!TARGET_IEEE_FP)
8779 return 4;
8780 /* The cost of code output by ix86_expand_fp_compare. */
8781 switch (code)
8782 {
8783 case UNLE:
8784 case UNLT:
8785 case LTGT:
8786 case GT:
8787 case GE:
8788 case UNORDERED:
8789 case ORDERED:
8790 case UNEQ:
8791 return 4;
8792 break;
8793 case LT:
8794 case NE:
8795 case EQ:
8796 case UNGE:
8797 return 5;
8798 break;
8799 case LE:
8800 case UNGT:
8801 return 6;
8802 break;
8803 default:
8804 abort ();
8805 }
8806 }
8807
8808 /* Return cost of comparison done using fcomi operation.
8809 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8810 static int
8811 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8812 {
8813 enum rtx_code bypass_code, first_code, second_code;
8814 /* Return arbitrarily high cost when instruction is not supported - this
8815 prevents gcc from using it. */
8816 if (!TARGET_CMOVE)
8817 return 1024;
8818 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8819 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8820 }
8821
8822 /* Return cost of comparison done using sahf operation.
8823 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8824 static int
8825 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8826 {
8827 enum rtx_code bypass_code, first_code, second_code;
8828 /* Return arbitrarily high cost when instruction is not preferred - this
8829 avoids gcc from using it. */
8830 if (!TARGET_USE_SAHF && !optimize_size)
8831 return 1024;
8832 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8833 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8834 }
8835
8836 /* Compute cost of the comparison done using any method.
8837 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8838 static int
8839 ix86_fp_comparison_cost (enum rtx_code code)
8840 {
8841 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8842 int min;
8843
8844 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8845 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8846
8847 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8848 if (min > sahf_cost)
8849 min = sahf_cost;
8850 if (min > fcomi_cost)
8851 min = fcomi_cost;
8852 return min;
8853 }
8854
8855 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8856
8857 static rtx
8858 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8859 rtx *second_test, rtx *bypass_test)
8860 {
8861 enum machine_mode fpcmp_mode, intcmp_mode;
8862 rtx tmp, tmp2;
8863 int cost = ix86_fp_comparison_cost (code);
8864 enum rtx_code bypass_code, first_code, second_code;
8865
8866 fpcmp_mode = ix86_fp_compare_mode (code);
8867 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8868
8869 if (second_test)
8870 *second_test = NULL_RTX;
8871 if (bypass_test)
8872 *bypass_test = NULL_RTX;
8873
8874 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8875
8876 /* Do fcomi/sahf based test when profitable. */
8877 if ((bypass_code == UNKNOWN || bypass_test)
8878 && (second_code == UNKNOWN || second_test)
8879 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8880 {
8881 if (TARGET_CMOVE)
8882 {
8883 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8884 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8885 tmp);
8886 emit_insn (tmp);
8887 }
8888 else
8889 {
8890 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8891 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8892 if (!scratch)
8893 scratch = gen_reg_rtx (HImode);
8894 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8895 emit_insn (gen_x86_sahf_1 (scratch));
8896 }
8897
8898 /* The FP codes work out to act like unsigned. */
8899 intcmp_mode = fpcmp_mode;
8900 code = first_code;
8901 if (bypass_code != UNKNOWN)
8902 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8903 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8904 const0_rtx);
8905 if (second_code != UNKNOWN)
8906 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8907 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8908 const0_rtx);
8909 }
8910 else
8911 {
8912 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8913 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8914 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8915 if (!scratch)
8916 scratch = gen_reg_rtx (HImode);
8917 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8918
8919 /* In the unordered case, we have to check C2 for NaN's, which
8920 doesn't happen to work out to anything nice combination-wise.
8921 So do some bit twiddling on the value we've got in AH to come
8922 up with an appropriate set of condition codes. */
8923
8924 intcmp_mode = CCNOmode;
8925 switch (code)
8926 {
8927 case GT:
8928 case UNGT:
8929 if (code == GT || !TARGET_IEEE_FP)
8930 {
8931 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8932 code = EQ;
8933 }
8934 else
8935 {
8936 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8937 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8938 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8939 intcmp_mode = CCmode;
8940 code = GEU;
8941 }
8942 break;
8943 case LT:
8944 case UNLT:
8945 if (code == LT && TARGET_IEEE_FP)
8946 {
8947 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8948 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8949 intcmp_mode = CCmode;
8950 code = EQ;
8951 }
8952 else
8953 {
8954 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8955 code = NE;
8956 }
8957 break;
8958 case GE:
8959 case UNGE:
8960 if (code == GE || !TARGET_IEEE_FP)
8961 {
8962 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8963 code = EQ;
8964 }
8965 else
8966 {
8967 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8968 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8969 GEN_INT (0x01)));
8970 code = NE;
8971 }
8972 break;
8973 case LE:
8974 case UNLE:
8975 if (code == LE && TARGET_IEEE_FP)
8976 {
8977 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8978 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8979 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8980 intcmp_mode = CCmode;
8981 code = LTU;
8982 }
8983 else
8984 {
8985 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8986 code = NE;
8987 }
8988 break;
8989 case EQ:
8990 case UNEQ:
8991 if (code == EQ && TARGET_IEEE_FP)
8992 {
8993 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8994 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8995 intcmp_mode = CCmode;
8996 code = EQ;
8997 }
8998 else
8999 {
9000 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9001 code = NE;
9002 break;
9003 }
9004 break;
9005 case NE:
9006 case LTGT:
9007 if (code == NE && TARGET_IEEE_FP)
9008 {
9009 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9010 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9011 GEN_INT (0x40)));
9012 code = NE;
9013 }
9014 else
9015 {
9016 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9017 code = EQ;
9018 }
9019 break;
9020
9021 case UNORDERED:
9022 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9023 code = NE;
9024 break;
9025 case ORDERED:
9026 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9027 code = EQ;
9028 break;
9029
9030 default:
9031 abort ();
9032 }
9033 }
9034
9035 /* Return the test that should be put into the flags user, i.e.
9036 the bcc, scc, or cmov instruction. */
9037 return gen_rtx_fmt_ee (code, VOIDmode,
9038 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9039 const0_rtx);
9040 }
9041
9042 rtx
9043 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9044 {
9045 rtx op0, op1, ret;
9046 op0 = ix86_compare_op0;
9047 op1 = ix86_compare_op1;
9048
9049 if (second_test)
9050 *second_test = NULL_RTX;
9051 if (bypass_test)
9052 *bypass_test = NULL_RTX;
9053
9054 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9055 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9056 second_test, bypass_test);
9057 else
9058 ret = ix86_expand_int_compare (code, op0, op1);
9059
9060 return ret;
9061 }
9062
9063 /* Return true if the CODE will result in nontrivial jump sequence. */
9064 bool
9065 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9066 {
9067 enum rtx_code bypass_code, first_code, second_code;
9068 if (!TARGET_CMOVE)
9069 return true;
9070 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9071 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9072 }
9073
9074 void
9075 ix86_expand_branch (enum rtx_code code, rtx label)
9076 {
9077 rtx tmp;
9078
9079 switch (GET_MODE (ix86_compare_op0))
9080 {
9081 case QImode:
9082 case HImode:
9083 case SImode:
9084 simple:
9085 tmp = ix86_expand_compare (code, NULL, NULL);
9086 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9087 gen_rtx_LABEL_REF (VOIDmode, label),
9088 pc_rtx);
9089 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9090 return;
9091
9092 case SFmode:
9093 case DFmode:
9094 case XFmode:
9095 {
9096 rtvec vec;
9097 int use_fcomi;
9098 enum rtx_code bypass_code, first_code, second_code;
9099
9100 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9101 &ix86_compare_op1);
9102
9103 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9104
9105 /* Check whether we will use the natural sequence with one jump. If
9106 so, we can expand jump early. Otherwise delay expansion by
9107 creating compound insn to not confuse optimizers. */
9108 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9109 && TARGET_CMOVE)
9110 {
9111 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9112 gen_rtx_LABEL_REF (VOIDmode, label),
9113 pc_rtx, NULL_RTX, NULL_RTX);
9114 }
9115 else
9116 {
9117 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9118 ix86_compare_op0, ix86_compare_op1);
9119 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9120 gen_rtx_LABEL_REF (VOIDmode, label),
9121 pc_rtx);
9122 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9123
9124 use_fcomi = ix86_use_fcomi_compare (code);
9125 vec = rtvec_alloc (3 + !use_fcomi);
9126 RTVEC_ELT (vec, 0) = tmp;
9127 RTVEC_ELT (vec, 1)
9128 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9129 RTVEC_ELT (vec, 2)
9130 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9131 if (! use_fcomi)
9132 RTVEC_ELT (vec, 3)
9133 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9134
9135 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9136 }
9137 return;
9138 }
9139
9140 case DImode:
9141 if (TARGET_64BIT)
9142 goto simple;
9143 /* Expand DImode branch into multiple compare+branch. */
9144 {
9145 rtx lo[2], hi[2], label2;
9146 enum rtx_code code1, code2, code3;
9147
9148 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9149 {
9150 tmp = ix86_compare_op0;
9151 ix86_compare_op0 = ix86_compare_op1;
9152 ix86_compare_op1 = tmp;
9153 code = swap_condition (code);
9154 }
9155 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9156 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9157
9158 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9159 avoid two branches. This costs one extra insn, so disable when
9160 optimizing for size. */
9161
9162 if ((code == EQ || code == NE)
9163 && (!optimize_size
9164 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9165 {
9166 rtx xor0, xor1;
9167
9168 xor1 = hi[0];
9169 if (hi[1] != const0_rtx)
9170 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9171 NULL_RTX, 0, OPTAB_WIDEN);
9172
9173 xor0 = lo[0];
9174 if (lo[1] != const0_rtx)
9175 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9176 NULL_RTX, 0, OPTAB_WIDEN);
9177
9178 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9179 NULL_RTX, 0, OPTAB_WIDEN);
9180
9181 ix86_compare_op0 = tmp;
9182 ix86_compare_op1 = const0_rtx;
9183 ix86_expand_branch (code, label);
9184 return;
9185 }
9186
9187 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9188 op1 is a constant and the low word is zero, then we can just
9189 examine the high word. */
9190
9191 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9192 switch (code)
9193 {
9194 case LT: case LTU: case GE: case GEU:
9195 ix86_compare_op0 = hi[0];
9196 ix86_compare_op1 = hi[1];
9197 ix86_expand_branch (code, label);
9198 return;
9199 default:
9200 break;
9201 }
9202
9203 /* Otherwise, we need two or three jumps. */
9204
9205 label2 = gen_label_rtx ();
9206
9207 code1 = code;
9208 code2 = swap_condition (code);
9209 code3 = unsigned_condition (code);
9210
9211 switch (code)
9212 {
9213 case LT: case GT: case LTU: case GTU:
9214 break;
9215
9216 case LE: code1 = LT; code2 = GT; break;
9217 case GE: code1 = GT; code2 = LT; break;
9218 case LEU: code1 = LTU; code2 = GTU; break;
9219 case GEU: code1 = GTU; code2 = LTU; break;
9220
9221 case EQ: code1 = UNKNOWN; code2 = NE; break;
9222 case NE: code2 = UNKNOWN; break;
9223
9224 default:
9225 abort ();
9226 }
9227
9228 /*
9229 * a < b =>
9230 * if (hi(a) < hi(b)) goto true;
9231 * if (hi(a) > hi(b)) goto false;
9232 * if (lo(a) < lo(b)) goto true;
9233 * false:
9234 */
9235
9236 ix86_compare_op0 = hi[0];
9237 ix86_compare_op1 = hi[1];
9238
9239 if (code1 != UNKNOWN)
9240 ix86_expand_branch (code1, label);
9241 if (code2 != UNKNOWN)
9242 ix86_expand_branch (code2, label2);
9243
9244 ix86_compare_op0 = lo[0];
9245 ix86_compare_op1 = lo[1];
9246 ix86_expand_branch (code3, label);
9247
9248 if (code2 != UNKNOWN)
9249 emit_label (label2);
9250 return;
9251 }
9252
9253 default:
9254 abort ();
9255 }
9256 }
9257
9258 /* Split branch based on floating point condition. */
9259 void
9260 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9261 rtx target1, rtx target2, rtx tmp, rtx pushed)
9262 {
9263 rtx second, bypass;
9264 rtx label = NULL_RTX;
9265 rtx condition;
9266 int bypass_probability = -1, second_probability = -1, probability = -1;
9267 rtx i;
9268
9269 if (target2 != pc_rtx)
9270 {
9271 rtx tmp = target2;
9272 code = reverse_condition_maybe_unordered (code);
9273 target2 = target1;
9274 target1 = tmp;
9275 }
9276
9277 condition = ix86_expand_fp_compare (code, op1, op2,
9278 tmp, &second, &bypass);
9279
9280 /* Remove pushed operand from stack. */
9281 if (pushed)
9282 ix86_free_from_memory (GET_MODE (pushed));
9283
9284 if (split_branch_probability >= 0)
9285 {
9286 /* Distribute the probabilities across the jumps.
9287 Assume the BYPASS and SECOND to be always test
9288 for UNORDERED. */
9289 probability = split_branch_probability;
9290
9291 /* Value of 1 is low enough to make no need for probability
9292 to be updated. Later we may run some experiments and see
9293 if unordered values are more frequent in practice. */
9294 if (bypass)
9295 bypass_probability = 1;
9296 if (second)
9297 second_probability = 1;
9298 }
9299 if (bypass != NULL_RTX)
9300 {
9301 label = gen_label_rtx ();
9302 i = emit_jump_insn (gen_rtx_SET
9303 (VOIDmode, pc_rtx,
9304 gen_rtx_IF_THEN_ELSE (VOIDmode,
9305 bypass,
9306 gen_rtx_LABEL_REF (VOIDmode,
9307 label),
9308 pc_rtx)));
9309 if (bypass_probability >= 0)
9310 REG_NOTES (i)
9311 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9312 GEN_INT (bypass_probability),
9313 REG_NOTES (i));
9314 }
9315 i = emit_jump_insn (gen_rtx_SET
9316 (VOIDmode, pc_rtx,
9317 gen_rtx_IF_THEN_ELSE (VOIDmode,
9318 condition, target1, target2)));
9319 if (probability >= 0)
9320 REG_NOTES (i)
9321 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9322 GEN_INT (probability),
9323 REG_NOTES (i));
9324 if (second != NULL_RTX)
9325 {
9326 i = emit_jump_insn (gen_rtx_SET
9327 (VOIDmode, pc_rtx,
9328 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9329 target2)));
9330 if (second_probability >= 0)
9331 REG_NOTES (i)
9332 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9333 GEN_INT (second_probability),
9334 REG_NOTES (i));
9335 }
9336 if (label != NULL_RTX)
9337 emit_label (label);
9338 }
9339
9340 int
9341 ix86_expand_setcc (enum rtx_code code, rtx dest)
9342 {
9343 rtx ret, tmp, tmpreg, equiv;
9344 rtx second_test, bypass_test;
9345
9346 if (GET_MODE (ix86_compare_op0) == DImode
9347 && !TARGET_64BIT)
9348 return 0; /* FAIL */
9349
9350 if (GET_MODE (dest) != QImode)
9351 abort ();
9352
9353 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9354 PUT_MODE (ret, QImode);
9355
9356 tmp = dest;
9357 tmpreg = dest;
9358
9359 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9360 if (bypass_test || second_test)
9361 {
9362 rtx test = second_test;
9363 int bypass = 0;
9364 rtx tmp2 = gen_reg_rtx (QImode);
9365 if (bypass_test)
9366 {
9367 if (second_test)
9368 abort ();
9369 test = bypass_test;
9370 bypass = 1;
9371 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9372 }
9373 PUT_MODE (test, QImode);
9374 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9375
9376 if (bypass)
9377 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9378 else
9379 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9380 }
9381
9382 /* Attach a REG_EQUAL note describing the comparison result. */
9383 equiv = simplify_gen_relational (code, QImode,
9384 GET_MODE (ix86_compare_op0),
9385 ix86_compare_op0, ix86_compare_op1);
9386 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9387
9388 return 1; /* DONE */
9389 }
9390
9391 /* Expand comparison setting or clearing carry flag. Return true when
9392 successful and set pop for the operation. */
9393 static bool
9394 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9395 {
9396 enum machine_mode mode =
9397 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9398
9399 /* Do not handle DImode compares that go trought special path. Also we can't
9400 deal with FP compares yet. This is possible to add. */
9401 if ((mode == DImode && !TARGET_64BIT))
9402 return false;
9403 if (FLOAT_MODE_P (mode))
9404 {
9405 rtx second_test = NULL, bypass_test = NULL;
9406 rtx compare_op, compare_seq;
9407
9408 /* Shortcut: following common codes never translate into carry flag compares. */
9409 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9410 || code == ORDERED || code == UNORDERED)
9411 return false;
9412
9413 /* These comparisons require zero flag; swap operands so they won't. */
9414 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9415 && !TARGET_IEEE_FP)
9416 {
9417 rtx tmp = op0;
9418 op0 = op1;
9419 op1 = tmp;
9420 code = swap_condition (code);
9421 }
9422
9423 /* Try to expand the comparison and verify that we end up with carry flag
9424 based comparison. This is fails to be true only when we decide to expand
9425 comparison using arithmetic that is not too common scenario. */
9426 start_sequence ();
9427 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9428 &second_test, &bypass_test);
9429 compare_seq = get_insns ();
9430 end_sequence ();
9431
9432 if (second_test || bypass_test)
9433 return false;
9434 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9435 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9436 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9437 else
9438 code = GET_CODE (compare_op);
9439 if (code != LTU && code != GEU)
9440 return false;
9441 emit_insn (compare_seq);
9442 *pop = compare_op;
9443 return true;
9444 }
9445 if (!INTEGRAL_MODE_P (mode))
9446 return false;
9447 switch (code)
9448 {
9449 case LTU:
9450 case GEU:
9451 break;
9452
9453 /* Convert a==0 into (unsigned)a<1. */
9454 case EQ:
9455 case NE:
9456 if (op1 != const0_rtx)
9457 return false;
9458 op1 = const1_rtx;
9459 code = (code == EQ ? LTU : GEU);
9460 break;
9461
9462 /* Convert a>b into b<a or a>=b-1. */
9463 case GTU:
9464 case LEU:
9465 if (GET_CODE (op1) == CONST_INT)
9466 {
9467 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9468 /* Bail out on overflow. We still can swap operands but that
9469 would force loading of the constant into register. */
9470 if (op1 == const0_rtx
9471 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9472 return false;
9473 code = (code == GTU ? GEU : LTU);
9474 }
9475 else
9476 {
9477 rtx tmp = op1;
9478 op1 = op0;
9479 op0 = tmp;
9480 code = (code == GTU ? LTU : GEU);
9481 }
9482 break;
9483
9484 /* Convert a>=0 into (unsigned)a<0x80000000. */
9485 case LT:
9486 case GE:
9487 if (mode == DImode || op1 != const0_rtx)
9488 return false;
9489 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9490 code = (code == LT ? GEU : LTU);
9491 break;
9492 case LE:
9493 case GT:
9494 if (mode == DImode || op1 != constm1_rtx)
9495 return false;
9496 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9497 code = (code == LE ? GEU : LTU);
9498 break;
9499
9500 default:
9501 return false;
9502 }
9503 /* Swapping operands may cause constant to appear as first operand. */
9504 if (!nonimmediate_operand (op0, VOIDmode))
9505 {
9506 if (no_new_pseudos)
9507 return false;
9508 op0 = force_reg (mode, op0);
9509 }
9510 ix86_compare_op0 = op0;
9511 ix86_compare_op1 = op1;
9512 *pop = ix86_expand_compare (code, NULL, NULL);
9513 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9514 abort ();
9515 return true;
9516 }
9517
9518 int
9519 ix86_expand_int_movcc (rtx operands[])
9520 {
9521 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9522 rtx compare_seq, compare_op;
9523 rtx second_test, bypass_test;
9524 enum machine_mode mode = GET_MODE (operands[0]);
9525 bool sign_bit_compare_p = false;;
9526
9527 start_sequence ();
9528 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9529 compare_seq = get_insns ();
9530 end_sequence ();
9531
9532 compare_code = GET_CODE (compare_op);
9533
9534 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9535 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9536 sign_bit_compare_p = true;
9537
9538 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9539 HImode insns, we'd be swallowed in word prefix ops. */
9540
9541 if ((mode != HImode || TARGET_FAST_PREFIX)
9542 && (mode != DImode || TARGET_64BIT)
9543 && GET_CODE (operands[2]) == CONST_INT
9544 && GET_CODE (operands[3]) == CONST_INT)
9545 {
9546 rtx out = operands[0];
9547 HOST_WIDE_INT ct = INTVAL (operands[2]);
9548 HOST_WIDE_INT cf = INTVAL (operands[3]);
9549 HOST_WIDE_INT diff;
9550
9551 diff = ct - cf;
9552 /* Sign bit compares are better done using shifts than we do by using
9553 sbb. */
9554 if (sign_bit_compare_p
9555 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9556 ix86_compare_op1, &compare_op))
9557 {
9558 /* Detect overlap between destination and compare sources. */
9559 rtx tmp = out;
9560
9561 if (!sign_bit_compare_p)
9562 {
9563 bool fpcmp = false;
9564
9565 compare_code = GET_CODE (compare_op);
9566
9567 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9568 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9569 {
9570 fpcmp = true;
9571 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9572 }
9573
9574 /* To simplify rest of code, restrict to the GEU case. */
9575 if (compare_code == LTU)
9576 {
9577 HOST_WIDE_INT tmp = ct;
9578 ct = cf;
9579 cf = tmp;
9580 compare_code = reverse_condition (compare_code);
9581 code = reverse_condition (code);
9582 }
9583 else
9584 {
9585 if (fpcmp)
9586 PUT_CODE (compare_op,
9587 reverse_condition_maybe_unordered
9588 (GET_CODE (compare_op)));
9589 else
9590 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9591 }
9592 diff = ct - cf;
9593
9594 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9595 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9596 tmp = gen_reg_rtx (mode);
9597
9598 if (mode == DImode)
9599 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9600 else
9601 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9602 }
9603 else
9604 {
9605 if (code == GT || code == GE)
9606 code = reverse_condition (code);
9607 else
9608 {
9609 HOST_WIDE_INT tmp = ct;
9610 ct = cf;
9611 cf = tmp;
9612 diff = ct - cf;
9613 }
9614 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9615 ix86_compare_op1, VOIDmode, 0, -1);
9616 }
9617
9618 if (diff == 1)
9619 {
9620 /*
9621 * cmpl op0,op1
9622 * sbbl dest,dest
9623 * [addl dest, ct]
9624 *
9625 * Size 5 - 8.
9626 */
9627 if (ct)
9628 tmp = expand_simple_binop (mode, PLUS,
9629 tmp, GEN_INT (ct),
9630 copy_rtx (tmp), 1, OPTAB_DIRECT);
9631 }
9632 else if (cf == -1)
9633 {
9634 /*
9635 * cmpl op0,op1
9636 * sbbl dest,dest
9637 * orl $ct, dest
9638 *
9639 * Size 8.
9640 */
9641 tmp = expand_simple_binop (mode, IOR,
9642 tmp, GEN_INT (ct),
9643 copy_rtx (tmp), 1, OPTAB_DIRECT);
9644 }
9645 else if (diff == -1 && ct)
9646 {
9647 /*
9648 * cmpl op0,op1
9649 * sbbl dest,dest
9650 * notl dest
9651 * [addl dest, cf]
9652 *
9653 * Size 8 - 11.
9654 */
9655 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9656 if (cf)
9657 tmp = expand_simple_binop (mode, PLUS,
9658 copy_rtx (tmp), GEN_INT (cf),
9659 copy_rtx (tmp), 1, OPTAB_DIRECT);
9660 }
9661 else
9662 {
9663 /*
9664 * cmpl op0,op1
9665 * sbbl dest,dest
9666 * [notl dest]
9667 * andl cf - ct, dest
9668 * [addl dest, ct]
9669 *
9670 * Size 8 - 11.
9671 */
9672
9673 if (cf == 0)
9674 {
9675 cf = ct;
9676 ct = 0;
9677 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9678 }
9679
9680 tmp = expand_simple_binop (mode, AND,
9681 copy_rtx (tmp),
9682 gen_int_mode (cf - ct, mode),
9683 copy_rtx (tmp), 1, OPTAB_DIRECT);
9684 if (ct)
9685 tmp = expand_simple_binop (mode, PLUS,
9686 copy_rtx (tmp), GEN_INT (ct),
9687 copy_rtx (tmp), 1, OPTAB_DIRECT);
9688 }
9689
9690 if (!rtx_equal_p (tmp, out))
9691 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9692
9693 return 1; /* DONE */
9694 }
9695
9696 if (diff < 0)
9697 {
9698 HOST_WIDE_INT tmp;
9699 tmp = ct, ct = cf, cf = tmp;
9700 diff = -diff;
9701 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9702 {
9703 /* We may be reversing unordered compare to normal compare, that
9704 is not valid in general (we may convert non-trapping condition
9705 to trapping one), however on i386 we currently emit all
9706 comparisons unordered. */
9707 compare_code = reverse_condition_maybe_unordered (compare_code);
9708 code = reverse_condition_maybe_unordered (code);
9709 }
9710 else
9711 {
9712 compare_code = reverse_condition (compare_code);
9713 code = reverse_condition (code);
9714 }
9715 }
9716
9717 compare_code = UNKNOWN;
9718 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9719 && GET_CODE (ix86_compare_op1) == CONST_INT)
9720 {
9721 if (ix86_compare_op1 == const0_rtx
9722 && (code == LT || code == GE))
9723 compare_code = code;
9724 else if (ix86_compare_op1 == constm1_rtx)
9725 {
9726 if (code == LE)
9727 compare_code = LT;
9728 else if (code == GT)
9729 compare_code = GE;
9730 }
9731 }
9732
9733 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9734 if (compare_code != UNKNOWN
9735 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9736 && (cf == -1 || ct == -1))
9737 {
9738 /* If lea code below could be used, only optimize
9739 if it results in a 2 insn sequence. */
9740
9741 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9742 || diff == 3 || diff == 5 || diff == 9)
9743 || (compare_code == LT && ct == -1)
9744 || (compare_code == GE && cf == -1))
9745 {
9746 /*
9747 * notl op1 (if necessary)
9748 * sarl $31, op1
9749 * orl cf, op1
9750 */
9751 if (ct != -1)
9752 {
9753 cf = ct;
9754 ct = -1;
9755 code = reverse_condition (code);
9756 }
9757
9758 out = emit_store_flag (out, code, ix86_compare_op0,
9759 ix86_compare_op1, VOIDmode, 0, -1);
9760
9761 out = expand_simple_binop (mode, IOR,
9762 out, GEN_INT (cf),
9763 out, 1, OPTAB_DIRECT);
9764 if (out != operands[0])
9765 emit_move_insn (operands[0], out);
9766
9767 return 1; /* DONE */
9768 }
9769 }
9770
9771
9772 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9773 || diff == 3 || diff == 5 || diff == 9)
9774 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9775 && (mode != DImode
9776 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9777 {
9778 /*
9779 * xorl dest,dest
9780 * cmpl op1,op2
9781 * setcc dest
9782 * lea cf(dest*(ct-cf)),dest
9783 *
9784 * Size 14.
9785 *
9786 * This also catches the degenerate setcc-only case.
9787 */
9788
9789 rtx tmp;
9790 int nops;
9791
9792 out = emit_store_flag (out, code, ix86_compare_op0,
9793 ix86_compare_op1, VOIDmode, 0, 1);
9794
9795 nops = 0;
9796 /* On x86_64 the lea instruction operates on Pmode, so we need
9797 to get arithmetics done in proper mode to match. */
9798 if (diff == 1)
9799 tmp = copy_rtx (out);
9800 else
9801 {
9802 rtx out1;
9803 out1 = copy_rtx (out);
9804 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9805 nops++;
9806 if (diff & 1)
9807 {
9808 tmp = gen_rtx_PLUS (mode, tmp, out1);
9809 nops++;
9810 }
9811 }
9812 if (cf != 0)
9813 {
9814 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9815 nops++;
9816 }
9817 if (!rtx_equal_p (tmp, out))
9818 {
9819 if (nops == 1)
9820 out = force_operand (tmp, copy_rtx (out));
9821 else
9822 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9823 }
9824 if (!rtx_equal_p (out, operands[0]))
9825 emit_move_insn (operands[0], copy_rtx (out));
9826
9827 return 1; /* DONE */
9828 }
9829
9830 /*
9831 * General case: Jumpful:
9832 * xorl dest,dest cmpl op1, op2
9833 * cmpl op1, op2 movl ct, dest
9834 * setcc dest jcc 1f
9835 * decl dest movl cf, dest
9836 * andl (cf-ct),dest 1:
9837 * addl ct,dest
9838 *
9839 * Size 20. Size 14.
9840 *
9841 * This is reasonably steep, but branch mispredict costs are
9842 * high on modern cpus, so consider failing only if optimizing
9843 * for space.
9844 */
9845
9846 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9847 && BRANCH_COST >= 2)
9848 {
9849 if (cf == 0)
9850 {
9851 cf = ct;
9852 ct = 0;
9853 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9854 /* We may be reversing unordered compare to normal compare,
9855 that is not valid in general (we may convert non-trapping
9856 condition to trapping one), however on i386 we currently
9857 emit all comparisons unordered. */
9858 code = reverse_condition_maybe_unordered (code);
9859 else
9860 {
9861 code = reverse_condition (code);
9862 if (compare_code != UNKNOWN)
9863 compare_code = reverse_condition (compare_code);
9864 }
9865 }
9866
9867 if (compare_code != UNKNOWN)
9868 {
9869 /* notl op1 (if needed)
9870 sarl $31, op1
9871 andl (cf-ct), op1
9872 addl ct, op1
9873
9874 For x < 0 (resp. x <= -1) there will be no notl,
9875 so if possible swap the constants to get rid of the
9876 complement.
9877 True/false will be -1/0 while code below (store flag
9878 followed by decrement) is 0/-1, so the constants need
9879 to be exchanged once more. */
9880
9881 if (compare_code == GE || !cf)
9882 {
9883 code = reverse_condition (code);
9884 compare_code = LT;
9885 }
9886 else
9887 {
9888 HOST_WIDE_INT tmp = cf;
9889 cf = ct;
9890 ct = tmp;
9891 }
9892
9893 out = emit_store_flag (out, code, ix86_compare_op0,
9894 ix86_compare_op1, VOIDmode, 0, -1);
9895 }
9896 else
9897 {
9898 out = emit_store_flag (out, code, ix86_compare_op0,
9899 ix86_compare_op1, VOIDmode, 0, 1);
9900
9901 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9902 copy_rtx (out), 1, OPTAB_DIRECT);
9903 }
9904
9905 out = expand_simple_binop (mode, AND, copy_rtx (out),
9906 gen_int_mode (cf - ct, mode),
9907 copy_rtx (out), 1, OPTAB_DIRECT);
9908 if (ct)
9909 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9910 copy_rtx (out), 1, OPTAB_DIRECT);
9911 if (!rtx_equal_p (out, operands[0]))
9912 emit_move_insn (operands[0], copy_rtx (out));
9913
9914 return 1; /* DONE */
9915 }
9916 }
9917
9918 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9919 {
9920 /* Try a few things more with specific constants and a variable. */
9921
9922 optab op;
9923 rtx var, orig_out, out, tmp;
9924
9925 if (BRANCH_COST <= 2)
9926 return 0; /* FAIL */
9927
9928 /* If one of the two operands is an interesting constant, load a
9929 constant with the above and mask it in with a logical operation. */
9930
9931 if (GET_CODE (operands[2]) == CONST_INT)
9932 {
9933 var = operands[3];
9934 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9935 operands[3] = constm1_rtx, op = and_optab;
9936 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9937 operands[3] = const0_rtx, op = ior_optab;
9938 else
9939 return 0; /* FAIL */
9940 }
9941 else if (GET_CODE (operands[3]) == CONST_INT)
9942 {
9943 var = operands[2];
9944 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9945 operands[2] = constm1_rtx, op = and_optab;
9946 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9947 operands[2] = const0_rtx, op = ior_optab;
9948 else
9949 return 0; /* FAIL */
9950 }
9951 else
9952 return 0; /* FAIL */
9953
9954 orig_out = operands[0];
9955 tmp = gen_reg_rtx (mode);
9956 operands[0] = tmp;
9957
9958 /* Recurse to get the constant loaded. */
9959 if (ix86_expand_int_movcc (operands) == 0)
9960 return 0; /* FAIL */
9961
9962 /* Mask in the interesting variable. */
9963 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9964 OPTAB_WIDEN);
9965 if (!rtx_equal_p (out, orig_out))
9966 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9967
9968 return 1; /* DONE */
9969 }
9970
9971 /*
9972 * For comparison with above,
9973 *
9974 * movl cf,dest
9975 * movl ct,tmp
9976 * cmpl op1,op2
9977 * cmovcc tmp,dest
9978 *
9979 * Size 15.
9980 */
9981
9982 if (! nonimmediate_operand (operands[2], mode))
9983 operands[2] = force_reg (mode, operands[2]);
9984 if (! nonimmediate_operand (operands[3], mode))
9985 operands[3] = force_reg (mode, operands[3]);
9986
9987 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9988 {
9989 rtx tmp = gen_reg_rtx (mode);
9990 emit_move_insn (tmp, operands[3]);
9991 operands[3] = tmp;
9992 }
9993 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9994 {
9995 rtx tmp = gen_reg_rtx (mode);
9996 emit_move_insn (tmp, operands[2]);
9997 operands[2] = tmp;
9998 }
9999
10000 if (! register_operand (operands[2], VOIDmode)
10001 && (mode == QImode
10002 || ! register_operand (operands[3], VOIDmode)))
10003 operands[2] = force_reg (mode, operands[2]);
10004
10005 if (mode == QImode
10006 && ! register_operand (operands[3], VOIDmode))
10007 operands[3] = force_reg (mode, operands[3]);
10008
10009 emit_insn (compare_seq);
10010 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10011 gen_rtx_IF_THEN_ELSE (mode,
10012 compare_op, operands[2],
10013 operands[3])));
10014 if (bypass_test)
10015 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10016 gen_rtx_IF_THEN_ELSE (mode,
10017 bypass_test,
10018 copy_rtx (operands[3]),
10019 copy_rtx (operands[0]))));
10020 if (second_test)
10021 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10022 gen_rtx_IF_THEN_ELSE (mode,
10023 second_test,
10024 copy_rtx (operands[2]),
10025 copy_rtx (operands[0]))));
10026
10027 return 1; /* DONE */
10028 }
10029
10030 int
10031 ix86_expand_fp_movcc (rtx operands[])
10032 {
10033 enum machine_mode mode = GET_MODE (operands[0]);
10034 enum rtx_code code = GET_CODE (operands[1]);
10035 rtx tmp, compare_op, second_test, bypass_test;
10036
10037 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10038 {
10039 rtx cmp_op0, cmp_op1, if_true, if_false;
10040 rtx clob;
10041 enum machine_mode vmode, cmode;
10042 bool is_minmax = false;
10043
10044 cmp_op0 = ix86_compare_op0;
10045 cmp_op1 = ix86_compare_op1;
10046 if_true = operands[2];
10047 if_false = operands[3];
10048
10049 /* Since we've no cmove for sse registers, don't force bad register
10050 allocation just to gain access to it. Deny movcc when the
10051 comparison mode doesn't match the move mode. */
10052 cmode = GET_MODE (cmp_op0);
10053 if (cmode == VOIDmode)
10054 cmode = GET_MODE (cmp_op1);
10055 if (cmode != mode)
10056 return 0;
10057
10058 /* We have no LTGT as an operator. We could implement it with
10059 NE & ORDERED, but this requires an extra temporary. It's
10060 not clear that it's worth it. */
10061 if (code == LTGT || code == UNEQ)
10062 return 0;
10063
10064 /* Massage condition to satisfy sse_comparison_operator. Try
10065 to canonicalize the destination operand to be first in the
10066 comparison - this helps reload to avoid extra moves. */
10067 if (!sse_comparison_operator (operands[1], VOIDmode)
10068 || (COMMUTATIVE_P (operands[1])
10069 && rtx_equal_p (operands[0], cmp_op1)))
10070 {
10071 tmp = cmp_op0;
10072 cmp_op0 = cmp_op1;
10073 cmp_op1 = tmp;
10074 code = swap_condition (code);
10075 }
10076
10077 /* Detect conditional moves that exactly match min/max operational
10078 semantics. Note that this is IEEE safe, as long as we don't
10079 interchange the operands. Which is why we keep this in the form
10080 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
10081 if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
10082 {
10083 if (((cmp_op0 == if_true && cmp_op1 == if_false)
10084 || (cmp_op0 == if_false && cmp_op1 == if_true)))
10085 {
10086 is_minmax = true;
10087 if (code == UNGE)
10088 {
10089 code = LT;
10090 tmp = if_true;
10091 if_true = if_false;
10092 if_false = tmp;
10093 }
10094 }
10095 }
10096
10097 if (mode == SFmode)
10098 vmode = V4SFmode;
10099 else if (mode == DFmode)
10100 vmode = V2DFmode;
10101 else
10102 gcc_unreachable ();
10103
10104 cmp_op0 = force_reg (mode, cmp_op0);
10105 if (!nonimmediate_operand (cmp_op1, mode))
10106 cmp_op1 = force_reg (mode, cmp_op1);
10107
10108 tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10109 gcc_assert (sse_comparison_operator (tmp, VOIDmode));
10110
10111 tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
10112 tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
10113
10114 if (!is_minmax)
10115 {
10116 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
10117 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10118 }
10119
10120 emit_insn (tmp);
10121 return 1;
10122 }
10123
10124 /* The floating point conditional move instructions don't directly
10125 support conditions resulting from a signed integer comparison. */
10126
10127 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10128
10129 /* The floating point conditional move instructions don't directly
10130 support signed integer comparisons. */
10131
10132 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10133 {
10134 if (second_test != NULL || bypass_test != NULL)
10135 abort ();
10136 tmp = gen_reg_rtx (QImode);
10137 ix86_expand_setcc (code, tmp);
10138 code = NE;
10139 ix86_compare_op0 = tmp;
10140 ix86_compare_op1 = const0_rtx;
10141 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10142 }
10143 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10144 {
10145 tmp = gen_reg_rtx (mode);
10146 emit_move_insn (tmp, operands[3]);
10147 operands[3] = tmp;
10148 }
10149 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10150 {
10151 tmp = gen_reg_rtx (mode);
10152 emit_move_insn (tmp, operands[2]);
10153 operands[2] = tmp;
10154 }
10155
10156 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10157 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10158 operands[2], operands[3])));
10159 if (bypass_test)
10160 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10161 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10162 operands[3], operands[0])));
10163 if (second_test)
10164 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10165 gen_rtx_IF_THEN_ELSE (mode, second_test,
10166 operands[2], operands[0])));
10167
10168 return 1;
10169 }
10170
10171 void
10172 ix86_split_sse_movcc (rtx operands[])
10173 {
10174 rtx dest, scratch, cmp, op_true, op_false, x;
10175 enum machine_mode mode, vmode;
10176
10177 /* Note that the operator CMP has been set up with matching constraints
10178 such that dest is valid for the comparison. Unless one of the true
10179 or false operands are zero, the true operand has already been placed
10180 in SCRATCH. */
10181 dest = operands[0];
10182 scratch = operands[1];
10183 op_true = operands[2];
10184 op_false = operands[3];
10185 cmp = operands[4];
10186
10187 mode = GET_MODE (dest);
10188 vmode = GET_MODE (scratch);
10189
10190 /* We need to make sure that the TRUE and FALSE operands are out of the
10191 way of the destination. Marking the destination earlyclobber doesn't
10192 work, since we want matching constraints for the actual comparison, so
10193 at some point we always wind up having to do a copy ourselves here.
10194 We very much prefer the TRUE value to be in SCRATCH. If it turns out
10195 that FALSE overlaps DEST, then we invert the comparison so that we
10196 still only have to do one move. */
10197 if (rtx_equal_p (op_false, dest))
10198 {
10199 enum rtx_code code;
10200
10201 if (rtx_equal_p (op_true, dest))
10202 {
10203 /* ??? Really ought not happen. It means some optimizer managed
10204 to prove the operands were identical, but failed to fold the
10205 conditional move to a straight move. Do so here, because
10206 otherwise we'll generate incorrect code. And since they're
10207 both already in the destination register, nothing to do. */
10208 return;
10209 }
10210
10211 x = gen_rtx_REG (mode, REGNO (scratch));
10212 emit_move_insn (x, op_false);
10213 op_false = op_true;
10214 op_true = x;
10215
10216 code = GET_CODE (cmp);
10217 code = reverse_condition_maybe_unordered (code);
10218 cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
10219 }
10220 else if (op_true == CONST0_RTX (mode))
10221 ;
10222 else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
10223 ;
10224 else
10225 {
10226 x = gen_rtx_REG (mode, REGNO (scratch));
10227 emit_move_insn (x, op_true);
10228 op_true = x;
10229 }
10230
10231 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
10232 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10233
10234 if (op_false == CONST0_RTX (mode))
10235 {
10236 op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
10237 x = gen_rtx_AND (vmode, dest, op_true);
10238 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10239 }
10240 else
10241 {
10242 op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
10243
10244 if (op_true == CONST0_RTX (mode))
10245 {
10246 x = gen_rtx_NOT (vmode, dest);
10247 x = gen_rtx_AND (vmode, x, op_false);
10248 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10249 }
10250 else
10251 {
10252 x = gen_rtx_AND (vmode, scratch, dest);
10253 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10254
10255 x = gen_rtx_NOT (vmode, dest);
10256 x = gen_rtx_AND (vmode, x, op_false);
10257 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10258
10259 x = gen_rtx_IOR (vmode, dest, scratch);
10260 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10261 }
10262 }
10263 }
10264
10265 /* Expand conditional increment or decrement using adb/sbb instructions.
10266 The default case using setcc followed by the conditional move can be
10267 done by generic code. */
10268 int
10269 ix86_expand_int_addcc (rtx operands[])
10270 {
10271 enum rtx_code code = GET_CODE (operands[1]);
10272 rtx compare_op;
10273 rtx val = const0_rtx;
10274 bool fpcmp = false;
10275 enum machine_mode mode = GET_MODE (operands[0]);
10276
10277 if (operands[3] != const1_rtx
10278 && operands[3] != constm1_rtx)
10279 return 0;
10280 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10281 ix86_compare_op1, &compare_op))
10282 return 0;
10283 code = GET_CODE (compare_op);
10284
10285 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10286 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10287 {
10288 fpcmp = true;
10289 code = ix86_fp_compare_code_to_integer (code);
10290 }
10291
10292 if (code != LTU)
10293 {
10294 val = constm1_rtx;
10295 if (fpcmp)
10296 PUT_CODE (compare_op,
10297 reverse_condition_maybe_unordered
10298 (GET_CODE (compare_op)));
10299 else
10300 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10301 }
10302 PUT_MODE (compare_op, mode);
10303
10304 /* Construct either adc or sbb insn. */
10305 if ((code == LTU) == (operands[3] == constm1_rtx))
10306 {
10307 switch (GET_MODE (operands[0]))
10308 {
10309 case QImode:
10310 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10311 break;
10312 case HImode:
10313 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10314 break;
10315 case SImode:
10316 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10317 break;
10318 case DImode:
10319 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10320 break;
10321 default:
10322 abort ();
10323 }
10324 }
10325 else
10326 {
10327 switch (GET_MODE (operands[0]))
10328 {
10329 case QImode:
10330 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10331 break;
10332 case HImode:
10333 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10334 break;
10335 case SImode:
10336 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10337 break;
10338 case DImode:
10339 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10340 break;
10341 default:
10342 abort ();
10343 }
10344 }
10345 return 1; /* DONE */
10346 }
10347
10348
10349 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10350 works for floating pointer parameters and nonoffsetable memories.
10351 For pushes, it returns just stack offsets; the values will be saved
10352 in the right order. Maximally three parts are generated. */
10353
10354 static int
10355 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10356 {
10357 int size;
10358
10359 if (!TARGET_64BIT)
10360 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10361 else
10362 size = (GET_MODE_SIZE (mode) + 4) / 8;
10363
10364 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10365 abort ();
10366 if (size < 2 || size > 3)
10367 abort ();
10368
10369 /* Optimize constant pool reference to immediates. This is used by fp
10370 moves, that force all constants to memory to allow combining. */
10371 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10372 {
10373 rtx tmp = maybe_get_pool_constant (operand);
10374 if (tmp)
10375 operand = tmp;
10376 }
10377
10378 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10379 {
10380 /* The only non-offsetable memories we handle are pushes. */
10381 if (! push_operand (operand, VOIDmode))
10382 abort ();
10383
10384 operand = copy_rtx (operand);
10385 PUT_MODE (operand, Pmode);
10386 parts[0] = parts[1] = parts[2] = operand;
10387 return size;
10388 }
10389
10390 if (GET_CODE (operand) == CONST_VECTOR)
10391 {
10392 enum machine_mode imode = int_mode_for_mode (mode);
10393 operand = simplify_subreg (imode, operand, mode, 0);
10394 gcc_assert (operand != NULL);
10395 mode = imode;
10396 }
10397
10398 if (!TARGET_64BIT)
10399 {
10400 if (mode == DImode)
10401 split_di (&operand, 1, &parts[0], &parts[1]);
10402 else
10403 {
10404 if (REG_P (operand))
10405 {
10406 if (!reload_completed)
10407 abort ();
10408 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10409 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10410 if (size == 3)
10411 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10412 }
10413 else if (offsettable_memref_p (operand))
10414 {
10415 operand = adjust_address (operand, SImode, 0);
10416 parts[0] = operand;
10417 parts[1] = adjust_address (operand, SImode, 4);
10418 if (size == 3)
10419 parts[2] = adjust_address (operand, SImode, 8);
10420 }
10421 else if (GET_CODE (operand) == CONST_DOUBLE)
10422 {
10423 REAL_VALUE_TYPE r;
10424 long l[4];
10425
10426 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10427 switch (mode)
10428 {
10429 case XFmode:
10430 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10431 parts[2] = gen_int_mode (l[2], SImode);
10432 break;
10433 case DFmode:
10434 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10435 break;
10436 default:
10437 abort ();
10438 }
10439 parts[1] = gen_int_mode (l[1], SImode);
10440 parts[0] = gen_int_mode (l[0], SImode);
10441 }
10442 else
10443 abort ();
10444 }
10445 }
10446 else
10447 {
10448 if (mode == TImode)
10449 split_ti (&operand, 1, &parts[0], &parts[1]);
10450 if (mode == XFmode || mode == TFmode)
10451 {
10452 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10453 if (REG_P (operand))
10454 {
10455 if (!reload_completed)
10456 abort ();
10457 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10458 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10459 }
10460 else if (offsettable_memref_p (operand))
10461 {
10462 operand = adjust_address (operand, DImode, 0);
10463 parts[0] = operand;
10464 parts[1] = adjust_address (operand, upper_mode, 8);
10465 }
10466 else if (GET_CODE (operand) == CONST_DOUBLE)
10467 {
10468 REAL_VALUE_TYPE r;
10469 long l[4];
10470
10471 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10472 real_to_target (l, &r, mode);
10473
10474 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10475 if (HOST_BITS_PER_WIDE_INT >= 64)
10476 parts[0]
10477 = gen_int_mode
10478 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10479 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10480 DImode);
10481 else
10482 parts[0] = immed_double_const (l[0], l[1], DImode);
10483
10484 if (upper_mode == SImode)
10485 parts[1] = gen_int_mode (l[2], SImode);
10486 else if (HOST_BITS_PER_WIDE_INT >= 64)
10487 parts[1]
10488 = gen_int_mode
10489 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10490 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10491 DImode);
10492 else
10493 parts[1] = immed_double_const (l[2], l[3], DImode);
10494 }
10495 else
10496 abort ();
10497 }
10498 }
10499
10500 return size;
10501 }
10502
10503 /* Emit insns to perform a move or push of DI, DF, and XF values.
10504 Return false when normal moves are needed; true when all required
10505 insns have been emitted. Operands 2-4 contain the input values
10506 int the correct order; operands 5-7 contain the output values. */
10507
10508 void
10509 ix86_split_long_move (rtx operands[])
10510 {
10511 rtx part[2][3];
10512 int nparts;
10513 int push = 0;
10514 int collisions = 0;
10515 enum machine_mode mode = GET_MODE (operands[0]);
10516
10517 /* The DFmode expanders may ask us to move double.
10518 For 64bit target this is single move. By hiding the fact
10519 here we simplify i386.md splitters. */
10520 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10521 {
10522 /* Optimize constant pool reference to immediates. This is used by
10523 fp moves, that force all constants to memory to allow combining. */
10524
10525 if (GET_CODE (operands[1]) == MEM
10526 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10527 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10528 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10529 if (push_operand (operands[0], VOIDmode))
10530 {
10531 operands[0] = copy_rtx (operands[0]);
10532 PUT_MODE (operands[0], Pmode);
10533 }
10534 else
10535 operands[0] = gen_lowpart (DImode, operands[0]);
10536 operands[1] = gen_lowpart (DImode, operands[1]);
10537 emit_move_insn (operands[0], operands[1]);
10538 return;
10539 }
10540
10541 /* The only non-offsettable memory we handle is push. */
10542 if (push_operand (operands[0], VOIDmode))
10543 push = 1;
10544 else if (GET_CODE (operands[0]) == MEM
10545 && ! offsettable_memref_p (operands[0]))
10546 abort ();
10547
10548 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10549 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10550
10551 /* When emitting push, take care for source operands on the stack. */
10552 if (push && GET_CODE (operands[1]) == MEM
10553 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10554 {
10555 if (nparts == 3)
10556 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10557 XEXP (part[1][2], 0));
10558 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10559 XEXP (part[1][1], 0));
10560 }
10561
10562 /* We need to do copy in the right order in case an address register
10563 of the source overlaps the destination. */
10564 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10565 {
10566 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10567 collisions++;
10568 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10569 collisions++;
10570 if (nparts == 3
10571 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10572 collisions++;
10573
10574 /* Collision in the middle part can be handled by reordering. */
10575 if (collisions == 1 && nparts == 3
10576 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10577 {
10578 rtx tmp;
10579 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10580 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10581 }
10582
10583 /* If there are more collisions, we can't handle it by reordering.
10584 Do an lea to the last part and use only one colliding move. */
10585 else if (collisions > 1)
10586 {
10587 rtx base;
10588
10589 collisions = 1;
10590
10591 base = part[0][nparts - 1];
10592
10593 /* Handle the case when the last part isn't valid for lea.
10594 Happens in 64-bit mode storing the 12-byte XFmode. */
10595 if (GET_MODE (base) != Pmode)
10596 base = gen_rtx_REG (Pmode, REGNO (base));
10597
10598 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10599 part[1][0] = replace_equiv_address (part[1][0], base);
10600 part[1][1] = replace_equiv_address (part[1][1],
10601 plus_constant (base, UNITS_PER_WORD));
10602 if (nparts == 3)
10603 part[1][2] = replace_equiv_address (part[1][2],
10604 plus_constant (base, 8));
10605 }
10606 }
10607
10608 if (push)
10609 {
10610 if (!TARGET_64BIT)
10611 {
10612 if (nparts == 3)
10613 {
10614 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10615 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10616 emit_move_insn (part[0][2], part[1][2]);
10617 }
10618 }
10619 else
10620 {
10621 /* In 64bit mode we don't have 32bit push available. In case this is
10622 register, it is OK - we will just use larger counterpart. We also
10623 retype memory - these comes from attempt to avoid REX prefix on
10624 moving of second half of TFmode value. */
10625 if (GET_MODE (part[1][1]) == SImode)
10626 {
10627 if (GET_CODE (part[1][1]) == MEM)
10628 part[1][1] = adjust_address (part[1][1], DImode, 0);
10629 else if (REG_P (part[1][1]))
10630 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10631 else
10632 abort ();
10633 if (GET_MODE (part[1][0]) == SImode)
10634 part[1][0] = part[1][1];
10635 }
10636 }
10637 emit_move_insn (part[0][1], part[1][1]);
10638 emit_move_insn (part[0][0], part[1][0]);
10639 return;
10640 }
10641
10642 /* Choose correct order to not overwrite the source before it is copied. */
10643 if ((REG_P (part[0][0])
10644 && REG_P (part[1][1])
10645 && (REGNO (part[0][0]) == REGNO (part[1][1])
10646 || (nparts == 3
10647 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10648 || (collisions > 0
10649 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10650 {
10651 if (nparts == 3)
10652 {
10653 operands[2] = part[0][2];
10654 operands[3] = part[0][1];
10655 operands[4] = part[0][0];
10656 operands[5] = part[1][2];
10657 operands[6] = part[1][1];
10658 operands[7] = part[1][0];
10659 }
10660 else
10661 {
10662 operands[2] = part[0][1];
10663 operands[3] = part[0][0];
10664 operands[5] = part[1][1];
10665 operands[6] = part[1][0];
10666 }
10667 }
10668 else
10669 {
10670 if (nparts == 3)
10671 {
10672 operands[2] = part[0][0];
10673 operands[3] = part[0][1];
10674 operands[4] = part[0][2];
10675 operands[5] = part[1][0];
10676 operands[6] = part[1][1];
10677 operands[7] = part[1][2];
10678 }
10679 else
10680 {
10681 operands[2] = part[0][0];
10682 operands[3] = part[0][1];
10683 operands[5] = part[1][0];
10684 operands[6] = part[1][1];
10685 }
10686 }
10687
10688 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10689 if (optimize_size)
10690 {
10691 if (GET_CODE (operands[5]) == CONST_INT
10692 && operands[5] != const0_rtx
10693 && REG_P (operands[2]))
10694 {
10695 if (GET_CODE (operands[6]) == CONST_INT
10696 && INTVAL (operands[6]) == INTVAL (operands[5]))
10697 operands[6] = operands[2];
10698
10699 if (nparts == 3
10700 && GET_CODE (operands[7]) == CONST_INT
10701 && INTVAL (operands[7]) == INTVAL (operands[5]))
10702 operands[7] = operands[2];
10703 }
10704
10705 if (nparts == 3
10706 && GET_CODE (operands[6]) == CONST_INT
10707 && operands[6] != const0_rtx
10708 && REG_P (operands[3])
10709 && GET_CODE (operands[7]) == CONST_INT
10710 && INTVAL (operands[7]) == INTVAL (operands[6]))
10711 operands[7] = operands[3];
10712 }
10713
10714 emit_move_insn (operands[2], operands[5]);
10715 emit_move_insn (operands[3], operands[6]);
10716 if (nparts == 3)
10717 emit_move_insn (operands[4], operands[7]);
10718
10719 return;
10720 }
10721
10722 /* Helper function of ix86_split_ashldi used to generate an SImode
10723 left shift by a constant, either using a single shift or
10724 a sequence of add instructions. */
10725
10726 static void
10727 ix86_expand_ashlsi3_const (rtx operand, int count)
10728 {
10729 if (count == 1)
10730 emit_insn (gen_addsi3 (operand, operand, operand));
10731 else if (!optimize_size
10732 && count * ix86_cost->add <= ix86_cost->shift_const)
10733 {
10734 int i;
10735 for (i=0; i<count; i++)
10736 emit_insn (gen_addsi3 (operand, operand, operand));
10737 }
10738 else
10739 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10740 }
10741
10742 void
10743 ix86_split_ashldi (rtx *operands, rtx scratch)
10744 {
10745 rtx low[2], high[2];
10746 int count;
10747
10748 if (GET_CODE (operands[2]) == CONST_INT)
10749 {
10750 split_di (operands, 2, low, high);
10751 count = INTVAL (operands[2]) & 63;
10752
10753 if (count >= 32)
10754 {
10755 emit_move_insn (high[0], low[1]);
10756 emit_move_insn (low[0], const0_rtx);
10757
10758 if (count > 32)
10759 ix86_expand_ashlsi3_const (high[0], count - 32);
10760 }
10761 else
10762 {
10763 if (!rtx_equal_p (operands[0], operands[1]))
10764 emit_move_insn (operands[0], operands[1]);
10765 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10766 ix86_expand_ashlsi3_const (low[0], count);
10767 }
10768 return;
10769 }
10770
10771 split_di (operands, 1, low, high);
10772
10773 if (operands[1] == const1_rtx)
10774 {
10775 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10776 can be done with two 32-bit shifts, no branches, no cmoves. */
10777 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10778 {
10779 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10780
10781 ix86_expand_clear (low[0]);
10782 ix86_expand_clear (high[0]);
10783 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10784
10785 d = gen_lowpart (QImode, low[0]);
10786 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10787 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10788 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10789
10790 d = gen_lowpart (QImode, high[0]);
10791 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10792 s = gen_rtx_NE (QImode, flags, const0_rtx);
10793 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10794 }
10795
10796 /* Otherwise, we can get the same results by manually performing
10797 a bit extract operation on bit 5, and then performing the two
10798 shifts. The two methods of getting 0/1 into low/high are exactly
10799 the same size. Avoiding the shift in the bit extract case helps
10800 pentium4 a bit; no one else seems to care much either way. */
10801 else
10802 {
10803 rtx x;
10804
10805 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10806 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10807 else
10808 x = gen_lowpart (SImode, operands[2]);
10809 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10810
10811 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10812 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10813 emit_move_insn (low[0], high[0]);
10814 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10815 }
10816
10817 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10818 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10819 return;
10820 }
10821
10822 if (operands[1] == constm1_rtx)
10823 {
10824 /* For -1LL << N, we can avoid the shld instruction, because we
10825 know that we're shifting 0...31 ones into a -1. */
10826 emit_move_insn (low[0], constm1_rtx);
10827 if (optimize_size)
10828 emit_move_insn (high[0], low[0]);
10829 else
10830 emit_move_insn (high[0], constm1_rtx);
10831 }
10832 else
10833 {
10834 if (!rtx_equal_p (operands[0], operands[1]))
10835 emit_move_insn (operands[0], operands[1]);
10836
10837 split_di (operands, 1, low, high);
10838 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10839 }
10840
10841 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10842
10843 if (TARGET_CMOVE && scratch)
10844 {
10845 ix86_expand_clear (scratch);
10846 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10847 }
10848 else
10849 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10850 }
10851
10852 void
10853 ix86_split_ashrdi (rtx *operands, rtx scratch)
10854 {
10855 rtx low[2], high[2];
10856 int count;
10857
10858 if (GET_CODE (operands[2]) == CONST_INT)
10859 {
10860 split_di (operands, 2, low, high);
10861 count = INTVAL (operands[2]) & 63;
10862
10863 if (count == 63)
10864 {
10865 emit_move_insn (high[0], high[1]);
10866 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10867 emit_move_insn (low[0], high[0]);
10868
10869 }
10870 else if (count >= 32)
10871 {
10872 emit_move_insn (low[0], high[1]);
10873 emit_move_insn (high[0], low[0]);
10874 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10875 if (count > 32)
10876 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10877 }
10878 else
10879 {
10880 if (!rtx_equal_p (operands[0], operands[1]))
10881 emit_move_insn (operands[0], operands[1]);
10882 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10883 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10884 }
10885 }
10886 else
10887 {
10888 if (!rtx_equal_p (operands[0], operands[1]))
10889 emit_move_insn (operands[0], operands[1]);
10890
10891 split_di (operands, 1, low, high);
10892
10893 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10894 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10895
10896 if (TARGET_CMOVE && scratch)
10897 {
10898 emit_move_insn (scratch, high[0]);
10899 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10900 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10901 scratch));
10902 }
10903 else
10904 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10905 }
10906 }
10907
10908 void
10909 ix86_split_lshrdi (rtx *operands, rtx scratch)
10910 {
10911 rtx low[2], high[2];
10912 int count;
10913
10914 if (GET_CODE (operands[2]) == CONST_INT)
10915 {
10916 split_di (operands, 2, low, high);
10917 count = INTVAL (operands[2]) & 63;
10918
10919 if (count >= 32)
10920 {
10921 emit_move_insn (low[0], high[1]);
10922 ix86_expand_clear (high[0]);
10923
10924 if (count > 32)
10925 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10926 }
10927 else
10928 {
10929 if (!rtx_equal_p (operands[0], operands[1]))
10930 emit_move_insn (operands[0], operands[1]);
10931 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10932 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10933 }
10934 }
10935 else
10936 {
10937 if (!rtx_equal_p (operands[0], operands[1]))
10938 emit_move_insn (operands[0], operands[1]);
10939
10940 split_di (operands, 1, low, high);
10941
10942 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10943 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10944
10945 /* Heh. By reversing the arguments, we can reuse this pattern. */
10946 if (TARGET_CMOVE && scratch)
10947 {
10948 ix86_expand_clear (scratch);
10949 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10950 scratch));
10951 }
10952 else
10953 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10954 }
10955 }
10956
10957 /* Helper function for the string operations below. Dest VARIABLE whether
10958 it is aligned to VALUE bytes. If true, jump to the label. */
10959 static rtx
10960 ix86_expand_aligntest (rtx variable, int value)
10961 {
10962 rtx label = gen_label_rtx ();
10963 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10964 if (GET_MODE (variable) == DImode)
10965 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10966 else
10967 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10968 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10969 1, label);
10970 return label;
10971 }
10972
10973 /* Adjust COUNTER by the VALUE. */
10974 static void
10975 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10976 {
10977 if (GET_MODE (countreg) == DImode)
10978 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10979 else
10980 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10981 }
10982
10983 /* Zero extend possibly SImode EXP to Pmode register. */
10984 rtx
10985 ix86_zero_extend_to_Pmode (rtx exp)
10986 {
10987 rtx r;
10988 if (GET_MODE (exp) == VOIDmode)
10989 return force_reg (Pmode, exp);
10990 if (GET_MODE (exp) == Pmode)
10991 return copy_to_mode_reg (Pmode, exp);
10992 r = gen_reg_rtx (Pmode);
10993 emit_insn (gen_zero_extendsidi2 (r, exp));
10994 return r;
10995 }
10996
10997 /* Expand string move (memcpy) operation. Use i386 string operations when
10998 profitable. expand_clrmem contains similar code. */
10999 int
11000 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11001 {
11002 rtx srcreg, destreg, countreg, srcexp, destexp;
11003 enum machine_mode counter_mode;
11004 HOST_WIDE_INT align = 0;
11005 unsigned HOST_WIDE_INT count = 0;
11006
11007 if (GET_CODE (align_exp) == CONST_INT)
11008 align = INTVAL (align_exp);
11009
11010 /* Can't use any of this if the user has appropriated esi or edi. */
11011 if (global_regs[4] || global_regs[5])
11012 return 0;
11013
11014 /* This simple hack avoids all inlining code and simplifies code below. */
11015 if (!TARGET_ALIGN_STRINGOPS)
11016 align = 64;
11017
11018 if (GET_CODE (count_exp) == CONST_INT)
11019 {
11020 count = INTVAL (count_exp);
11021 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11022 return 0;
11023 }
11024
11025 /* Figure out proper mode for counter. For 32bits it is always SImode,
11026 for 64bits use SImode when possible, otherwise DImode.
11027 Set count to number of bytes copied when known at compile time. */
11028 if (!TARGET_64BIT
11029 || GET_MODE (count_exp) == SImode
11030 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11031 counter_mode = SImode;
11032 else
11033 counter_mode = DImode;
11034
11035 if (counter_mode != SImode && counter_mode != DImode)
11036 abort ();
11037
11038 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11039 if (destreg != XEXP (dst, 0))
11040 dst = replace_equiv_address_nv (dst, destreg);
11041 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11042 if (srcreg != XEXP (src, 0))
11043 src = replace_equiv_address_nv (src, srcreg);
11044
11045 /* When optimizing for size emit simple rep ; movsb instruction for
11046 counts not divisible by 4. */
11047
11048 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11049 {
11050 emit_insn (gen_cld ());
11051 countreg = ix86_zero_extend_to_Pmode (count_exp);
11052 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11053 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11054 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11055 destexp, srcexp));
11056 }
11057
11058 /* For constant aligned (or small unaligned) copies use rep movsl
11059 followed by code copying the rest. For PentiumPro ensure 8 byte
11060 alignment to allow rep movsl acceleration. */
11061
11062 else if (count != 0
11063 && (align >= 8
11064 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11065 || optimize_size || count < (unsigned int) 64))
11066 {
11067 unsigned HOST_WIDE_INT offset = 0;
11068 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11069 rtx srcmem, dstmem;
11070
11071 emit_insn (gen_cld ());
11072 if (count & ~(size - 1))
11073 {
11074 countreg = copy_to_mode_reg (counter_mode,
11075 GEN_INT ((count >> (size == 4 ? 2 : 3))
11076 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11077 countreg = ix86_zero_extend_to_Pmode (countreg);
11078
11079 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11080 GEN_INT (size == 4 ? 2 : 3));
11081 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11082 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11083
11084 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11085 countreg, destexp, srcexp));
11086 offset = count & ~(size - 1);
11087 }
11088 if (size == 8 && (count & 0x04))
11089 {
11090 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11091 offset);
11092 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11093 offset);
11094 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11095 offset += 4;
11096 }
11097 if (count & 0x02)
11098 {
11099 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11100 offset);
11101 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11102 offset);
11103 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11104 offset += 2;
11105 }
11106 if (count & 0x01)
11107 {
11108 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11109 offset);
11110 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11111 offset);
11112 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11113 }
11114 }
11115 /* The generic code based on the glibc implementation:
11116 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11117 allowing accelerated copying there)
11118 - copy the data using rep movsl
11119 - copy the rest. */
11120 else
11121 {
11122 rtx countreg2;
11123 rtx label = NULL;
11124 rtx srcmem, dstmem;
11125 int desired_alignment = (TARGET_PENTIUMPRO
11126 && (count == 0 || count >= (unsigned int) 260)
11127 ? 8 : UNITS_PER_WORD);
11128 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11129 dst = change_address (dst, BLKmode, destreg);
11130 src = change_address (src, BLKmode, srcreg);
11131
11132 /* In case we don't know anything about the alignment, default to
11133 library version, since it is usually equally fast and result in
11134 shorter code.
11135
11136 Also emit call when we know that the count is large and call overhead
11137 will not be important. */
11138 if (!TARGET_INLINE_ALL_STRINGOPS
11139 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11140 return 0;
11141
11142 if (TARGET_SINGLE_STRINGOP)
11143 emit_insn (gen_cld ());
11144
11145 countreg2 = gen_reg_rtx (Pmode);
11146 countreg = copy_to_mode_reg (counter_mode, count_exp);
11147
11148 /* We don't use loops to align destination and to copy parts smaller
11149 than 4 bytes, because gcc is able to optimize such code better (in
11150 the case the destination or the count really is aligned, gcc is often
11151 able to predict the branches) and also it is friendlier to the
11152 hardware branch prediction.
11153
11154 Using loops is beneficial for generic case, because we can
11155 handle small counts using the loops. Many CPUs (such as Athlon)
11156 have large REP prefix setup costs.
11157
11158 This is quite costly. Maybe we can revisit this decision later or
11159 add some customizability to this code. */
11160
11161 if (count == 0 && align < desired_alignment)
11162 {
11163 label = gen_label_rtx ();
11164 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11165 LEU, 0, counter_mode, 1, label);
11166 }
11167 if (align <= 1)
11168 {
11169 rtx label = ix86_expand_aligntest (destreg, 1);
11170 srcmem = change_address (src, QImode, srcreg);
11171 dstmem = change_address (dst, QImode, destreg);
11172 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11173 ix86_adjust_counter (countreg, 1);
11174 emit_label (label);
11175 LABEL_NUSES (label) = 1;
11176 }
11177 if (align <= 2)
11178 {
11179 rtx label = ix86_expand_aligntest (destreg, 2);
11180 srcmem = change_address (src, HImode, srcreg);
11181 dstmem = change_address (dst, HImode, destreg);
11182 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11183 ix86_adjust_counter (countreg, 2);
11184 emit_label (label);
11185 LABEL_NUSES (label) = 1;
11186 }
11187 if (align <= 4 && desired_alignment > 4)
11188 {
11189 rtx label = ix86_expand_aligntest (destreg, 4);
11190 srcmem = change_address (src, SImode, srcreg);
11191 dstmem = change_address (dst, SImode, destreg);
11192 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11193 ix86_adjust_counter (countreg, 4);
11194 emit_label (label);
11195 LABEL_NUSES (label) = 1;
11196 }
11197
11198 if (label && desired_alignment > 4 && !TARGET_64BIT)
11199 {
11200 emit_label (label);
11201 LABEL_NUSES (label) = 1;
11202 label = NULL_RTX;
11203 }
11204 if (!TARGET_SINGLE_STRINGOP)
11205 emit_insn (gen_cld ());
11206 if (TARGET_64BIT)
11207 {
11208 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11209 GEN_INT (3)));
11210 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11211 }
11212 else
11213 {
11214 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11215 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11216 }
11217 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11218 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11219 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11220 countreg2, destexp, srcexp));
11221
11222 if (label)
11223 {
11224 emit_label (label);
11225 LABEL_NUSES (label) = 1;
11226 }
11227 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11228 {
11229 srcmem = change_address (src, SImode, srcreg);
11230 dstmem = change_address (dst, SImode, destreg);
11231 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11232 }
11233 if ((align <= 4 || count == 0) && TARGET_64BIT)
11234 {
11235 rtx label = ix86_expand_aligntest (countreg, 4);
11236 srcmem = change_address (src, SImode, srcreg);
11237 dstmem = change_address (dst, SImode, destreg);
11238 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11239 emit_label (label);
11240 LABEL_NUSES (label) = 1;
11241 }
11242 if (align > 2 && count != 0 && (count & 2))
11243 {
11244 srcmem = change_address (src, HImode, srcreg);
11245 dstmem = change_address (dst, HImode, destreg);
11246 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11247 }
11248 if (align <= 2 || count == 0)
11249 {
11250 rtx label = ix86_expand_aligntest (countreg, 2);
11251 srcmem = change_address (src, HImode, srcreg);
11252 dstmem = change_address (dst, HImode, destreg);
11253 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11254 emit_label (label);
11255 LABEL_NUSES (label) = 1;
11256 }
11257 if (align > 1 && count != 0 && (count & 1))
11258 {
11259 srcmem = change_address (src, QImode, srcreg);
11260 dstmem = change_address (dst, QImode, destreg);
11261 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11262 }
11263 if (align <= 1 || count == 0)
11264 {
11265 rtx label = ix86_expand_aligntest (countreg, 1);
11266 srcmem = change_address (src, QImode, srcreg);
11267 dstmem = change_address (dst, QImode, destreg);
11268 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11269 emit_label (label);
11270 LABEL_NUSES (label) = 1;
11271 }
11272 }
11273
11274 return 1;
11275 }
11276
11277 /* Expand string clear operation (bzero). Use i386 string operations when
11278 profitable. expand_movmem contains similar code. */
11279 int
11280 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11281 {
11282 rtx destreg, zeroreg, countreg, destexp;
11283 enum machine_mode counter_mode;
11284 HOST_WIDE_INT align = 0;
11285 unsigned HOST_WIDE_INT count = 0;
11286
11287 if (GET_CODE (align_exp) == CONST_INT)
11288 align = INTVAL (align_exp);
11289
11290 /* Can't use any of this if the user has appropriated esi. */
11291 if (global_regs[4])
11292 return 0;
11293
11294 /* This simple hack avoids all inlining code and simplifies code below. */
11295 if (!TARGET_ALIGN_STRINGOPS)
11296 align = 32;
11297
11298 if (GET_CODE (count_exp) == CONST_INT)
11299 {
11300 count = INTVAL (count_exp);
11301 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11302 return 0;
11303 }
11304 /* Figure out proper mode for counter. For 32bits it is always SImode,
11305 for 64bits use SImode when possible, otherwise DImode.
11306 Set count to number of bytes copied when known at compile time. */
11307 if (!TARGET_64BIT
11308 || GET_MODE (count_exp) == SImode
11309 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11310 counter_mode = SImode;
11311 else
11312 counter_mode = DImode;
11313
11314 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11315 if (destreg != XEXP (dst, 0))
11316 dst = replace_equiv_address_nv (dst, destreg);
11317
11318
11319 /* When optimizing for size emit simple rep ; movsb instruction for
11320 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11321 sequence is 7 bytes long, so if optimizing for size and count is
11322 small enough that some stosl, stosw and stosb instructions without
11323 rep are shorter, fall back into the next if. */
11324
11325 if ((!optimize || optimize_size)
11326 && (count == 0
11327 || ((count & 0x03)
11328 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11329 {
11330 emit_insn (gen_cld ());
11331
11332 countreg = ix86_zero_extend_to_Pmode (count_exp);
11333 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11334 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11335 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11336 }
11337 else if (count != 0
11338 && (align >= 8
11339 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11340 || optimize_size || count < (unsigned int) 64))
11341 {
11342 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11343 unsigned HOST_WIDE_INT offset = 0;
11344
11345 emit_insn (gen_cld ());
11346
11347 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11348 if (count & ~(size - 1))
11349 {
11350 unsigned HOST_WIDE_INT repcount;
11351 unsigned int max_nonrep;
11352
11353 repcount = count >> (size == 4 ? 2 : 3);
11354 if (!TARGET_64BIT)
11355 repcount &= 0x3fffffff;
11356
11357 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11358 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11359 bytes. In both cases the latter seems to be faster for small
11360 values of N. */
11361 max_nonrep = size == 4 ? 7 : 4;
11362 if (!optimize_size)
11363 switch (ix86_tune)
11364 {
11365 case PROCESSOR_PENTIUM4:
11366 case PROCESSOR_NOCONA:
11367 max_nonrep = 3;
11368 break;
11369 default:
11370 break;
11371 }
11372
11373 if (repcount <= max_nonrep)
11374 while (repcount-- > 0)
11375 {
11376 rtx mem = adjust_automodify_address_nv (dst,
11377 GET_MODE (zeroreg),
11378 destreg, offset);
11379 emit_insn (gen_strset (destreg, mem, zeroreg));
11380 offset += size;
11381 }
11382 else
11383 {
11384 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11385 countreg = ix86_zero_extend_to_Pmode (countreg);
11386 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11387 GEN_INT (size == 4 ? 2 : 3));
11388 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11389 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11390 destexp));
11391 offset = count & ~(size - 1);
11392 }
11393 }
11394 if (size == 8 && (count & 0x04))
11395 {
11396 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11397 offset);
11398 emit_insn (gen_strset (destreg, mem,
11399 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11400 offset += 4;
11401 }
11402 if (count & 0x02)
11403 {
11404 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11405 offset);
11406 emit_insn (gen_strset (destreg, mem,
11407 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11408 offset += 2;
11409 }
11410 if (count & 0x01)
11411 {
11412 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11413 offset);
11414 emit_insn (gen_strset (destreg, mem,
11415 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11416 }
11417 }
11418 else
11419 {
11420 rtx countreg2;
11421 rtx label = NULL;
11422 /* Compute desired alignment of the string operation. */
11423 int desired_alignment = (TARGET_PENTIUMPRO
11424 && (count == 0 || count >= (unsigned int) 260)
11425 ? 8 : UNITS_PER_WORD);
11426
11427 /* In case we don't know anything about the alignment, default to
11428 library version, since it is usually equally fast and result in
11429 shorter code.
11430
11431 Also emit call when we know that the count is large and call overhead
11432 will not be important. */
11433 if (!TARGET_INLINE_ALL_STRINGOPS
11434 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11435 return 0;
11436
11437 if (TARGET_SINGLE_STRINGOP)
11438 emit_insn (gen_cld ());
11439
11440 countreg2 = gen_reg_rtx (Pmode);
11441 countreg = copy_to_mode_reg (counter_mode, count_exp);
11442 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11443 /* Get rid of MEM_OFFSET, it won't be accurate. */
11444 dst = change_address (dst, BLKmode, destreg);
11445
11446 if (count == 0 && align < desired_alignment)
11447 {
11448 label = gen_label_rtx ();
11449 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11450 LEU, 0, counter_mode, 1, label);
11451 }
11452 if (align <= 1)
11453 {
11454 rtx label = ix86_expand_aligntest (destreg, 1);
11455 emit_insn (gen_strset (destreg, dst,
11456 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11457 ix86_adjust_counter (countreg, 1);
11458 emit_label (label);
11459 LABEL_NUSES (label) = 1;
11460 }
11461 if (align <= 2)
11462 {
11463 rtx label = ix86_expand_aligntest (destreg, 2);
11464 emit_insn (gen_strset (destreg, dst,
11465 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11466 ix86_adjust_counter (countreg, 2);
11467 emit_label (label);
11468 LABEL_NUSES (label) = 1;
11469 }
11470 if (align <= 4 && desired_alignment > 4)
11471 {
11472 rtx label = ix86_expand_aligntest (destreg, 4);
11473 emit_insn (gen_strset (destreg, dst,
11474 (TARGET_64BIT
11475 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11476 : zeroreg)));
11477 ix86_adjust_counter (countreg, 4);
11478 emit_label (label);
11479 LABEL_NUSES (label) = 1;
11480 }
11481
11482 if (label && desired_alignment > 4 && !TARGET_64BIT)
11483 {
11484 emit_label (label);
11485 LABEL_NUSES (label) = 1;
11486 label = NULL_RTX;
11487 }
11488
11489 if (!TARGET_SINGLE_STRINGOP)
11490 emit_insn (gen_cld ());
11491 if (TARGET_64BIT)
11492 {
11493 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11494 GEN_INT (3)));
11495 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11496 }
11497 else
11498 {
11499 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11500 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11501 }
11502 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11503 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11504
11505 if (label)
11506 {
11507 emit_label (label);
11508 LABEL_NUSES (label) = 1;
11509 }
11510
11511 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11512 emit_insn (gen_strset (destreg, dst,
11513 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11514 if (TARGET_64BIT && (align <= 4 || count == 0))
11515 {
11516 rtx label = ix86_expand_aligntest (countreg, 4);
11517 emit_insn (gen_strset (destreg, dst,
11518 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11519 emit_label (label);
11520 LABEL_NUSES (label) = 1;
11521 }
11522 if (align > 2 && count != 0 && (count & 2))
11523 emit_insn (gen_strset (destreg, dst,
11524 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11525 if (align <= 2 || count == 0)
11526 {
11527 rtx label = ix86_expand_aligntest (countreg, 2);
11528 emit_insn (gen_strset (destreg, dst,
11529 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11530 emit_label (label);
11531 LABEL_NUSES (label) = 1;
11532 }
11533 if (align > 1 && count != 0 && (count & 1))
11534 emit_insn (gen_strset (destreg, dst,
11535 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11536 if (align <= 1 || count == 0)
11537 {
11538 rtx label = ix86_expand_aligntest (countreg, 1);
11539 emit_insn (gen_strset (destreg, dst,
11540 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11541 emit_label (label);
11542 LABEL_NUSES (label) = 1;
11543 }
11544 }
11545 return 1;
11546 }
11547
11548 /* Expand strlen. */
11549 int
11550 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11551 {
11552 rtx addr, scratch1, scratch2, scratch3, scratch4;
11553
11554 /* The generic case of strlen expander is long. Avoid it's
11555 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11556
11557 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11558 && !TARGET_INLINE_ALL_STRINGOPS
11559 && !optimize_size
11560 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11561 return 0;
11562
11563 addr = force_reg (Pmode, XEXP (src, 0));
11564 scratch1 = gen_reg_rtx (Pmode);
11565
11566 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11567 && !optimize_size)
11568 {
11569 /* Well it seems that some optimizer does not combine a call like
11570 foo(strlen(bar), strlen(bar));
11571 when the move and the subtraction is done here. It does calculate
11572 the length just once when these instructions are done inside of
11573 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11574 often used and I use one fewer register for the lifetime of
11575 output_strlen_unroll() this is better. */
11576
11577 emit_move_insn (out, addr);
11578
11579 ix86_expand_strlensi_unroll_1 (out, src, align);
11580
11581 /* strlensi_unroll_1 returns the address of the zero at the end of
11582 the string, like memchr(), so compute the length by subtracting
11583 the start address. */
11584 if (TARGET_64BIT)
11585 emit_insn (gen_subdi3 (out, out, addr));
11586 else
11587 emit_insn (gen_subsi3 (out, out, addr));
11588 }
11589 else
11590 {
11591 rtx unspec;
11592 scratch2 = gen_reg_rtx (Pmode);
11593 scratch3 = gen_reg_rtx (Pmode);
11594 scratch4 = force_reg (Pmode, constm1_rtx);
11595
11596 emit_move_insn (scratch3, addr);
11597 eoschar = force_reg (QImode, eoschar);
11598
11599 emit_insn (gen_cld ());
11600 src = replace_equiv_address_nv (src, scratch3);
11601
11602 /* If .md starts supporting :P, this can be done in .md. */
11603 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11604 scratch4), UNSPEC_SCAS);
11605 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11606 if (TARGET_64BIT)
11607 {
11608 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11609 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11610 }
11611 else
11612 {
11613 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11614 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11615 }
11616 }
11617 return 1;
11618 }
11619
11620 /* Expand the appropriate insns for doing strlen if not just doing
11621 repnz; scasb
11622
11623 out = result, initialized with the start address
11624 align_rtx = alignment of the address.
11625 scratch = scratch register, initialized with the startaddress when
11626 not aligned, otherwise undefined
11627
11628 This is just the body. It needs the initializations mentioned above and
11629 some address computing at the end. These things are done in i386.md. */
11630
11631 static void
11632 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11633 {
11634 int align;
11635 rtx tmp;
11636 rtx align_2_label = NULL_RTX;
11637 rtx align_3_label = NULL_RTX;
11638 rtx align_4_label = gen_label_rtx ();
11639 rtx end_0_label = gen_label_rtx ();
11640 rtx mem;
11641 rtx tmpreg = gen_reg_rtx (SImode);
11642 rtx scratch = gen_reg_rtx (SImode);
11643 rtx cmp;
11644
11645 align = 0;
11646 if (GET_CODE (align_rtx) == CONST_INT)
11647 align = INTVAL (align_rtx);
11648
11649 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11650
11651 /* Is there a known alignment and is it less than 4? */
11652 if (align < 4)
11653 {
11654 rtx scratch1 = gen_reg_rtx (Pmode);
11655 emit_move_insn (scratch1, out);
11656 /* Is there a known alignment and is it not 2? */
11657 if (align != 2)
11658 {
11659 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11660 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11661
11662 /* Leave just the 3 lower bits. */
11663 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11664 NULL_RTX, 0, OPTAB_WIDEN);
11665
11666 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11667 Pmode, 1, align_4_label);
11668 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11669 Pmode, 1, align_2_label);
11670 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11671 Pmode, 1, align_3_label);
11672 }
11673 else
11674 {
11675 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11676 check if is aligned to 4 - byte. */
11677
11678 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11679 NULL_RTX, 0, OPTAB_WIDEN);
11680
11681 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11682 Pmode, 1, align_4_label);
11683 }
11684
11685 mem = change_address (src, QImode, out);
11686
11687 /* Now compare the bytes. */
11688
11689 /* Compare the first n unaligned byte on a byte per byte basis. */
11690 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11691 QImode, 1, end_0_label);
11692
11693 /* Increment the address. */
11694 if (TARGET_64BIT)
11695 emit_insn (gen_adddi3 (out, out, const1_rtx));
11696 else
11697 emit_insn (gen_addsi3 (out, out, const1_rtx));
11698
11699 /* Not needed with an alignment of 2 */
11700 if (align != 2)
11701 {
11702 emit_label (align_2_label);
11703
11704 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11705 end_0_label);
11706
11707 if (TARGET_64BIT)
11708 emit_insn (gen_adddi3 (out, out, const1_rtx));
11709 else
11710 emit_insn (gen_addsi3 (out, out, const1_rtx));
11711
11712 emit_label (align_3_label);
11713 }
11714
11715 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11716 end_0_label);
11717
11718 if (TARGET_64BIT)
11719 emit_insn (gen_adddi3 (out, out, const1_rtx));
11720 else
11721 emit_insn (gen_addsi3 (out, out, const1_rtx));
11722 }
11723
11724 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11725 align this loop. It gives only huge programs, but does not help to
11726 speed up. */
11727 emit_label (align_4_label);
11728
11729 mem = change_address (src, SImode, out);
11730 emit_move_insn (scratch, mem);
11731 if (TARGET_64BIT)
11732 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11733 else
11734 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11735
11736 /* This formula yields a nonzero result iff one of the bytes is zero.
11737 This saves three branches inside loop and many cycles. */
11738
11739 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11740 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11741 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11742 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11743 gen_int_mode (0x80808080, SImode)));
11744 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11745 align_4_label);
11746
11747 if (TARGET_CMOVE)
11748 {
11749 rtx reg = gen_reg_rtx (SImode);
11750 rtx reg2 = gen_reg_rtx (Pmode);
11751 emit_move_insn (reg, tmpreg);
11752 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11753
11754 /* If zero is not in the first two bytes, move two bytes forward. */
11755 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11756 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11757 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11758 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11759 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11760 reg,
11761 tmpreg)));
11762 /* Emit lea manually to avoid clobbering of flags. */
11763 emit_insn (gen_rtx_SET (SImode, reg2,
11764 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11765
11766 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11767 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11768 emit_insn (gen_rtx_SET (VOIDmode, out,
11769 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11770 reg2,
11771 out)));
11772
11773 }
11774 else
11775 {
11776 rtx end_2_label = gen_label_rtx ();
11777 /* Is zero in the first two bytes? */
11778
11779 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11780 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11781 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11782 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11783 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11784 pc_rtx);
11785 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11786 JUMP_LABEL (tmp) = end_2_label;
11787
11788 /* Not in the first two. Move two bytes forward. */
11789 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11790 if (TARGET_64BIT)
11791 emit_insn (gen_adddi3 (out, out, const2_rtx));
11792 else
11793 emit_insn (gen_addsi3 (out, out, const2_rtx));
11794
11795 emit_label (end_2_label);
11796
11797 }
11798
11799 /* Avoid branch in fixing the byte. */
11800 tmpreg = gen_lowpart (QImode, tmpreg);
11801 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11802 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11803 if (TARGET_64BIT)
11804 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11805 else
11806 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11807
11808 emit_label (end_0_label);
11809 }
11810
11811 void
11812 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11813 rtx callarg2 ATTRIBUTE_UNUSED,
11814 rtx pop, int sibcall)
11815 {
11816 rtx use = NULL, call;
11817
11818 if (pop == const0_rtx)
11819 pop = NULL;
11820 if (TARGET_64BIT && pop)
11821 abort ();
11822
11823 #if TARGET_MACHO
11824 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11825 fnaddr = machopic_indirect_call_target (fnaddr);
11826 #else
11827 /* Static functions and indirect calls don't need the pic register. */
11828 if (! TARGET_64BIT && flag_pic
11829 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11830 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11831 use_reg (&use, pic_offset_table_rtx);
11832
11833 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11834 {
11835 rtx al = gen_rtx_REG (QImode, 0);
11836 emit_move_insn (al, callarg2);
11837 use_reg (&use, al);
11838 }
11839 #endif /* TARGET_MACHO */
11840
11841 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11842 {
11843 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11844 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11845 }
11846 if (sibcall && TARGET_64BIT
11847 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11848 {
11849 rtx addr;
11850 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11851 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11852 emit_move_insn (fnaddr, addr);
11853 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11854 }
11855
11856 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11857 if (retval)
11858 call = gen_rtx_SET (VOIDmode, retval, call);
11859 if (pop)
11860 {
11861 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11862 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11863 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11864 }
11865
11866 call = emit_call_insn (call);
11867 if (use)
11868 CALL_INSN_FUNCTION_USAGE (call) = use;
11869 }
11870
11871 \f
11872 /* Clear stack slot assignments remembered from previous functions.
11873 This is called from INIT_EXPANDERS once before RTL is emitted for each
11874 function. */
11875
11876 static struct machine_function *
11877 ix86_init_machine_status (void)
11878 {
11879 struct machine_function *f;
11880
11881 f = ggc_alloc_cleared (sizeof (struct machine_function));
11882 f->use_fast_prologue_epilogue_nregs = -1;
11883
11884 return f;
11885 }
11886
11887 /* Return a MEM corresponding to a stack slot with mode MODE.
11888 Allocate a new slot if necessary.
11889
11890 The RTL for a function can have several slots available: N is
11891 which slot to use. */
11892
11893 rtx
11894 assign_386_stack_local (enum machine_mode mode, int n)
11895 {
11896 struct stack_local_entry *s;
11897
11898 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11899 abort ();
11900
11901 for (s = ix86_stack_locals; s; s = s->next)
11902 if (s->mode == mode && s->n == n)
11903 return s->rtl;
11904
11905 s = (struct stack_local_entry *)
11906 ggc_alloc (sizeof (struct stack_local_entry));
11907 s->n = n;
11908 s->mode = mode;
11909 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11910
11911 s->next = ix86_stack_locals;
11912 ix86_stack_locals = s;
11913 return s->rtl;
11914 }
11915
11916 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11917
11918 static GTY(()) rtx ix86_tls_symbol;
11919 rtx
11920 ix86_tls_get_addr (void)
11921 {
11922
11923 if (!ix86_tls_symbol)
11924 {
11925 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11926 (TARGET_GNU_TLS && !TARGET_64BIT)
11927 ? "___tls_get_addr"
11928 : "__tls_get_addr");
11929 }
11930
11931 return ix86_tls_symbol;
11932 }
11933 \f
11934 /* Calculate the length of the memory address in the instruction
11935 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11936
11937 int
11938 memory_address_length (rtx addr)
11939 {
11940 struct ix86_address parts;
11941 rtx base, index, disp;
11942 int len;
11943
11944 if (GET_CODE (addr) == PRE_DEC
11945 || GET_CODE (addr) == POST_INC
11946 || GET_CODE (addr) == PRE_MODIFY
11947 || GET_CODE (addr) == POST_MODIFY)
11948 return 0;
11949
11950 if (! ix86_decompose_address (addr, &parts))
11951 abort ();
11952
11953 if (parts.base && GET_CODE (parts.base) == SUBREG)
11954 parts.base = SUBREG_REG (parts.base);
11955 if (parts.index && GET_CODE (parts.index) == SUBREG)
11956 parts.index = SUBREG_REG (parts.index);
11957
11958 base = parts.base;
11959 index = parts.index;
11960 disp = parts.disp;
11961 len = 0;
11962
11963 /* Rule of thumb:
11964 - esp as the base always wants an index,
11965 - ebp as the base always wants a displacement. */
11966
11967 /* Register Indirect. */
11968 if (base && !index && !disp)
11969 {
11970 /* esp (for its index) and ebp (for its displacement) need
11971 the two-byte modrm form. */
11972 if (addr == stack_pointer_rtx
11973 || addr == arg_pointer_rtx
11974 || addr == frame_pointer_rtx
11975 || addr == hard_frame_pointer_rtx)
11976 len = 1;
11977 }
11978
11979 /* Direct Addressing. */
11980 else if (disp && !base && !index)
11981 len = 4;
11982
11983 else
11984 {
11985 /* Find the length of the displacement constant. */
11986 if (disp)
11987 {
11988 if (GET_CODE (disp) == CONST_INT
11989 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11990 && base)
11991 len = 1;
11992 else
11993 len = 4;
11994 }
11995 /* ebp always wants a displacement. */
11996 else if (base == hard_frame_pointer_rtx)
11997 len = 1;
11998
11999 /* An index requires the two-byte modrm form.... */
12000 if (index
12001 /* ...like esp, which always wants an index. */
12002 || base == stack_pointer_rtx
12003 || base == arg_pointer_rtx
12004 || base == frame_pointer_rtx)
12005 len += 1;
12006 }
12007
12008 return len;
12009 }
12010
12011 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12012 is set, expect that insn have 8bit immediate alternative. */
12013 int
12014 ix86_attr_length_immediate_default (rtx insn, int shortform)
12015 {
12016 int len = 0;
12017 int i;
12018 extract_insn_cached (insn);
12019 for (i = recog_data.n_operands - 1; i >= 0; --i)
12020 if (CONSTANT_P (recog_data.operand[i]))
12021 {
12022 if (len)
12023 abort ();
12024 if (shortform
12025 && GET_CODE (recog_data.operand[i]) == CONST_INT
12026 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12027 len = 1;
12028 else
12029 {
12030 switch (get_attr_mode (insn))
12031 {
12032 case MODE_QI:
12033 len+=1;
12034 break;
12035 case MODE_HI:
12036 len+=2;
12037 break;
12038 case MODE_SI:
12039 len+=4;
12040 break;
12041 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12042 case MODE_DI:
12043 len+=4;
12044 break;
12045 default:
12046 fatal_insn ("unknown insn mode", insn);
12047 }
12048 }
12049 }
12050 return len;
12051 }
12052 /* Compute default value for "length_address" attribute. */
12053 int
12054 ix86_attr_length_address_default (rtx insn)
12055 {
12056 int i;
12057
12058 if (get_attr_type (insn) == TYPE_LEA)
12059 {
12060 rtx set = PATTERN (insn);
12061 if (GET_CODE (set) == SET)
12062 ;
12063 else if (GET_CODE (set) == PARALLEL
12064 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12065 set = XVECEXP (set, 0, 0);
12066 else
12067 {
12068 #ifdef ENABLE_CHECKING
12069 abort ();
12070 #endif
12071 return 0;
12072 }
12073
12074 return memory_address_length (SET_SRC (set));
12075 }
12076
12077 extract_insn_cached (insn);
12078 for (i = recog_data.n_operands - 1; i >= 0; --i)
12079 if (GET_CODE (recog_data.operand[i]) == MEM)
12080 {
12081 return memory_address_length (XEXP (recog_data.operand[i], 0));
12082 break;
12083 }
12084 return 0;
12085 }
12086 \f
12087 /* Return the maximum number of instructions a cpu can issue. */
12088
12089 static int
12090 ix86_issue_rate (void)
12091 {
12092 switch (ix86_tune)
12093 {
12094 case PROCESSOR_PENTIUM:
12095 case PROCESSOR_K6:
12096 return 2;
12097
12098 case PROCESSOR_PENTIUMPRO:
12099 case PROCESSOR_PENTIUM4:
12100 case PROCESSOR_ATHLON:
12101 case PROCESSOR_K8:
12102 case PROCESSOR_NOCONA:
12103 return 3;
12104
12105 default:
12106 return 1;
12107 }
12108 }
12109
12110 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12111 by DEP_INSN and nothing set by DEP_INSN. */
12112
12113 static int
12114 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12115 {
12116 rtx set, set2;
12117
12118 /* Simplify the test for uninteresting insns. */
12119 if (insn_type != TYPE_SETCC
12120 && insn_type != TYPE_ICMOV
12121 && insn_type != TYPE_FCMOV
12122 && insn_type != TYPE_IBR)
12123 return 0;
12124
12125 if ((set = single_set (dep_insn)) != 0)
12126 {
12127 set = SET_DEST (set);
12128 set2 = NULL_RTX;
12129 }
12130 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12131 && XVECLEN (PATTERN (dep_insn), 0) == 2
12132 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12133 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12134 {
12135 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12136 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12137 }
12138 else
12139 return 0;
12140
12141 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12142 return 0;
12143
12144 /* This test is true if the dependent insn reads the flags but
12145 not any other potentially set register. */
12146 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12147 return 0;
12148
12149 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12150 return 0;
12151
12152 return 1;
12153 }
12154
12155 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12156 address with operands set by DEP_INSN. */
12157
12158 static int
12159 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12160 {
12161 rtx addr;
12162
12163 if (insn_type == TYPE_LEA
12164 && TARGET_PENTIUM)
12165 {
12166 addr = PATTERN (insn);
12167 if (GET_CODE (addr) == SET)
12168 ;
12169 else if (GET_CODE (addr) == PARALLEL
12170 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12171 addr = XVECEXP (addr, 0, 0);
12172 else
12173 abort ();
12174 addr = SET_SRC (addr);
12175 }
12176 else
12177 {
12178 int i;
12179 extract_insn_cached (insn);
12180 for (i = recog_data.n_operands - 1; i >= 0; --i)
12181 if (GET_CODE (recog_data.operand[i]) == MEM)
12182 {
12183 addr = XEXP (recog_data.operand[i], 0);
12184 goto found;
12185 }
12186 return 0;
12187 found:;
12188 }
12189
12190 return modified_in_p (addr, dep_insn);
12191 }
12192
12193 static int
12194 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12195 {
12196 enum attr_type insn_type, dep_insn_type;
12197 enum attr_memory memory;
12198 rtx set, set2;
12199 int dep_insn_code_number;
12200
12201 /* Anti and output dependencies have zero cost on all CPUs. */
12202 if (REG_NOTE_KIND (link) != 0)
12203 return 0;
12204
12205 dep_insn_code_number = recog_memoized (dep_insn);
12206
12207 /* If we can't recognize the insns, we can't really do anything. */
12208 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12209 return cost;
12210
12211 insn_type = get_attr_type (insn);
12212 dep_insn_type = get_attr_type (dep_insn);
12213
12214 switch (ix86_tune)
12215 {
12216 case PROCESSOR_PENTIUM:
12217 /* Address Generation Interlock adds a cycle of latency. */
12218 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12219 cost += 1;
12220
12221 /* ??? Compares pair with jump/setcc. */
12222 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12223 cost = 0;
12224
12225 /* Floating point stores require value to be ready one cycle earlier. */
12226 if (insn_type == TYPE_FMOV
12227 && get_attr_memory (insn) == MEMORY_STORE
12228 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12229 cost += 1;
12230 break;
12231
12232 case PROCESSOR_PENTIUMPRO:
12233 memory = get_attr_memory (insn);
12234
12235 /* INT->FP conversion is expensive. */
12236 if (get_attr_fp_int_src (dep_insn))
12237 cost += 5;
12238
12239 /* There is one cycle extra latency between an FP op and a store. */
12240 if (insn_type == TYPE_FMOV
12241 && (set = single_set (dep_insn)) != NULL_RTX
12242 && (set2 = single_set (insn)) != NULL_RTX
12243 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12244 && GET_CODE (SET_DEST (set2)) == MEM)
12245 cost += 1;
12246
12247 /* Show ability of reorder buffer to hide latency of load by executing
12248 in parallel with previous instruction in case
12249 previous instruction is not needed to compute the address. */
12250 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12251 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12252 {
12253 /* Claim moves to take one cycle, as core can issue one load
12254 at time and the next load can start cycle later. */
12255 if (dep_insn_type == TYPE_IMOV
12256 || dep_insn_type == TYPE_FMOV)
12257 cost = 1;
12258 else if (cost > 1)
12259 cost--;
12260 }
12261 break;
12262
12263 case PROCESSOR_K6:
12264 memory = get_attr_memory (insn);
12265
12266 /* The esp dependency is resolved before the instruction is really
12267 finished. */
12268 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12269 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12270 return 1;
12271
12272 /* INT->FP conversion is expensive. */
12273 if (get_attr_fp_int_src (dep_insn))
12274 cost += 5;
12275
12276 /* Show ability of reorder buffer to hide latency of load by executing
12277 in parallel with previous instruction in case
12278 previous instruction is not needed to compute the address. */
12279 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12280 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12281 {
12282 /* Claim moves to take one cycle, as core can issue one load
12283 at time and the next load can start cycle later. */
12284 if (dep_insn_type == TYPE_IMOV
12285 || dep_insn_type == TYPE_FMOV)
12286 cost = 1;
12287 else if (cost > 2)
12288 cost -= 2;
12289 else
12290 cost = 1;
12291 }
12292 break;
12293
12294 case PROCESSOR_ATHLON:
12295 case PROCESSOR_K8:
12296 memory = get_attr_memory (insn);
12297
12298 /* Show ability of reorder buffer to hide latency of load by executing
12299 in parallel with previous instruction in case
12300 previous instruction is not needed to compute the address. */
12301 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12302 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12303 {
12304 enum attr_unit unit = get_attr_unit (insn);
12305 int loadcost = 3;
12306
12307 /* Because of the difference between the length of integer and
12308 floating unit pipeline preparation stages, the memory operands
12309 for floating point are cheaper.
12310
12311 ??? For Athlon it the difference is most probably 2. */
12312 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12313 loadcost = 3;
12314 else
12315 loadcost = TARGET_ATHLON ? 2 : 0;
12316
12317 if (cost >= loadcost)
12318 cost -= loadcost;
12319 else
12320 cost = 0;
12321 }
12322
12323 default:
12324 break;
12325 }
12326
12327 return cost;
12328 }
12329
12330 /* How many alternative schedules to try. This should be as wide as the
12331 scheduling freedom in the DFA, but no wider. Making this value too
12332 large results extra work for the scheduler. */
12333
12334 static int
12335 ia32_multipass_dfa_lookahead (void)
12336 {
12337 if (ix86_tune == PROCESSOR_PENTIUM)
12338 return 2;
12339
12340 if (ix86_tune == PROCESSOR_PENTIUMPRO
12341 || ix86_tune == PROCESSOR_K6)
12342 return 1;
12343
12344 else
12345 return 0;
12346 }
12347
12348 \f
12349 /* Compute the alignment given to a constant that is being placed in memory.
12350 EXP is the constant and ALIGN is the alignment that the object would
12351 ordinarily have.
12352 The value of this function is used instead of that alignment to align
12353 the object. */
12354
12355 int
12356 ix86_constant_alignment (tree exp, int align)
12357 {
12358 if (TREE_CODE (exp) == REAL_CST)
12359 {
12360 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12361 return 64;
12362 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12363 return 128;
12364 }
12365 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12366 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12367 return BITS_PER_WORD;
12368
12369 return align;
12370 }
12371
12372 /* Compute the alignment for a static variable.
12373 TYPE is the data type, and ALIGN is the alignment that
12374 the object would ordinarily have. The value of this function is used
12375 instead of that alignment to align the object. */
12376
12377 int
12378 ix86_data_alignment (tree type, int align)
12379 {
12380 if (AGGREGATE_TYPE_P (type)
12381 && TYPE_SIZE (type)
12382 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12383 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12384 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12385 return 256;
12386
12387 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12388 to 16byte boundary. */
12389 if (TARGET_64BIT)
12390 {
12391 if (AGGREGATE_TYPE_P (type)
12392 && TYPE_SIZE (type)
12393 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12394 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12395 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12396 return 128;
12397 }
12398
12399 if (TREE_CODE (type) == ARRAY_TYPE)
12400 {
12401 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12402 return 64;
12403 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12404 return 128;
12405 }
12406 else if (TREE_CODE (type) == COMPLEX_TYPE)
12407 {
12408
12409 if (TYPE_MODE (type) == DCmode && align < 64)
12410 return 64;
12411 if (TYPE_MODE (type) == XCmode && align < 128)
12412 return 128;
12413 }
12414 else if ((TREE_CODE (type) == RECORD_TYPE
12415 || TREE_CODE (type) == UNION_TYPE
12416 || TREE_CODE (type) == QUAL_UNION_TYPE)
12417 && TYPE_FIELDS (type))
12418 {
12419 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12420 return 64;
12421 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12422 return 128;
12423 }
12424 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12425 || TREE_CODE (type) == INTEGER_TYPE)
12426 {
12427 if (TYPE_MODE (type) == DFmode && align < 64)
12428 return 64;
12429 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12430 return 128;
12431 }
12432
12433 return align;
12434 }
12435
12436 /* Compute the alignment for a local variable.
12437 TYPE is the data type, and ALIGN is the alignment that
12438 the object would ordinarily have. The value of this macro is used
12439 instead of that alignment to align the object. */
12440
12441 int
12442 ix86_local_alignment (tree type, int align)
12443 {
12444 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12445 to 16byte boundary. */
12446 if (TARGET_64BIT)
12447 {
12448 if (AGGREGATE_TYPE_P (type)
12449 && TYPE_SIZE (type)
12450 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12451 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12452 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12453 return 128;
12454 }
12455 if (TREE_CODE (type) == ARRAY_TYPE)
12456 {
12457 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12458 return 64;
12459 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12460 return 128;
12461 }
12462 else if (TREE_CODE (type) == COMPLEX_TYPE)
12463 {
12464 if (TYPE_MODE (type) == DCmode && align < 64)
12465 return 64;
12466 if (TYPE_MODE (type) == XCmode && align < 128)
12467 return 128;
12468 }
12469 else if ((TREE_CODE (type) == RECORD_TYPE
12470 || TREE_CODE (type) == UNION_TYPE
12471 || TREE_CODE (type) == QUAL_UNION_TYPE)
12472 && TYPE_FIELDS (type))
12473 {
12474 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12475 return 64;
12476 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12477 return 128;
12478 }
12479 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12480 || TREE_CODE (type) == INTEGER_TYPE)
12481 {
12482
12483 if (TYPE_MODE (type) == DFmode && align < 64)
12484 return 64;
12485 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12486 return 128;
12487 }
12488 return align;
12489 }
12490 \f
12491 /* Emit RTL insns to initialize the variable parts of a trampoline.
12492 FNADDR is an RTX for the address of the function's pure code.
12493 CXT is an RTX for the static chain value for the function. */
12494 void
12495 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12496 {
12497 if (!TARGET_64BIT)
12498 {
12499 /* Compute offset from the end of the jmp to the target function. */
12500 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12501 plus_constant (tramp, 10),
12502 NULL_RTX, 1, OPTAB_DIRECT);
12503 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12504 gen_int_mode (0xb9, QImode));
12505 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12506 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12507 gen_int_mode (0xe9, QImode));
12508 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12509 }
12510 else
12511 {
12512 int offset = 0;
12513 /* Try to load address using shorter movl instead of movabs.
12514 We may want to support movq for kernel mode, but kernel does not use
12515 trampolines at the moment. */
12516 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12517 {
12518 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12519 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12520 gen_int_mode (0xbb41, HImode));
12521 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12522 gen_lowpart (SImode, fnaddr));
12523 offset += 6;
12524 }
12525 else
12526 {
12527 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12528 gen_int_mode (0xbb49, HImode));
12529 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12530 fnaddr);
12531 offset += 10;
12532 }
12533 /* Load static chain using movabs to r10. */
12534 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12535 gen_int_mode (0xba49, HImode));
12536 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12537 cxt);
12538 offset += 10;
12539 /* Jump to the r11 */
12540 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12541 gen_int_mode (0xff49, HImode));
12542 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12543 gen_int_mode (0xe3, QImode));
12544 offset += 3;
12545 if (offset > TRAMPOLINE_SIZE)
12546 abort ();
12547 }
12548
12549 #ifdef ENABLE_EXECUTE_STACK
12550 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12551 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12552 #endif
12553 }
12554 \f
12555 /* Codes for all the SSE/MMX builtins. */
12556 enum ix86_builtins
12557 {
12558 IX86_BUILTIN_ADDPS,
12559 IX86_BUILTIN_ADDSS,
12560 IX86_BUILTIN_DIVPS,
12561 IX86_BUILTIN_DIVSS,
12562 IX86_BUILTIN_MULPS,
12563 IX86_BUILTIN_MULSS,
12564 IX86_BUILTIN_SUBPS,
12565 IX86_BUILTIN_SUBSS,
12566
12567 IX86_BUILTIN_CMPEQPS,
12568 IX86_BUILTIN_CMPLTPS,
12569 IX86_BUILTIN_CMPLEPS,
12570 IX86_BUILTIN_CMPGTPS,
12571 IX86_BUILTIN_CMPGEPS,
12572 IX86_BUILTIN_CMPNEQPS,
12573 IX86_BUILTIN_CMPNLTPS,
12574 IX86_BUILTIN_CMPNLEPS,
12575 IX86_BUILTIN_CMPNGTPS,
12576 IX86_BUILTIN_CMPNGEPS,
12577 IX86_BUILTIN_CMPORDPS,
12578 IX86_BUILTIN_CMPUNORDPS,
12579 IX86_BUILTIN_CMPNEPS,
12580 IX86_BUILTIN_CMPEQSS,
12581 IX86_BUILTIN_CMPLTSS,
12582 IX86_BUILTIN_CMPLESS,
12583 IX86_BUILTIN_CMPNEQSS,
12584 IX86_BUILTIN_CMPNLTSS,
12585 IX86_BUILTIN_CMPNLESS,
12586 IX86_BUILTIN_CMPNGTSS,
12587 IX86_BUILTIN_CMPNGESS,
12588 IX86_BUILTIN_CMPORDSS,
12589 IX86_BUILTIN_CMPUNORDSS,
12590 IX86_BUILTIN_CMPNESS,
12591
12592 IX86_BUILTIN_COMIEQSS,
12593 IX86_BUILTIN_COMILTSS,
12594 IX86_BUILTIN_COMILESS,
12595 IX86_BUILTIN_COMIGTSS,
12596 IX86_BUILTIN_COMIGESS,
12597 IX86_BUILTIN_COMINEQSS,
12598 IX86_BUILTIN_UCOMIEQSS,
12599 IX86_BUILTIN_UCOMILTSS,
12600 IX86_BUILTIN_UCOMILESS,
12601 IX86_BUILTIN_UCOMIGTSS,
12602 IX86_BUILTIN_UCOMIGESS,
12603 IX86_BUILTIN_UCOMINEQSS,
12604
12605 IX86_BUILTIN_CVTPI2PS,
12606 IX86_BUILTIN_CVTPS2PI,
12607 IX86_BUILTIN_CVTSI2SS,
12608 IX86_BUILTIN_CVTSI642SS,
12609 IX86_BUILTIN_CVTSS2SI,
12610 IX86_BUILTIN_CVTSS2SI64,
12611 IX86_BUILTIN_CVTTPS2PI,
12612 IX86_BUILTIN_CVTTSS2SI,
12613 IX86_BUILTIN_CVTTSS2SI64,
12614
12615 IX86_BUILTIN_MAXPS,
12616 IX86_BUILTIN_MAXSS,
12617 IX86_BUILTIN_MINPS,
12618 IX86_BUILTIN_MINSS,
12619
12620 IX86_BUILTIN_LOADUPS,
12621 IX86_BUILTIN_STOREUPS,
12622 IX86_BUILTIN_MOVSS,
12623
12624 IX86_BUILTIN_MOVHLPS,
12625 IX86_BUILTIN_MOVLHPS,
12626 IX86_BUILTIN_LOADHPS,
12627 IX86_BUILTIN_LOADLPS,
12628 IX86_BUILTIN_STOREHPS,
12629 IX86_BUILTIN_STORELPS,
12630
12631 IX86_BUILTIN_MASKMOVQ,
12632 IX86_BUILTIN_MOVMSKPS,
12633 IX86_BUILTIN_PMOVMSKB,
12634
12635 IX86_BUILTIN_MOVNTPS,
12636 IX86_BUILTIN_MOVNTQ,
12637
12638 IX86_BUILTIN_LOADDQU,
12639 IX86_BUILTIN_STOREDQU,
12640
12641 IX86_BUILTIN_PACKSSWB,
12642 IX86_BUILTIN_PACKSSDW,
12643 IX86_BUILTIN_PACKUSWB,
12644
12645 IX86_BUILTIN_PADDB,
12646 IX86_BUILTIN_PADDW,
12647 IX86_BUILTIN_PADDD,
12648 IX86_BUILTIN_PADDQ,
12649 IX86_BUILTIN_PADDSB,
12650 IX86_BUILTIN_PADDSW,
12651 IX86_BUILTIN_PADDUSB,
12652 IX86_BUILTIN_PADDUSW,
12653 IX86_BUILTIN_PSUBB,
12654 IX86_BUILTIN_PSUBW,
12655 IX86_BUILTIN_PSUBD,
12656 IX86_BUILTIN_PSUBQ,
12657 IX86_BUILTIN_PSUBSB,
12658 IX86_BUILTIN_PSUBSW,
12659 IX86_BUILTIN_PSUBUSB,
12660 IX86_BUILTIN_PSUBUSW,
12661
12662 IX86_BUILTIN_PAND,
12663 IX86_BUILTIN_PANDN,
12664 IX86_BUILTIN_POR,
12665 IX86_BUILTIN_PXOR,
12666
12667 IX86_BUILTIN_PAVGB,
12668 IX86_BUILTIN_PAVGW,
12669
12670 IX86_BUILTIN_PCMPEQB,
12671 IX86_BUILTIN_PCMPEQW,
12672 IX86_BUILTIN_PCMPEQD,
12673 IX86_BUILTIN_PCMPGTB,
12674 IX86_BUILTIN_PCMPGTW,
12675 IX86_BUILTIN_PCMPGTD,
12676
12677 IX86_BUILTIN_PMADDWD,
12678
12679 IX86_BUILTIN_PMAXSW,
12680 IX86_BUILTIN_PMAXUB,
12681 IX86_BUILTIN_PMINSW,
12682 IX86_BUILTIN_PMINUB,
12683
12684 IX86_BUILTIN_PMULHUW,
12685 IX86_BUILTIN_PMULHW,
12686 IX86_BUILTIN_PMULLW,
12687
12688 IX86_BUILTIN_PSADBW,
12689 IX86_BUILTIN_PSHUFW,
12690
12691 IX86_BUILTIN_PSLLW,
12692 IX86_BUILTIN_PSLLD,
12693 IX86_BUILTIN_PSLLQ,
12694 IX86_BUILTIN_PSRAW,
12695 IX86_BUILTIN_PSRAD,
12696 IX86_BUILTIN_PSRLW,
12697 IX86_BUILTIN_PSRLD,
12698 IX86_BUILTIN_PSRLQ,
12699 IX86_BUILTIN_PSLLWI,
12700 IX86_BUILTIN_PSLLDI,
12701 IX86_BUILTIN_PSLLQI,
12702 IX86_BUILTIN_PSRAWI,
12703 IX86_BUILTIN_PSRADI,
12704 IX86_BUILTIN_PSRLWI,
12705 IX86_BUILTIN_PSRLDI,
12706 IX86_BUILTIN_PSRLQI,
12707
12708 IX86_BUILTIN_PUNPCKHBW,
12709 IX86_BUILTIN_PUNPCKHWD,
12710 IX86_BUILTIN_PUNPCKHDQ,
12711 IX86_BUILTIN_PUNPCKLBW,
12712 IX86_BUILTIN_PUNPCKLWD,
12713 IX86_BUILTIN_PUNPCKLDQ,
12714
12715 IX86_BUILTIN_SHUFPS,
12716
12717 IX86_BUILTIN_RCPPS,
12718 IX86_BUILTIN_RCPSS,
12719 IX86_BUILTIN_RSQRTPS,
12720 IX86_BUILTIN_RSQRTSS,
12721 IX86_BUILTIN_SQRTPS,
12722 IX86_BUILTIN_SQRTSS,
12723
12724 IX86_BUILTIN_UNPCKHPS,
12725 IX86_BUILTIN_UNPCKLPS,
12726
12727 IX86_BUILTIN_ANDPS,
12728 IX86_BUILTIN_ANDNPS,
12729 IX86_BUILTIN_ORPS,
12730 IX86_BUILTIN_XORPS,
12731
12732 IX86_BUILTIN_EMMS,
12733 IX86_BUILTIN_LDMXCSR,
12734 IX86_BUILTIN_STMXCSR,
12735 IX86_BUILTIN_SFENCE,
12736
12737 /* 3DNow! Original */
12738 IX86_BUILTIN_FEMMS,
12739 IX86_BUILTIN_PAVGUSB,
12740 IX86_BUILTIN_PF2ID,
12741 IX86_BUILTIN_PFACC,
12742 IX86_BUILTIN_PFADD,
12743 IX86_BUILTIN_PFCMPEQ,
12744 IX86_BUILTIN_PFCMPGE,
12745 IX86_BUILTIN_PFCMPGT,
12746 IX86_BUILTIN_PFMAX,
12747 IX86_BUILTIN_PFMIN,
12748 IX86_BUILTIN_PFMUL,
12749 IX86_BUILTIN_PFRCP,
12750 IX86_BUILTIN_PFRCPIT1,
12751 IX86_BUILTIN_PFRCPIT2,
12752 IX86_BUILTIN_PFRSQIT1,
12753 IX86_BUILTIN_PFRSQRT,
12754 IX86_BUILTIN_PFSUB,
12755 IX86_BUILTIN_PFSUBR,
12756 IX86_BUILTIN_PI2FD,
12757 IX86_BUILTIN_PMULHRW,
12758
12759 /* 3DNow! Athlon Extensions */
12760 IX86_BUILTIN_PF2IW,
12761 IX86_BUILTIN_PFNACC,
12762 IX86_BUILTIN_PFPNACC,
12763 IX86_BUILTIN_PI2FW,
12764 IX86_BUILTIN_PSWAPDSI,
12765 IX86_BUILTIN_PSWAPDSF,
12766
12767 /* SSE2 */
12768 IX86_BUILTIN_ADDPD,
12769 IX86_BUILTIN_ADDSD,
12770 IX86_BUILTIN_DIVPD,
12771 IX86_BUILTIN_DIVSD,
12772 IX86_BUILTIN_MULPD,
12773 IX86_BUILTIN_MULSD,
12774 IX86_BUILTIN_SUBPD,
12775 IX86_BUILTIN_SUBSD,
12776
12777 IX86_BUILTIN_CMPEQPD,
12778 IX86_BUILTIN_CMPLTPD,
12779 IX86_BUILTIN_CMPLEPD,
12780 IX86_BUILTIN_CMPGTPD,
12781 IX86_BUILTIN_CMPGEPD,
12782 IX86_BUILTIN_CMPNEQPD,
12783 IX86_BUILTIN_CMPNLTPD,
12784 IX86_BUILTIN_CMPNLEPD,
12785 IX86_BUILTIN_CMPNGTPD,
12786 IX86_BUILTIN_CMPNGEPD,
12787 IX86_BUILTIN_CMPORDPD,
12788 IX86_BUILTIN_CMPUNORDPD,
12789 IX86_BUILTIN_CMPNEPD,
12790 IX86_BUILTIN_CMPEQSD,
12791 IX86_BUILTIN_CMPLTSD,
12792 IX86_BUILTIN_CMPLESD,
12793 IX86_BUILTIN_CMPNEQSD,
12794 IX86_BUILTIN_CMPNLTSD,
12795 IX86_BUILTIN_CMPNLESD,
12796 IX86_BUILTIN_CMPORDSD,
12797 IX86_BUILTIN_CMPUNORDSD,
12798 IX86_BUILTIN_CMPNESD,
12799
12800 IX86_BUILTIN_COMIEQSD,
12801 IX86_BUILTIN_COMILTSD,
12802 IX86_BUILTIN_COMILESD,
12803 IX86_BUILTIN_COMIGTSD,
12804 IX86_BUILTIN_COMIGESD,
12805 IX86_BUILTIN_COMINEQSD,
12806 IX86_BUILTIN_UCOMIEQSD,
12807 IX86_BUILTIN_UCOMILTSD,
12808 IX86_BUILTIN_UCOMILESD,
12809 IX86_BUILTIN_UCOMIGTSD,
12810 IX86_BUILTIN_UCOMIGESD,
12811 IX86_BUILTIN_UCOMINEQSD,
12812
12813 IX86_BUILTIN_MAXPD,
12814 IX86_BUILTIN_MAXSD,
12815 IX86_BUILTIN_MINPD,
12816 IX86_BUILTIN_MINSD,
12817
12818 IX86_BUILTIN_ANDPD,
12819 IX86_BUILTIN_ANDNPD,
12820 IX86_BUILTIN_ORPD,
12821 IX86_BUILTIN_XORPD,
12822
12823 IX86_BUILTIN_SQRTPD,
12824 IX86_BUILTIN_SQRTSD,
12825
12826 IX86_BUILTIN_UNPCKHPD,
12827 IX86_BUILTIN_UNPCKLPD,
12828
12829 IX86_BUILTIN_SHUFPD,
12830
12831 IX86_BUILTIN_LOADUPD,
12832 IX86_BUILTIN_STOREUPD,
12833 IX86_BUILTIN_MOVSD,
12834
12835 IX86_BUILTIN_LOADHPD,
12836 IX86_BUILTIN_LOADLPD,
12837
12838 IX86_BUILTIN_CVTDQ2PD,
12839 IX86_BUILTIN_CVTDQ2PS,
12840
12841 IX86_BUILTIN_CVTPD2DQ,
12842 IX86_BUILTIN_CVTPD2PI,
12843 IX86_BUILTIN_CVTPD2PS,
12844 IX86_BUILTIN_CVTTPD2DQ,
12845 IX86_BUILTIN_CVTTPD2PI,
12846
12847 IX86_BUILTIN_CVTPI2PD,
12848 IX86_BUILTIN_CVTSI2SD,
12849 IX86_BUILTIN_CVTSI642SD,
12850
12851 IX86_BUILTIN_CVTSD2SI,
12852 IX86_BUILTIN_CVTSD2SI64,
12853 IX86_BUILTIN_CVTSD2SS,
12854 IX86_BUILTIN_CVTSS2SD,
12855 IX86_BUILTIN_CVTTSD2SI,
12856 IX86_BUILTIN_CVTTSD2SI64,
12857
12858 IX86_BUILTIN_CVTPS2DQ,
12859 IX86_BUILTIN_CVTPS2PD,
12860 IX86_BUILTIN_CVTTPS2DQ,
12861
12862 IX86_BUILTIN_MOVNTI,
12863 IX86_BUILTIN_MOVNTPD,
12864 IX86_BUILTIN_MOVNTDQ,
12865
12866 /* SSE2 MMX */
12867 IX86_BUILTIN_MASKMOVDQU,
12868 IX86_BUILTIN_MOVMSKPD,
12869 IX86_BUILTIN_PMOVMSKB128,
12870
12871 IX86_BUILTIN_PACKSSWB128,
12872 IX86_BUILTIN_PACKSSDW128,
12873 IX86_BUILTIN_PACKUSWB128,
12874
12875 IX86_BUILTIN_PADDB128,
12876 IX86_BUILTIN_PADDW128,
12877 IX86_BUILTIN_PADDD128,
12878 IX86_BUILTIN_PADDQ128,
12879 IX86_BUILTIN_PADDSB128,
12880 IX86_BUILTIN_PADDSW128,
12881 IX86_BUILTIN_PADDUSB128,
12882 IX86_BUILTIN_PADDUSW128,
12883 IX86_BUILTIN_PSUBB128,
12884 IX86_BUILTIN_PSUBW128,
12885 IX86_BUILTIN_PSUBD128,
12886 IX86_BUILTIN_PSUBQ128,
12887 IX86_BUILTIN_PSUBSB128,
12888 IX86_BUILTIN_PSUBSW128,
12889 IX86_BUILTIN_PSUBUSB128,
12890 IX86_BUILTIN_PSUBUSW128,
12891
12892 IX86_BUILTIN_PAND128,
12893 IX86_BUILTIN_PANDN128,
12894 IX86_BUILTIN_POR128,
12895 IX86_BUILTIN_PXOR128,
12896
12897 IX86_BUILTIN_PAVGB128,
12898 IX86_BUILTIN_PAVGW128,
12899
12900 IX86_BUILTIN_PCMPEQB128,
12901 IX86_BUILTIN_PCMPEQW128,
12902 IX86_BUILTIN_PCMPEQD128,
12903 IX86_BUILTIN_PCMPGTB128,
12904 IX86_BUILTIN_PCMPGTW128,
12905 IX86_BUILTIN_PCMPGTD128,
12906
12907 IX86_BUILTIN_PMADDWD128,
12908
12909 IX86_BUILTIN_PMAXSW128,
12910 IX86_BUILTIN_PMAXUB128,
12911 IX86_BUILTIN_PMINSW128,
12912 IX86_BUILTIN_PMINUB128,
12913
12914 IX86_BUILTIN_PMULUDQ,
12915 IX86_BUILTIN_PMULUDQ128,
12916 IX86_BUILTIN_PMULHUW128,
12917 IX86_BUILTIN_PMULHW128,
12918 IX86_BUILTIN_PMULLW128,
12919
12920 IX86_BUILTIN_PSADBW128,
12921 IX86_BUILTIN_PSHUFHW,
12922 IX86_BUILTIN_PSHUFLW,
12923 IX86_BUILTIN_PSHUFD,
12924
12925 IX86_BUILTIN_PSLLW128,
12926 IX86_BUILTIN_PSLLD128,
12927 IX86_BUILTIN_PSLLQ128,
12928 IX86_BUILTIN_PSRAW128,
12929 IX86_BUILTIN_PSRAD128,
12930 IX86_BUILTIN_PSRLW128,
12931 IX86_BUILTIN_PSRLD128,
12932 IX86_BUILTIN_PSRLQ128,
12933 IX86_BUILTIN_PSLLDQI128,
12934 IX86_BUILTIN_PSLLWI128,
12935 IX86_BUILTIN_PSLLDI128,
12936 IX86_BUILTIN_PSLLQI128,
12937 IX86_BUILTIN_PSRAWI128,
12938 IX86_BUILTIN_PSRADI128,
12939 IX86_BUILTIN_PSRLDQI128,
12940 IX86_BUILTIN_PSRLWI128,
12941 IX86_BUILTIN_PSRLDI128,
12942 IX86_BUILTIN_PSRLQI128,
12943
12944 IX86_BUILTIN_PUNPCKHBW128,
12945 IX86_BUILTIN_PUNPCKHWD128,
12946 IX86_BUILTIN_PUNPCKHDQ128,
12947 IX86_BUILTIN_PUNPCKHQDQ128,
12948 IX86_BUILTIN_PUNPCKLBW128,
12949 IX86_BUILTIN_PUNPCKLWD128,
12950 IX86_BUILTIN_PUNPCKLDQ128,
12951 IX86_BUILTIN_PUNPCKLQDQ128,
12952
12953 IX86_BUILTIN_CLFLUSH,
12954 IX86_BUILTIN_MFENCE,
12955 IX86_BUILTIN_LFENCE,
12956
12957 /* Prescott New Instructions. */
12958 IX86_BUILTIN_ADDSUBPS,
12959 IX86_BUILTIN_HADDPS,
12960 IX86_BUILTIN_HSUBPS,
12961 IX86_BUILTIN_MOVSHDUP,
12962 IX86_BUILTIN_MOVSLDUP,
12963 IX86_BUILTIN_ADDSUBPD,
12964 IX86_BUILTIN_HADDPD,
12965 IX86_BUILTIN_HSUBPD,
12966 IX86_BUILTIN_LDDQU,
12967
12968 IX86_BUILTIN_MONITOR,
12969 IX86_BUILTIN_MWAIT,
12970
12971 IX86_BUILTIN_VEC_INIT_V2SI,
12972 IX86_BUILTIN_VEC_INIT_V4HI,
12973 IX86_BUILTIN_VEC_INIT_V8QI,
12974 IX86_BUILTIN_VEC_EXT_V2DF,
12975 IX86_BUILTIN_VEC_EXT_V2DI,
12976 IX86_BUILTIN_VEC_EXT_V4SF,
12977 IX86_BUILTIN_VEC_EXT_V4SI,
12978 IX86_BUILTIN_VEC_EXT_V8HI,
12979 IX86_BUILTIN_VEC_EXT_V2SI,
12980 IX86_BUILTIN_VEC_EXT_V4HI,
12981 IX86_BUILTIN_VEC_SET_V8HI,
12982 IX86_BUILTIN_VEC_SET_V4HI,
12983
12984 IX86_BUILTIN_MAX
12985 };
12986
12987 #define def_builtin(MASK, NAME, TYPE, CODE) \
12988 do { \
12989 if ((MASK) & target_flags \
12990 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12991 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12992 NULL, NULL_TREE); \
12993 } while (0)
12994
12995 /* Bits for builtin_description.flag. */
12996
12997 /* Set when we don't support the comparison natively, and should
12998 swap_comparison in order to support it. */
12999 #define BUILTIN_DESC_SWAP_OPERANDS 1
13000
13001 struct builtin_description
13002 {
13003 const unsigned int mask;
13004 const enum insn_code icode;
13005 const char *const name;
13006 const enum ix86_builtins code;
13007 const enum rtx_code comparison;
13008 const unsigned int flag;
13009 };
13010
13011 static const struct builtin_description bdesc_comi[] =
13012 {
13013 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13014 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13015 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13016 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13017 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13018 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13019 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13020 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13021 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13022 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13023 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13024 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13025 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13026 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13027 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13028 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13029 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13030 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13031 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13037 };
13038
13039 static const struct builtin_description bdesc_2arg[] =
13040 {
13041 /* SSE */
13042 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
13043 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13044 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13045 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13046 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13047 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13048 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13049 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13050
13051 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13052 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13054 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13055 BUILTIN_DESC_SWAP_OPERANDS },
13056 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13057 BUILTIN_DESC_SWAP_OPERANDS },
13058 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13059 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13060 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13061 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13062 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13063 BUILTIN_DESC_SWAP_OPERANDS },
13064 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13065 BUILTIN_DESC_SWAP_OPERANDS },
13066 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13067 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13068 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13069 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13070 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
13071 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
13072 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
13073 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
13074 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13075 BUILTIN_DESC_SWAP_OPERANDS },
13076 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13077 BUILTIN_DESC_SWAP_OPERANDS },
13078 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13079
13080 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13081 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13082 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13083 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13084
13085 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13086 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13087 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13088 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13089
13090 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13091 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13092 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13093 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13094 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13095
13096 /* MMX */
13097 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13098 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13099 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13100 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13101 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13102 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13103 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13104 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13105
13106 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13107 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13108 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13109 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13110 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13111 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13112 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13113 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13114
13115 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13116 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13117 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13118
13119 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13120 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13121 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13122 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13123
13124 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13125 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13126
13127 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13128 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13129 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13130 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13131 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13132 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13133
13134 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13135 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13136 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13137 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13138
13139 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13140 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13141 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13142 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13143 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13144 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13145
13146 /* Special. */
13147 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13148 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13149 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13150
13151 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13152 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13153 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13154
13155 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13156 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13157 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13158 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13159 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13160 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13161
13162 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13163 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13164 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13165 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13166 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13167 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13168
13169 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13170 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13171 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13172 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13173
13174 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13175 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13176
13177 /* SSE2 */
13178 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13179 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13180 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13181 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13182 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13183 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13184 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13185 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13186
13187 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13188 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13190 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13191 BUILTIN_DESC_SWAP_OPERANDS },
13192 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13193 BUILTIN_DESC_SWAP_OPERANDS },
13194 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13195 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13196 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13197 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13198 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13199 BUILTIN_DESC_SWAP_OPERANDS },
13200 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13201 BUILTIN_DESC_SWAP_OPERANDS },
13202 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13203 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13204 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13205 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13206 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13207 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13208 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13209 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13210 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13211
13212 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13213 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13214 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13215 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13216
13217 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13218 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13219 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13220 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13221
13222 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13223 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13224 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13225
13226 /* SSE2 MMX */
13227 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13228 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13229 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13230 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13231 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13232 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13233 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13234 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13235
13236 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13237 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13238 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13239 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13240 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13241 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13242 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13243 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13244
13245 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13246 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13247
13248 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13249 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13250 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13251 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13252
13253 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13254 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13255
13256 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13257 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13258 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13259 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13260 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13261 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13262
13263 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13264 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13265 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13266 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13267
13268 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13269 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13270 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13271 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13272 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13273 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13274 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13275 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13276
13277 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13278 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13279 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13280
13281 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13282 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13283
13284 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13285 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13286
13287 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13288 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13289 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13290
13291 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13292 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13293 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13294
13295 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13296 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13297
13298 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13299
13300 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13301 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13302 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13303 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13304
13305 /* SSE3 MMX */
13306 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13307 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13308 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13309 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13310 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13311 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13312 };
13313
13314 static const struct builtin_description bdesc_1arg[] =
13315 {
13316 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13317 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13318
13319 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13320 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13321 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13322
13323 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13324 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13325 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13326 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13327 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13328 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13329
13330 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13331 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13332
13333 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13334
13335 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13336 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13337
13338 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13339 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13340 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13341 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13342 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13343
13344 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13345
13346 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13347 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13348 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13349 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13350
13351 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13352 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13353 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13354
13355 /* SSE3 */
13356 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13357 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13358 };
13359
13360 static void
13361 ix86_init_builtins (void)
13362 {
13363 if (TARGET_MMX)
13364 ix86_init_mmx_sse_builtins ();
13365 }
13366
13367 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13368 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13369 builtins. */
13370 static void
13371 ix86_init_mmx_sse_builtins (void)
13372 {
13373 const struct builtin_description * d;
13374 size_t i;
13375
13376 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13377 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13378 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13379 tree V2DI_type_node
13380 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13381 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13382 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13383 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13384 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13385 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13386 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13387
13388 tree pchar_type_node = build_pointer_type (char_type_node);
13389 tree pcchar_type_node = build_pointer_type (
13390 build_type_variant (char_type_node, 1, 0));
13391 tree pfloat_type_node = build_pointer_type (float_type_node);
13392 tree pcfloat_type_node = build_pointer_type (
13393 build_type_variant (float_type_node, 1, 0));
13394 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13395 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13396 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13397
13398 /* Comparisons. */
13399 tree int_ftype_v4sf_v4sf
13400 = build_function_type_list (integer_type_node,
13401 V4SF_type_node, V4SF_type_node, NULL_TREE);
13402 tree v4si_ftype_v4sf_v4sf
13403 = build_function_type_list (V4SI_type_node,
13404 V4SF_type_node, V4SF_type_node, NULL_TREE);
13405 /* MMX/SSE/integer conversions. */
13406 tree int_ftype_v4sf
13407 = build_function_type_list (integer_type_node,
13408 V4SF_type_node, NULL_TREE);
13409 tree int64_ftype_v4sf
13410 = build_function_type_list (long_long_integer_type_node,
13411 V4SF_type_node, NULL_TREE);
13412 tree int_ftype_v8qi
13413 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13414 tree v4sf_ftype_v4sf_int
13415 = build_function_type_list (V4SF_type_node,
13416 V4SF_type_node, integer_type_node, NULL_TREE);
13417 tree v4sf_ftype_v4sf_int64
13418 = build_function_type_list (V4SF_type_node,
13419 V4SF_type_node, long_long_integer_type_node,
13420 NULL_TREE);
13421 tree v4sf_ftype_v4sf_v2si
13422 = build_function_type_list (V4SF_type_node,
13423 V4SF_type_node, V2SI_type_node, NULL_TREE);
13424
13425 /* Miscellaneous. */
13426 tree v8qi_ftype_v4hi_v4hi
13427 = build_function_type_list (V8QI_type_node,
13428 V4HI_type_node, V4HI_type_node, NULL_TREE);
13429 tree v4hi_ftype_v2si_v2si
13430 = build_function_type_list (V4HI_type_node,
13431 V2SI_type_node, V2SI_type_node, NULL_TREE);
13432 tree v4sf_ftype_v4sf_v4sf_int
13433 = build_function_type_list (V4SF_type_node,
13434 V4SF_type_node, V4SF_type_node,
13435 integer_type_node, NULL_TREE);
13436 tree v2si_ftype_v4hi_v4hi
13437 = build_function_type_list (V2SI_type_node,
13438 V4HI_type_node, V4HI_type_node, NULL_TREE);
13439 tree v4hi_ftype_v4hi_int
13440 = build_function_type_list (V4HI_type_node,
13441 V4HI_type_node, integer_type_node, NULL_TREE);
13442 tree v4hi_ftype_v4hi_di
13443 = build_function_type_list (V4HI_type_node,
13444 V4HI_type_node, long_long_unsigned_type_node,
13445 NULL_TREE);
13446 tree v2si_ftype_v2si_di
13447 = build_function_type_list (V2SI_type_node,
13448 V2SI_type_node, long_long_unsigned_type_node,
13449 NULL_TREE);
13450 tree void_ftype_void
13451 = build_function_type (void_type_node, void_list_node);
13452 tree void_ftype_unsigned
13453 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13454 tree void_ftype_unsigned_unsigned
13455 = build_function_type_list (void_type_node, unsigned_type_node,
13456 unsigned_type_node, NULL_TREE);
13457 tree void_ftype_pcvoid_unsigned_unsigned
13458 = build_function_type_list (void_type_node, const_ptr_type_node,
13459 unsigned_type_node, unsigned_type_node,
13460 NULL_TREE);
13461 tree unsigned_ftype_void
13462 = build_function_type (unsigned_type_node, void_list_node);
13463 tree v2si_ftype_v4sf
13464 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13465 /* Loads/stores. */
13466 tree void_ftype_v8qi_v8qi_pchar
13467 = build_function_type_list (void_type_node,
13468 V8QI_type_node, V8QI_type_node,
13469 pchar_type_node, NULL_TREE);
13470 tree v4sf_ftype_pcfloat
13471 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13472 /* @@@ the type is bogus */
13473 tree v4sf_ftype_v4sf_pv2si
13474 = build_function_type_list (V4SF_type_node,
13475 V4SF_type_node, pv2si_type_node, NULL_TREE);
13476 tree void_ftype_pv2si_v4sf
13477 = build_function_type_list (void_type_node,
13478 pv2si_type_node, V4SF_type_node, NULL_TREE);
13479 tree void_ftype_pfloat_v4sf
13480 = build_function_type_list (void_type_node,
13481 pfloat_type_node, V4SF_type_node, NULL_TREE);
13482 tree void_ftype_pdi_di
13483 = build_function_type_list (void_type_node,
13484 pdi_type_node, long_long_unsigned_type_node,
13485 NULL_TREE);
13486 tree void_ftype_pv2di_v2di
13487 = build_function_type_list (void_type_node,
13488 pv2di_type_node, V2DI_type_node, NULL_TREE);
13489 /* Normal vector unops. */
13490 tree v4sf_ftype_v4sf
13491 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13492
13493 /* Normal vector binops. */
13494 tree v4sf_ftype_v4sf_v4sf
13495 = build_function_type_list (V4SF_type_node,
13496 V4SF_type_node, V4SF_type_node, NULL_TREE);
13497 tree v8qi_ftype_v8qi_v8qi
13498 = build_function_type_list (V8QI_type_node,
13499 V8QI_type_node, V8QI_type_node, NULL_TREE);
13500 tree v4hi_ftype_v4hi_v4hi
13501 = build_function_type_list (V4HI_type_node,
13502 V4HI_type_node, V4HI_type_node, NULL_TREE);
13503 tree v2si_ftype_v2si_v2si
13504 = build_function_type_list (V2SI_type_node,
13505 V2SI_type_node, V2SI_type_node, NULL_TREE);
13506 tree di_ftype_di_di
13507 = build_function_type_list (long_long_unsigned_type_node,
13508 long_long_unsigned_type_node,
13509 long_long_unsigned_type_node, NULL_TREE);
13510
13511 tree v2si_ftype_v2sf
13512 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13513 tree v2sf_ftype_v2si
13514 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13515 tree v2si_ftype_v2si
13516 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13517 tree v2sf_ftype_v2sf
13518 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13519 tree v2sf_ftype_v2sf_v2sf
13520 = build_function_type_list (V2SF_type_node,
13521 V2SF_type_node, V2SF_type_node, NULL_TREE);
13522 tree v2si_ftype_v2sf_v2sf
13523 = build_function_type_list (V2SI_type_node,
13524 V2SF_type_node, V2SF_type_node, NULL_TREE);
13525 tree pint_type_node = build_pointer_type (integer_type_node);
13526 tree pdouble_type_node = build_pointer_type (double_type_node);
13527 tree pcdouble_type_node = build_pointer_type (
13528 build_type_variant (double_type_node, 1, 0));
13529 tree int_ftype_v2df_v2df
13530 = build_function_type_list (integer_type_node,
13531 V2DF_type_node, V2DF_type_node, NULL_TREE);
13532
13533 tree ti_ftype_ti_ti
13534 = build_function_type_list (intTI_type_node,
13535 intTI_type_node, intTI_type_node, NULL_TREE);
13536 tree void_ftype_pcvoid
13537 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13538 tree v4sf_ftype_v4si
13539 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13540 tree v4si_ftype_v4sf
13541 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13542 tree v2df_ftype_v4si
13543 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13544 tree v4si_ftype_v2df
13545 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13546 tree v2si_ftype_v2df
13547 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13548 tree v4sf_ftype_v2df
13549 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13550 tree v2df_ftype_v2si
13551 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13552 tree v2df_ftype_v4sf
13553 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13554 tree int_ftype_v2df
13555 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13556 tree int64_ftype_v2df
13557 = build_function_type_list (long_long_integer_type_node,
13558 V2DF_type_node, NULL_TREE);
13559 tree v2df_ftype_v2df_int
13560 = build_function_type_list (V2DF_type_node,
13561 V2DF_type_node, integer_type_node, NULL_TREE);
13562 tree v2df_ftype_v2df_int64
13563 = build_function_type_list (V2DF_type_node,
13564 V2DF_type_node, long_long_integer_type_node,
13565 NULL_TREE);
13566 tree v4sf_ftype_v4sf_v2df
13567 = build_function_type_list (V4SF_type_node,
13568 V4SF_type_node, V2DF_type_node, NULL_TREE);
13569 tree v2df_ftype_v2df_v4sf
13570 = build_function_type_list (V2DF_type_node,
13571 V2DF_type_node, V4SF_type_node, NULL_TREE);
13572 tree v2df_ftype_v2df_v2df_int
13573 = build_function_type_list (V2DF_type_node,
13574 V2DF_type_node, V2DF_type_node,
13575 integer_type_node,
13576 NULL_TREE);
13577 tree v2df_ftype_v2df_pcdouble
13578 = build_function_type_list (V2DF_type_node,
13579 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13580 tree void_ftype_pdouble_v2df
13581 = build_function_type_list (void_type_node,
13582 pdouble_type_node, V2DF_type_node, NULL_TREE);
13583 tree void_ftype_pint_int
13584 = build_function_type_list (void_type_node,
13585 pint_type_node, integer_type_node, NULL_TREE);
13586 tree void_ftype_v16qi_v16qi_pchar
13587 = build_function_type_list (void_type_node,
13588 V16QI_type_node, V16QI_type_node,
13589 pchar_type_node, NULL_TREE);
13590 tree v2df_ftype_pcdouble
13591 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13592 tree v2df_ftype_v2df_v2df
13593 = build_function_type_list (V2DF_type_node,
13594 V2DF_type_node, V2DF_type_node, NULL_TREE);
13595 tree v16qi_ftype_v16qi_v16qi
13596 = build_function_type_list (V16QI_type_node,
13597 V16QI_type_node, V16QI_type_node, NULL_TREE);
13598 tree v8hi_ftype_v8hi_v8hi
13599 = build_function_type_list (V8HI_type_node,
13600 V8HI_type_node, V8HI_type_node, NULL_TREE);
13601 tree v4si_ftype_v4si_v4si
13602 = build_function_type_list (V4SI_type_node,
13603 V4SI_type_node, V4SI_type_node, NULL_TREE);
13604 tree v2di_ftype_v2di_v2di
13605 = build_function_type_list (V2DI_type_node,
13606 V2DI_type_node, V2DI_type_node, NULL_TREE);
13607 tree v2di_ftype_v2df_v2df
13608 = build_function_type_list (V2DI_type_node,
13609 V2DF_type_node, V2DF_type_node, NULL_TREE);
13610 tree v2df_ftype_v2df
13611 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13612 tree v2di_ftype_v2di_int
13613 = build_function_type_list (V2DI_type_node,
13614 V2DI_type_node, integer_type_node, NULL_TREE);
13615 tree v4si_ftype_v4si_int
13616 = build_function_type_list (V4SI_type_node,
13617 V4SI_type_node, integer_type_node, NULL_TREE);
13618 tree v8hi_ftype_v8hi_int
13619 = build_function_type_list (V8HI_type_node,
13620 V8HI_type_node, integer_type_node, NULL_TREE);
13621 tree v8hi_ftype_v8hi_v2di
13622 = build_function_type_list (V8HI_type_node,
13623 V8HI_type_node, V2DI_type_node, NULL_TREE);
13624 tree v4si_ftype_v4si_v2di
13625 = build_function_type_list (V4SI_type_node,
13626 V4SI_type_node, V2DI_type_node, NULL_TREE);
13627 tree v4si_ftype_v8hi_v8hi
13628 = build_function_type_list (V4SI_type_node,
13629 V8HI_type_node, V8HI_type_node, NULL_TREE);
13630 tree di_ftype_v8qi_v8qi
13631 = build_function_type_list (long_long_unsigned_type_node,
13632 V8QI_type_node, V8QI_type_node, NULL_TREE);
13633 tree di_ftype_v2si_v2si
13634 = build_function_type_list (long_long_unsigned_type_node,
13635 V2SI_type_node, V2SI_type_node, NULL_TREE);
13636 tree v2di_ftype_v16qi_v16qi
13637 = build_function_type_list (V2DI_type_node,
13638 V16QI_type_node, V16QI_type_node, NULL_TREE);
13639 tree v2di_ftype_v4si_v4si
13640 = build_function_type_list (V2DI_type_node,
13641 V4SI_type_node, V4SI_type_node, NULL_TREE);
13642 tree int_ftype_v16qi
13643 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13644 tree v16qi_ftype_pcchar
13645 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13646 tree void_ftype_pchar_v16qi
13647 = build_function_type_list (void_type_node,
13648 pchar_type_node, V16QI_type_node, NULL_TREE);
13649
13650 tree float80_type;
13651 tree float128_type;
13652 tree ftype;
13653
13654 /* The __float80 type. */
13655 if (TYPE_MODE (long_double_type_node) == XFmode)
13656 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13657 "__float80");
13658 else
13659 {
13660 /* The __float80 type. */
13661 float80_type = make_node (REAL_TYPE);
13662 TYPE_PRECISION (float80_type) = 80;
13663 layout_type (float80_type);
13664 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13665 }
13666
13667 float128_type = make_node (REAL_TYPE);
13668 TYPE_PRECISION (float128_type) = 128;
13669 layout_type (float128_type);
13670 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13671
13672 /* Add all builtins that are more or less simple operations on two
13673 operands. */
13674 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13675 {
13676 /* Use one of the operands; the target can have a different mode for
13677 mask-generating compares. */
13678 enum machine_mode mode;
13679 tree type;
13680
13681 if (d->name == 0)
13682 continue;
13683 mode = insn_data[d->icode].operand[1].mode;
13684
13685 switch (mode)
13686 {
13687 case V16QImode:
13688 type = v16qi_ftype_v16qi_v16qi;
13689 break;
13690 case V8HImode:
13691 type = v8hi_ftype_v8hi_v8hi;
13692 break;
13693 case V4SImode:
13694 type = v4si_ftype_v4si_v4si;
13695 break;
13696 case V2DImode:
13697 type = v2di_ftype_v2di_v2di;
13698 break;
13699 case V2DFmode:
13700 type = v2df_ftype_v2df_v2df;
13701 break;
13702 case TImode:
13703 type = ti_ftype_ti_ti;
13704 break;
13705 case V4SFmode:
13706 type = v4sf_ftype_v4sf_v4sf;
13707 break;
13708 case V8QImode:
13709 type = v8qi_ftype_v8qi_v8qi;
13710 break;
13711 case V4HImode:
13712 type = v4hi_ftype_v4hi_v4hi;
13713 break;
13714 case V2SImode:
13715 type = v2si_ftype_v2si_v2si;
13716 break;
13717 case DImode:
13718 type = di_ftype_di_di;
13719 break;
13720
13721 default:
13722 abort ();
13723 }
13724
13725 /* Override for comparisons. */
13726 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13727 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13728 type = v4si_ftype_v4sf_v4sf;
13729
13730 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13731 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13732 type = v2di_ftype_v2df_v2df;
13733
13734 def_builtin (d->mask, d->name, type, d->code);
13735 }
13736
13737 /* Add the remaining MMX insns with somewhat more complicated types. */
13738 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13739 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13740 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13741 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13742
13743 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13744 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13745 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13746
13747 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13748 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13749
13750 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13751 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13752
13753 /* comi/ucomi insns. */
13754 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13755 if (d->mask == MASK_SSE2)
13756 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13757 else
13758 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13759
13760 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13761 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13762 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13763
13764 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13765 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13766 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13767 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13768 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13769 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13770 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13771 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13772 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13773 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13774 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13775
13776 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13777
13778 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13779 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13780
13781 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13782 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13783 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13784 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13785
13786 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13787 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13788 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13789 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13790
13791 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13792
13793 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13794
13795 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13796 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13797 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13798 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13799 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13800 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13801
13802 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13803
13804 /* Original 3DNow! */
13805 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13806 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13807 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13808 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13809 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13810 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13811 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13812 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13813 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13814 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13815 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13816 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13817 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13818 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13819 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13820 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13821 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13822 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13823 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13824 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13825
13826 /* 3DNow! extension as used in the Athlon CPU. */
13827 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13828 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13829 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13830 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13831 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13832 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13833
13834 /* SSE2 */
13835 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13836
13837 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13838 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13839
13840 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
13841 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
13842
13843 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13844 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13845 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13846 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13847 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13848
13849 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13850 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13851 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13852 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13853
13854 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13855 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13856
13857 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13858
13859 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13860 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13861
13862 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13863 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13864 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13865 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13866 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13867
13868 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13869
13870 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13871 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13872 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13873 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13874
13875 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13876 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13877 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13878
13879 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13880 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13881 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13882 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13883
13884 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13885 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13886 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13887
13888 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13889 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13890
13891 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13892 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13893
13894 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13895 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13896 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13897
13898 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13899 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13900 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13901
13902 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13903 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13904
13905 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13906 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13907 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13908 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13909
13910 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13911 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13912 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13913 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13914
13915 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13916 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13917
13918 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13919
13920 /* Prescott New Instructions. */
13921 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13922 void_ftype_pcvoid_unsigned_unsigned,
13923 IX86_BUILTIN_MONITOR);
13924 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13925 void_ftype_unsigned_unsigned,
13926 IX86_BUILTIN_MWAIT);
13927 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13928 v4sf_ftype_v4sf,
13929 IX86_BUILTIN_MOVSHDUP);
13930 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13931 v4sf_ftype_v4sf,
13932 IX86_BUILTIN_MOVSLDUP);
13933 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13934 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13935
13936 /* Access to the vec_init patterns. */
13937 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
13938 integer_type_node, NULL_TREE);
13939 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
13940 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
13941
13942 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
13943 short_integer_type_node,
13944 short_integer_type_node,
13945 short_integer_type_node, NULL_TREE);
13946 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
13947 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
13948
13949 ftype = build_function_type_list (V8QI_type_node, char_type_node,
13950 char_type_node, char_type_node,
13951 char_type_node, char_type_node,
13952 char_type_node, char_type_node,
13953 char_type_node, NULL_TREE);
13954 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
13955 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
13956
13957 /* Access to the vec_extract patterns. */
13958 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13959 integer_type_node, NULL_TREE);
13960 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
13961 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
13962
13963 ftype = build_function_type_list (long_long_integer_type_node,
13964 V2DI_type_node, integer_type_node,
13965 NULL_TREE);
13966 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
13967 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
13968
13969 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13970 integer_type_node, NULL_TREE);
13971 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
13972 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
13973
13974 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13975 integer_type_node, NULL_TREE);
13976 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
13977 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
13978
13979 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13980 integer_type_node, NULL_TREE);
13981 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
13982 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
13983
13984 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
13985 integer_type_node, NULL_TREE);
13986 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
13987 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
13988
13989 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
13990 integer_type_node, NULL_TREE);
13991 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
13992 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
13993
13994 /* Access to the vec_set patterns. */
13995 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13996 intHI_type_node,
13997 integer_type_node, NULL_TREE);
13998 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
13999 ftype, IX86_BUILTIN_VEC_SET_V8HI);
14000
14001 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14002 intHI_type_node,
14003 integer_type_node, NULL_TREE);
14004 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14005 ftype, IX86_BUILTIN_VEC_SET_V4HI);
14006 }
14007
14008 /* Errors in the source file can cause expand_expr to return const0_rtx
14009 where we expect a vector. To avoid crashing, use one of the vector
14010 clear instructions. */
14011 static rtx
14012 safe_vector_operand (rtx x, enum machine_mode mode)
14013 {
14014 if (x == const0_rtx)
14015 x = CONST0_RTX (mode);
14016 return x;
14017 }
14018
14019 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14020
14021 static rtx
14022 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14023 {
14024 rtx pat, xops[3];
14025 tree arg0 = TREE_VALUE (arglist);
14026 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14027 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14028 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14029 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14030 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14031 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14032
14033 if (VECTOR_MODE_P (mode0))
14034 op0 = safe_vector_operand (op0, mode0);
14035 if (VECTOR_MODE_P (mode1))
14036 op1 = safe_vector_operand (op1, mode1);
14037
14038 if (optimize || !target
14039 || GET_MODE (target) != tmode
14040 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14041 target = gen_reg_rtx (tmode);
14042
14043 if (GET_MODE (op1) == SImode && mode1 == TImode)
14044 {
14045 rtx x = gen_reg_rtx (V4SImode);
14046 emit_insn (gen_sse2_loadd (x, op1));
14047 op1 = gen_lowpart (TImode, x);
14048 }
14049
14050 /* In case the insn wants input operands in modes different from
14051 the result, abort. */
14052 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
14053 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
14054 abort ();
14055
14056 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14057 op0 = copy_to_mode_reg (mode0, op0);
14058 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14059 op1 = copy_to_mode_reg (mode1, op1);
14060
14061 /* ??? Using ix86_fixup_binary_operands is problematic when
14062 we've got mismatched modes. Fake it. */
14063
14064 xops[0] = target;
14065 xops[1] = op0;
14066 xops[2] = op1;
14067
14068 if (tmode == mode0 && tmode == mode1)
14069 {
14070 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
14071 op0 = xops[1];
14072 op1 = xops[2];
14073 }
14074 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14075 {
14076 op0 = force_reg (mode0, op0);
14077 op1 = force_reg (mode1, op1);
14078 target = gen_reg_rtx (tmode);
14079 }
14080
14081 pat = GEN_FCN (icode) (target, op0, op1);
14082 if (! pat)
14083 return 0;
14084 emit_insn (pat);
14085 return target;
14086 }
14087
14088 /* Subroutine of ix86_expand_builtin to take care of stores. */
14089
14090 static rtx
14091 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14092 {
14093 rtx pat;
14094 tree arg0 = TREE_VALUE (arglist);
14095 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14096 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14097 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14098 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14099 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14100
14101 if (VECTOR_MODE_P (mode1))
14102 op1 = safe_vector_operand (op1, mode1);
14103
14104 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14105 op1 = copy_to_mode_reg (mode1, op1);
14106
14107 pat = GEN_FCN (icode) (op0, op1);
14108 if (pat)
14109 emit_insn (pat);
14110 return 0;
14111 }
14112
14113 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14114
14115 static rtx
14116 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14117 rtx target, int do_load)
14118 {
14119 rtx pat;
14120 tree arg0 = TREE_VALUE (arglist);
14121 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14122 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14123 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14124
14125 if (optimize || !target
14126 || GET_MODE (target) != tmode
14127 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14128 target = gen_reg_rtx (tmode);
14129 if (do_load)
14130 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14131 else
14132 {
14133 if (VECTOR_MODE_P (mode0))
14134 op0 = safe_vector_operand (op0, mode0);
14135
14136 if ((optimize && !register_operand (op0, mode0))
14137 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14138 op0 = copy_to_mode_reg (mode0, op0);
14139 }
14140
14141 pat = GEN_FCN (icode) (target, op0);
14142 if (! pat)
14143 return 0;
14144 emit_insn (pat);
14145 return target;
14146 }
14147
14148 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14149 sqrtss, rsqrtss, rcpss. */
14150
14151 static rtx
14152 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14153 {
14154 rtx pat;
14155 tree arg0 = TREE_VALUE (arglist);
14156 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14157 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14158 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14159
14160 if (optimize || !target
14161 || GET_MODE (target) != tmode
14162 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14163 target = gen_reg_rtx (tmode);
14164
14165 if (VECTOR_MODE_P (mode0))
14166 op0 = safe_vector_operand (op0, mode0);
14167
14168 if ((optimize && !register_operand (op0, mode0))
14169 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14170 op0 = copy_to_mode_reg (mode0, op0);
14171
14172 op1 = op0;
14173 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14174 op1 = copy_to_mode_reg (mode0, op1);
14175
14176 pat = GEN_FCN (icode) (target, op0, op1);
14177 if (! pat)
14178 return 0;
14179 emit_insn (pat);
14180 return target;
14181 }
14182
14183 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14184
14185 static rtx
14186 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14187 rtx target)
14188 {
14189 rtx pat;
14190 tree arg0 = TREE_VALUE (arglist);
14191 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14192 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14193 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14194 rtx op2;
14195 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14196 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14197 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14198 enum rtx_code comparison = d->comparison;
14199
14200 if (VECTOR_MODE_P (mode0))
14201 op0 = safe_vector_operand (op0, mode0);
14202 if (VECTOR_MODE_P (mode1))
14203 op1 = safe_vector_operand (op1, mode1);
14204
14205 /* Swap operands if we have a comparison that isn't available in
14206 hardware. */
14207 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14208 {
14209 rtx tmp = gen_reg_rtx (mode1);
14210 emit_move_insn (tmp, op1);
14211 op1 = op0;
14212 op0 = tmp;
14213 }
14214
14215 if (optimize || !target
14216 || GET_MODE (target) != tmode
14217 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14218 target = gen_reg_rtx (tmode);
14219
14220 if ((optimize && !register_operand (op0, mode0))
14221 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14222 op0 = copy_to_mode_reg (mode0, op0);
14223 if ((optimize && !register_operand (op1, mode1))
14224 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14225 op1 = copy_to_mode_reg (mode1, op1);
14226
14227 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14228 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14229 if (! pat)
14230 return 0;
14231 emit_insn (pat);
14232 return target;
14233 }
14234
14235 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14236
14237 static rtx
14238 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14239 rtx target)
14240 {
14241 rtx pat;
14242 tree arg0 = TREE_VALUE (arglist);
14243 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14244 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14245 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14246 rtx op2;
14247 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14248 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14249 enum rtx_code comparison = d->comparison;
14250
14251 if (VECTOR_MODE_P (mode0))
14252 op0 = safe_vector_operand (op0, mode0);
14253 if (VECTOR_MODE_P (mode1))
14254 op1 = safe_vector_operand (op1, mode1);
14255
14256 /* Swap operands if we have a comparison that isn't available in
14257 hardware. */
14258 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14259 {
14260 rtx tmp = op1;
14261 op1 = op0;
14262 op0 = tmp;
14263 }
14264
14265 target = gen_reg_rtx (SImode);
14266 emit_move_insn (target, const0_rtx);
14267 target = gen_rtx_SUBREG (QImode, target, 0);
14268
14269 if ((optimize && !register_operand (op0, mode0))
14270 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14271 op0 = copy_to_mode_reg (mode0, op0);
14272 if ((optimize && !register_operand (op1, mode1))
14273 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14274 op1 = copy_to_mode_reg (mode1, op1);
14275
14276 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14277 pat = GEN_FCN (d->icode) (op0, op1);
14278 if (! pat)
14279 return 0;
14280 emit_insn (pat);
14281 emit_insn (gen_rtx_SET (VOIDmode,
14282 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14283 gen_rtx_fmt_ee (comparison, QImode,
14284 SET_DEST (pat),
14285 const0_rtx)));
14286
14287 return SUBREG_REG (target);
14288 }
14289
14290 /* Return the integer constant in ARG. Constrain it to be in the range
14291 of the subparts of VEC_TYPE; issue an error if not. */
14292
14293 static int
14294 get_element_number (tree vec_type, tree arg)
14295 {
14296 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14297
14298 if (!host_integerp (arg, 1)
14299 || (elt = tree_low_cst (arg, 1), elt > max))
14300 {
14301 error ("selector must be an integer constant in the range 0..%i", max);
14302 return 0;
14303 }
14304
14305 return elt;
14306 }
14307
14308 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14309 ix86_expand_vector_init. We DO have language-level syntax for this, in
14310 the form of (type){ init-list }. Except that since we can't place emms
14311 instructions from inside the compiler, we can't allow the use of MMX
14312 registers unless the user explicitly asks for it. So we do *not* define
14313 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14314 we have builtins invoked by mmintrin.h that gives us license to emit
14315 these sorts of instructions. */
14316
14317 static rtx
14318 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14319 {
14320 enum machine_mode tmode = TYPE_MODE (type);
14321 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14322 int i, n_elt = GET_MODE_NUNITS (tmode);
14323 rtvec v = rtvec_alloc (n_elt);
14324
14325 gcc_assert (VECTOR_MODE_P (tmode));
14326
14327 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14328 {
14329 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14330 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14331 }
14332
14333 gcc_assert (arglist == NULL);
14334
14335 if (!target || !register_operand (target, tmode))
14336 target = gen_reg_rtx (tmode);
14337
14338 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14339 return target;
14340 }
14341
14342 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14343 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14344 had a language-level syntax for referencing vector elements. */
14345
14346 static rtx
14347 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14348 {
14349 enum machine_mode tmode, mode0;
14350 tree arg0, arg1;
14351 int elt;
14352 rtx op0;
14353
14354 arg0 = TREE_VALUE (arglist);
14355 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14356
14357 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14358 elt = get_element_number (TREE_TYPE (arg0), arg1);
14359
14360 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14361 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14362 gcc_assert (VECTOR_MODE_P (mode0));
14363
14364 op0 = force_reg (mode0, op0);
14365
14366 if (optimize || !target || !register_operand (target, tmode))
14367 target = gen_reg_rtx (tmode);
14368
14369 ix86_expand_vector_extract (true, target, op0, elt);
14370
14371 return target;
14372 }
14373
14374 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14375 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14376 a language-level syntax for referencing vector elements. */
14377
14378 static rtx
14379 ix86_expand_vec_set_builtin (tree arglist)
14380 {
14381 enum machine_mode tmode, mode1;
14382 tree arg0, arg1, arg2;
14383 int elt;
14384 rtx op0, op1;
14385
14386 arg0 = TREE_VALUE (arglist);
14387 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14388 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14389
14390 tmode = TYPE_MODE (TREE_TYPE (arg0));
14391 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14392 gcc_assert (VECTOR_MODE_P (tmode));
14393
14394 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14395 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14396 elt = get_element_number (TREE_TYPE (arg0), arg2);
14397
14398 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14399 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14400
14401 op0 = force_reg (tmode, op0);
14402 op1 = force_reg (mode1, op1);
14403
14404 ix86_expand_vector_set (true, op0, op1, elt);
14405
14406 return op0;
14407 }
14408
14409 /* Expand an expression EXP that calls a built-in function,
14410 with result going to TARGET if that's convenient
14411 (and in mode MODE if that's convenient).
14412 SUBTARGET may be used as the target for computing one of EXP's operands.
14413 IGNORE is nonzero if the value is to be ignored. */
14414
14415 static rtx
14416 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14417 enum machine_mode mode ATTRIBUTE_UNUSED,
14418 int ignore ATTRIBUTE_UNUSED)
14419 {
14420 const struct builtin_description *d;
14421 size_t i;
14422 enum insn_code icode;
14423 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14424 tree arglist = TREE_OPERAND (exp, 1);
14425 tree arg0, arg1, arg2;
14426 rtx op0, op1, op2, pat;
14427 enum machine_mode tmode, mode0, mode1, mode2;
14428 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14429
14430 switch (fcode)
14431 {
14432 case IX86_BUILTIN_EMMS:
14433 emit_insn (gen_mmx_emms ());
14434 return 0;
14435
14436 case IX86_BUILTIN_SFENCE:
14437 emit_insn (gen_sse_sfence ());
14438 return 0;
14439
14440 case IX86_BUILTIN_MASKMOVQ:
14441 case IX86_BUILTIN_MASKMOVDQU:
14442 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14443 ? CODE_FOR_mmx_maskmovq
14444 : CODE_FOR_sse2_maskmovdqu);
14445 /* Note the arg order is different from the operand order. */
14446 arg1 = TREE_VALUE (arglist);
14447 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14448 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14449 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14450 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14451 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14452 mode0 = insn_data[icode].operand[0].mode;
14453 mode1 = insn_data[icode].operand[1].mode;
14454 mode2 = insn_data[icode].operand[2].mode;
14455
14456 op0 = force_reg (Pmode, op0);
14457 op0 = gen_rtx_MEM (mode1, op0);
14458
14459 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14460 op0 = copy_to_mode_reg (mode0, op0);
14461 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14462 op1 = copy_to_mode_reg (mode1, op1);
14463 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14464 op2 = copy_to_mode_reg (mode2, op2);
14465 pat = GEN_FCN (icode) (op0, op1, op2);
14466 if (! pat)
14467 return 0;
14468 emit_insn (pat);
14469 return 0;
14470
14471 case IX86_BUILTIN_SQRTSS:
14472 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14473 case IX86_BUILTIN_RSQRTSS:
14474 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14475 case IX86_BUILTIN_RCPSS:
14476 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14477
14478 case IX86_BUILTIN_LOADUPS:
14479 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14480
14481 case IX86_BUILTIN_STOREUPS:
14482 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14483
14484 case IX86_BUILTIN_LOADHPS:
14485 case IX86_BUILTIN_LOADLPS:
14486 case IX86_BUILTIN_LOADHPD:
14487 case IX86_BUILTIN_LOADLPD:
14488 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14489 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14490 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14491 : CODE_FOR_sse2_loadlpd);
14492 arg0 = TREE_VALUE (arglist);
14493 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14494 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14495 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14496 tmode = insn_data[icode].operand[0].mode;
14497 mode0 = insn_data[icode].operand[1].mode;
14498 mode1 = insn_data[icode].operand[2].mode;
14499
14500 op0 = force_reg (mode0, op0);
14501 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14502 if (optimize || target == 0
14503 || GET_MODE (target) != tmode
14504 || !register_operand (target, tmode))
14505 target = gen_reg_rtx (tmode);
14506 pat = GEN_FCN (icode) (target, op0, op1);
14507 if (! pat)
14508 return 0;
14509 emit_insn (pat);
14510 return target;
14511
14512 case IX86_BUILTIN_STOREHPS:
14513 case IX86_BUILTIN_STORELPS:
14514 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14515 : CODE_FOR_sse_storelps);
14516 arg0 = TREE_VALUE (arglist);
14517 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14518 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14519 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14520 mode0 = insn_data[icode].operand[0].mode;
14521 mode1 = insn_data[icode].operand[1].mode;
14522
14523 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14524 op1 = force_reg (mode1, op1);
14525
14526 pat = GEN_FCN (icode) (op0, op1);
14527 if (! pat)
14528 return 0;
14529 emit_insn (pat);
14530 return const0_rtx;
14531
14532 case IX86_BUILTIN_MOVNTPS:
14533 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14534 case IX86_BUILTIN_MOVNTQ:
14535 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14536
14537 case IX86_BUILTIN_LDMXCSR:
14538 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14539 target = assign_386_stack_local (SImode, 0);
14540 emit_move_insn (target, op0);
14541 emit_insn (gen_sse_ldmxcsr (target));
14542 return 0;
14543
14544 case IX86_BUILTIN_STMXCSR:
14545 target = assign_386_stack_local (SImode, 0);
14546 emit_insn (gen_sse_stmxcsr (target));
14547 return copy_to_mode_reg (SImode, target);
14548
14549 case IX86_BUILTIN_SHUFPS:
14550 case IX86_BUILTIN_SHUFPD:
14551 icode = (fcode == IX86_BUILTIN_SHUFPS
14552 ? CODE_FOR_sse_shufps
14553 : CODE_FOR_sse2_shufpd);
14554 arg0 = TREE_VALUE (arglist);
14555 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14556 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14557 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14558 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14559 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14560 tmode = insn_data[icode].operand[0].mode;
14561 mode0 = insn_data[icode].operand[1].mode;
14562 mode1 = insn_data[icode].operand[2].mode;
14563 mode2 = insn_data[icode].operand[3].mode;
14564
14565 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14566 op0 = copy_to_mode_reg (mode0, op0);
14567 if ((optimize && !register_operand (op1, mode1))
14568 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14569 op1 = copy_to_mode_reg (mode1, op1);
14570 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14571 {
14572 /* @@@ better error message */
14573 error ("mask must be an immediate");
14574 return gen_reg_rtx (tmode);
14575 }
14576 if (optimize || target == 0
14577 || GET_MODE (target) != tmode
14578 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14579 target = gen_reg_rtx (tmode);
14580 pat = GEN_FCN (icode) (target, op0, op1, op2);
14581 if (! pat)
14582 return 0;
14583 emit_insn (pat);
14584 return target;
14585
14586 case IX86_BUILTIN_PSHUFW:
14587 case IX86_BUILTIN_PSHUFD:
14588 case IX86_BUILTIN_PSHUFHW:
14589 case IX86_BUILTIN_PSHUFLW:
14590 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14591 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14592 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14593 : CODE_FOR_mmx_pshufw);
14594 arg0 = TREE_VALUE (arglist);
14595 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14596 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14597 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14598 tmode = insn_data[icode].operand[0].mode;
14599 mode1 = insn_data[icode].operand[1].mode;
14600 mode2 = insn_data[icode].operand[2].mode;
14601
14602 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14603 op0 = copy_to_mode_reg (mode1, op0);
14604 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14605 {
14606 /* @@@ better error message */
14607 error ("mask must be an immediate");
14608 return const0_rtx;
14609 }
14610 if (target == 0
14611 || GET_MODE (target) != tmode
14612 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14613 target = gen_reg_rtx (tmode);
14614 pat = GEN_FCN (icode) (target, op0, op1);
14615 if (! pat)
14616 return 0;
14617 emit_insn (pat);
14618 return target;
14619
14620 case IX86_BUILTIN_PSLLDQI128:
14621 case IX86_BUILTIN_PSRLDQI128:
14622 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14623 : CODE_FOR_sse2_lshrti3);
14624 arg0 = TREE_VALUE (arglist);
14625 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14626 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14627 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14628 tmode = insn_data[icode].operand[0].mode;
14629 mode1 = insn_data[icode].operand[1].mode;
14630 mode2 = insn_data[icode].operand[2].mode;
14631
14632 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14633 {
14634 op0 = copy_to_reg (op0);
14635 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14636 }
14637 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14638 {
14639 error ("shift must be an immediate");
14640 return const0_rtx;
14641 }
14642 target = gen_reg_rtx (V2DImode);
14643 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14644 if (! pat)
14645 return 0;
14646 emit_insn (pat);
14647 return target;
14648
14649 case IX86_BUILTIN_FEMMS:
14650 emit_insn (gen_mmx_femms ());
14651 return NULL_RTX;
14652
14653 case IX86_BUILTIN_PAVGUSB:
14654 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14655
14656 case IX86_BUILTIN_PF2ID:
14657 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14658
14659 case IX86_BUILTIN_PFACC:
14660 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14661
14662 case IX86_BUILTIN_PFADD:
14663 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14664
14665 case IX86_BUILTIN_PFCMPEQ:
14666 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14667
14668 case IX86_BUILTIN_PFCMPGE:
14669 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14670
14671 case IX86_BUILTIN_PFCMPGT:
14672 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14673
14674 case IX86_BUILTIN_PFMAX:
14675 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14676
14677 case IX86_BUILTIN_PFMIN:
14678 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14679
14680 case IX86_BUILTIN_PFMUL:
14681 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14682
14683 case IX86_BUILTIN_PFRCP:
14684 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14685
14686 case IX86_BUILTIN_PFRCPIT1:
14687 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14688
14689 case IX86_BUILTIN_PFRCPIT2:
14690 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14691
14692 case IX86_BUILTIN_PFRSQIT1:
14693 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14694
14695 case IX86_BUILTIN_PFRSQRT:
14696 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14697
14698 case IX86_BUILTIN_PFSUB:
14699 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14700
14701 case IX86_BUILTIN_PFSUBR:
14702 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14703
14704 case IX86_BUILTIN_PI2FD:
14705 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14706
14707 case IX86_BUILTIN_PMULHRW:
14708 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14709
14710 case IX86_BUILTIN_PF2IW:
14711 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14712
14713 case IX86_BUILTIN_PFNACC:
14714 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14715
14716 case IX86_BUILTIN_PFPNACC:
14717 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14718
14719 case IX86_BUILTIN_PI2FW:
14720 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14721
14722 case IX86_BUILTIN_PSWAPDSI:
14723 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14724
14725 case IX86_BUILTIN_PSWAPDSF:
14726 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14727
14728 case IX86_BUILTIN_SQRTSD:
14729 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14730 case IX86_BUILTIN_LOADUPD:
14731 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14732 case IX86_BUILTIN_STOREUPD:
14733 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14734
14735 case IX86_BUILTIN_MFENCE:
14736 emit_insn (gen_sse2_mfence ());
14737 return 0;
14738 case IX86_BUILTIN_LFENCE:
14739 emit_insn (gen_sse2_lfence ());
14740 return 0;
14741
14742 case IX86_BUILTIN_CLFLUSH:
14743 arg0 = TREE_VALUE (arglist);
14744 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14745 icode = CODE_FOR_sse2_clflush;
14746 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14747 op0 = copy_to_mode_reg (Pmode, op0);
14748
14749 emit_insn (gen_sse2_clflush (op0));
14750 return 0;
14751
14752 case IX86_BUILTIN_MOVNTPD:
14753 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14754 case IX86_BUILTIN_MOVNTDQ:
14755 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14756 case IX86_BUILTIN_MOVNTI:
14757 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14758
14759 case IX86_BUILTIN_LOADDQU:
14760 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14761 case IX86_BUILTIN_STOREDQU:
14762 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14763
14764 case IX86_BUILTIN_MONITOR:
14765 arg0 = TREE_VALUE (arglist);
14766 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14767 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14768 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14769 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14770 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14771 if (!REG_P (op0))
14772 op0 = copy_to_mode_reg (SImode, op0);
14773 if (!REG_P (op1))
14774 op1 = copy_to_mode_reg (SImode, op1);
14775 if (!REG_P (op2))
14776 op2 = copy_to_mode_reg (SImode, op2);
14777 emit_insn (gen_sse3_monitor (op0, op1, op2));
14778 return 0;
14779
14780 case IX86_BUILTIN_MWAIT:
14781 arg0 = TREE_VALUE (arglist);
14782 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14783 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14784 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14785 if (!REG_P (op0))
14786 op0 = copy_to_mode_reg (SImode, op0);
14787 if (!REG_P (op1))
14788 op1 = copy_to_mode_reg (SImode, op1);
14789 emit_insn (gen_sse3_mwait (op0, op1));
14790 return 0;
14791
14792 case IX86_BUILTIN_LDDQU:
14793 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
14794 target, 1);
14795
14796 case IX86_BUILTIN_VEC_INIT_V2SI:
14797 case IX86_BUILTIN_VEC_INIT_V4HI:
14798 case IX86_BUILTIN_VEC_INIT_V8QI:
14799 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
14800
14801 case IX86_BUILTIN_VEC_EXT_V2DF:
14802 case IX86_BUILTIN_VEC_EXT_V2DI:
14803 case IX86_BUILTIN_VEC_EXT_V4SF:
14804 case IX86_BUILTIN_VEC_EXT_V4SI:
14805 case IX86_BUILTIN_VEC_EXT_V8HI:
14806 case IX86_BUILTIN_VEC_EXT_V2SI:
14807 case IX86_BUILTIN_VEC_EXT_V4HI:
14808 return ix86_expand_vec_ext_builtin (arglist, target);
14809
14810 case IX86_BUILTIN_VEC_SET_V8HI:
14811 case IX86_BUILTIN_VEC_SET_V4HI:
14812 return ix86_expand_vec_set_builtin (arglist);
14813
14814 default:
14815 break;
14816 }
14817
14818 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14819 if (d->code == fcode)
14820 {
14821 /* Compares are treated specially. */
14822 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14823 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
14824 || d->icode == CODE_FOR_sse2_maskcmpv2df3
14825 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14826 return ix86_expand_sse_compare (d, arglist, target);
14827
14828 return ix86_expand_binop_builtin (d->icode, arglist, target);
14829 }
14830
14831 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14832 if (d->code == fcode)
14833 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14834
14835 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14836 if (d->code == fcode)
14837 return ix86_expand_sse_comi (d, arglist, target);
14838
14839 gcc_unreachable ();
14840 }
14841
14842 /* Store OPERAND to the memory after reload is completed. This means
14843 that we can't easily use assign_stack_local. */
14844 rtx
14845 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14846 {
14847 rtx result;
14848 if (!reload_completed)
14849 abort ();
14850 if (TARGET_RED_ZONE)
14851 {
14852 result = gen_rtx_MEM (mode,
14853 gen_rtx_PLUS (Pmode,
14854 stack_pointer_rtx,
14855 GEN_INT (-RED_ZONE_SIZE)));
14856 emit_move_insn (result, operand);
14857 }
14858 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14859 {
14860 switch (mode)
14861 {
14862 case HImode:
14863 case SImode:
14864 operand = gen_lowpart (DImode, operand);
14865 /* FALLTHRU */
14866 case DImode:
14867 emit_insn (
14868 gen_rtx_SET (VOIDmode,
14869 gen_rtx_MEM (DImode,
14870 gen_rtx_PRE_DEC (DImode,
14871 stack_pointer_rtx)),
14872 operand));
14873 break;
14874 default:
14875 abort ();
14876 }
14877 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14878 }
14879 else
14880 {
14881 switch (mode)
14882 {
14883 case DImode:
14884 {
14885 rtx operands[2];
14886 split_di (&operand, 1, operands, operands + 1);
14887 emit_insn (
14888 gen_rtx_SET (VOIDmode,
14889 gen_rtx_MEM (SImode,
14890 gen_rtx_PRE_DEC (Pmode,
14891 stack_pointer_rtx)),
14892 operands[1]));
14893 emit_insn (
14894 gen_rtx_SET (VOIDmode,
14895 gen_rtx_MEM (SImode,
14896 gen_rtx_PRE_DEC (Pmode,
14897 stack_pointer_rtx)),
14898 operands[0]));
14899 }
14900 break;
14901 case HImode:
14902 /* It is better to store HImodes as SImodes. */
14903 if (!TARGET_PARTIAL_REG_STALL)
14904 operand = gen_lowpart (SImode, operand);
14905 /* FALLTHRU */
14906 case SImode:
14907 emit_insn (
14908 gen_rtx_SET (VOIDmode,
14909 gen_rtx_MEM (GET_MODE (operand),
14910 gen_rtx_PRE_DEC (SImode,
14911 stack_pointer_rtx)),
14912 operand));
14913 break;
14914 default:
14915 abort ();
14916 }
14917 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14918 }
14919 return result;
14920 }
14921
14922 /* Free operand from the memory. */
14923 void
14924 ix86_free_from_memory (enum machine_mode mode)
14925 {
14926 if (!TARGET_RED_ZONE)
14927 {
14928 int size;
14929
14930 if (mode == DImode || TARGET_64BIT)
14931 size = 8;
14932 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14933 size = 2;
14934 else
14935 size = 4;
14936 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14937 to pop or add instruction if registers are available. */
14938 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14939 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14940 GEN_INT (size))));
14941 }
14942 }
14943
14944 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14945 QImode must go into class Q_REGS.
14946 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14947 movdf to do mem-to-mem moves through integer regs. */
14948 enum reg_class
14949 ix86_preferred_reload_class (rtx x, enum reg_class class)
14950 {
14951 /* We're only allowed to return a subclass of CLASS. Many of the
14952 following checks fail for NO_REGS, so eliminate that early. */
14953 if (class == NO_REGS)
14954 return NO_REGS;
14955
14956 /* All classes can load zeros. */
14957 if (x == CONST0_RTX (GET_MODE (x)))
14958 return class;
14959
14960 /* Floating-point constants need more complex checks. */
14961 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14962 {
14963 /* General regs can load everything. */
14964 if (reg_class_subset_p (class, GENERAL_REGS))
14965 return class;
14966
14967 /* Floats can load 0 and 1 plus some others. Note that we eliminated
14968 zero above. We only want to wind up preferring 80387 registers if
14969 we plan on doing computation with them. */
14970 if (TARGET_80387
14971 && (TARGET_MIX_SSE_I387
14972 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
14973 && standard_80387_constant_p (x))
14974 {
14975 /* Limit class to non-sse. */
14976 if (class == FLOAT_SSE_REGS)
14977 return FLOAT_REGS;
14978 if (class == FP_TOP_SSE_REGS)
14979 return FP_TOP_REG;
14980 if (class == FP_SECOND_SSE_REGS)
14981 return FP_SECOND_REG;
14982 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
14983 return class;
14984 }
14985
14986 return NO_REGS;
14987 }
14988 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14989 return NO_REGS;
14990 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
14991 return NO_REGS;
14992
14993 /* Generally when we see PLUS here, it's the function invariant
14994 (plus soft-fp const_int). Which can only be computed into general
14995 regs. */
14996 if (GET_CODE (x) == PLUS)
14997 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
14998
14999 /* QImode constants are easy to load, but non-constant QImode data
15000 must go into Q_REGS. */
15001 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
15002 {
15003 if (reg_class_subset_p (class, Q_REGS))
15004 return class;
15005 if (reg_class_subset_p (Q_REGS, class))
15006 return Q_REGS;
15007 return NO_REGS;
15008 }
15009
15010 return class;
15011 }
15012
15013 /* If we are copying between general and FP registers, we need a memory
15014 location. The same is true for SSE and MMX registers.
15015
15016 The macro can't work reliably when one of the CLASSES is class containing
15017 registers from multiple units (SSE, MMX, integer). We avoid this by never
15018 combining those units in single alternative in the machine description.
15019 Ensure that this constraint holds to avoid unexpected surprises.
15020
15021 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15022 enforce these sanity checks. */
15023
15024 int
15025 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
15026 enum machine_mode mode, int strict)
15027 {
15028 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
15029 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
15030 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
15031 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
15032 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
15033 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
15034 {
15035 if (strict)
15036 abort ();
15037 return true;
15038 }
15039
15040 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
15041 return true;
15042
15043 /* ??? This is a lie. We do have moves between mmx/general, and for
15044 mmx/sse2. But by saying we need secondary memory we discourage the
15045 register allocator from using the mmx registers unless needed. */
15046 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15047 return true;
15048
15049 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15050 {
15051 /* SSE1 doesn't have any direct moves from other classes. */
15052 if (!TARGET_SSE2)
15053 return true;
15054
15055 /* If the target says that inter-unit moves are more expensive
15056 than moving through memory, then don't generate them. */
15057 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15058 return true;
15059
15060 /* Between SSE and general, we have moves no larger than word size. */
15061 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15062 return true;
15063
15064 /* ??? For the cost of one register reformat penalty, we could use
15065 the same instructions to move SFmode and DFmode data, but the
15066 relevant move patterns don't support those alternatives. */
15067 if (mode == SFmode || mode == DFmode)
15068 return true;
15069 }
15070
15071 return false;
15072 }
15073
15074 /* Return the cost of moving data from a register in class CLASS1 to
15075 one in class CLASS2.
15076
15077 It is not required that the cost always equal 2 when FROM is the same as TO;
15078 on some machines it is expensive to move between registers if they are not
15079 general registers. */
15080
15081 int
15082 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15083 enum reg_class class2)
15084 {
15085 /* In case we require secondary memory, compute cost of the store followed
15086 by load. In order to avoid bad register allocation choices, we need
15087 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15088
15089 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15090 {
15091 int cost = 1;
15092
15093 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15094 MEMORY_MOVE_COST (mode, class1, 1));
15095 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15096 MEMORY_MOVE_COST (mode, class2, 1));
15097
15098 /* In case of copying from general_purpose_register we may emit multiple
15099 stores followed by single load causing memory size mismatch stall.
15100 Count this as arbitrarily high cost of 20. */
15101 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15102 cost += 20;
15103
15104 /* In the case of FP/MMX moves, the registers actually overlap, and we
15105 have to switch modes in order to treat them differently. */
15106 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15107 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15108 cost += 20;
15109
15110 return cost;
15111 }
15112
15113 /* Moves between SSE/MMX and integer unit are expensive. */
15114 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15115 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15116 return ix86_cost->mmxsse_to_integer;
15117 if (MAYBE_FLOAT_CLASS_P (class1))
15118 return ix86_cost->fp_move;
15119 if (MAYBE_SSE_CLASS_P (class1))
15120 return ix86_cost->sse_move;
15121 if (MAYBE_MMX_CLASS_P (class1))
15122 return ix86_cost->mmx_move;
15123 return 2;
15124 }
15125
15126 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15127
15128 bool
15129 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15130 {
15131 /* Flags and only flags can only hold CCmode values. */
15132 if (CC_REGNO_P (regno))
15133 return GET_MODE_CLASS (mode) == MODE_CC;
15134 if (GET_MODE_CLASS (mode) == MODE_CC
15135 || GET_MODE_CLASS (mode) == MODE_RANDOM
15136 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15137 return 0;
15138 if (FP_REGNO_P (regno))
15139 return VALID_FP_MODE_P (mode);
15140 if (SSE_REGNO_P (regno))
15141 {
15142 /* We implement the move patterns for all vector modes into and
15143 out of SSE registers, even when no operation instructions
15144 are available. */
15145 return (VALID_SSE_REG_MODE (mode)
15146 || VALID_SSE2_REG_MODE (mode)
15147 || VALID_MMX_REG_MODE (mode)
15148 || VALID_MMX_REG_MODE_3DNOW (mode));
15149 }
15150 if (MMX_REGNO_P (regno))
15151 {
15152 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15153 so if the register is available at all, then we can move data of
15154 the given mode into or out of it. */
15155 return (VALID_MMX_REG_MODE (mode)
15156 || VALID_MMX_REG_MODE_3DNOW (mode));
15157 }
15158
15159 if (mode == QImode)
15160 {
15161 /* Take care for QImode values - they can be in non-QI regs,
15162 but then they do cause partial register stalls. */
15163 if (regno < 4 || TARGET_64BIT)
15164 return 1;
15165 if (!TARGET_PARTIAL_REG_STALL)
15166 return 1;
15167 return reload_in_progress || reload_completed;
15168 }
15169 /* We handle both integer and floats in the general purpose registers. */
15170 else if (VALID_INT_MODE_P (mode))
15171 return 1;
15172 else if (VALID_FP_MODE_P (mode))
15173 return 1;
15174 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15175 on to use that value in smaller contexts, this can easily force a
15176 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15177 supporting DImode, allow it. */
15178 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
15179 return 1;
15180
15181 return 0;
15182 }
15183
15184 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15185 tieable integer mode. */
15186
15187 static bool
15188 ix86_tieable_integer_mode_p (enum machine_mode mode)
15189 {
15190 switch (mode)
15191 {
15192 case HImode:
15193 case SImode:
15194 return true;
15195
15196 case QImode:
15197 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15198
15199 case DImode:
15200 return TARGET_64BIT;
15201
15202 default:
15203 return false;
15204 }
15205 }
15206
15207 /* Return true if MODE1 is accessible in a register that can hold MODE2
15208 without copying. That is, all register classes that can hold MODE2
15209 can also hold MODE1. */
15210
15211 bool
15212 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15213 {
15214 if (mode1 == mode2)
15215 return true;
15216
15217 if (ix86_tieable_integer_mode_p (mode1)
15218 && ix86_tieable_integer_mode_p (mode2))
15219 return true;
15220
15221 /* MODE2 being XFmode implies fp stack or general regs, which means we
15222 can tie any smaller floating point modes to it. Note that we do not
15223 tie this with TFmode. */
15224 if (mode2 == XFmode)
15225 return mode1 == SFmode || mode1 == DFmode;
15226
15227 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15228 that we can tie it with SFmode. */
15229 if (mode2 == DFmode)
15230 return mode1 == SFmode;
15231
15232 /* If MODE2 is only appropriate for an SSE register, then tie with
15233 any other mode acceptable to SSE registers. */
15234 if (GET_MODE_SIZE (mode2) >= 8
15235 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
15236 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15237
15238 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15239 with any other mode acceptable to MMX registers. */
15240 if (GET_MODE_SIZE (mode2) == 8
15241 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
15242 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15243
15244 return false;
15245 }
15246
15247 /* Return the cost of moving data of mode M between a
15248 register and memory. A value of 2 is the default; this cost is
15249 relative to those in `REGISTER_MOVE_COST'.
15250
15251 If moving between registers and memory is more expensive than
15252 between two registers, you should define this macro to express the
15253 relative cost.
15254
15255 Model also increased moving costs of QImode registers in non
15256 Q_REGS classes.
15257 */
15258 int
15259 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15260 {
15261 if (FLOAT_CLASS_P (class))
15262 {
15263 int index;
15264 switch (mode)
15265 {
15266 case SFmode:
15267 index = 0;
15268 break;
15269 case DFmode:
15270 index = 1;
15271 break;
15272 case XFmode:
15273 index = 2;
15274 break;
15275 default:
15276 return 100;
15277 }
15278 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15279 }
15280 if (SSE_CLASS_P (class))
15281 {
15282 int index;
15283 switch (GET_MODE_SIZE (mode))
15284 {
15285 case 4:
15286 index = 0;
15287 break;
15288 case 8:
15289 index = 1;
15290 break;
15291 case 16:
15292 index = 2;
15293 break;
15294 default:
15295 return 100;
15296 }
15297 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15298 }
15299 if (MMX_CLASS_P (class))
15300 {
15301 int index;
15302 switch (GET_MODE_SIZE (mode))
15303 {
15304 case 4:
15305 index = 0;
15306 break;
15307 case 8:
15308 index = 1;
15309 break;
15310 default:
15311 return 100;
15312 }
15313 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15314 }
15315 switch (GET_MODE_SIZE (mode))
15316 {
15317 case 1:
15318 if (in)
15319 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15320 : ix86_cost->movzbl_load);
15321 else
15322 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15323 : ix86_cost->int_store[0] + 4);
15324 break;
15325 case 2:
15326 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15327 default:
15328 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15329 if (mode == TFmode)
15330 mode = XFmode;
15331 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15332 * (((int) GET_MODE_SIZE (mode)
15333 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15334 }
15335 }
15336
15337 /* Compute a (partial) cost for rtx X. Return true if the complete
15338 cost has been computed, and false if subexpressions should be
15339 scanned. In either case, *TOTAL contains the cost result. */
15340
15341 static bool
15342 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15343 {
15344 enum machine_mode mode = GET_MODE (x);
15345
15346 switch (code)
15347 {
15348 case CONST_INT:
15349 case CONST:
15350 case LABEL_REF:
15351 case SYMBOL_REF:
15352 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15353 *total = 3;
15354 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15355 *total = 2;
15356 else if (flag_pic && SYMBOLIC_CONST (x)
15357 && (!TARGET_64BIT
15358 || (!GET_CODE (x) != LABEL_REF
15359 && (GET_CODE (x) != SYMBOL_REF
15360 || !SYMBOL_REF_LOCAL_P (x)))))
15361 *total = 1;
15362 else
15363 *total = 0;
15364 return true;
15365
15366 case CONST_DOUBLE:
15367 if (mode == VOIDmode)
15368 *total = 0;
15369 else
15370 switch (standard_80387_constant_p (x))
15371 {
15372 case 1: /* 0.0 */
15373 *total = 1;
15374 break;
15375 default: /* Other constants */
15376 *total = 2;
15377 break;
15378 case 0:
15379 case -1:
15380 /* Start with (MEM (SYMBOL_REF)), since that's where
15381 it'll probably end up. Add a penalty for size. */
15382 *total = (COSTS_N_INSNS (1)
15383 + (flag_pic != 0 && !TARGET_64BIT)
15384 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15385 break;
15386 }
15387 return true;
15388
15389 case ZERO_EXTEND:
15390 /* The zero extensions is often completely free on x86_64, so make
15391 it as cheap as possible. */
15392 if (TARGET_64BIT && mode == DImode
15393 && GET_MODE (XEXP (x, 0)) == SImode)
15394 *total = 1;
15395 else if (TARGET_ZERO_EXTEND_WITH_AND)
15396 *total = COSTS_N_INSNS (ix86_cost->add);
15397 else
15398 *total = COSTS_N_INSNS (ix86_cost->movzx);
15399 return false;
15400
15401 case SIGN_EXTEND:
15402 *total = COSTS_N_INSNS (ix86_cost->movsx);
15403 return false;
15404
15405 case ASHIFT:
15406 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15407 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15408 {
15409 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15410 if (value == 1)
15411 {
15412 *total = COSTS_N_INSNS (ix86_cost->add);
15413 return false;
15414 }
15415 if ((value == 2 || value == 3)
15416 && ix86_cost->lea <= ix86_cost->shift_const)
15417 {
15418 *total = COSTS_N_INSNS (ix86_cost->lea);
15419 return false;
15420 }
15421 }
15422 /* FALLTHRU */
15423
15424 case ROTATE:
15425 case ASHIFTRT:
15426 case LSHIFTRT:
15427 case ROTATERT:
15428 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15429 {
15430 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15431 {
15432 if (INTVAL (XEXP (x, 1)) > 32)
15433 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15434 else
15435 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15436 }
15437 else
15438 {
15439 if (GET_CODE (XEXP (x, 1)) == AND)
15440 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15441 else
15442 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15443 }
15444 }
15445 else
15446 {
15447 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15448 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15449 else
15450 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15451 }
15452 return false;
15453
15454 case MULT:
15455 if (FLOAT_MODE_P (mode))
15456 {
15457 *total = COSTS_N_INSNS (ix86_cost->fmul);
15458 return false;
15459 }
15460 else
15461 {
15462 rtx op0 = XEXP (x, 0);
15463 rtx op1 = XEXP (x, 1);
15464 int nbits;
15465 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15466 {
15467 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15468 for (nbits = 0; value != 0; value &= value - 1)
15469 nbits++;
15470 }
15471 else
15472 /* This is arbitrary. */
15473 nbits = 7;
15474
15475 /* Compute costs correctly for widening multiplication. */
15476 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15477 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15478 == GET_MODE_SIZE (mode))
15479 {
15480 int is_mulwiden = 0;
15481 enum machine_mode inner_mode = GET_MODE (op0);
15482
15483 if (GET_CODE (op0) == GET_CODE (op1))
15484 is_mulwiden = 1, op1 = XEXP (op1, 0);
15485 else if (GET_CODE (op1) == CONST_INT)
15486 {
15487 if (GET_CODE (op0) == SIGN_EXTEND)
15488 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15489 == INTVAL (op1);
15490 else
15491 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15492 }
15493
15494 if (is_mulwiden)
15495 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15496 }
15497
15498 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15499 + nbits * ix86_cost->mult_bit)
15500 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15501
15502 return true;
15503 }
15504
15505 case DIV:
15506 case UDIV:
15507 case MOD:
15508 case UMOD:
15509 if (FLOAT_MODE_P (mode))
15510 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15511 else
15512 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15513 return false;
15514
15515 case PLUS:
15516 if (FLOAT_MODE_P (mode))
15517 *total = COSTS_N_INSNS (ix86_cost->fadd);
15518 else if (GET_MODE_CLASS (mode) == MODE_INT
15519 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15520 {
15521 if (GET_CODE (XEXP (x, 0)) == PLUS
15522 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15523 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15524 && CONSTANT_P (XEXP (x, 1)))
15525 {
15526 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15527 if (val == 2 || val == 4 || val == 8)
15528 {
15529 *total = COSTS_N_INSNS (ix86_cost->lea);
15530 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15531 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15532 outer_code);
15533 *total += rtx_cost (XEXP (x, 1), outer_code);
15534 return true;
15535 }
15536 }
15537 else if (GET_CODE (XEXP (x, 0)) == MULT
15538 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15539 {
15540 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15541 if (val == 2 || val == 4 || val == 8)
15542 {
15543 *total = COSTS_N_INSNS (ix86_cost->lea);
15544 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15545 *total += rtx_cost (XEXP (x, 1), outer_code);
15546 return true;
15547 }
15548 }
15549 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15550 {
15551 *total = COSTS_N_INSNS (ix86_cost->lea);
15552 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15553 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15554 *total += rtx_cost (XEXP (x, 1), outer_code);
15555 return true;
15556 }
15557 }
15558 /* FALLTHRU */
15559
15560 case MINUS:
15561 if (FLOAT_MODE_P (mode))
15562 {
15563 *total = COSTS_N_INSNS (ix86_cost->fadd);
15564 return false;
15565 }
15566 /* FALLTHRU */
15567
15568 case AND:
15569 case IOR:
15570 case XOR:
15571 if (!TARGET_64BIT && mode == DImode)
15572 {
15573 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15574 + (rtx_cost (XEXP (x, 0), outer_code)
15575 << (GET_MODE (XEXP (x, 0)) != DImode))
15576 + (rtx_cost (XEXP (x, 1), outer_code)
15577 << (GET_MODE (XEXP (x, 1)) != DImode)));
15578 return true;
15579 }
15580 /* FALLTHRU */
15581
15582 case NEG:
15583 if (FLOAT_MODE_P (mode))
15584 {
15585 *total = COSTS_N_INSNS (ix86_cost->fchs);
15586 return false;
15587 }
15588 /* FALLTHRU */
15589
15590 case NOT:
15591 if (!TARGET_64BIT && mode == DImode)
15592 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15593 else
15594 *total = COSTS_N_INSNS (ix86_cost->add);
15595 return false;
15596
15597 case COMPARE:
15598 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15599 && XEXP (XEXP (x, 0), 1) == const1_rtx
15600 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15601 && XEXP (x, 1) == const0_rtx)
15602 {
15603 /* This kind of construct is implemented using test[bwl].
15604 Treat it as if we had an AND. */
15605 *total = (COSTS_N_INSNS (ix86_cost->add)
15606 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15607 + rtx_cost (const1_rtx, outer_code));
15608 return true;
15609 }
15610 return false;
15611
15612 case FLOAT_EXTEND:
15613 if (!TARGET_SSE_MATH
15614 || mode == XFmode
15615 || (mode == DFmode && !TARGET_SSE2))
15616 *total = 0;
15617 return false;
15618
15619 case ABS:
15620 if (FLOAT_MODE_P (mode))
15621 *total = COSTS_N_INSNS (ix86_cost->fabs);
15622 return false;
15623
15624 case SQRT:
15625 if (FLOAT_MODE_P (mode))
15626 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15627 return false;
15628
15629 case UNSPEC:
15630 if (XINT (x, 1) == UNSPEC_TP)
15631 *total = 0;
15632 return false;
15633
15634 default:
15635 return false;
15636 }
15637 }
15638
15639 #if TARGET_MACHO
15640
15641 static int current_machopic_label_num;
15642
15643 /* Given a symbol name and its associated stub, write out the
15644 definition of the stub. */
15645
15646 void
15647 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15648 {
15649 unsigned int length;
15650 char *binder_name, *symbol_name, lazy_ptr_name[32];
15651 int label = ++current_machopic_label_num;
15652
15653 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15654 symb = (*targetm.strip_name_encoding) (symb);
15655
15656 length = strlen (stub);
15657 binder_name = alloca (length + 32);
15658 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15659
15660 length = strlen (symb);
15661 symbol_name = alloca (length + 32);
15662 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15663
15664 sprintf (lazy_ptr_name, "L%d$lz", label);
15665
15666 if (MACHOPIC_PURE)
15667 machopic_picsymbol_stub_section ();
15668 else
15669 machopic_symbol_stub_section ();
15670
15671 fprintf (file, "%s:\n", stub);
15672 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15673
15674 if (MACHOPIC_PURE)
15675 {
15676 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15677 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15678 fprintf (file, "\tjmp %%edx\n");
15679 }
15680 else
15681 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15682
15683 fprintf (file, "%s:\n", binder_name);
15684
15685 if (MACHOPIC_PURE)
15686 {
15687 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15688 fprintf (file, "\tpushl %%eax\n");
15689 }
15690 else
15691 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15692
15693 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15694
15695 machopic_lazy_symbol_ptr_section ();
15696 fprintf (file, "%s:\n", lazy_ptr_name);
15697 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15698 fprintf (file, "\t.long %s\n", binder_name);
15699 }
15700 #endif /* TARGET_MACHO */
15701
15702 /* Order the registers for register allocator. */
15703
15704 void
15705 x86_order_regs_for_local_alloc (void)
15706 {
15707 int pos = 0;
15708 int i;
15709
15710 /* First allocate the local general purpose registers. */
15711 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15712 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15713 reg_alloc_order [pos++] = i;
15714
15715 /* Global general purpose registers. */
15716 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15717 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15718 reg_alloc_order [pos++] = i;
15719
15720 /* x87 registers come first in case we are doing FP math
15721 using them. */
15722 if (!TARGET_SSE_MATH)
15723 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15724 reg_alloc_order [pos++] = i;
15725
15726 /* SSE registers. */
15727 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15728 reg_alloc_order [pos++] = i;
15729 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15730 reg_alloc_order [pos++] = i;
15731
15732 /* x87 registers. */
15733 if (TARGET_SSE_MATH)
15734 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15735 reg_alloc_order [pos++] = i;
15736
15737 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15738 reg_alloc_order [pos++] = i;
15739
15740 /* Initialize the rest of array as we do not allocate some registers
15741 at all. */
15742 while (pos < FIRST_PSEUDO_REGISTER)
15743 reg_alloc_order [pos++] = 0;
15744 }
15745
15746 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15747 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15748 #endif
15749
15750 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15751 struct attribute_spec.handler. */
15752 static tree
15753 ix86_handle_struct_attribute (tree *node, tree name,
15754 tree args ATTRIBUTE_UNUSED,
15755 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15756 {
15757 tree *type = NULL;
15758 if (DECL_P (*node))
15759 {
15760 if (TREE_CODE (*node) == TYPE_DECL)
15761 type = &TREE_TYPE (*node);
15762 }
15763 else
15764 type = node;
15765
15766 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15767 || TREE_CODE (*type) == UNION_TYPE)))
15768 {
15769 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
15770 *no_add_attrs = true;
15771 }
15772
15773 else if ((is_attribute_p ("ms_struct", name)
15774 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15775 || ((is_attribute_p ("gcc_struct", name)
15776 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15777 {
15778 warning ("%qs incompatible attribute ignored",
15779 IDENTIFIER_POINTER (name));
15780 *no_add_attrs = true;
15781 }
15782
15783 return NULL_TREE;
15784 }
15785
15786 static bool
15787 ix86_ms_bitfield_layout_p (tree record_type)
15788 {
15789 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15790 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15791 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15792 }
15793
15794 /* Returns an expression indicating where the this parameter is
15795 located on entry to the FUNCTION. */
15796
15797 static rtx
15798 x86_this_parameter (tree function)
15799 {
15800 tree type = TREE_TYPE (function);
15801
15802 if (TARGET_64BIT)
15803 {
15804 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15805 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15806 }
15807
15808 if (ix86_function_regparm (type, function) > 0)
15809 {
15810 tree parm;
15811
15812 parm = TYPE_ARG_TYPES (type);
15813 /* Figure out whether or not the function has a variable number of
15814 arguments. */
15815 for (; parm; parm = TREE_CHAIN (parm))
15816 if (TREE_VALUE (parm) == void_type_node)
15817 break;
15818 /* If not, the this parameter is in the first argument. */
15819 if (parm)
15820 {
15821 int regno = 0;
15822 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15823 regno = 2;
15824 return gen_rtx_REG (SImode, regno);
15825 }
15826 }
15827
15828 if (aggregate_value_p (TREE_TYPE (type), type))
15829 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15830 else
15831 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15832 }
15833
15834 /* Determine whether x86_output_mi_thunk can succeed. */
15835
15836 static bool
15837 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15838 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15839 HOST_WIDE_INT vcall_offset, tree function)
15840 {
15841 /* 64-bit can handle anything. */
15842 if (TARGET_64BIT)
15843 return true;
15844
15845 /* For 32-bit, everything's fine if we have one free register. */
15846 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15847 return true;
15848
15849 /* Need a free register for vcall_offset. */
15850 if (vcall_offset)
15851 return false;
15852
15853 /* Need a free register for GOT references. */
15854 if (flag_pic && !(*targetm.binds_local_p) (function))
15855 return false;
15856
15857 /* Otherwise ok. */
15858 return true;
15859 }
15860
15861 /* Output the assembler code for a thunk function. THUNK_DECL is the
15862 declaration for the thunk function itself, FUNCTION is the decl for
15863 the target function. DELTA is an immediate constant offset to be
15864 added to THIS. If VCALL_OFFSET is nonzero, the word at
15865 *(*this + vcall_offset) should be added to THIS. */
15866
15867 static void
15868 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15869 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15870 HOST_WIDE_INT vcall_offset, tree function)
15871 {
15872 rtx xops[3];
15873 rtx this = x86_this_parameter (function);
15874 rtx this_reg, tmp;
15875
15876 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15877 pull it in now and let DELTA benefit. */
15878 if (REG_P (this))
15879 this_reg = this;
15880 else if (vcall_offset)
15881 {
15882 /* Put the this parameter into %eax. */
15883 xops[0] = this;
15884 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15885 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15886 }
15887 else
15888 this_reg = NULL_RTX;
15889
15890 /* Adjust the this parameter by a fixed constant. */
15891 if (delta)
15892 {
15893 xops[0] = GEN_INT (delta);
15894 xops[1] = this_reg ? this_reg : this;
15895 if (TARGET_64BIT)
15896 {
15897 if (!x86_64_general_operand (xops[0], DImode))
15898 {
15899 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15900 xops[1] = tmp;
15901 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15902 xops[0] = tmp;
15903 xops[1] = this;
15904 }
15905 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15906 }
15907 else
15908 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15909 }
15910
15911 /* Adjust the this parameter by a value stored in the vtable. */
15912 if (vcall_offset)
15913 {
15914 if (TARGET_64BIT)
15915 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15916 else
15917 {
15918 int tmp_regno = 2 /* ECX */;
15919 if (lookup_attribute ("fastcall",
15920 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15921 tmp_regno = 0 /* EAX */;
15922 tmp = gen_rtx_REG (SImode, tmp_regno);
15923 }
15924
15925 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15926 xops[1] = tmp;
15927 if (TARGET_64BIT)
15928 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15929 else
15930 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15931
15932 /* Adjust the this parameter. */
15933 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15934 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15935 {
15936 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15937 xops[0] = GEN_INT (vcall_offset);
15938 xops[1] = tmp2;
15939 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15940 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15941 }
15942 xops[1] = this_reg;
15943 if (TARGET_64BIT)
15944 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15945 else
15946 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15947 }
15948
15949 /* If necessary, drop THIS back to its stack slot. */
15950 if (this_reg && this_reg != this)
15951 {
15952 xops[0] = this_reg;
15953 xops[1] = this;
15954 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15955 }
15956
15957 xops[0] = XEXP (DECL_RTL (function), 0);
15958 if (TARGET_64BIT)
15959 {
15960 if (!flag_pic || (*targetm.binds_local_p) (function))
15961 output_asm_insn ("jmp\t%P0", xops);
15962 else
15963 {
15964 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15965 tmp = gen_rtx_CONST (Pmode, tmp);
15966 tmp = gen_rtx_MEM (QImode, tmp);
15967 xops[0] = tmp;
15968 output_asm_insn ("jmp\t%A0", xops);
15969 }
15970 }
15971 else
15972 {
15973 if (!flag_pic || (*targetm.binds_local_p) (function))
15974 output_asm_insn ("jmp\t%P0", xops);
15975 else
15976 #if TARGET_MACHO
15977 if (TARGET_MACHO)
15978 {
15979 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15980 tmp = (gen_rtx_SYMBOL_REF
15981 (Pmode,
15982 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15983 tmp = gen_rtx_MEM (QImode, tmp);
15984 xops[0] = tmp;
15985 output_asm_insn ("jmp\t%0", xops);
15986 }
15987 else
15988 #endif /* TARGET_MACHO */
15989 {
15990 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15991 output_set_got (tmp);
15992
15993 xops[1] = tmp;
15994 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15995 output_asm_insn ("jmp\t{*}%1", xops);
15996 }
15997 }
15998 }
15999
16000 static void
16001 x86_file_start (void)
16002 {
16003 default_file_start ();
16004 if (X86_FILE_START_VERSION_DIRECTIVE)
16005 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
16006 if (X86_FILE_START_FLTUSED)
16007 fputs ("\t.global\t__fltused\n", asm_out_file);
16008 if (ix86_asm_dialect == ASM_INTEL)
16009 fputs ("\t.intel_syntax\n", asm_out_file);
16010 }
16011
16012 int
16013 x86_field_alignment (tree field, int computed)
16014 {
16015 enum machine_mode mode;
16016 tree type = TREE_TYPE (field);
16017
16018 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
16019 return computed;
16020 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
16021 ? get_inner_array_type (type) : type);
16022 if (mode == DFmode || mode == DCmode
16023 || GET_MODE_CLASS (mode) == MODE_INT
16024 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16025 return MIN (32, computed);
16026 return computed;
16027 }
16028
16029 /* Output assembler code to FILE to increment profiler label # LABELNO
16030 for profiling a function entry. */
16031 void
16032 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
16033 {
16034 if (TARGET_64BIT)
16035 if (flag_pic)
16036 {
16037 #ifndef NO_PROFILE_COUNTERS
16038 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
16039 #endif
16040 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
16041 }
16042 else
16043 {
16044 #ifndef NO_PROFILE_COUNTERS
16045 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
16046 #endif
16047 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16048 }
16049 else if (flag_pic)
16050 {
16051 #ifndef NO_PROFILE_COUNTERS
16052 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16053 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
16054 #endif
16055 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
16056 }
16057 else
16058 {
16059 #ifndef NO_PROFILE_COUNTERS
16060 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
16061 PROFILE_COUNT_REGISTER);
16062 #endif
16063 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16064 }
16065 }
16066
16067 /* We don't have exact information about the insn sizes, but we may assume
16068 quite safely that we are informed about all 1 byte insns and memory
16069 address sizes. This is enough to eliminate unnecessary padding in
16070 99% of cases. */
16071
16072 static int
16073 min_insn_size (rtx insn)
16074 {
16075 int l = 0;
16076
16077 if (!INSN_P (insn) || !active_insn_p (insn))
16078 return 0;
16079
16080 /* Discard alignments we've emit and jump instructions. */
16081 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16082 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16083 return 0;
16084 if (GET_CODE (insn) == JUMP_INSN
16085 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16086 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16087 return 0;
16088
16089 /* Important case - calls are always 5 bytes.
16090 It is common to have many calls in the row. */
16091 if (GET_CODE (insn) == CALL_INSN
16092 && symbolic_reference_mentioned_p (PATTERN (insn))
16093 && !SIBLING_CALL_P (insn))
16094 return 5;
16095 if (get_attr_length (insn) <= 1)
16096 return 1;
16097
16098 /* For normal instructions we may rely on the sizes of addresses
16099 and the presence of symbol to require 4 bytes of encoding.
16100 This is not the case for jumps where references are PC relative. */
16101 if (GET_CODE (insn) != JUMP_INSN)
16102 {
16103 l = get_attr_length_address (insn);
16104 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16105 l = 4;
16106 }
16107 if (l)
16108 return 1+l;
16109 else
16110 return 2;
16111 }
16112
16113 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16114 window. */
16115
16116 static void
16117 ix86_avoid_jump_misspredicts (void)
16118 {
16119 rtx insn, start = get_insns ();
16120 int nbytes = 0, njumps = 0;
16121 int isjump = 0;
16122
16123 /* Look for all minimal intervals of instructions containing 4 jumps.
16124 The intervals are bounded by START and INSN. NBYTES is the total
16125 size of instructions in the interval including INSN and not including
16126 START. When the NBYTES is smaller than 16 bytes, it is possible
16127 that the end of START and INSN ends up in the same 16byte page.
16128
16129 The smallest offset in the page INSN can start is the case where START
16130 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16131 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16132 */
16133 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16134 {
16135
16136 nbytes += min_insn_size (insn);
16137 if (dump_file)
16138 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16139 INSN_UID (insn), min_insn_size (insn));
16140 if ((GET_CODE (insn) == JUMP_INSN
16141 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16142 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16143 || GET_CODE (insn) == CALL_INSN)
16144 njumps++;
16145 else
16146 continue;
16147
16148 while (njumps > 3)
16149 {
16150 start = NEXT_INSN (start);
16151 if ((GET_CODE (start) == JUMP_INSN
16152 && GET_CODE (PATTERN (start)) != ADDR_VEC
16153 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16154 || GET_CODE (start) == CALL_INSN)
16155 njumps--, isjump = 1;
16156 else
16157 isjump = 0;
16158 nbytes -= min_insn_size (start);
16159 }
16160 if (njumps < 0)
16161 abort ();
16162 if (dump_file)
16163 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16164 INSN_UID (start), INSN_UID (insn), nbytes);
16165
16166 if (njumps == 3 && isjump && nbytes < 16)
16167 {
16168 int padsize = 15 - nbytes + min_insn_size (insn);
16169
16170 if (dump_file)
16171 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16172 INSN_UID (insn), padsize);
16173 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16174 }
16175 }
16176 }
16177
16178 /* AMD Athlon works faster
16179 when RET is not destination of conditional jump or directly preceded
16180 by other jump instruction. We avoid the penalty by inserting NOP just
16181 before the RET instructions in such cases. */
16182 static void
16183 ix86_pad_returns (void)
16184 {
16185 edge e;
16186 edge_iterator ei;
16187
16188 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16189 {
16190 basic_block bb = e->src;
16191 rtx ret = BB_END (bb);
16192 rtx prev;
16193 bool replace = false;
16194
16195 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16196 || !maybe_hot_bb_p (bb))
16197 continue;
16198 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16199 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16200 break;
16201 if (prev && GET_CODE (prev) == CODE_LABEL)
16202 {
16203 edge e;
16204 edge_iterator ei;
16205
16206 FOR_EACH_EDGE (e, ei, bb->preds)
16207 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16208 && !(e->flags & EDGE_FALLTHRU))
16209 replace = true;
16210 }
16211 if (!replace)
16212 {
16213 prev = prev_active_insn (ret);
16214 if (prev
16215 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16216 || GET_CODE (prev) == CALL_INSN))
16217 replace = true;
16218 /* Empty functions get branch mispredict even when the jump destination
16219 is not visible to us. */
16220 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16221 replace = true;
16222 }
16223 if (replace)
16224 {
16225 emit_insn_before (gen_return_internal_long (), ret);
16226 delete_insn (ret);
16227 }
16228 }
16229 }
16230
16231 /* Implement machine specific optimizations. We implement padding of returns
16232 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16233 static void
16234 ix86_reorg (void)
16235 {
16236 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16237 ix86_pad_returns ();
16238 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16239 ix86_avoid_jump_misspredicts ();
16240 }
16241
16242 /* Return nonzero when QImode register that must be represented via REX prefix
16243 is used. */
16244 bool
16245 x86_extended_QIreg_mentioned_p (rtx insn)
16246 {
16247 int i;
16248 extract_insn_cached (insn);
16249 for (i = 0; i < recog_data.n_operands; i++)
16250 if (REG_P (recog_data.operand[i])
16251 && REGNO (recog_data.operand[i]) >= 4)
16252 return true;
16253 return false;
16254 }
16255
16256 /* Return nonzero when P points to register encoded via REX prefix.
16257 Called via for_each_rtx. */
16258 static int
16259 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16260 {
16261 unsigned int regno;
16262 if (!REG_P (*p))
16263 return 0;
16264 regno = REGNO (*p);
16265 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16266 }
16267
16268 /* Return true when INSN mentions register that must be encoded using REX
16269 prefix. */
16270 bool
16271 x86_extended_reg_mentioned_p (rtx insn)
16272 {
16273 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16274 }
16275
16276 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16277 optabs would emit if we didn't have TFmode patterns. */
16278
16279 void
16280 x86_emit_floatuns (rtx operands[2])
16281 {
16282 rtx neglab, donelab, i0, i1, f0, in, out;
16283 enum machine_mode mode, inmode;
16284
16285 inmode = GET_MODE (operands[1]);
16286 if (inmode != SImode
16287 && inmode != DImode)
16288 abort ();
16289
16290 out = operands[0];
16291 in = force_reg (inmode, operands[1]);
16292 mode = GET_MODE (out);
16293 neglab = gen_label_rtx ();
16294 donelab = gen_label_rtx ();
16295 i1 = gen_reg_rtx (Pmode);
16296 f0 = gen_reg_rtx (mode);
16297
16298 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16299
16300 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16301 emit_jump_insn (gen_jump (donelab));
16302 emit_barrier ();
16303
16304 emit_label (neglab);
16305
16306 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16307 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16308 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16309 expand_float (f0, i0, 0);
16310 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16311
16312 emit_label (donelab);
16313 }
16314 \f
16315 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16316 with all elements equal to VAR. Return true if successful. */
16317
16318 static bool
16319 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16320 rtx target, rtx val)
16321 {
16322 enum machine_mode smode, wsmode, wvmode;
16323 rtx x;
16324
16325 switch (mode)
16326 {
16327 case V2SImode:
16328 case V2SFmode:
16329 if (!mmx_ok && !TARGET_SSE)
16330 return false;
16331 /* FALLTHRU */
16332
16333 case V2DFmode:
16334 case V2DImode:
16335 case V4SFmode:
16336 case V4SImode:
16337 val = force_reg (GET_MODE_INNER (mode), val);
16338 x = gen_rtx_VEC_DUPLICATE (mode, val);
16339 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16340 return true;
16341
16342 case V4HImode:
16343 if (!mmx_ok)
16344 return false;
16345 if (TARGET_SSE || TARGET_3DNOW_A)
16346 {
16347 val = gen_lowpart (SImode, val);
16348 x = gen_rtx_TRUNCATE (HImode, val);
16349 x = gen_rtx_VEC_DUPLICATE (mode, x);
16350 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16351 return true;
16352 }
16353 else
16354 {
16355 smode = HImode;
16356 wsmode = SImode;
16357 wvmode = V2SImode;
16358 goto widen;
16359 }
16360
16361 case V8QImode:
16362 if (!mmx_ok)
16363 return false;
16364 smode = QImode;
16365 wsmode = HImode;
16366 wvmode = V4HImode;
16367 goto widen;
16368 case V8HImode:
16369 smode = HImode;
16370 wsmode = SImode;
16371 wvmode = V4SImode;
16372 goto widen;
16373 case V16QImode:
16374 smode = QImode;
16375 wsmode = HImode;
16376 wvmode = V8HImode;
16377 goto widen;
16378 widen:
16379 /* Replicate the value once into the next wider mode and recurse. */
16380 val = convert_modes (wsmode, smode, val, true);
16381 x = expand_simple_binop (wsmode, ASHIFT, val,
16382 GEN_INT (GET_MODE_BITSIZE (smode)),
16383 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16384 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16385
16386 x = gen_reg_rtx (wvmode);
16387 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16388 gcc_unreachable ();
16389 emit_move_insn (target, gen_lowpart (mode, x));
16390 return true;
16391
16392 default:
16393 return false;
16394 }
16395 }
16396
16397 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16398 whose low element is VAR, and other elements are zero. Return true
16399 if successful. */
16400
16401 static bool
16402 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16403 rtx target, rtx var)
16404 {
16405 enum machine_mode vsimode;
16406 rtx x;
16407
16408 switch (mode)
16409 {
16410 case V2SFmode:
16411 case V2SImode:
16412 if (!mmx_ok && !TARGET_SSE)
16413 return false;
16414 /* FALLTHRU */
16415
16416 case V2DFmode:
16417 case V2DImode:
16418 var = force_reg (GET_MODE_INNER (mode), var);
16419 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16420 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16421 return true;
16422
16423 case V4SFmode:
16424 case V4SImode:
16425 var = force_reg (GET_MODE_INNER (mode), var);
16426 x = gen_rtx_VEC_DUPLICATE (mode, var);
16427 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16428 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16429 return true;
16430
16431 case V8HImode:
16432 case V16QImode:
16433 vsimode = V4SImode;
16434 goto widen;
16435 case V4HImode:
16436 case V8QImode:
16437 if (!mmx_ok)
16438 return false;
16439 vsimode = V2SImode;
16440 goto widen;
16441 widen:
16442 /* Zero extend the variable element to SImode and recurse. */
16443 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16444
16445 x = gen_reg_rtx (vsimode);
16446 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16447 gcc_unreachable ();
16448
16449 emit_move_insn (target, gen_lowpart (mode, x));
16450 return true;
16451
16452 default:
16453 return false;
16454 }
16455 }
16456
16457 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16458 consisting of the values in VALS. It is known that all elements
16459 except ONE_VAR are constants. Return true if successful. */
16460
16461 static bool
16462 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16463 rtx target, rtx vals, int one_var)
16464 {
16465 rtx var = XVECEXP (vals, 0, one_var);
16466 enum machine_mode wmode;
16467 rtx const_vec, x;
16468
16469 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16470 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16471
16472 switch (mode)
16473 {
16474 case V2DFmode:
16475 case V2DImode:
16476 case V2SFmode:
16477 case V2SImode:
16478 /* For the two element vectors, it's just as easy to use
16479 the general case. */
16480 return false;
16481
16482 case V4SFmode:
16483 case V4SImode:
16484 case V8HImode:
16485 case V4HImode:
16486 break;
16487
16488 case V16QImode:
16489 wmode = V8HImode;
16490 goto widen;
16491 case V8QImode:
16492 wmode = V4HImode;
16493 goto widen;
16494 widen:
16495 /* There's no way to set one QImode entry easily. Combine
16496 the variable value with its adjacent constant value, and
16497 promote to an HImode set. */
16498 x = XVECEXP (vals, 0, one_var ^ 1);
16499 if (one_var & 1)
16500 {
16501 var = convert_modes (HImode, QImode, var, true);
16502 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16503 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16504 x = GEN_INT (INTVAL (x) & 0xff);
16505 }
16506 else
16507 {
16508 var = convert_modes (HImode, QImode, var, true);
16509 x = gen_int_mode (INTVAL (x) << 8, HImode);
16510 }
16511 if (x != const0_rtx)
16512 var = expand_simple_binop (HImode, IOR, var, x, var,
16513 1, OPTAB_LIB_WIDEN);
16514
16515 x = gen_reg_rtx (wmode);
16516 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16517 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16518
16519 emit_move_insn (target, gen_lowpart (mode, x));
16520 return true;
16521
16522 default:
16523 return false;
16524 }
16525
16526 emit_move_insn (target, const_vec);
16527 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16528 return true;
16529 }
16530
16531 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16532 all values variable, and none identical. */
16533
16534 static void
16535 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16536 rtx target, rtx vals)
16537 {
16538 enum machine_mode half_mode = GET_MODE_INNER (mode);
16539 rtx op0 = NULL, op1 = NULL;
16540 bool use_vec_concat = false;
16541
16542 switch (mode)
16543 {
16544 case V2SFmode:
16545 case V2SImode:
16546 if (!mmx_ok && !TARGET_SSE)
16547 break;
16548 /* FALLTHRU */
16549
16550 case V2DFmode:
16551 case V2DImode:
16552 /* For the two element vectors, we always implement VEC_CONCAT. */
16553 op0 = XVECEXP (vals, 0, 0);
16554 op1 = XVECEXP (vals, 0, 1);
16555 use_vec_concat = true;
16556 break;
16557
16558 case V4SFmode:
16559 half_mode = V2SFmode;
16560 goto half;
16561 case V4SImode:
16562 half_mode = V2SImode;
16563 goto half;
16564 half:
16565 {
16566 rtvec v;
16567
16568 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16569 Recurse to load the two halves. */
16570
16571 op0 = gen_reg_rtx (half_mode);
16572 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16573 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16574
16575 op1 = gen_reg_rtx (half_mode);
16576 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16577 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16578
16579 use_vec_concat = true;
16580 }
16581 break;
16582
16583 case V8HImode:
16584 case V16QImode:
16585 case V4HImode:
16586 case V8QImode:
16587 break;
16588
16589 default:
16590 gcc_unreachable ();
16591 }
16592
16593 if (use_vec_concat)
16594 {
16595 if (!register_operand (op0, half_mode))
16596 op0 = force_reg (half_mode, op0);
16597 if (!register_operand (op1, half_mode))
16598 op1 = force_reg (half_mode, op1);
16599
16600 emit_insn (gen_rtx_SET (VOIDmode, target,
16601 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16602 }
16603 else
16604 {
16605 int i, j, n_elts, n_words, n_elt_per_word;
16606 enum machine_mode inner_mode;
16607 rtx words[4], shift;
16608
16609 inner_mode = GET_MODE_INNER (mode);
16610 n_elts = GET_MODE_NUNITS (mode);
16611 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16612 n_elt_per_word = n_elts / n_words;
16613 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16614
16615 for (i = 0; i < n_words; ++i)
16616 {
16617 rtx word = NULL_RTX;
16618
16619 for (j = 0; j < n_elt_per_word; ++j)
16620 {
16621 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16622 elt = convert_modes (word_mode, inner_mode, elt, true);
16623
16624 if (j == 0)
16625 word = elt;
16626 else
16627 {
16628 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16629 word, 1, OPTAB_LIB_WIDEN);
16630 word = expand_simple_binop (word_mode, IOR, word, elt,
16631 word, 1, OPTAB_LIB_WIDEN);
16632 }
16633 }
16634
16635 words[i] = word;
16636 }
16637
16638 if (n_words == 1)
16639 emit_move_insn (target, gen_lowpart (mode, words[0]));
16640 else if (n_words == 2)
16641 {
16642 rtx tmp = gen_reg_rtx (mode);
16643 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16644 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16645 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16646 emit_move_insn (target, tmp);
16647 }
16648 else if (n_words == 4)
16649 {
16650 rtx tmp = gen_reg_rtx (V4SImode);
16651 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16652 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
16653 emit_move_insn (target, gen_lowpart (mode, tmp));
16654 }
16655 else
16656 gcc_unreachable ();
16657 }
16658 }
16659
16660 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16661 instructions unless MMX_OK is true. */
16662
16663 void
16664 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16665 {
16666 enum machine_mode mode = GET_MODE (target);
16667 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16668 int n_elts = GET_MODE_NUNITS (mode);
16669 int n_var = 0, one_var = -1;
16670 bool all_same = true, all_const_zero = true;
16671 int i;
16672 rtx x;
16673
16674 for (i = 0; i < n_elts; ++i)
16675 {
16676 x = XVECEXP (vals, 0, i);
16677 if (!CONSTANT_P (x))
16678 n_var++, one_var = i;
16679 else if (x != CONST0_RTX (inner_mode))
16680 all_const_zero = false;
16681 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16682 all_same = false;
16683 }
16684
16685 /* Constants are best loaded from the constant pool. */
16686 if (n_var == 0)
16687 {
16688 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16689 return;
16690 }
16691
16692 /* If all values are identical, broadcast the value. */
16693 if (all_same
16694 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16695 XVECEXP (vals, 0, 0)))
16696 return;
16697
16698 /* Values where only one field is non-constant are best loaded from
16699 the pool and overwritten via move later. */
16700 if (n_var == 1)
16701 {
16702 if (all_const_zero && one_var == 0
16703 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16704 XVECEXP (vals, 0, 0)))
16705 return;
16706
16707 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16708 return;
16709 }
16710
16711 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16712 }
16713
16714 void
16715 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16716 {
16717 enum machine_mode mode = GET_MODE (target);
16718 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16719 bool use_vec_merge = false;
16720 rtx tmp;
16721
16722 switch (mode)
16723 {
16724 case V2SFmode:
16725 case V2SImode:
16726 if (mmx_ok)
16727 {
16728 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
16729 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
16730 if (elt == 0)
16731 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
16732 else
16733 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
16734 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16735 return;
16736 }
16737 break;
16738
16739 case V2DFmode:
16740 case V2DImode:
16741 {
16742 rtx op0, op1;
16743
16744 /* For the two element vectors, we implement a VEC_CONCAT with
16745 the extraction of the other element. */
16746
16747 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
16748 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
16749
16750 if (elt == 0)
16751 op0 = val, op1 = tmp;
16752 else
16753 op0 = tmp, op1 = val;
16754
16755 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
16756 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16757 }
16758 return;
16759
16760 case V4SFmode:
16761 switch (elt)
16762 {
16763 case 0:
16764 use_vec_merge = true;
16765 break;
16766
16767 case 1:
16768 /* tmp = op0 = A B C D */
16769 tmp = copy_to_reg (target);
16770
16771 /* op0 = C C D D */
16772 emit_insn (gen_sse_unpcklps (target, target, target));
16773
16774 /* op0 = C C D X */
16775 ix86_expand_vector_set (false, target, val, 0);
16776
16777 /* op0 = A B X D */
16778 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16779 GEN_INT (1), GEN_INT (0),
16780 GEN_INT (2+4), GEN_INT (3+4)));
16781 return;
16782
16783 case 2:
16784 tmp = copy_to_reg (target);
16785 ix86_expand_vector_set (false, target, val, 0);
16786 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16787 GEN_INT (0), GEN_INT (1),
16788 GEN_INT (0+4), GEN_INT (3+4)));
16789 return;
16790
16791 case 3:
16792 tmp = copy_to_reg (target);
16793 ix86_expand_vector_set (false, target, val, 0);
16794 emit_insn (gen_sse_shufps_1 (target, target, tmp,
16795 GEN_INT (0), GEN_INT (1),
16796 GEN_INT (2+4), GEN_INT (0+4)));
16797 return;
16798
16799 default:
16800 gcc_unreachable ();
16801 }
16802 break;
16803
16804 case V4SImode:
16805 /* Element 0 handled by vec_merge below. */
16806 if (elt == 0)
16807 {
16808 use_vec_merge = true;
16809 break;
16810 }
16811
16812 if (TARGET_SSE2)
16813 {
16814 /* With SSE2, use integer shuffles to swap element 0 and ELT,
16815 store into element 0, then shuffle them back. */
16816
16817 rtx order[4];
16818
16819 order[0] = GEN_INT (elt);
16820 order[1] = const1_rtx;
16821 order[2] = const2_rtx;
16822 order[3] = GEN_INT (3);
16823 order[elt] = const0_rtx;
16824
16825 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16826 order[1], order[2], order[3]));
16827
16828 ix86_expand_vector_set (false, target, val, 0);
16829
16830 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
16831 order[1], order[2], order[3]));
16832 }
16833 else
16834 {
16835 /* For SSE1, we have to reuse the V4SF code. */
16836 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
16837 gen_lowpart (SFmode, val), elt);
16838 }
16839 return;
16840
16841 case V8HImode:
16842 use_vec_merge = TARGET_SSE2;
16843 break;
16844 case V4HImode:
16845 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16846 break;
16847
16848 case V16QImode:
16849 case V8QImode:
16850 default:
16851 break;
16852 }
16853
16854 if (use_vec_merge)
16855 {
16856 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
16857 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
16858 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16859 }
16860 else
16861 {
16862 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16863
16864 emit_move_insn (mem, target);
16865
16866 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16867 emit_move_insn (tmp, val);
16868
16869 emit_move_insn (target, mem);
16870 }
16871 }
16872
16873 void
16874 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
16875 {
16876 enum machine_mode mode = GET_MODE (vec);
16877 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16878 bool use_vec_extr = false;
16879 rtx tmp;
16880
16881 switch (mode)
16882 {
16883 case V2SImode:
16884 case V2SFmode:
16885 if (!mmx_ok)
16886 break;
16887 /* FALLTHRU */
16888
16889 case V2DFmode:
16890 case V2DImode:
16891 use_vec_extr = true;
16892 break;
16893
16894 case V4SFmode:
16895 switch (elt)
16896 {
16897 case 0:
16898 tmp = vec;
16899 break;
16900
16901 case 1:
16902 case 3:
16903 tmp = gen_reg_rtx (mode);
16904 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
16905 GEN_INT (elt), GEN_INT (elt),
16906 GEN_INT (elt+4), GEN_INT (elt+4)));
16907 break;
16908
16909 case 2:
16910 tmp = gen_reg_rtx (mode);
16911 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
16912 break;
16913
16914 default:
16915 gcc_unreachable ();
16916 }
16917 vec = tmp;
16918 use_vec_extr = true;
16919 elt = 0;
16920 break;
16921
16922 case V4SImode:
16923 if (TARGET_SSE2)
16924 {
16925 switch (elt)
16926 {
16927 case 0:
16928 tmp = vec;
16929 break;
16930
16931 case 1:
16932 case 3:
16933 tmp = gen_reg_rtx (mode);
16934 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
16935 GEN_INT (elt), GEN_INT (elt),
16936 GEN_INT (elt), GEN_INT (elt)));
16937 break;
16938
16939 case 2:
16940 tmp = gen_reg_rtx (mode);
16941 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
16942 break;
16943
16944 default:
16945 gcc_unreachable ();
16946 }
16947 vec = tmp;
16948 use_vec_extr = true;
16949 elt = 0;
16950 }
16951 else
16952 {
16953 /* For SSE1, we have to reuse the V4SF code. */
16954 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
16955 gen_lowpart (V4SFmode, vec), elt);
16956 return;
16957 }
16958 break;
16959
16960 case V8HImode:
16961 use_vec_extr = TARGET_SSE2;
16962 break;
16963 case V4HImode:
16964 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
16965 break;
16966
16967 case V16QImode:
16968 case V8QImode:
16969 /* ??? Could extract the appropriate HImode element and shift. */
16970 default:
16971 break;
16972 }
16973
16974 if (use_vec_extr)
16975 {
16976 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
16977 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
16978
16979 /* Let the rtl optimizers know about the zero extension performed. */
16980 if (inner_mode == HImode)
16981 {
16982 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
16983 target = gen_lowpart (SImode, target);
16984 }
16985
16986 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16987 }
16988 else
16989 {
16990 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
16991
16992 emit_move_insn (mem, vec);
16993
16994 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
16995 emit_move_insn (target, tmp);
16996 }
16997 }
16998 \f
16999 /* Implements target hook vector_mode_supported_p. */
17000 static bool
17001 ix86_vector_mode_supported_p (enum machine_mode mode)
17002 {
17003 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
17004 return true;
17005 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
17006 return true;
17007 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
17008 return true;
17009 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
17010 return true;
17011 return false;
17012 }
17013
17014 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17015
17016 We do this in the new i386 backend to maintain source compatibility
17017 with the old cc0-based compiler. */
17018
17019 static tree
17020 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
17021 tree inputs ATTRIBUTE_UNUSED,
17022 tree clobbers)
17023 {
17024 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
17025 clobbers);
17026 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
17027 clobbers);
17028 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
17029 clobbers);
17030 return clobbers;
17031 }
17032
17033 /* Worker function for REVERSE_CONDITION. */
17034
17035 enum rtx_code
17036 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
17037 {
17038 return (mode != CCFPmode && mode != CCFPUmode
17039 ? reverse_condition (code)
17040 : reverse_condition_maybe_unordered (code));
17041 }
17042
17043 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17044 to OPERANDS[0]. */
17045
17046 const char *
17047 output_387_reg_move (rtx insn, rtx *operands)
17048 {
17049 if (REG_P (operands[1])
17050 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17051 {
17052 if (REGNO (operands[0]) == FIRST_STACK_REG
17053 && TARGET_USE_FFREEP)
17054 return "ffreep\t%y0";
17055 return "fstp\t%y0";
17056 }
17057 if (STACK_TOP_P (operands[0]))
17058 return "fld%z1\t%y1";
17059 return "fst\t%y0";
17060 }
17061
17062 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17063 FP status register is set. */
17064
17065 void
17066 ix86_emit_fp_unordered_jump (rtx label)
17067 {
17068 rtx reg = gen_reg_rtx (HImode);
17069 rtx temp;
17070
17071 emit_insn (gen_x86_fnstsw_1 (reg));
17072
17073 if (TARGET_USE_SAHF)
17074 {
17075 emit_insn (gen_x86_sahf_1 (reg));
17076
17077 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17078 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17079 }
17080 else
17081 {
17082 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17083
17084 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17085 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17086 }
17087
17088 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17089 gen_rtx_LABEL_REF (VOIDmode, label),
17090 pc_rtx);
17091 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17092 emit_jump_insn (temp);
17093 }
17094
17095 /* Output code to perform a log1p XFmode calculation. */
17096
17097 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17098 {
17099 rtx label1 = gen_label_rtx ();
17100 rtx label2 = gen_label_rtx ();
17101
17102 rtx tmp = gen_reg_rtx (XFmode);
17103 rtx tmp2 = gen_reg_rtx (XFmode);
17104
17105 emit_insn (gen_absxf2 (tmp, op1));
17106 emit_insn (gen_cmpxf (tmp,
17107 CONST_DOUBLE_FROM_REAL_VALUE (
17108 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17109 XFmode)));
17110 emit_jump_insn (gen_bge (label1));
17111
17112 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17113 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17114 emit_jump (label2);
17115
17116 emit_label (label1);
17117 emit_move_insn (tmp, CONST1_RTX (XFmode));
17118 emit_insn (gen_addxf3 (tmp, op1, tmp));
17119 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17120 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17121
17122 emit_label (label2);
17123 }
17124
17125 /* Solaris named-section hook. Parameters are as for
17126 named_section_real. */
17127
17128 static void
17129 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17130 tree decl)
17131 {
17132 /* With Binutils 2.15, the "@unwind" marker must be specified on
17133 every occurrence of the ".eh_frame" section, not just the first
17134 one. */
17135 if (TARGET_64BIT
17136 && strcmp (name, ".eh_frame") == 0)
17137 {
17138 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17139 flags & SECTION_WRITE ? "aw" : "a");
17140 return;
17141 }
17142 default_elf_asm_named_section (name, flags, decl);
17143 }
17144
17145 #include "gt-i386.h"
This page took 0.853414 seconds and 6 git commands to generate.