]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
darwin-c.c, [...]: Follow spelling conventions.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE register
566 parts instead of whole registers, so we may maintain just lower part of
567 scalar values in proper format leaving the upper part undefined. */
568 const int x86_sse_partial_regs = m_ATHLON_K8;
569 /* Athlon optimizes partial-register FPS special case, thus avoiding the
570 need for extra instructions beforehand */
571 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
577 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
578 /* Some CPU cores are not able to predict more than 4 branch instructions in
579 the 16 byte window. */
580 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
581 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K8 | m_PENT;
582
583 /* In case the average insn count for single function invocation is
584 lower than this constant, emit fast (but longer) prologue and
585 epilogue code. */
586 #define FAST_PROLOGUE_INSN_COUNT 20
587
588 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
589 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
590 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
591 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
592
593 /* Array of the smallest class containing reg number REGNO, indexed by
594 REGNO. Used by REGNO_REG_CLASS in i386.h. */
595
596 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
597 {
598 /* ax, dx, cx, bx */
599 AREG, DREG, CREG, BREG,
600 /* si, di, bp, sp */
601 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
602 /* FP registers */
603 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
604 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
605 /* arg pointer */
606 NON_Q_REGS,
607 /* flags, fpsr, dirflag, frame */
608 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
609 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
610 SSE_REGS, SSE_REGS,
611 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
612 MMX_REGS, MMX_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
615 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
616 SSE_REGS, SSE_REGS,
617 };
618
619 /* The "default" register map used in 32bit mode. */
620
621 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
622 {
623 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
624 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
625 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
626 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
627 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
629 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
630 };
631
632 static int const x86_64_int_parameter_registers[6] =
633 {
634 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
635 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
636 };
637
638 static int const x86_64_int_return_registers[4] =
639 {
640 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
641 };
642
643 /* The "default" register map used in 64bit mode. */
644 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
645 {
646 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
647 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
648 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
649 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
650 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
651 8,9,10,11,12,13,14,15, /* extended integer registers */
652 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
653 };
654
655 /* Define the register numbers to be used in Dwarf debugging information.
656 The SVR4 reference port C compiler uses the following register numbers
657 in its Dwarf output code:
658 0 for %eax (gcc regno = 0)
659 1 for %ecx (gcc regno = 2)
660 2 for %edx (gcc regno = 1)
661 3 for %ebx (gcc regno = 3)
662 4 for %esp (gcc regno = 7)
663 5 for %ebp (gcc regno = 6)
664 6 for %esi (gcc regno = 4)
665 7 for %edi (gcc regno = 5)
666 The following three DWARF register numbers are never generated by
667 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
668 believes these numbers have these meanings.
669 8 for %eip (no gcc equivalent)
670 9 for %eflags (gcc regno = 17)
671 10 for %trapno (no gcc equivalent)
672 It is not at all clear how we should number the FP stack registers
673 for the x86 architecture. If the version of SDB on x86/svr4 were
674 a bit less brain dead with respect to floating-point then we would
675 have a precedent to follow with respect to DWARF register numbers
676 for x86 FP registers, but the SDB on x86/svr4 is so completely
677 broken with respect to FP registers that it is hardly worth thinking
678 of it as something to strive for compatibility with.
679 The version of x86/svr4 SDB I have at the moment does (partially)
680 seem to believe that DWARF register number 11 is associated with
681 the x86 register %st(0), but that's about all. Higher DWARF
682 register numbers don't seem to be associated with anything in
683 particular, and even for DWARF regno 11, SDB only seems to under-
684 stand that it should say that a variable lives in %st(0) (when
685 asked via an `=' command) if we said it was in DWARF regno 11,
686 but SDB still prints garbage when asked for the value of the
687 variable in question (via a `/' command).
688 (Also note that the labels SDB prints for various FP stack regs
689 when doing an `x' command are all wrong.)
690 Note that these problems generally don't affect the native SVR4
691 C compiler because it doesn't allow the use of -O with -g and
692 because when it is *not* optimizing, it allocates a memory
693 location for each floating-point variable, and the memory
694 location is what gets described in the DWARF AT_location
695 attribute for the variable in question.
696 Regardless of the severe mental illness of the x86/svr4 SDB, we
697 do something sensible here and we use the following DWARF
698 register numbers. Note that these are all stack-top-relative
699 numbers.
700 11 for %st(0) (gcc regno = 8)
701 12 for %st(1) (gcc regno = 9)
702 13 for %st(2) (gcc regno = 10)
703 14 for %st(3) (gcc regno = 11)
704 15 for %st(4) (gcc regno = 12)
705 16 for %st(5) (gcc regno = 13)
706 17 for %st(6) (gcc regno = 14)
707 18 for %st(7) (gcc regno = 15)
708 */
709 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
710 {
711 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
712 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
713 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
714 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
715 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
717 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
718 };
719
720 /* Test and compare insns in i386.md store the information needed to
721 generate branch and scc insns here. */
722
723 rtx ix86_compare_op0 = NULL_RTX;
724 rtx ix86_compare_op1 = NULL_RTX;
725
726 #define MAX_386_STACK_LOCALS 3
727 /* Size of the register save area. */
728 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
729
730 /* Define the structure for the machine field in struct function. */
731
732 struct stack_local_entry GTY(())
733 {
734 unsigned short mode;
735 unsigned short n;
736 rtx rtl;
737 struct stack_local_entry *next;
738 };
739
740 /* Structure describing stack frame layout.
741 Stack grows downward:
742
743 [arguments]
744 <- ARG_POINTER
745 saved pc
746
747 saved frame pointer if frame_pointer_needed
748 <- HARD_FRAME_POINTER
749 [saved regs]
750
751 [padding1] \
752 )
753 [va_arg registers] (
754 > to_allocate <- FRAME_POINTER
755 [frame] (
756 )
757 [padding2] /
758 */
759 struct ix86_frame
760 {
761 int nregs;
762 int padding1;
763 int va_arg_size;
764 HOST_WIDE_INT frame;
765 int padding2;
766 int outgoing_arguments_size;
767 int red_zone_size;
768
769 HOST_WIDE_INT to_allocate;
770 /* The offsets relative to ARG_POINTER. */
771 HOST_WIDE_INT frame_pointer_offset;
772 HOST_WIDE_INT hard_frame_pointer_offset;
773 HOST_WIDE_INT stack_pointer_offset;
774
775 /* When save_regs_using_mov is set, emit prologue using
776 move instead of push instructions. */
777 bool save_regs_using_mov;
778 };
779
780 /* Used to enable/disable debugging features. */
781 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
782 /* Code model option as passed by user. */
783 const char *ix86_cmodel_string;
784 /* Parsed value. */
785 enum cmodel ix86_cmodel;
786 /* Asm dialect. */
787 const char *ix86_asm_string;
788 enum asm_dialect ix86_asm_dialect = ASM_ATT;
789 /* TLS dialext. */
790 const char *ix86_tls_dialect_string;
791 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
792
793 /* Which unit we are generating floating point math for. */
794 enum fpmath_unit ix86_fpmath;
795
796 /* Which cpu are we scheduling for. */
797 enum processor_type ix86_tune;
798 /* Which instruction set architecture to use. */
799 enum processor_type ix86_arch;
800
801 /* Strings to hold which cpu and instruction set architecture to use. */
802 const char *ix86_tune_string; /* for -mtune=<xxx> */
803 const char *ix86_arch_string; /* for -march=<xxx> */
804 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
805
806 /* # of registers to use to pass arguments. */
807 const char *ix86_regparm_string;
808
809 /* true if sse prefetch instruction is not NOOP. */
810 int x86_prefetch_sse;
811
812 /* ix86_regparm_string as a number */
813 int ix86_regparm;
814
815 /* Alignment to use for loops and jumps: */
816
817 /* Power of two alignment for loops. */
818 const char *ix86_align_loops_string;
819
820 /* Power of two alignment for non-loop jumps. */
821 const char *ix86_align_jumps_string;
822
823 /* Power of two alignment for stack boundary in bytes. */
824 const char *ix86_preferred_stack_boundary_string;
825
826 /* Preferred alignment for stack boundary in bits. */
827 unsigned int ix86_preferred_stack_boundary;
828
829 /* Values 1-5: see jump.c */
830 int ix86_branch_cost;
831 const char *ix86_branch_cost_string;
832
833 /* Power of two alignment for functions. */
834 const char *ix86_align_funcs_string;
835
836 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
837 char internal_label_prefix[16];
838 int internal_label_prefix_len;
839 \f
840 static void output_pic_addr_const (FILE *, rtx, int);
841 static void put_condition_code (enum rtx_code, enum machine_mode,
842 int, int, FILE *);
843 static const char *get_some_local_dynamic_name (void);
844 static int get_some_local_dynamic_name_1 (rtx *, void *);
845 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
846 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
847 rtx *);
848 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
849 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
850 enum machine_mode);
851 static rtx get_thread_pointer (int);
852 static rtx legitimize_tls_address (rtx, enum tls_model, int);
853 static void get_pc_thunk_name (char [32], unsigned int);
854 static rtx gen_push (rtx);
855 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
856 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
857 static struct machine_function * ix86_init_machine_status (void);
858 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
859 static int ix86_nsaved_regs (void);
860 static void ix86_emit_save_regs (void);
861 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
862 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
863 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
864 static HOST_WIDE_INT ix86_GOT_alias_set (void);
865 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
866 static rtx ix86_expand_aligntest (rtx, int);
867 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
868 static int ix86_issue_rate (void);
869 static int ix86_adjust_cost (rtx, rtx, rtx, int);
870 static int ia32_multipass_dfa_lookahead (void);
871 static void ix86_init_mmx_sse_builtins (void);
872 static rtx x86_this_parameter (tree);
873 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
874 HOST_WIDE_INT, tree);
875 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
876 static void x86_file_start (void);
877 static void ix86_reorg (void);
878 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
879 static tree ix86_build_builtin_va_list (void);
880 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
881 tree, int *, int);
882 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
883 static bool ix86_vector_mode_supported_p (enum machine_mode);
884
885 static int ix86_address_cost (rtx);
886 static bool ix86_cannot_force_const_mem (rtx);
887 static rtx ix86_delegitimize_address (rtx);
888
889 struct builtin_description;
890 static rtx ix86_expand_sse_comi (const struct builtin_description *,
891 tree, rtx);
892 static rtx ix86_expand_sse_compare (const struct builtin_description *,
893 tree, rtx);
894 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
895 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
896 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
897 static rtx ix86_expand_store_builtin (enum insn_code, tree);
898 static rtx safe_vector_operand (rtx, enum machine_mode);
899 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
900 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
901 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
902 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
903 static int ix86_fp_comparison_cost (enum rtx_code code);
904 static unsigned int ix86_select_alt_pic_regnum (void);
905 static int ix86_save_reg (unsigned int, int);
906 static void ix86_compute_frame_layout (struct ix86_frame *);
907 static int ix86_comp_type_attributes (tree, tree);
908 static int ix86_function_regparm (tree, tree);
909 const struct attribute_spec ix86_attribute_table[];
910 static bool ix86_function_ok_for_sibcall (tree, tree);
911 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
912 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
913 static int ix86_value_regno (enum machine_mode);
914 static bool contains_128bit_aligned_vector_p (tree);
915 static rtx ix86_struct_value_rtx (tree, int);
916 static bool ix86_ms_bitfield_layout_p (tree);
917 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
918 static int extended_reg_mentioned_1 (rtx *, void *);
919 static bool ix86_rtx_costs (rtx, int, int, int *);
920 static int min_insn_size (rtx);
921 static tree ix86_md_asm_clobbers (tree clobbers);
922 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
923 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
924 tree, bool);
925
926 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
927 static void ix86_svr3_asm_out_constructor (rtx, int);
928 #endif
929
930 /* Register class used for passing given 64bit part of the argument.
931 These represent classes as documented by the PS ABI, with the exception
932 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
933 use SF or DFmode move instead of DImode to avoid reformatting penalties.
934
935 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
936 whenever possible (upper half does contain padding).
937 */
938 enum x86_64_reg_class
939 {
940 X86_64_NO_CLASS,
941 X86_64_INTEGER_CLASS,
942 X86_64_INTEGERSI_CLASS,
943 X86_64_SSE_CLASS,
944 X86_64_SSESF_CLASS,
945 X86_64_SSEDF_CLASS,
946 X86_64_SSEUP_CLASS,
947 X86_64_X87_CLASS,
948 X86_64_X87UP_CLASS,
949 X86_64_MEMORY_CLASS
950 };
951 static const char * const x86_64_reg_class_name[] =
952 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
953
954 #define MAX_CLASSES 4
955 static int classify_argument (enum machine_mode, tree,
956 enum x86_64_reg_class [MAX_CLASSES], int);
957 static int examine_argument (enum machine_mode, tree, int, int *, int *);
958 static rtx construct_container (enum machine_mode, tree, int, int, int,
959 const int *, int);
960 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
961 enum x86_64_reg_class);
962
963 /* Table of constants used by fldpi, fldln2, etc.... */
964 static REAL_VALUE_TYPE ext_80387_constants_table [5];
965 static bool ext_80387_constants_init = 0;
966 static void init_ext_80387_constants (void);
967 \f
968 /* Initialize the GCC target structure. */
969 #undef TARGET_ATTRIBUTE_TABLE
970 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
971 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
972 # undef TARGET_MERGE_DECL_ATTRIBUTES
973 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
974 #endif
975
976 #undef TARGET_COMP_TYPE_ATTRIBUTES
977 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
978
979 #undef TARGET_INIT_BUILTINS
980 #define TARGET_INIT_BUILTINS ix86_init_builtins
981
982 #undef TARGET_EXPAND_BUILTIN
983 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
984
985 #undef TARGET_ASM_FUNCTION_EPILOGUE
986 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
987
988 #undef TARGET_ASM_OPEN_PAREN
989 #define TARGET_ASM_OPEN_PAREN ""
990 #undef TARGET_ASM_CLOSE_PAREN
991 #define TARGET_ASM_CLOSE_PAREN ""
992
993 #undef TARGET_ASM_ALIGNED_HI_OP
994 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
995 #undef TARGET_ASM_ALIGNED_SI_OP
996 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
997 #ifdef ASM_QUAD
998 #undef TARGET_ASM_ALIGNED_DI_OP
999 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1000 #endif
1001
1002 #undef TARGET_ASM_UNALIGNED_HI_OP
1003 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1004 #undef TARGET_ASM_UNALIGNED_SI_OP
1005 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1006 #undef TARGET_ASM_UNALIGNED_DI_OP
1007 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1008
1009 #undef TARGET_SCHED_ADJUST_COST
1010 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1011 #undef TARGET_SCHED_ISSUE_RATE
1012 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1013 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1014 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1015 ia32_multipass_dfa_lookahead
1016
1017 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1018 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1019
1020 #ifdef HAVE_AS_TLS
1021 #undef TARGET_HAVE_TLS
1022 #define TARGET_HAVE_TLS true
1023 #endif
1024 #undef TARGET_CANNOT_FORCE_CONST_MEM
1025 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1026
1027 #undef TARGET_DELEGITIMIZE_ADDRESS
1028 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1029
1030 #undef TARGET_MS_BITFIELD_LAYOUT_P
1031 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1032
1033 #undef TARGET_ASM_OUTPUT_MI_THUNK
1034 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1035 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1036 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1037
1038 #undef TARGET_ASM_FILE_START
1039 #define TARGET_ASM_FILE_START x86_file_start
1040
1041 #undef TARGET_RTX_COSTS
1042 #define TARGET_RTX_COSTS ix86_rtx_costs
1043 #undef TARGET_ADDRESS_COST
1044 #define TARGET_ADDRESS_COST ix86_address_cost
1045
1046 #undef TARGET_FIXED_CONDITION_CODE_REGS
1047 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1048 #undef TARGET_CC_MODES_COMPATIBLE
1049 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1050
1051 #undef TARGET_MACHINE_DEPENDENT_REORG
1052 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1053
1054 #undef TARGET_BUILD_BUILTIN_VA_LIST
1055 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1056
1057 #undef TARGET_MD_ASM_CLOBBERS
1058 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1059
1060 #undef TARGET_PROMOTE_PROTOTYPES
1061 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1062 #undef TARGET_STRUCT_VALUE_RTX
1063 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1064 #undef TARGET_SETUP_INCOMING_VARARGS
1065 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1066 #undef TARGET_MUST_PASS_IN_STACK
1067 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1068 #undef TARGET_PASS_BY_REFERENCE
1069 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1070
1071 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1072 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1073
1074 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1075 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1076
1077 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1078 #undef TARGET_INSERT_ATTRIBUTES
1079 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1080 #endif
1081
1082 struct gcc_target targetm = TARGET_INITIALIZER;
1083
1084 \f
1085 /* The svr4 ABI for the i386 says that records and unions are returned
1086 in memory. */
1087 #ifndef DEFAULT_PCC_STRUCT_RETURN
1088 #define DEFAULT_PCC_STRUCT_RETURN 1
1089 #endif
1090
1091 /* Sometimes certain combinations of command options do not make
1092 sense on a particular target machine. You can define a macro
1093 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1094 defined, is executed once just after all the command options have
1095 been parsed.
1096
1097 Don't use this macro to turn on various extra optimizations for
1098 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1099
1100 void
1101 override_options (void)
1102 {
1103 int i;
1104 int ix86_tune_defaulted = 0;
1105
1106 /* Comes from final.c -- no real reason to change it. */
1107 #define MAX_CODE_ALIGN 16
1108
1109 static struct ptt
1110 {
1111 const struct processor_costs *cost; /* Processor costs */
1112 const int target_enable; /* Target flags to enable. */
1113 const int target_disable; /* Target flags to disable. */
1114 const int align_loop; /* Default alignments. */
1115 const int align_loop_max_skip;
1116 const int align_jump;
1117 const int align_jump_max_skip;
1118 const int align_func;
1119 }
1120 const processor_target_table[PROCESSOR_max] =
1121 {
1122 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1123 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1124 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1125 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1126 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1127 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1128 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1129 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1131 };
1132
1133 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1134 static struct pta
1135 {
1136 const char *const name; /* processor name or nickname. */
1137 const enum processor_type processor;
1138 const enum pta_flags
1139 {
1140 PTA_SSE = 1,
1141 PTA_SSE2 = 2,
1142 PTA_SSE3 = 4,
1143 PTA_MMX = 8,
1144 PTA_PREFETCH_SSE = 16,
1145 PTA_3DNOW = 32,
1146 PTA_3DNOW_A = 64,
1147 PTA_64BIT = 128
1148 } flags;
1149 }
1150 const processor_alias_table[] =
1151 {
1152 {"i386", PROCESSOR_I386, 0},
1153 {"i486", PROCESSOR_I486, 0},
1154 {"i586", PROCESSOR_PENTIUM, 0},
1155 {"pentium", PROCESSOR_PENTIUM, 0},
1156 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1157 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1158 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1159 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1160 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1161 {"i686", PROCESSOR_PENTIUMPRO, 0},
1162 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1163 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1164 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1165 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1166 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1167 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1170 | PTA_MMX | PTA_PREFETCH_SSE},
1171 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1172 | PTA_MMX | PTA_PREFETCH_SSE},
1173 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"k6", PROCESSOR_K6, PTA_MMX},
1176 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1177 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1178 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 | PTA_3DNOW_A},
1180 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1181 | PTA_3DNOW | PTA_3DNOW_A},
1182 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1183 | PTA_3DNOW_A | PTA_SSE},
1184 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1185 | PTA_3DNOW_A | PTA_SSE},
1186 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1187 | PTA_3DNOW_A | PTA_SSE},
1188 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1189 | PTA_SSE | PTA_SSE2 },
1190 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1192 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1193 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1195 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1196 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1198 };
1199
1200 int const pta_size = ARRAY_SIZE (processor_alias_table);
1201
1202 /* Set the default values for switches whose default depends on TARGET_64BIT
1203 in case they weren't overwritten by command line options. */
1204 if (TARGET_64BIT)
1205 {
1206 if (flag_omit_frame_pointer == 2)
1207 flag_omit_frame_pointer = 1;
1208 if (flag_asynchronous_unwind_tables == 2)
1209 flag_asynchronous_unwind_tables = 1;
1210 if (flag_pcc_struct_return == 2)
1211 flag_pcc_struct_return = 0;
1212 }
1213 else
1214 {
1215 if (flag_omit_frame_pointer == 2)
1216 flag_omit_frame_pointer = 0;
1217 if (flag_asynchronous_unwind_tables == 2)
1218 flag_asynchronous_unwind_tables = 0;
1219 if (flag_pcc_struct_return == 2)
1220 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1221 }
1222
1223 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1224 SUBTARGET_OVERRIDE_OPTIONS;
1225 #endif
1226
1227 if (!ix86_tune_string && ix86_arch_string)
1228 ix86_tune_string = ix86_arch_string;
1229 if (!ix86_tune_string)
1230 {
1231 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1232 ix86_tune_defaulted = 1;
1233 }
1234 if (!ix86_arch_string)
1235 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1236
1237 if (ix86_cmodel_string != 0)
1238 {
1239 if (!strcmp (ix86_cmodel_string, "small"))
1240 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1241 else if (flag_pic)
1242 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1243 else if (!strcmp (ix86_cmodel_string, "32"))
1244 ix86_cmodel = CM_32;
1245 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1246 ix86_cmodel = CM_KERNEL;
1247 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1248 ix86_cmodel = CM_MEDIUM;
1249 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1250 ix86_cmodel = CM_LARGE;
1251 else
1252 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1253 }
1254 else
1255 {
1256 ix86_cmodel = CM_32;
1257 if (TARGET_64BIT)
1258 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1259 }
1260 if (ix86_asm_string != 0)
1261 {
1262 if (!strcmp (ix86_asm_string, "intel"))
1263 ix86_asm_dialect = ASM_INTEL;
1264 else if (!strcmp (ix86_asm_string, "att"))
1265 ix86_asm_dialect = ASM_ATT;
1266 else
1267 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1268 }
1269 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1270 error ("code model `%s' not supported in the %s bit mode",
1271 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1272 if (ix86_cmodel == CM_LARGE)
1273 sorry ("code model `large' not supported yet");
1274 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1275 sorry ("%i-bit mode not compiled in",
1276 (target_flags & MASK_64BIT) ? 64 : 32);
1277
1278 for (i = 0; i < pta_size; i++)
1279 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1280 {
1281 ix86_arch = processor_alias_table[i].processor;
1282 /* Default cpu tuning to the architecture. */
1283 ix86_tune = ix86_arch;
1284 if (processor_alias_table[i].flags & PTA_MMX
1285 && !(target_flags_explicit & MASK_MMX))
1286 target_flags |= MASK_MMX;
1287 if (processor_alias_table[i].flags & PTA_3DNOW
1288 && !(target_flags_explicit & MASK_3DNOW))
1289 target_flags |= MASK_3DNOW;
1290 if (processor_alias_table[i].flags & PTA_3DNOW_A
1291 && !(target_flags_explicit & MASK_3DNOW_A))
1292 target_flags |= MASK_3DNOW_A;
1293 if (processor_alias_table[i].flags & PTA_SSE
1294 && !(target_flags_explicit & MASK_SSE))
1295 target_flags |= MASK_SSE;
1296 if (processor_alias_table[i].flags & PTA_SSE2
1297 && !(target_flags_explicit & MASK_SSE2))
1298 target_flags |= MASK_SSE2;
1299 if (processor_alias_table[i].flags & PTA_SSE3
1300 && !(target_flags_explicit & MASK_SSE3))
1301 target_flags |= MASK_SSE3;
1302 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1303 x86_prefetch_sse = true;
1304 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1305 {
1306 if (ix86_tune_defaulted)
1307 {
1308 ix86_tune_string = "x86-64";
1309 for (i = 0; i < pta_size; i++)
1310 if (! strcmp (ix86_tune_string,
1311 processor_alias_table[i].name))
1312 break;
1313 ix86_tune = processor_alias_table[i].processor;
1314 }
1315 else
1316 error ("CPU you selected does not support x86-64 "
1317 "instruction set");
1318 }
1319 break;
1320 }
1321
1322 if (i == pta_size)
1323 error ("bad value (%s) for -march= switch", ix86_arch_string);
1324
1325 for (i = 0; i < pta_size; i++)
1326 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1327 {
1328 ix86_tune = processor_alias_table[i].processor;
1329 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1330 error ("CPU you selected does not support x86-64 instruction set");
1331
1332 /* Intel CPUs have always interpreted SSE prefetch instructions as
1333 NOPs; so, we can enable SSE prefetch instructions even when
1334 -mtune (rather than -march) points us to a processor that has them.
1335 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1336 higher processors. */
1337 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1338 x86_prefetch_sse = true;
1339 break;
1340 }
1341 if (i == pta_size)
1342 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1343
1344 if (optimize_size)
1345 ix86_cost = &size_cost;
1346 else
1347 ix86_cost = processor_target_table[ix86_tune].cost;
1348 target_flags |= processor_target_table[ix86_tune].target_enable;
1349 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1350
1351 /* Arrange to set up i386_stack_locals for all functions. */
1352 init_machine_status = ix86_init_machine_status;
1353
1354 /* Validate -mregparm= value. */
1355 if (ix86_regparm_string)
1356 {
1357 i = atoi (ix86_regparm_string);
1358 if (i < 0 || i > REGPARM_MAX)
1359 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1360 else
1361 ix86_regparm = i;
1362 }
1363 else
1364 if (TARGET_64BIT)
1365 ix86_regparm = REGPARM_MAX;
1366
1367 /* If the user has provided any of the -malign-* options,
1368 warn and use that value only if -falign-* is not set.
1369 Remove this code in GCC 3.2 or later. */
1370 if (ix86_align_loops_string)
1371 {
1372 warning ("-malign-loops is obsolete, use -falign-loops");
1373 if (align_loops == 0)
1374 {
1375 i = atoi (ix86_align_loops_string);
1376 if (i < 0 || i > MAX_CODE_ALIGN)
1377 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1378 else
1379 align_loops = 1 << i;
1380 }
1381 }
1382
1383 if (ix86_align_jumps_string)
1384 {
1385 warning ("-malign-jumps is obsolete, use -falign-jumps");
1386 if (align_jumps == 0)
1387 {
1388 i = atoi (ix86_align_jumps_string);
1389 if (i < 0 || i > MAX_CODE_ALIGN)
1390 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1391 else
1392 align_jumps = 1 << i;
1393 }
1394 }
1395
1396 if (ix86_align_funcs_string)
1397 {
1398 warning ("-malign-functions is obsolete, use -falign-functions");
1399 if (align_functions == 0)
1400 {
1401 i = atoi (ix86_align_funcs_string);
1402 if (i < 0 || i > MAX_CODE_ALIGN)
1403 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1404 else
1405 align_functions = 1 << i;
1406 }
1407 }
1408
1409 /* Default align_* from the processor table. */
1410 if (align_loops == 0)
1411 {
1412 align_loops = processor_target_table[ix86_tune].align_loop;
1413 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1414 }
1415 if (align_jumps == 0)
1416 {
1417 align_jumps = processor_target_table[ix86_tune].align_jump;
1418 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1419 }
1420 if (align_functions == 0)
1421 {
1422 align_functions = processor_target_table[ix86_tune].align_func;
1423 }
1424
1425 /* Validate -mpreferred-stack-boundary= value, or provide default.
1426 The default of 128 bits is for Pentium III's SSE __m128, but we
1427 don't want additional code to keep the stack aligned when
1428 optimizing for code size. */
1429 ix86_preferred_stack_boundary = (optimize_size
1430 ? TARGET_64BIT ? 128 : 32
1431 : 128);
1432 if (ix86_preferred_stack_boundary_string)
1433 {
1434 i = atoi (ix86_preferred_stack_boundary_string);
1435 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1436 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1437 TARGET_64BIT ? 4 : 2);
1438 else
1439 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1440 }
1441
1442 /* Validate -mbranch-cost= value, or provide default. */
1443 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1444 if (ix86_branch_cost_string)
1445 {
1446 i = atoi (ix86_branch_cost_string);
1447 if (i < 0 || i > 5)
1448 error ("-mbranch-cost=%d is not between 0 and 5", i);
1449 else
1450 ix86_branch_cost = i;
1451 }
1452
1453 if (ix86_tls_dialect_string)
1454 {
1455 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1456 ix86_tls_dialect = TLS_DIALECT_GNU;
1457 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1458 ix86_tls_dialect = TLS_DIALECT_SUN;
1459 else
1460 error ("bad value (%s) for -mtls-dialect= switch",
1461 ix86_tls_dialect_string);
1462 }
1463
1464 /* Keep nonleaf frame pointers. */
1465 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1466 flag_omit_frame_pointer = 1;
1467
1468 /* If we're doing fast math, we don't care about comparison order
1469 wrt NaNs. This lets us use a shorter comparison sequence. */
1470 if (flag_unsafe_math_optimizations)
1471 target_flags &= ~MASK_IEEE_FP;
1472
1473 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1474 since the insns won't need emulation. */
1475 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1476 target_flags &= ~MASK_NO_FANCY_MATH_387;
1477
1478 /* Turn on SSE2 builtins for -msse3. */
1479 if (TARGET_SSE3)
1480 target_flags |= MASK_SSE2;
1481
1482 /* Turn on SSE builtins for -msse2. */
1483 if (TARGET_SSE2)
1484 target_flags |= MASK_SSE;
1485
1486 if (TARGET_64BIT)
1487 {
1488 if (TARGET_ALIGN_DOUBLE)
1489 error ("-malign-double makes no sense in the 64bit mode");
1490 if (TARGET_RTD)
1491 error ("-mrtd calling convention not supported in the 64bit mode");
1492 /* Enable by default the SSE and MMX builtins. */
1493 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1494 ix86_fpmath = FPMATH_SSE;
1495 }
1496 else
1497 {
1498 ix86_fpmath = FPMATH_387;
1499 /* i386 ABI does not specify red zone. It still makes sense to use it
1500 when programmer takes care to stack from being destroyed. */
1501 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1502 target_flags |= MASK_NO_RED_ZONE;
1503 }
1504
1505 if (ix86_fpmath_string != 0)
1506 {
1507 if (! strcmp (ix86_fpmath_string, "387"))
1508 ix86_fpmath = FPMATH_387;
1509 else if (! strcmp (ix86_fpmath_string, "sse"))
1510 {
1511 if (!TARGET_SSE)
1512 {
1513 warning ("SSE instruction set disabled, using 387 arithmetics");
1514 ix86_fpmath = FPMATH_387;
1515 }
1516 else
1517 ix86_fpmath = FPMATH_SSE;
1518 }
1519 else if (! strcmp (ix86_fpmath_string, "387,sse")
1520 || ! strcmp (ix86_fpmath_string, "sse,387"))
1521 {
1522 if (!TARGET_SSE)
1523 {
1524 warning ("SSE instruction set disabled, using 387 arithmetics");
1525 ix86_fpmath = FPMATH_387;
1526 }
1527 else if (!TARGET_80387)
1528 {
1529 warning ("387 instruction set disabled, using SSE arithmetics");
1530 ix86_fpmath = FPMATH_SSE;
1531 }
1532 else
1533 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1534 }
1535 else
1536 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1537 }
1538
1539 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1540 on by -msse. */
1541 if (TARGET_SSE)
1542 {
1543 target_flags |= MASK_MMX;
1544 x86_prefetch_sse = true;
1545 }
1546
1547 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1548 if (TARGET_3DNOW)
1549 {
1550 target_flags |= MASK_MMX;
1551 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1552 extensions it adds. */
1553 if (x86_3dnow_a & (1 << ix86_arch))
1554 target_flags |= MASK_3DNOW_A;
1555 }
1556 if ((x86_accumulate_outgoing_args & TUNEMASK)
1557 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1558 && !optimize_size)
1559 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1560
1561 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1562 {
1563 char *p;
1564 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1565 p = strchr (internal_label_prefix, 'X');
1566 internal_label_prefix_len = p - internal_label_prefix;
1567 *p = '\0';
1568 }
1569 /* When scheduling description is not available, disable scheduler pass so it
1570 won't slow down the compilation and make x87 code slower. */
1571 if (!TARGET_SCHEDULE)
1572 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1573 }
1574 \f
1575 void
1576 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1577 {
1578 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1579 make the problem with not enough registers even worse. */
1580 #ifdef INSN_SCHEDULING
1581 if (level > 1)
1582 flag_schedule_insns = 0;
1583 #endif
1584
1585 /* The default values of these switches depend on the TARGET_64BIT
1586 that is not known at this moment. Mark these values with 2 and
1587 let user the to override these. In case there is no command line option
1588 specifying them, we will set the defaults in override_options. */
1589 if (optimize >= 1)
1590 flag_omit_frame_pointer = 2;
1591 flag_pcc_struct_return = 2;
1592 flag_asynchronous_unwind_tables = 2;
1593 }
1594 \f
1595 /* Table of valid machine attributes. */
1596 const struct attribute_spec ix86_attribute_table[] =
1597 {
1598 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1599 /* Stdcall attribute says callee is responsible for popping arguments
1600 if they are not variable. */
1601 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1602 /* Fastcall attribute says callee is responsible for popping arguments
1603 if they are not variable. */
1604 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1605 /* Cdecl attribute says the callee is a normal C declaration */
1606 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1607 /* Regparm attribute specifies how many integer arguments are to be
1608 passed in registers. */
1609 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1610 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1611 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1612 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1613 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1614 #endif
1615 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1616 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1617 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1618 SUBTARGET_ATTRIBUTE_TABLE,
1619 #endif
1620 { NULL, 0, 0, false, false, false, NULL }
1621 };
1622
1623 /* Decide whether we can make a sibling call to a function. DECL is the
1624 declaration of the function being targeted by the call and EXP is the
1625 CALL_EXPR representing the call. */
1626
1627 static bool
1628 ix86_function_ok_for_sibcall (tree decl, tree exp)
1629 {
1630 /* If we are generating position-independent code, we cannot sibcall
1631 optimize any indirect call, or a direct call to a global function,
1632 as the PLT requires %ebx be live. */
1633 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1634 return false;
1635
1636 /* If we are returning floats on the 80387 register stack, we cannot
1637 make a sibcall from a function that doesn't return a float to a
1638 function that does or, conversely, from a function that does return
1639 a float to a function that doesn't; the necessary stack adjustment
1640 would not be executed. */
1641 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1642 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1643 return false;
1644
1645 /* If this call is indirect, we'll need to be able to use a call-clobbered
1646 register for the address of the target function. Make sure that all
1647 such registers are not used for passing parameters. */
1648 if (!decl && !TARGET_64BIT)
1649 {
1650 tree type;
1651
1652 /* We're looking at the CALL_EXPR, we need the type of the function. */
1653 type = TREE_OPERAND (exp, 0); /* pointer expression */
1654 type = TREE_TYPE (type); /* pointer type */
1655 type = TREE_TYPE (type); /* function type */
1656
1657 if (ix86_function_regparm (type, NULL) >= 3)
1658 {
1659 /* ??? Need to count the actual number of registers to be used,
1660 not the possible number of registers. Fix later. */
1661 return false;
1662 }
1663 }
1664
1665 /* Otherwise okay. That also includes certain types of indirect calls. */
1666 return true;
1667 }
1668
1669 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1670 arguments as in struct attribute_spec.handler. */
1671 static tree
1672 ix86_handle_cdecl_attribute (tree *node, tree name,
1673 tree args ATTRIBUTE_UNUSED,
1674 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1675 {
1676 if (TREE_CODE (*node) != FUNCTION_TYPE
1677 && TREE_CODE (*node) != METHOD_TYPE
1678 && TREE_CODE (*node) != FIELD_DECL
1679 && TREE_CODE (*node) != TYPE_DECL)
1680 {
1681 warning ("`%s' attribute only applies to functions",
1682 IDENTIFIER_POINTER (name));
1683 *no_add_attrs = true;
1684 }
1685 else
1686 {
1687 if (is_attribute_p ("fastcall", name))
1688 {
1689 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1690 {
1691 error ("fastcall and stdcall attributes are not compatible");
1692 }
1693 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1694 {
1695 error ("fastcall and regparm attributes are not compatible");
1696 }
1697 }
1698 else if (is_attribute_p ("stdcall", name))
1699 {
1700 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1701 {
1702 error ("fastcall and stdcall attributes are not compatible");
1703 }
1704 }
1705 }
1706
1707 if (TARGET_64BIT)
1708 {
1709 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1710 *no_add_attrs = true;
1711 }
1712
1713 return NULL_TREE;
1714 }
1715
1716 /* Handle a "regparm" attribute;
1717 arguments as in struct attribute_spec.handler. */
1718 static tree
1719 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1720 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1721 {
1722 if (TREE_CODE (*node) != FUNCTION_TYPE
1723 && TREE_CODE (*node) != METHOD_TYPE
1724 && TREE_CODE (*node) != FIELD_DECL
1725 && TREE_CODE (*node) != TYPE_DECL)
1726 {
1727 warning ("`%s' attribute only applies to functions",
1728 IDENTIFIER_POINTER (name));
1729 *no_add_attrs = true;
1730 }
1731 else
1732 {
1733 tree cst;
1734
1735 cst = TREE_VALUE (args);
1736 if (TREE_CODE (cst) != INTEGER_CST)
1737 {
1738 warning ("`%s' attribute requires an integer constant argument",
1739 IDENTIFIER_POINTER (name));
1740 *no_add_attrs = true;
1741 }
1742 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1743 {
1744 warning ("argument to `%s' attribute larger than %d",
1745 IDENTIFIER_POINTER (name), REGPARM_MAX);
1746 *no_add_attrs = true;
1747 }
1748
1749 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1750 {
1751 error ("fastcall and regparm attributes are not compatible");
1752 }
1753 }
1754
1755 return NULL_TREE;
1756 }
1757
1758 /* Return 0 if the attributes for two types are incompatible, 1 if they
1759 are compatible, and 2 if they are nearly compatible (which causes a
1760 warning to be generated). */
1761
1762 static int
1763 ix86_comp_type_attributes (tree type1, tree type2)
1764 {
1765 /* Check for mismatch of non-default calling convention. */
1766 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1767
1768 if (TREE_CODE (type1) != FUNCTION_TYPE)
1769 return 1;
1770
1771 /* Check for mismatched fastcall types */
1772 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1773 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1774 return 0;
1775
1776 /* Check for mismatched return types (cdecl vs stdcall). */
1777 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1778 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1779 return 0;
1780 if (ix86_function_regparm (type1, NULL)
1781 != ix86_function_regparm (type2, NULL))
1782 return 0;
1783 return 1;
1784 }
1785 \f
1786 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1787 DECL may be NULL when calling function indirectly
1788 or considering a libcall. */
1789
1790 static int
1791 ix86_function_regparm (tree type, tree decl)
1792 {
1793 tree attr;
1794 int regparm = ix86_regparm;
1795 bool user_convention = false;
1796
1797 if (!TARGET_64BIT)
1798 {
1799 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1800 if (attr)
1801 {
1802 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1803 user_convention = true;
1804 }
1805
1806 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1807 {
1808 regparm = 2;
1809 user_convention = true;
1810 }
1811
1812 /* Use register calling convention for local functions when possible. */
1813 if (!TARGET_64BIT && !user_convention && decl
1814 && flag_unit_at_a_time && !profile_flag)
1815 {
1816 struct cgraph_local_info *i = cgraph_local_info (decl);
1817 if (i && i->local)
1818 {
1819 /* We can't use regparm(3) for nested functions as these use
1820 static chain pointer in third argument. */
1821 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1822 regparm = 2;
1823 else
1824 regparm = 3;
1825 }
1826 }
1827 }
1828 return regparm;
1829 }
1830
1831 /* Return true if EAX is live at the start of the function. Used by
1832 ix86_expand_prologue to determine if we need special help before
1833 calling allocate_stack_worker. */
1834
1835 static bool
1836 ix86_eax_live_at_start_p (void)
1837 {
1838 /* Cheat. Don't bother working forward from ix86_function_regparm
1839 to the function type to whether an actual argument is located in
1840 eax. Instead just look at cfg info, which is still close enough
1841 to correct at this point. This gives false positives for broken
1842 functions that might use uninitialized data that happens to be
1843 allocated in eax, but who cares? */
1844 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1845 }
1846
1847 /* Value is the number of bytes of arguments automatically
1848 popped when returning from a subroutine call.
1849 FUNDECL is the declaration node of the function (as a tree),
1850 FUNTYPE is the data type of the function (as a tree),
1851 or for a library call it is an identifier node for the subroutine name.
1852 SIZE is the number of bytes of arguments passed on the stack.
1853
1854 On the 80386, the RTD insn may be used to pop them if the number
1855 of args is fixed, but if the number is variable then the caller
1856 must pop them all. RTD can't be used for library calls now
1857 because the library is compiled with the Unix compiler.
1858 Use of RTD is a selectable option, since it is incompatible with
1859 standard Unix calling sequences. If the option is not selected,
1860 the caller must always pop the args.
1861
1862 The attribute stdcall is equivalent to RTD on a per module basis. */
1863
1864 int
1865 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1866 {
1867 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1868
1869 /* Cdecl functions override -mrtd, and never pop the stack. */
1870 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1871
1872 /* Stdcall and fastcall functions will pop the stack if not
1873 variable args. */
1874 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1875 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1876 rtd = 1;
1877
1878 if (rtd
1879 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1880 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1881 == void_type_node)))
1882 return size;
1883 }
1884
1885 /* Lose any fake structure return argument if it is passed on the stack. */
1886 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1887 && !TARGET_64BIT
1888 && !KEEP_AGGREGATE_RETURN_POINTER)
1889 {
1890 int nregs = ix86_function_regparm (funtype, fundecl);
1891
1892 if (!nregs)
1893 return GET_MODE_SIZE (Pmode);
1894 }
1895
1896 return 0;
1897 }
1898 \f
1899 /* Argument support functions. */
1900
1901 /* Return true when register may be used to pass function parameters. */
1902 bool
1903 ix86_function_arg_regno_p (int regno)
1904 {
1905 int i;
1906 if (!TARGET_64BIT)
1907 return (regno < REGPARM_MAX
1908 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1909 if (SSE_REGNO_P (regno) && TARGET_SSE)
1910 return true;
1911 /* RAX is used as hidden argument to va_arg functions. */
1912 if (!regno)
1913 return true;
1914 for (i = 0; i < REGPARM_MAX; i++)
1915 if (regno == x86_64_int_parameter_registers[i])
1916 return true;
1917 return false;
1918 }
1919
1920 /* Return if we do not know how to pass TYPE solely in registers. */
1921
1922 static bool
1923 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1924 {
1925 if (must_pass_in_stack_var_size_or_pad (mode, type))
1926 return true;
1927 return (!TARGET_64BIT && type && mode == TImode);
1928 }
1929
1930 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1931 for a call to a function whose data type is FNTYPE.
1932 For a library call, FNTYPE is 0. */
1933
1934 void
1935 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1936 tree fntype, /* tree ptr for function decl */
1937 rtx libname, /* SYMBOL_REF of library name or 0 */
1938 tree fndecl)
1939 {
1940 static CUMULATIVE_ARGS zero_cum;
1941 tree param, next_param;
1942
1943 if (TARGET_DEBUG_ARG)
1944 {
1945 fprintf (stderr, "\ninit_cumulative_args (");
1946 if (fntype)
1947 fprintf (stderr, "fntype code = %s, ret code = %s",
1948 tree_code_name[(int) TREE_CODE (fntype)],
1949 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1950 else
1951 fprintf (stderr, "no fntype");
1952
1953 if (libname)
1954 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1955 }
1956
1957 *cum = zero_cum;
1958
1959 /* Set up the number of registers to use for passing arguments. */
1960 if (fntype)
1961 cum->nregs = ix86_function_regparm (fntype, fndecl);
1962 else
1963 cum->nregs = ix86_regparm;
1964 if (TARGET_SSE)
1965 cum->sse_nregs = SSE_REGPARM_MAX;
1966 if (TARGET_MMX)
1967 cum->mmx_nregs = MMX_REGPARM_MAX;
1968 cum->warn_sse = true;
1969 cum->warn_mmx = true;
1970 cum->maybe_vaarg = false;
1971
1972 /* Use ecx and edx registers if function has fastcall attribute */
1973 if (fntype && !TARGET_64BIT)
1974 {
1975 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1976 {
1977 cum->nregs = 2;
1978 cum->fastcall = 1;
1979 }
1980 }
1981
1982 /* Determine if this function has variable arguments. This is
1983 indicated by the last argument being 'void_type_mode' if there
1984 are no variable arguments. If there are variable arguments, then
1985 we won't pass anything in registers in 32-bit mode. */
1986
1987 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1988 {
1989 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1990 param != 0; param = next_param)
1991 {
1992 next_param = TREE_CHAIN (param);
1993 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1994 {
1995 if (!TARGET_64BIT)
1996 {
1997 cum->nregs = 0;
1998 cum->sse_nregs = 0;
1999 cum->mmx_nregs = 0;
2000 cum->warn_sse = 0;
2001 cum->warn_mmx = 0;
2002 cum->fastcall = 0;
2003 }
2004 cum->maybe_vaarg = true;
2005 }
2006 }
2007 }
2008 if ((!fntype && !libname)
2009 || (fntype && !TYPE_ARG_TYPES (fntype)))
2010 cum->maybe_vaarg = 1;
2011
2012 if (TARGET_DEBUG_ARG)
2013 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2014
2015 return;
2016 }
2017
2018 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2019 of this code is to classify each 8bytes of incoming argument by the register
2020 class and assign registers accordingly. */
2021
2022 /* Return the union class of CLASS1 and CLASS2.
2023 See the x86-64 PS ABI for details. */
2024
2025 static enum x86_64_reg_class
2026 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2027 {
2028 /* Rule #1: If both classes are equal, this is the resulting class. */
2029 if (class1 == class2)
2030 return class1;
2031
2032 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2033 the other class. */
2034 if (class1 == X86_64_NO_CLASS)
2035 return class2;
2036 if (class2 == X86_64_NO_CLASS)
2037 return class1;
2038
2039 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2040 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2041 return X86_64_MEMORY_CLASS;
2042
2043 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2044 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2045 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2046 return X86_64_INTEGERSI_CLASS;
2047 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2048 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2049 return X86_64_INTEGER_CLASS;
2050
2051 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2052 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2053 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2054 return X86_64_MEMORY_CLASS;
2055
2056 /* Rule #6: Otherwise class SSE is used. */
2057 return X86_64_SSE_CLASS;
2058 }
2059
2060 /* Classify the argument of type TYPE and mode MODE.
2061 CLASSES will be filled by the register class used to pass each word
2062 of the operand. The number of words is returned. In case the parameter
2063 should be passed in memory, 0 is returned. As a special case for zero
2064 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2065
2066 BIT_OFFSET is used internally for handling records and specifies offset
2067 of the offset in bits modulo 256 to avoid overflow cases.
2068
2069 See the x86-64 PS ABI for details.
2070 */
2071
2072 static int
2073 classify_argument (enum machine_mode mode, tree type,
2074 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2075 {
2076 HOST_WIDE_INT bytes =
2077 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2078 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2079
2080 /* Variable sized entities are always passed/returned in memory. */
2081 if (bytes < 0)
2082 return 0;
2083
2084 if (mode != VOIDmode
2085 && targetm.calls.must_pass_in_stack (mode, type))
2086 return 0;
2087
2088 if (type && AGGREGATE_TYPE_P (type))
2089 {
2090 int i;
2091 tree field;
2092 enum x86_64_reg_class subclasses[MAX_CLASSES];
2093
2094 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2095 if (bytes > 16)
2096 return 0;
2097
2098 for (i = 0; i < words; i++)
2099 classes[i] = X86_64_NO_CLASS;
2100
2101 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2102 signalize memory class, so handle it as special case. */
2103 if (!words)
2104 {
2105 classes[0] = X86_64_NO_CLASS;
2106 return 1;
2107 }
2108
2109 /* Classify each field of record and merge classes. */
2110 if (TREE_CODE (type) == RECORD_TYPE)
2111 {
2112 /* For classes first merge in the field of the subclasses. */
2113 if (TYPE_BINFO (type))
2114 {
2115 tree binfo, base_binfo;
2116 int i;
2117
2118 for (binfo = TYPE_BINFO (type), i = 0;
2119 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2120 {
2121 int num;
2122 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2123 tree type = BINFO_TYPE (base_binfo);
2124
2125 num = classify_argument (TYPE_MODE (type),
2126 type, subclasses,
2127 (offset + bit_offset) % 256);
2128 if (!num)
2129 return 0;
2130 for (i = 0; i < num; i++)
2131 {
2132 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2133 classes[i + pos] =
2134 merge_classes (subclasses[i], classes[i + pos]);
2135 }
2136 }
2137 }
2138 /* And now merge the fields of structure. */
2139 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2140 {
2141 if (TREE_CODE (field) == FIELD_DECL)
2142 {
2143 int num;
2144
2145 /* Bitfields are always classified as integer. Handle them
2146 early, since later code would consider them to be
2147 misaligned integers. */
2148 if (DECL_BIT_FIELD (field))
2149 {
2150 for (i = int_bit_position (field) / 8 / 8;
2151 i < (int_bit_position (field)
2152 + tree_low_cst (DECL_SIZE (field), 0)
2153 + 63) / 8 / 8; i++)
2154 classes[i] =
2155 merge_classes (X86_64_INTEGER_CLASS,
2156 classes[i]);
2157 }
2158 else
2159 {
2160 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2161 TREE_TYPE (field), subclasses,
2162 (int_bit_position (field)
2163 + bit_offset) % 256);
2164 if (!num)
2165 return 0;
2166 for (i = 0; i < num; i++)
2167 {
2168 int pos =
2169 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2170 classes[i + pos] =
2171 merge_classes (subclasses[i], classes[i + pos]);
2172 }
2173 }
2174 }
2175 }
2176 }
2177 /* Arrays are handled as small records. */
2178 else if (TREE_CODE (type) == ARRAY_TYPE)
2179 {
2180 int num;
2181 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2182 TREE_TYPE (type), subclasses, bit_offset);
2183 if (!num)
2184 return 0;
2185
2186 /* The partial classes are now full classes. */
2187 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2188 subclasses[0] = X86_64_SSE_CLASS;
2189 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2190 subclasses[0] = X86_64_INTEGER_CLASS;
2191
2192 for (i = 0; i < words; i++)
2193 classes[i] = subclasses[i % num];
2194 }
2195 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2196 else if (TREE_CODE (type) == UNION_TYPE
2197 || TREE_CODE (type) == QUAL_UNION_TYPE)
2198 {
2199 /* For classes first merge in the field of the subclasses. */
2200 if (TYPE_BINFO (type))
2201 {
2202 tree binfo, base_binfo;
2203 int i;
2204
2205 for (binfo = TYPE_BINFO (type), i = 0;
2206 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2207 {
2208 int num;
2209 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2210 tree type = BINFO_TYPE (base_binfo);
2211
2212 num = classify_argument (TYPE_MODE (type),
2213 type, subclasses,
2214 (offset + (bit_offset % 64)) % 256);
2215 if (!num)
2216 return 0;
2217 for (i = 0; i < num; i++)
2218 {
2219 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2220 classes[i + pos] =
2221 merge_classes (subclasses[i], classes[i + pos]);
2222 }
2223 }
2224 }
2225 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2226 {
2227 if (TREE_CODE (field) == FIELD_DECL)
2228 {
2229 int num;
2230 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2231 TREE_TYPE (field), subclasses,
2232 bit_offset);
2233 if (!num)
2234 return 0;
2235 for (i = 0; i < num; i++)
2236 classes[i] = merge_classes (subclasses[i], classes[i]);
2237 }
2238 }
2239 }
2240 else if (TREE_CODE (type) == SET_TYPE)
2241 {
2242 if (bytes <= 4)
2243 {
2244 classes[0] = X86_64_INTEGERSI_CLASS;
2245 return 1;
2246 }
2247 else if (bytes <= 8)
2248 {
2249 classes[0] = X86_64_INTEGER_CLASS;
2250 return 1;
2251 }
2252 else if (bytes <= 12)
2253 {
2254 classes[0] = X86_64_INTEGER_CLASS;
2255 classes[1] = X86_64_INTEGERSI_CLASS;
2256 return 2;
2257 }
2258 else
2259 {
2260 classes[0] = X86_64_INTEGER_CLASS;
2261 classes[1] = X86_64_INTEGER_CLASS;
2262 return 2;
2263 }
2264 }
2265 else
2266 abort ();
2267
2268 /* Final merger cleanup. */
2269 for (i = 0; i < words; i++)
2270 {
2271 /* If one class is MEMORY, everything should be passed in
2272 memory. */
2273 if (classes[i] == X86_64_MEMORY_CLASS)
2274 return 0;
2275
2276 /* The X86_64_SSEUP_CLASS should be always preceded by
2277 X86_64_SSE_CLASS. */
2278 if (classes[i] == X86_64_SSEUP_CLASS
2279 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2280 classes[i] = X86_64_SSE_CLASS;
2281
2282 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2283 if (classes[i] == X86_64_X87UP_CLASS
2284 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2285 classes[i] = X86_64_SSE_CLASS;
2286 }
2287 return words;
2288 }
2289
2290 /* Compute alignment needed. We align all types to natural boundaries with
2291 exception of XFmode that is aligned to 64bits. */
2292 if (mode != VOIDmode && mode != BLKmode)
2293 {
2294 int mode_alignment = GET_MODE_BITSIZE (mode);
2295
2296 if (mode == XFmode)
2297 mode_alignment = 128;
2298 else if (mode == XCmode)
2299 mode_alignment = 256;
2300 if (COMPLEX_MODE_P (mode))
2301 mode_alignment /= 2;
2302 /* Misaligned fields are always returned in memory. */
2303 if (bit_offset % mode_alignment)
2304 return 0;
2305 }
2306
2307 /* for V1xx modes, just use the base mode */
2308 if (VECTOR_MODE_P (mode)
2309 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2310 mode = GET_MODE_INNER (mode);
2311
2312 /* Classification of atomic types. */
2313 switch (mode)
2314 {
2315 case DImode:
2316 case SImode:
2317 case HImode:
2318 case QImode:
2319 case CSImode:
2320 case CHImode:
2321 case CQImode:
2322 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2323 classes[0] = X86_64_INTEGERSI_CLASS;
2324 else
2325 classes[0] = X86_64_INTEGER_CLASS;
2326 return 1;
2327 case CDImode:
2328 case TImode:
2329 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2330 return 2;
2331 case CTImode:
2332 return 0;
2333 case SFmode:
2334 if (!(bit_offset % 64))
2335 classes[0] = X86_64_SSESF_CLASS;
2336 else
2337 classes[0] = X86_64_SSE_CLASS;
2338 return 1;
2339 case DFmode:
2340 classes[0] = X86_64_SSEDF_CLASS;
2341 return 1;
2342 case XFmode:
2343 classes[0] = X86_64_X87_CLASS;
2344 classes[1] = X86_64_X87UP_CLASS;
2345 return 2;
2346 case TFmode:
2347 classes[0] = X86_64_SSE_CLASS;
2348 classes[1] = X86_64_SSEUP_CLASS;
2349 return 2;
2350 case SCmode:
2351 classes[0] = X86_64_SSE_CLASS;
2352 return 1;
2353 case DCmode:
2354 classes[0] = X86_64_SSEDF_CLASS;
2355 classes[1] = X86_64_SSEDF_CLASS;
2356 return 2;
2357 case XCmode:
2358 case TCmode:
2359 /* These modes are larger than 16 bytes. */
2360 return 0;
2361 case V4SFmode:
2362 case V4SImode:
2363 case V16QImode:
2364 case V8HImode:
2365 case V2DFmode:
2366 case V2DImode:
2367 classes[0] = X86_64_SSE_CLASS;
2368 classes[1] = X86_64_SSEUP_CLASS;
2369 return 2;
2370 case V2SFmode:
2371 case V2SImode:
2372 case V4HImode:
2373 case V8QImode:
2374 classes[0] = X86_64_SSE_CLASS;
2375 return 1;
2376 case BLKmode:
2377 case VOIDmode:
2378 return 0;
2379 default:
2380 if (VECTOR_MODE_P (mode))
2381 {
2382 if (bytes > 16)
2383 return 0;
2384 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2385 {
2386 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2387 classes[0] = X86_64_INTEGERSI_CLASS;
2388 else
2389 classes[0] = X86_64_INTEGER_CLASS;
2390 classes[1] = X86_64_INTEGER_CLASS;
2391 return 1 + (bytes > 8);
2392 }
2393 }
2394 abort ();
2395 }
2396 }
2397
2398 /* Examine the argument and return set number of register required in each
2399 class. Return 0 iff parameter should be passed in memory. */
2400 static int
2401 examine_argument (enum machine_mode mode, tree type, int in_return,
2402 int *int_nregs, int *sse_nregs)
2403 {
2404 enum x86_64_reg_class class[MAX_CLASSES];
2405 int n = classify_argument (mode, type, class, 0);
2406
2407 *int_nregs = 0;
2408 *sse_nregs = 0;
2409 if (!n)
2410 return 0;
2411 for (n--; n >= 0; n--)
2412 switch (class[n])
2413 {
2414 case X86_64_INTEGER_CLASS:
2415 case X86_64_INTEGERSI_CLASS:
2416 (*int_nregs)++;
2417 break;
2418 case X86_64_SSE_CLASS:
2419 case X86_64_SSESF_CLASS:
2420 case X86_64_SSEDF_CLASS:
2421 (*sse_nregs)++;
2422 break;
2423 case X86_64_NO_CLASS:
2424 case X86_64_SSEUP_CLASS:
2425 break;
2426 case X86_64_X87_CLASS:
2427 case X86_64_X87UP_CLASS:
2428 if (!in_return)
2429 return 0;
2430 break;
2431 case X86_64_MEMORY_CLASS:
2432 abort ();
2433 }
2434 return 1;
2435 }
2436 /* Construct container for the argument used by GCC interface. See
2437 FUNCTION_ARG for the detailed description. */
2438 static rtx
2439 construct_container (enum machine_mode mode, tree type, int in_return,
2440 int nintregs, int nsseregs, const int * intreg,
2441 int sse_regno)
2442 {
2443 enum machine_mode tmpmode;
2444 int bytes =
2445 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2446 enum x86_64_reg_class class[MAX_CLASSES];
2447 int n;
2448 int i;
2449 int nexps = 0;
2450 int needed_sseregs, needed_intregs;
2451 rtx exp[MAX_CLASSES];
2452 rtx ret;
2453
2454 n = classify_argument (mode, type, class, 0);
2455 if (TARGET_DEBUG_ARG)
2456 {
2457 if (!n)
2458 fprintf (stderr, "Memory class\n");
2459 else
2460 {
2461 fprintf (stderr, "Classes:");
2462 for (i = 0; i < n; i++)
2463 {
2464 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2465 }
2466 fprintf (stderr, "\n");
2467 }
2468 }
2469 if (!n)
2470 return NULL;
2471 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2472 return NULL;
2473 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2474 return NULL;
2475
2476 /* First construct simple cases. Avoid SCmode, since we want to use
2477 single register to pass this type. */
2478 if (n == 1 && mode != SCmode)
2479 switch (class[0])
2480 {
2481 case X86_64_INTEGER_CLASS:
2482 case X86_64_INTEGERSI_CLASS:
2483 return gen_rtx_REG (mode, intreg[0]);
2484 case X86_64_SSE_CLASS:
2485 case X86_64_SSESF_CLASS:
2486 case X86_64_SSEDF_CLASS:
2487 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2488 case X86_64_X87_CLASS:
2489 return gen_rtx_REG (mode, FIRST_STACK_REG);
2490 case X86_64_NO_CLASS:
2491 /* Zero sized array, struct or class. */
2492 return NULL;
2493 default:
2494 abort ();
2495 }
2496 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2497 && mode != BLKmode)
2498 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2499 if (n == 2
2500 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2501 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2502 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2503 && class[1] == X86_64_INTEGER_CLASS
2504 && (mode == CDImode || mode == TImode || mode == TFmode)
2505 && intreg[0] + 1 == intreg[1])
2506 return gen_rtx_REG (mode, intreg[0]);
2507 if (n == 4
2508 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2509 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2510 && mode != BLKmode)
2511 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2512
2513 /* Otherwise figure out the entries of the PARALLEL. */
2514 for (i = 0; i < n; i++)
2515 {
2516 switch (class[i])
2517 {
2518 case X86_64_NO_CLASS:
2519 break;
2520 case X86_64_INTEGER_CLASS:
2521 case X86_64_INTEGERSI_CLASS:
2522 /* Merge TImodes on aligned occasions here too. */
2523 if (i * 8 + 8 > bytes)
2524 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2525 else if (class[i] == X86_64_INTEGERSI_CLASS)
2526 tmpmode = SImode;
2527 else
2528 tmpmode = DImode;
2529 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2530 if (tmpmode == BLKmode)
2531 tmpmode = DImode;
2532 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2533 gen_rtx_REG (tmpmode, *intreg),
2534 GEN_INT (i*8));
2535 intreg++;
2536 break;
2537 case X86_64_SSESF_CLASS:
2538 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2539 gen_rtx_REG (SFmode,
2540 SSE_REGNO (sse_regno)),
2541 GEN_INT (i*8));
2542 sse_regno++;
2543 break;
2544 case X86_64_SSEDF_CLASS:
2545 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2546 gen_rtx_REG (DFmode,
2547 SSE_REGNO (sse_regno)),
2548 GEN_INT (i*8));
2549 sse_regno++;
2550 break;
2551 case X86_64_SSE_CLASS:
2552 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2553 tmpmode = TImode;
2554 else
2555 tmpmode = DImode;
2556 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2557 gen_rtx_REG (tmpmode,
2558 SSE_REGNO (sse_regno)),
2559 GEN_INT (i*8));
2560 if (tmpmode == TImode)
2561 i++;
2562 sse_regno++;
2563 break;
2564 default:
2565 abort ();
2566 }
2567 }
2568 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2569 for (i = 0; i < nexps; i++)
2570 XVECEXP (ret, 0, i) = exp [i];
2571 return ret;
2572 }
2573
2574 /* Update the data in CUM to advance over an argument
2575 of mode MODE and data type TYPE.
2576 (TYPE is null for libcalls where that information may not be available.) */
2577
2578 void
2579 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2580 enum machine_mode mode, /* current arg mode */
2581 tree type, /* type of the argument or 0 if lib support */
2582 int named) /* whether or not the argument was named */
2583 {
2584 int bytes =
2585 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2586 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2587
2588 if (TARGET_DEBUG_ARG)
2589 fprintf (stderr,
2590 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2591 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2592 if (TARGET_64BIT)
2593 {
2594 int int_nregs, sse_nregs;
2595 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2596 cum->words += words;
2597 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2598 {
2599 cum->nregs -= int_nregs;
2600 cum->sse_nregs -= sse_nregs;
2601 cum->regno += int_nregs;
2602 cum->sse_regno += sse_nregs;
2603 }
2604 else
2605 cum->words += words;
2606 }
2607 else
2608 {
2609 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2610 && (!type || !AGGREGATE_TYPE_P (type)))
2611 {
2612 cum->sse_words += words;
2613 cum->sse_nregs -= 1;
2614 cum->sse_regno += 1;
2615 if (cum->sse_nregs <= 0)
2616 {
2617 cum->sse_nregs = 0;
2618 cum->sse_regno = 0;
2619 }
2620 }
2621 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2622 && (!type || !AGGREGATE_TYPE_P (type)))
2623 {
2624 cum->mmx_words += words;
2625 cum->mmx_nregs -= 1;
2626 cum->mmx_regno += 1;
2627 if (cum->mmx_nregs <= 0)
2628 {
2629 cum->mmx_nregs = 0;
2630 cum->mmx_regno = 0;
2631 }
2632 }
2633 else
2634 {
2635 cum->words += words;
2636 cum->nregs -= words;
2637 cum->regno += words;
2638
2639 if (cum->nregs <= 0)
2640 {
2641 cum->nregs = 0;
2642 cum->regno = 0;
2643 }
2644 }
2645 }
2646 return;
2647 }
2648
2649 /* Define where to put the arguments to a function.
2650 Value is zero to push the argument on the stack,
2651 or a hard register in which to store the argument.
2652
2653 MODE is the argument's machine mode.
2654 TYPE is the data type of the argument (as a tree).
2655 This is null for libcalls where that information may
2656 not be available.
2657 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2658 the preceding args and about the function being called.
2659 NAMED is nonzero if this argument is a named parameter
2660 (otherwise it is an extra parameter matching an ellipsis). */
2661
2662 rtx
2663 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2664 enum machine_mode mode, /* current arg mode */
2665 tree type, /* type of the argument or 0 if lib support */
2666 int named) /* != 0 for normal args, == 0 for ... args */
2667 {
2668 rtx ret = NULL_RTX;
2669 int bytes =
2670 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2671 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2672 static bool warnedsse, warnedmmx;
2673
2674 /* To simplify the code below, represent vector types with a vector mode
2675 even if MMX/SSE are not active. */
2676 if (type
2677 && TREE_CODE (type) == VECTOR_TYPE
2678 && (bytes == 8 || bytes == 16)
2679 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2680 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2681 {
2682 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2683 enum machine_mode newmode
2684 = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2685 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2686
2687 /* Get the mode which has this inner mode and number of units. */
2688 for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
2689 if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
2690 && GET_MODE_INNER (newmode) == innermode)
2691 {
2692 mode = newmode;
2693 break;
2694 }
2695 }
2696
2697 /* Handle a hidden AL argument containing number of registers for varargs
2698 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2699 any AL settings. */
2700 if (mode == VOIDmode)
2701 {
2702 if (TARGET_64BIT)
2703 return GEN_INT (cum->maybe_vaarg
2704 ? (cum->sse_nregs < 0
2705 ? SSE_REGPARM_MAX
2706 : cum->sse_regno)
2707 : -1);
2708 else
2709 return constm1_rtx;
2710 }
2711 if (TARGET_64BIT)
2712 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2713 &x86_64_int_parameter_registers [cum->regno],
2714 cum->sse_regno);
2715 else
2716 switch (mode)
2717 {
2718 /* For now, pass fp/complex values on the stack. */
2719 default:
2720 break;
2721
2722 case BLKmode:
2723 if (bytes < 0)
2724 break;
2725 /* FALLTHRU */
2726 case DImode:
2727 case SImode:
2728 case HImode:
2729 case QImode:
2730 if (words <= cum->nregs)
2731 {
2732 int regno = cum->regno;
2733
2734 /* Fastcall allocates the first two DWORD (SImode) or
2735 smaller arguments to ECX and EDX. */
2736 if (cum->fastcall)
2737 {
2738 if (mode == BLKmode || mode == DImode)
2739 break;
2740
2741 /* ECX not EAX is the first allocated register. */
2742 if (regno == 0)
2743 regno = 2;
2744 }
2745 ret = gen_rtx_REG (mode, regno);
2746 }
2747 break;
2748 case TImode:
2749 case V16QImode:
2750 case V8HImode:
2751 case V4SImode:
2752 case V2DImode:
2753 case V4SFmode:
2754 case V2DFmode:
2755 if (!type || !AGGREGATE_TYPE_P (type))
2756 {
2757 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2758 {
2759 warnedsse = true;
2760 warning ("SSE vector argument without SSE enabled "
2761 "changes the ABI");
2762 }
2763 if (cum->sse_nregs)
2764 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2765 }
2766 break;
2767 case V8QImode:
2768 case V4HImode:
2769 case V2SImode:
2770 case V2SFmode:
2771 if (!type || !AGGREGATE_TYPE_P (type))
2772 {
2773 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2774 {
2775 warnedmmx = true;
2776 warning ("MMX vector argument without MMX enabled "
2777 "changes the ABI");
2778 }
2779 if (cum->mmx_nregs)
2780 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2781 }
2782 break;
2783 }
2784
2785 if (TARGET_DEBUG_ARG)
2786 {
2787 fprintf (stderr,
2788 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2789 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2790
2791 if (ret)
2792 print_simple_rtl (stderr, ret);
2793 else
2794 fprintf (stderr, ", stack");
2795
2796 fprintf (stderr, " )\n");
2797 }
2798
2799 return ret;
2800 }
2801
2802 /* A C expression that indicates when an argument must be passed by
2803 reference. If nonzero for an argument, a copy of that argument is
2804 made in memory and a pointer to the argument is passed instead of
2805 the argument itself. The pointer is passed in whatever way is
2806 appropriate for passing a pointer to that type. */
2807
2808 static bool
2809 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2810 enum machine_mode mode ATTRIBUTE_UNUSED,
2811 tree type, bool named ATTRIBUTE_UNUSED)
2812 {
2813 if (!TARGET_64BIT)
2814 return 0;
2815
2816 if (type && int_size_in_bytes (type) == -1)
2817 {
2818 if (TARGET_DEBUG_ARG)
2819 fprintf (stderr, "function_arg_pass_by_reference\n");
2820 return 1;
2821 }
2822
2823 return 0;
2824 }
2825
2826 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2827 ABI. Only called if TARGET_SSE. */
2828 static bool
2829 contains_128bit_aligned_vector_p (tree type)
2830 {
2831 enum machine_mode mode = TYPE_MODE (type);
2832 if (SSE_REG_MODE_P (mode)
2833 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2834 return true;
2835 if (TYPE_ALIGN (type) < 128)
2836 return false;
2837
2838 if (AGGREGATE_TYPE_P (type))
2839 {
2840 /* Walk the aggregates recursively. */
2841 if (TREE_CODE (type) == RECORD_TYPE
2842 || TREE_CODE (type) == UNION_TYPE
2843 || TREE_CODE (type) == QUAL_UNION_TYPE)
2844 {
2845 tree field;
2846
2847 if (TYPE_BINFO (type))
2848 {
2849 tree binfo, base_binfo;
2850 int i;
2851
2852 for (binfo = TYPE_BINFO (type), i = 0;
2853 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2854 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2855 return true;
2856 }
2857 /* And now merge the fields of structure. */
2858 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2859 {
2860 if (TREE_CODE (field) == FIELD_DECL
2861 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2862 return true;
2863 }
2864 }
2865 /* Just for use if some languages passes arrays by value. */
2866 else if (TREE_CODE (type) == ARRAY_TYPE)
2867 {
2868 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2869 return true;
2870 }
2871 else
2872 abort ();
2873 }
2874 return false;
2875 }
2876
2877 /* Gives the alignment boundary, in bits, of an argument with the
2878 specified mode and type. */
2879
2880 int
2881 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2882 {
2883 int align;
2884 if (type)
2885 align = TYPE_ALIGN (type);
2886 else
2887 align = GET_MODE_ALIGNMENT (mode);
2888 if (align < PARM_BOUNDARY)
2889 align = PARM_BOUNDARY;
2890 if (!TARGET_64BIT)
2891 {
2892 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2893 make an exception for SSE modes since these require 128bit
2894 alignment.
2895
2896 The handling here differs from field_alignment. ICC aligns MMX
2897 arguments to 4 byte boundaries, while structure fields are aligned
2898 to 8 byte boundaries. */
2899 if (!TARGET_SSE)
2900 align = PARM_BOUNDARY;
2901 else if (!type)
2902 {
2903 if (!SSE_REG_MODE_P (mode))
2904 align = PARM_BOUNDARY;
2905 }
2906 else
2907 {
2908 if (!contains_128bit_aligned_vector_p (type))
2909 align = PARM_BOUNDARY;
2910 }
2911 }
2912 if (align > 128)
2913 align = 128;
2914 return align;
2915 }
2916
2917 /* Return true if N is a possible register number of function value. */
2918 bool
2919 ix86_function_value_regno_p (int regno)
2920 {
2921 if (!TARGET_64BIT)
2922 {
2923 return ((regno) == 0
2924 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2925 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2926 }
2927 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2928 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2929 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2930 }
2931
2932 /* Define how to find the value returned by a function.
2933 VALTYPE is the data type of the value (as a tree).
2934 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2935 otherwise, FUNC is 0. */
2936 rtx
2937 ix86_function_value (tree valtype)
2938 {
2939 if (TARGET_64BIT)
2940 {
2941 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2942 REGPARM_MAX, SSE_REGPARM_MAX,
2943 x86_64_int_return_registers, 0);
2944 /* For zero sized structures, construct_container return NULL, but we need
2945 to keep rest of compiler happy by returning meaningful value. */
2946 if (!ret)
2947 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2948 return ret;
2949 }
2950 else
2951 return gen_rtx_REG (TYPE_MODE (valtype),
2952 ix86_value_regno (TYPE_MODE (valtype)));
2953 }
2954
2955 /* Return false iff type is returned in memory. */
2956 int
2957 ix86_return_in_memory (tree type)
2958 {
2959 int needed_intregs, needed_sseregs, size;
2960 enum machine_mode mode = TYPE_MODE (type);
2961
2962 if (TARGET_64BIT)
2963 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2964
2965 if (mode == BLKmode)
2966 return 1;
2967
2968 size = int_size_in_bytes (type);
2969
2970 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2971 return 0;
2972
2973 if (VECTOR_MODE_P (mode) || mode == TImode)
2974 {
2975 /* User-created vectors small enough to fit in EAX. */
2976 if (size < 8)
2977 return 0;
2978
2979 /* MMX/3dNow values are returned on the stack, since we've
2980 got to EMMS/FEMMS before returning. */
2981 if (size == 8)
2982 return 1;
2983
2984 /* SSE values are returned in XMM0, except when it doesn't exist. */
2985 if (size == 16)
2986 return (TARGET_SSE ? 0 : 1);
2987 }
2988
2989 if (mode == XFmode)
2990 return 0;
2991
2992 if (size > 12)
2993 return 1;
2994 return 0;
2995 }
2996
2997 /* When returning SSE vector types, we have a choice of either
2998 (1) being abi incompatible with a -march switch, or
2999 (2) generating an error.
3000 Given no good solution, I think the safest thing is one warning.
3001 The user won't be able to use -Werror, but....
3002
3003 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3004 called in response to actually generating a caller or callee that
3005 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3006 via aggregate_value_p for general type probing from tree-ssa. */
3007
3008 static rtx
3009 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3010 {
3011 static bool warned;
3012
3013 if (!TARGET_SSE && type && !warned)
3014 {
3015 /* Look at the return type of the function, not the function type. */
3016 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3017
3018 if (mode == TImode
3019 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3020 {
3021 warned = true;
3022 warning ("SSE vector return without SSE enabled changes the ABI");
3023 }
3024 }
3025
3026 return NULL;
3027 }
3028
3029 /* Define how to find the value returned by a library function
3030 assuming the value has mode MODE. */
3031 rtx
3032 ix86_libcall_value (enum machine_mode mode)
3033 {
3034 if (TARGET_64BIT)
3035 {
3036 switch (mode)
3037 {
3038 case SFmode:
3039 case SCmode:
3040 case DFmode:
3041 case DCmode:
3042 case TFmode:
3043 return gen_rtx_REG (mode, FIRST_SSE_REG);
3044 case XFmode:
3045 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3046 case XCmode:
3047 case TCmode:
3048 return NULL;
3049 default:
3050 return gen_rtx_REG (mode, 0);
3051 }
3052 }
3053 else
3054 return gen_rtx_REG (mode, ix86_value_regno (mode));
3055 }
3056
3057 /* Given a mode, return the register to use for a return value. */
3058
3059 static int
3060 ix86_value_regno (enum machine_mode mode)
3061 {
3062 /* Floating point return values in %st(0). */
3063 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3064 return FIRST_FLOAT_REG;
3065 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3066 we prevent this case when sse is not available. */
3067 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3068 return FIRST_SSE_REG;
3069 /* Everything else in %eax. */
3070 return 0;
3071 }
3072 \f
3073 /* Create the va_list data type. */
3074
3075 static tree
3076 ix86_build_builtin_va_list (void)
3077 {
3078 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3079
3080 /* For i386 we use plain pointer to argument area. */
3081 if (!TARGET_64BIT)
3082 return build_pointer_type (char_type_node);
3083
3084 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3085 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3086
3087 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3088 unsigned_type_node);
3089 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3090 unsigned_type_node);
3091 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3092 ptr_type_node);
3093 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3094 ptr_type_node);
3095
3096 DECL_FIELD_CONTEXT (f_gpr) = record;
3097 DECL_FIELD_CONTEXT (f_fpr) = record;
3098 DECL_FIELD_CONTEXT (f_ovf) = record;
3099 DECL_FIELD_CONTEXT (f_sav) = record;
3100
3101 TREE_CHAIN (record) = type_decl;
3102 TYPE_NAME (record) = type_decl;
3103 TYPE_FIELDS (record) = f_gpr;
3104 TREE_CHAIN (f_gpr) = f_fpr;
3105 TREE_CHAIN (f_fpr) = f_ovf;
3106 TREE_CHAIN (f_ovf) = f_sav;
3107
3108 layout_type (record);
3109
3110 /* The correct type is an array type of one element. */
3111 return build_array_type (record, build_index_type (size_zero_node));
3112 }
3113
3114 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3115
3116 static void
3117 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3118 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3119 int no_rtl)
3120 {
3121 CUMULATIVE_ARGS next_cum;
3122 rtx save_area = NULL_RTX, mem;
3123 rtx label;
3124 rtx label_ref;
3125 rtx tmp_reg;
3126 rtx nsse_reg;
3127 int set;
3128 tree fntype;
3129 int stdarg_p;
3130 int i;
3131
3132 if (!TARGET_64BIT)
3133 return;
3134
3135 /* Indicate to allocate space on the stack for varargs save area. */
3136 ix86_save_varrargs_registers = 1;
3137
3138 cfun->stack_alignment_needed = 128;
3139
3140 fntype = TREE_TYPE (current_function_decl);
3141 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3142 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3143 != void_type_node));
3144
3145 /* For varargs, we do not want to skip the dummy va_dcl argument.
3146 For stdargs, we do want to skip the last named argument. */
3147 next_cum = *cum;
3148 if (stdarg_p)
3149 function_arg_advance (&next_cum, mode, type, 1);
3150
3151 if (!no_rtl)
3152 save_area = frame_pointer_rtx;
3153
3154 set = get_varargs_alias_set ();
3155
3156 for (i = next_cum.regno; i < ix86_regparm; i++)
3157 {
3158 mem = gen_rtx_MEM (Pmode,
3159 plus_constant (save_area, i * UNITS_PER_WORD));
3160 set_mem_alias_set (mem, set);
3161 emit_move_insn (mem, gen_rtx_REG (Pmode,
3162 x86_64_int_parameter_registers[i]));
3163 }
3164
3165 if (next_cum.sse_nregs)
3166 {
3167 /* Now emit code to save SSE registers. The AX parameter contains number
3168 of SSE parameter registers used to call this function. We use
3169 sse_prologue_save insn template that produces computed jump across
3170 SSE saves. We need some preparation work to get this working. */
3171
3172 label = gen_label_rtx ();
3173 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3174
3175 /* Compute address to jump to :
3176 label - 5*eax + nnamed_sse_arguments*5 */
3177 tmp_reg = gen_reg_rtx (Pmode);
3178 nsse_reg = gen_reg_rtx (Pmode);
3179 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3180 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3181 gen_rtx_MULT (Pmode, nsse_reg,
3182 GEN_INT (4))));
3183 if (next_cum.sse_regno)
3184 emit_move_insn
3185 (nsse_reg,
3186 gen_rtx_CONST (DImode,
3187 gen_rtx_PLUS (DImode,
3188 label_ref,
3189 GEN_INT (next_cum.sse_regno * 4))));
3190 else
3191 emit_move_insn (nsse_reg, label_ref);
3192 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3193
3194 /* Compute address of memory block we save into. We always use pointer
3195 pointing 127 bytes after first byte to store - this is needed to keep
3196 instruction size limited by 4 bytes. */
3197 tmp_reg = gen_reg_rtx (Pmode);
3198 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3199 plus_constant (save_area,
3200 8 * REGPARM_MAX + 127)));
3201 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3202 set_mem_alias_set (mem, set);
3203 set_mem_align (mem, BITS_PER_WORD);
3204
3205 /* And finally do the dirty job! */
3206 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3207 GEN_INT (next_cum.sse_regno), label));
3208 }
3209
3210 }
3211
3212 /* Implement va_start. */
3213
3214 void
3215 ix86_va_start (tree valist, rtx nextarg)
3216 {
3217 HOST_WIDE_INT words, n_gpr, n_fpr;
3218 tree f_gpr, f_fpr, f_ovf, f_sav;
3219 tree gpr, fpr, ovf, sav, t;
3220
3221 /* Only 64bit target needs something special. */
3222 if (!TARGET_64BIT)
3223 {
3224 std_expand_builtin_va_start (valist, nextarg);
3225 return;
3226 }
3227
3228 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3229 f_fpr = TREE_CHAIN (f_gpr);
3230 f_ovf = TREE_CHAIN (f_fpr);
3231 f_sav = TREE_CHAIN (f_ovf);
3232
3233 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3234 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3235 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3236 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3237 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3238
3239 /* Count number of gp and fp argument registers used. */
3240 words = current_function_args_info.words;
3241 n_gpr = current_function_args_info.regno;
3242 n_fpr = current_function_args_info.sse_regno;
3243
3244 if (TARGET_DEBUG_ARG)
3245 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3246 (int) words, (int) n_gpr, (int) n_fpr);
3247
3248 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3249 build_int_cst (NULL_TREE, n_gpr * 8));
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3252
3253 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3254 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3255 TREE_SIDE_EFFECTS (t) = 1;
3256 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3257
3258 /* Find the overflow area. */
3259 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3260 if (words != 0)
3261 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3262 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3263 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3264 TREE_SIDE_EFFECTS (t) = 1;
3265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3266
3267 /* Find the register save area.
3268 Prologue of the function save it right above stack frame. */
3269 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3270 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3271 TREE_SIDE_EFFECTS (t) = 1;
3272 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3273 }
3274
3275 /* Implement va_arg. */
3276
3277 tree
3278 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3279 {
3280 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3281 tree f_gpr, f_fpr, f_ovf, f_sav;
3282 tree gpr, fpr, ovf, sav, t;
3283 int size, rsize;
3284 tree lab_false, lab_over = NULL_TREE;
3285 tree addr, t2;
3286 rtx container;
3287 int indirect_p = 0;
3288 tree ptrtype;
3289
3290 /* Only 64bit target needs something special. */
3291 if (!TARGET_64BIT)
3292 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3293
3294 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3295 f_fpr = TREE_CHAIN (f_gpr);
3296 f_ovf = TREE_CHAIN (f_fpr);
3297 f_sav = TREE_CHAIN (f_ovf);
3298
3299 valist = build_va_arg_indirect_ref (valist);
3300 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3301 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3302 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3303 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3304
3305 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3306 if (indirect_p)
3307 type = build_pointer_type (type);
3308 size = int_size_in_bytes (type);
3309 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3310
3311 container = construct_container (TYPE_MODE (type), type, 0,
3312 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3313 /*
3314 * Pull the value out of the saved registers ...
3315 */
3316
3317 addr = create_tmp_var (ptr_type_node, "addr");
3318 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3319
3320 if (container)
3321 {
3322 int needed_intregs, needed_sseregs;
3323 bool need_temp;
3324 tree int_addr, sse_addr;
3325
3326 lab_false = create_artificial_label ();
3327 lab_over = create_artificial_label ();
3328
3329 examine_argument (TYPE_MODE (type), type, 0,
3330 &needed_intregs, &needed_sseregs);
3331
3332 need_temp = (!REG_P (container)
3333 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3334 || TYPE_ALIGN (type) > 128));
3335
3336 /* In case we are passing structure, verify that it is consecutive block
3337 on the register save area. If not we need to do moves. */
3338 if (!need_temp && !REG_P (container))
3339 {
3340 /* Verify that all registers are strictly consecutive */
3341 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3342 {
3343 int i;
3344
3345 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3346 {
3347 rtx slot = XVECEXP (container, 0, i);
3348 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3349 || INTVAL (XEXP (slot, 1)) != i * 16)
3350 need_temp = 1;
3351 }
3352 }
3353 else
3354 {
3355 int i;
3356
3357 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3358 {
3359 rtx slot = XVECEXP (container, 0, i);
3360 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3361 || INTVAL (XEXP (slot, 1)) != i * 8)
3362 need_temp = 1;
3363 }
3364 }
3365 }
3366 if (!need_temp)
3367 {
3368 int_addr = addr;
3369 sse_addr = addr;
3370 }
3371 else
3372 {
3373 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3374 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3375 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3376 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3377 }
3378 /* First ensure that we fit completely in registers. */
3379 if (needed_intregs)
3380 {
3381 t = build_int_cst (TREE_TYPE (gpr),
3382 (REGPARM_MAX - needed_intregs + 1) * 8);
3383 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3384 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3385 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3386 gimplify_and_add (t, pre_p);
3387 }
3388 if (needed_sseregs)
3389 {
3390 t = build_int_cst (TREE_TYPE (fpr),
3391 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3392 + REGPARM_MAX * 8);
3393 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3394 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3395 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3396 gimplify_and_add (t, pre_p);
3397 }
3398
3399 /* Compute index to start of area used for integer regs. */
3400 if (needed_intregs)
3401 {
3402 /* int_addr = gpr + sav; */
3403 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3404 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3405 gimplify_and_add (t, pre_p);
3406 }
3407 if (needed_sseregs)
3408 {
3409 /* sse_addr = fpr + sav; */
3410 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3411 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3412 gimplify_and_add (t, pre_p);
3413 }
3414 if (need_temp)
3415 {
3416 int i;
3417 tree temp = create_tmp_var (type, "va_arg_tmp");
3418
3419 /* addr = &temp; */
3420 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3421 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3422 gimplify_and_add (t, pre_p);
3423
3424 for (i = 0; i < XVECLEN (container, 0); i++)
3425 {
3426 rtx slot = XVECEXP (container, 0, i);
3427 rtx reg = XEXP (slot, 0);
3428 enum machine_mode mode = GET_MODE (reg);
3429 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3430 tree addr_type = build_pointer_type (piece_type);
3431 tree src_addr, src;
3432 int src_offset;
3433 tree dest_addr, dest;
3434
3435 if (SSE_REGNO_P (REGNO (reg)))
3436 {
3437 src_addr = sse_addr;
3438 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3439 }
3440 else
3441 {
3442 src_addr = int_addr;
3443 src_offset = REGNO (reg) * 8;
3444 }
3445 src_addr = fold_convert (addr_type, src_addr);
3446 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3447 size_int (src_offset)));
3448 src = build_va_arg_indirect_ref (src_addr);
3449
3450 dest_addr = fold_convert (addr_type, addr);
3451 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3452 size_int (INTVAL (XEXP (slot, 1)))));
3453 dest = build_va_arg_indirect_ref (dest_addr);
3454
3455 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3456 gimplify_and_add (t, pre_p);
3457 }
3458 }
3459
3460 if (needed_intregs)
3461 {
3462 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3463 build_int_cst (NULL_TREE, needed_intregs * 8));
3464 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3465 gimplify_and_add (t, pre_p);
3466 }
3467 if (needed_sseregs)
3468 {
3469 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3470 build_int_cst (NULL_TREE, needed_sseregs * 16));
3471 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3472 gimplify_and_add (t, pre_p);
3473 }
3474
3475 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3476 gimplify_and_add (t, pre_p);
3477
3478 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3479 append_to_statement_list (t, pre_p);
3480 }
3481
3482 /* ... otherwise out of the overflow area. */
3483
3484 /* Care for on-stack alignment if needed. */
3485 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3486 t = ovf;
3487 else
3488 {
3489 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3490 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3491 build_int_cst (NULL_TREE, align - 1));
3492 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3493 build_int_cst (NULL_TREE, -align));
3494 }
3495 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3496
3497 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3498 gimplify_and_add (t2, pre_p);
3499
3500 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3501 build_int_cst (NULL_TREE, rsize * UNITS_PER_WORD));
3502 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3503 gimplify_and_add (t, pre_p);
3504
3505 if (container)
3506 {
3507 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3508 append_to_statement_list (t, pre_p);
3509 }
3510
3511 ptrtype = build_pointer_type (type);
3512 addr = fold_convert (ptrtype, addr);
3513
3514 if (indirect_p)
3515 addr = build_va_arg_indirect_ref (addr);
3516 return build_va_arg_indirect_ref (addr);
3517 }
3518 \f
3519 /* Return nonzero if OPNUM's MEM should be matched
3520 in movabs* patterns. */
3521
3522 int
3523 ix86_check_movabs (rtx insn, int opnum)
3524 {
3525 rtx set, mem;
3526
3527 set = PATTERN (insn);
3528 if (GET_CODE (set) == PARALLEL)
3529 set = XVECEXP (set, 0, 0);
3530 if (GET_CODE (set) != SET)
3531 abort ();
3532 mem = XEXP (set, opnum);
3533 while (GET_CODE (mem) == SUBREG)
3534 mem = SUBREG_REG (mem);
3535 if (GET_CODE (mem) != MEM)
3536 abort ();
3537 return (volatile_ok || !MEM_VOLATILE_P (mem));
3538 }
3539 \f
3540 /* Initialize the table of extra 80387 mathematical constants. */
3541
3542 static void
3543 init_ext_80387_constants (void)
3544 {
3545 static const char * cst[5] =
3546 {
3547 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3548 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3549 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3550 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3551 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3552 };
3553 int i;
3554
3555 for (i = 0; i < 5; i++)
3556 {
3557 real_from_string (&ext_80387_constants_table[i], cst[i]);
3558 /* Ensure each constant is rounded to XFmode precision. */
3559 real_convert (&ext_80387_constants_table[i],
3560 XFmode, &ext_80387_constants_table[i]);
3561 }
3562
3563 ext_80387_constants_init = 1;
3564 }
3565
3566 /* Return true if the constant is something that can be loaded with
3567 a special instruction. */
3568
3569 int
3570 standard_80387_constant_p (rtx x)
3571 {
3572 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3573 return -1;
3574
3575 if (x == CONST0_RTX (GET_MODE (x)))
3576 return 1;
3577 if (x == CONST1_RTX (GET_MODE (x)))
3578 return 2;
3579
3580 /* For XFmode constants, try to find a special 80387 instruction when
3581 optimizing for size or on those CPUs that benefit from them. */
3582 if (GET_MODE (x) == XFmode
3583 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3584 {
3585 REAL_VALUE_TYPE r;
3586 int i;
3587
3588 if (! ext_80387_constants_init)
3589 init_ext_80387_constants ();
3590
3591 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3592 for (i = 0; i < 5; i++)
3593 if (real_identical (&r, &ext_80387_constants_table[i]))
3594 return i + 3;
3595 }
3596
3597 return 0;
3598 }
3599
3600 /* Return the opcode of the special instruction to be used to load
3601 the constant X. */
3602
3603 const char *
3604 standard_80387_constant_opcode (rtx x)
3605 {
3606 switch (standard_80387_constant_p (x))
3607 {
3608 case 1:
3609 return "fldz";
3610 case 2:
3611 return "fld1";
3612 case 3:
3613 return "fldlg2";
3614 case 4:
3615 return "fldln2";
3616 case 5:
3617 return "fldl2e";
3618 case 6:
3619 return "fldl2t";
3620 case 7:
3621 return "fldpi";
3622 }
3623 abort ();
3624 }
3625
3626 /* Return the CONST_DOUBLE representing the 80387 constant that is
3627 loaded by the specified special instruction. The argument IDX
3628 matches the return value from standard_80387_constant_p. */
3629
3630 rtx
3631 standard_80387_constant_rtx (int idx)
3632 {
3633 int i;
3634
3635 if (! ext_80387_constants_init)
3636 init_ext_80387_constants ();
3637
3638 switch (idx)
3639 {
3640 case 3:
3641 case 4:
3642 case 5:
3643 case 6:
3644 case 7:
3645 i = idx - 3;
3646 break;
3647
3648 default:
3649 abort ();
3650 }
3651
3652 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3653 XFmode);
3654 }
3655
3656 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3657 */
3658 int
3659 standard_sse_constant_p (rtx x)
3660 {
3661 if (x == const0_rtx)
3662 return 1;
3663 return (x == CONST0_RTX (GET_MODE (x)));
3664 }
3665
3666 /* Returns 1 if OP contains a symbol reference */
3667
3668 int
3669 symbolic_reference_mentioned_p (rtx op)
3670 {
3671 const char *fmt;
3672 int i;
3673
3674 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3675 return 1;
3676
3677 fmt = GET_RTX_FORMAT (GET_CODE (op));
3678 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3679 {
3680 if (fmt[i] == 'E')
3681 {
3682 int j;
3683
3684 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3685 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3686 return 1;
3687 }
3688
3689 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3690 return 1;
3691 }
3692
3693 return 0;
3694 }
3695
3696 /* Return 1 if it is appropriate to emit `ret' instructions in the
3697 body of a function. Do this only if the epilogue is simple, needing a
3698 couple of insns. Prior to reloading, we can't tell how many registers
3699 must be saved, so return 0 then. Return 0 if there is no frame
3700 marker to de-allocate.
3701
3702 If NON_SAVING_SETJMP is defined and true, then it is not possible
3703 for the epilogue to be simple, so return 0. This is a special case
3704 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3705 until final, but jump_optimize may need to know sooner if a
3706 `return' is OK. */
3707
3708 int
3709 ix86_can_use_return_insn_p (void)
3710 {
3711 struct ix86_frame frame;
3712
3713 #ifdef NON_SAVING_SETJMP
3714 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3715 return 0;
3716 #endif
3717
3718 if (! reload_completed || frame_pointer_needed)
3719 return 0;
3720
3721 /* Don't allow more than 32 pop, since that's all we can do
3722 with one instruction. */
3723 if (current_function_pops_args
3724 && current_function_args_size >= 32768)
3725 return 0;
3726
3727 ix86_compute_frame_layout (&frame);
3728 return frame.to_allocate == 0 && frame.nregs == 0;
3729 }
3730 \f
3731 /* Value should be nonzero if functions must have frame pointers.
3732 Zero means the frame pointer need not be set up (and parms may
3733 be accessed via the stack pointer) in functions that seem suitable. */
3734
3735 int
3736 ix86_frame_pointer_required (void)
3737 {
3738 /* If we accessed previous frames, then the generated code expects
3739 to be able to access the saved ebp value in our frame. */
3740 if (cfun->machine->accesses_prev_frame)
3741 return 1;
3742
3743 /* Several x86 os'es need a frame pointer for other reasons,
3744 usually pertaining to setjmp. */
3745 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3746 return 1;
3747
3748 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3749 the frame pointer by default. Turn it back on now if we've not
3750 got a leaf function. */
3751 if (TARGET_OMIT_LEAF_FRAME_POINTER
3752 && (!current_function_is_leaf))
3753 return 1;
3754
3755 if (current_function_profile)
3756 return 1;
3757
3758 return 0;
3759 }
3760
3761 /* Record that the current function accesses previous call frames. */
3762
3763 void
3764 ix86_setup_frame_addresses (void)
3765 {
3766 cfun->machine->accesses_prev_frame = 1;
3767 }
3768 \f
3769 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3770 # define USE_HIDDEN_LINKONCE 1
3771 #else
3772 # define USE_HIDDEN_LINKONCE 0
3773 #endif
3774
3775 static int pic_labels_used;
3776
3777 /* Fills in the label name that should be used for a pc thunk for
3778 the given register. */
3779
3780 static void
3781 get_pc_thunk_name (char name[32], unsigned int regno)
3782 {
3783 if (USE_HIDDEN_LINKONCE)
3784 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3785 else
3786 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3787 }
3788
3789
3790 /* This function generates code for -fpic that loads %ebx with
3791 the return address of the caller and then returns. */
3792
3793 void
3794 ix86_file_end (void)
3795 {
3796 rtx xops[2];
3797 int regno;
3798
3799 for (regno = 0; regno < 8; ++regno)
3800 {
3801 char name[32];
3802
3803 if (! ((pic_labels_used >> regno) & 1))
3804 continue;
3805
3806 get_pc_thunk_name (name, regno);
3807
3808 if (USE_HIDDEN_LINKONCE)
3809 {
3810 tree decl;
3811
3812 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3813 error_mark_node);
3814 TREE_PUBLIC (decl) = 1;
3815 TREE_STATIC (decl) = 1;
3816 DECL_ONE_ONLY (decl) = 1;
3817
3818 (*targetm.asm_out.unique_section) (decl, 0);
3819 named_section (decl, NULL, 0);
3820
3821 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3822 fputs ("\t.hidden\t", asm_out_file);
3823 assemble_name (asm_out_file, name);
3824 fputc ('\n', asm_out_file);
3825 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3826 }
3827 else
3828 {
3829 text_section ();
3830 ASM_OUTPUT_LABEL (asm_out_file, name);
3831 }
3832
3833 xops[0] = gen_rtx_REG (SImode, regno);
3834 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3835 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3836 output_asm_insn ("ret", xops);
3837 }
3838
3839 if (NEED_INDICATE_EXEC_STACK)
3840 file_end_indicate_exec_stack ();
3841 }
3842
3843 /* Emit code for the SET_GOT patterns. */
3844
3845 const char *
3846 output_set_got (rtx dest)
3847 {
3848 rtx xops[3];
3849
3850 xops[0] = dest;
3851 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3852
3853 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3854 {
3855 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3856
3857 if (!flag_pic)
3858 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3859 else
3860 output_asm_insn ("call\t%a2", xops);
3861
3862 #if TARGET_MACHO
3863 /* Output the "canonical" label name ("Lxx$pb") here too. This
3864 is what will be referred to by the Mach-O PIC subsystem. */
3865 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3866 #endif
3867 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3868 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3869
3870 if (flag_pic)
3871 output_asm_insn ("pop{l}\t%0", xops);
3872 }
3873 else
3874 {
3875 char name[32];
3876 get_pc_thunk_name (name, REGNO (dest));
3877 pic_labels_used |= 1 << REGNO (dest);
3878
3879 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3880 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3881 output_asm_insn ("call\t%X2", xops);
3882 }
3883
3884 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3885 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3886 else if (!TARGET_MACHO)
3887 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3888
3889 return "";
3890 }
3891
3892 /* Generate an "push" pattern for input ARG. */
3893
3894 static rtx
3895 gen_push (rtx arg)
3896 {
3897 return gen_rtx_SET (VOIDmode,
3898 gen_rtx_MEM (Pmode,
3899 gen_rtx_PRE_DEC (Pmode,
3900 stack_pointer_rtx)),
3901 arg);
3902 }
3903
3904 /* Return >= 0 if there is an unused call-clobbered register available
3905 for the entire function. */
3906
3907 static unsigned int
3908 ix86_select_alt_pic_regnum (void)
3909 {
3910 if (current_function_is_leaf && !current_function_profile)
3911 {
3912 int i;
3913 for (i = 2; i >= 0; --i)
3914 if (!regs_ever_live[i])
3915 return i;
3916 }
3917
3918 return INVALID_REGNUM;
3919 }
3920
3921 /* Return 1 if we need to save REGNO. */
3922 static int
3923 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3924 {
3925 if (pic_offset_table_rtx
3926 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3927 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3928 || current_function_profile
3929 || current_function_calls_eh_return
3930 || current_function_uses_const_pool))
3931 {
3932 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3933 return 0;
3934 return 1;
3935 }
3936
3937 if (current_function_calls_eh_return && maybe_eh_return)
3938 {
3939 unsigned i;
3940 for (i = 0; ; i++)
3941 {
3942 unsigned test = EH_RETURN_DATA_REGNO (i);
3943 if (test == INVALID_REGNUM)
3944 break;
3945 if (test == regno)
3946 return 1;
3947 }
3948 }
3949
3950 return (regs_ever_live[regno]
3951 && !call_used_regs[regno]
3952 && !fixed_regs[regno]
3953 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3954 }
3955
3956 /* Return number of registers to be saved on the stack. */
3957
3958 static int
3959 ix86_nsaved_regs (void)
3960 {
3961 int nregs = 0;
3962 int regno;
3963
3964 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3965 if (ix86_save_reg (regno, true))
3966 nregs++;
3967 return nregs;
3968 }
3969
3970 /* Return the offset between two registers, one to be eliminated, and the other
3971 its replacement, at the start of a routine. */
3972
3973 HOST_WIDE_INT
3974 ix86_initial_elimination_offset (int from, int to)
3975 {
3976 struct ix86_frame frame;
3977 ix86_compute_frame_layout (&frame);
3978
3979 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3980 return frame.hard_frame_pointer_offset;
3981 else if (from == FRAME_POINTER_REGNUM
3982 && to == HARD_FRAME_POINTER_REGNUM)
3983 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3984 else
3985 {
3986 if (to != STACK_POINTER_REGNUM)
3987 abort ();
3988 else if (from == ARG_POINTER_REGNUM)
3989 return frame.stack_pointer_offset;
3990 else if (from != FRAME_POINTER_REGNUM)
3991 abort ();
3992 else
3993 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3994 }
3995 }
3996
3997 /* Fill structure ix86_frame about frame of currently computed function. */
3998
3999 static void
4000 ix86_compute_frame_layout (struct ix86_frame *frame)
4001 {
4002 HOST_WIDE_INT total_size;
4003 unsigned int stack_alignment_needed;
4004 HOST_WIDE_INT offset;
4005 unsigned int preferred_alignment;
4006 HOST_WIDE_INT size = get_frame_size ();
4007
4008 frame->nregs = ix86_nsaved_regs ();
4009 total_size = size;
4010
4011 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4012 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4013
4014 /* During reload iteration the amount of registers saved can change.
4015 Recompute the value as needed. Do not recompute when amount of registers
4016 didn't change as reload does mutiple calls to the function and does not
4017 expect the decision to change within single iteration. */
4018 if (!optimize_size
4019 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4020 {
4021 int count = frame->nregs;
4022
4023 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4024 /* The fast prologue uses move instead of push to save registers. This
4025 is significantly longer, but also executes faster as modern hardware
4026 can execute the moves in parallel, but can't do that for push/pop.
4027
4028 Be careful about choosing what prologue to emit: When function takes
4029 many instructions to execute we may use slow version as well as in
4030 case function is known to be outside hot spot (this is known with
4031 feedback only). Weight the size of function by number of registers
4032 to save as it is cheap to use one or two push instructions but very
4033 slow to use many of them. */
4034 if (count)
4035 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4036 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4037 || (flag_branch_probabilities
4038 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4039 cfun->machine->use_fast_prologue_epilogue = false;
4040 else
4041 cfun->machine->use_fast_prologue_epilogue
4042 = !expensive_function_p (count);
4043 }
4044 if (TARGET_PROLOGUE_USING_MOVE
4045 && cfun->machine->use_fast_prologue_epilogue)
4046 frame->save_regs_using_mov = true;
4047 else
4048 frame->save_regs_using_mov = false;
4049
4050
4051 /* Skip return address and saved base pointer. */
4052 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4053
4054 frame->hard_frame_pointer_offset = offset;
4055
4056 /* Do some sanity checking of stack_alignment_needed and
4057 preferred_alignment, since i386 port is the only using those features
4058 that may break easily. */
4059
4060 if (size && !stack_alignment_needed)
4061 abort ();
4062 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4063 abort ();
4064 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4065 abort ();
4066 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4067 abort ();
4068
4069 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4070 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4071
4072 /* Register save area */
4073 offset += frame->nregs * UNITS_PER_WORD;
4074
4075 /* Va-arg area */
4076 if (ix86_save_varrargs_registers)
4077 {
4078 offset += X86_64_VARARGS_SIZE;
4079 frame->va_arg_size = X86_64_VARARGS_SIZE;
4080 }
4081 else
4082 frame->va_arg_size = 0;
4083
4084 /* Align start of frame for local function. */
4085 frame->padding1 = ((offset + stack_alignment_needed - 1)
4086 & -stack_alignment_needed) - offset;
4087
4088 offset += frame->padding1;
4089
4090 /* Frame pointer points here. */
4091 frame->frame_pointer_offset = offset;
4092
4093 offset += size;
4094
4095 /* Add outgoing arguments area. Can be skipped if we eliminated
4096 all the function calls as dead code.
4097 Skipping is however impossible when function calls alloca. Alloca
4098 expander assumes that last current_function_outgoing_args_size
4099 of stack frame are unused. */
4100 if (ACCUMULATE_OUTGOING_ARGS
4101 && (!current_function_is_leaf || current_function_calls_alloca))
4102 {
4103 offset += current_function_outgoing_args_size;
4104 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4105 }
4106 else
4107 frame->outgoing_arguments_size = 0;
4108
4109 /* Align stack boundary. Only needed if we're calling another function
4110 or using alloca. */
4111 if (!current_function_is_leaf || current_function_calls_alloca)
4112 frame->padding2 = ((offset + preferred_alignment - 1)
4113 & -preferred_alignment) - offset;
4114 else
4115 frame->padding2 = 0;
4116
4117 offset += frame->padding2;
4118
4119 /* We've reached end of stack frame. */
4120 frame->stack_pointer_offset = offset;
4121
4122 /* Size prologue needs to allocate. */
4123 frame->to_allocate =
4124 (size + frame->padding1 + frame->padding2
4125 + frame->outgoing_arguments_size + frame->va_arg_size);
4126
4127 if ((!frame->to_allocate && frame->nregs <= 1)
4128 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4129 frame->save_regs_using_mov = false;
4130
4131 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4132 && current_function_is_leaf)
4133 {
4134 frame->red_zone_size = frame->to_allocate;
4135 if (frame->save_regs_using_mov)
4136 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4137 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4138 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4139 }
4140 else
4141 frame->red_zone_size = 0;
4142 frame->to_allocate -= frame->red_zone_size;
4143 frame->stack_pointer_offset -= frame->red_zone_size;
4144 #if 0
4145 fprintf (stderr, "nregs: %i\n", frame->nregs);
4146 fprintf (stderr, "size: %i\n", size);
4147 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4148 fprintf (stderr, "padding1: %i\n", frame->padding1);
4149 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4150 fprintf (stderr, "padding2: %i\n", frame->padding2);
4151 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4152 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4153 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4154 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4155 frame->hard_frame_pointer_offset);
4156 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4157 #endif
4158 }
4159
4160 /* Emit code to save registers in the prologue. */
4161
4162 static void
4163 ix86_emit_save_regs (void)
4164 {
4165 int regno;
4166 rtx insn;
4167
4168 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4169 if (ix86_save_reg (regno, true))
4170 {
4171 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4172 RTX_FRAME_RELATED_P (insn) = 1;
4173 }
4174 }
4175
4176 /* Emit code to save registers using MOV insns. First register
4177 is restored from POINTER + OFFSET. */
4178 static void
4179 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4180 {
4181 int regno;
4182 rtx insn;
4183
4184 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4185 if (ix86_save_reg (regno, true))
4186 {
4187 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4188 Pmode, offset),
4189 gen_rtx_REG (Pmode, regno));
4190 RTX_FRAME_RELATED_P (insn) = 1;
4191 offset += UNITS_PER_WORD;
4192 }
4193 }
4194
4195 /* Expand prologue or epilogue stack adjustment.
4196 The pattern exist to put a dependency on all ebp-based memory accesses.
4197 STYLE should be negative if instructions should be marked as frame related,
4198 zero if %r11 register is live and cannot be freely used and positive
4199 otherwise. */
4200
4201 static void
4202 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4203 {
4204 rtx insn;
4205
4206 if (! TARGET_64BIT)
4207 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4208 else if (x86_64_immediate_operand (offset, DImode))
4209 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4210 else
4211 {
4212 rtx r11;
4213 /* r11 is used by indirect sibcall return as well, set before the
4214 epilogue and used after the epilogue. ATM indirect sibcall
4215 shouldn't be used together with huge frame sizes in one
4216 function because of the frame_size check in sibcall.c. */
4217 if (style == 0)
4218 abort ();
4219 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4220 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4221 if (style < 0)
4222 RTX_FRAME_RELATED_P (insn) = 1;
4223 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4224 offset));
4225 }
4226 if (style < 0)
4227 RTX_FRAME_RELATED_P (insn) = 1;
4228 }
4229
4230 /* Expand the prologue into a bunch of separate insns. */
4231
4232 void
4233 ix86_expand_prologue (void)
4234 {
4235 rtx insn;
4236 bool pic_reg_used;
4237 struct ix86_frame frame;
4238 HOST_WIDE_INT allocate;
4239
4240 ix86_compute_frame_layout (&frame);
4241
4242 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4243 slower on all targets. Also sdb doesn't like it. */
4244
4245 if (frame_pointer_needed)
4246 {
4247 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4248 RTX_FRAME_RELATED_P (insn) = 1;
4249
4250 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4251 RTX_FRAME_RELATED_P (insn) = 1;
4252 }
4253
4254 allocate = frame.to_allocate;
4255
4256 if (!frame.save_regs_using_mov)
4257 ix86_emit_save_regs ();
4258 else
4259 allocate += frame.nregs * UNITS_PER_WORD;
4260
4261 /* When using red zone we may start register saving before allocating
4262 the stack frame saving one cycle of the prologue. */
4263 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4264 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4265 : stack_pointer_rtx,
4266 -frame.nregs * UNITS_PER_WORD);
4267
4268 if (allocate == 0)
4269 ;
4270 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4271 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4272 GEN_INT (-allocate), -1);
4273 else
4274 {
4275 /* Only valid for Win32. */
4276 rtx eax = gen_rtx_REG (SImode, 0);
4277 bool eax_live = ix86_eax_live_at_start_p ();
4278
4279 if (TARGET_64BIT)
4280 abort ();
4281
4282 if (eax_live)
4283 {
4284 emit_insn (gen_push (eax));
4285 allocate -= 4;
4286 }
4287
4288 insn = emit_move_insn (eax, GEN_INT (allocate));
4289 RTX_FRAME_RELATED_P (insn) = 1;
4290
4291 insn = emit_insn (gen_allocate_stack_worker (eax));
4292 RTX_FRAME_RELATED_P (insn) = 1;
4293
4294 if (eax_live)
4295 {
4296 rtx t;
4297 if (frame_pointer_needed)
4298 t = plus_constant (hard_frame_pointer_rtx,
4299 allocate
4300 - frame.to_allocate
4301 - frame.nregs * UNITS_PER_WORD);
4302 else
4303 t = plus_constant (stack_pointer_rtx, allocate);
4304 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4305 }
4306 }
4307
4308 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4309 {
4310 if (!frame_pointer_needed || !frame.to_allocate)
4311 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4312 else
4313 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4314 -frame.nregs * UNITS_PER_WORD);
4315 }
4316
4317 pic_reg_used = false;
4318 if (pic_offset_table_rtx
4319 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4320 || current_function_profile))
4321 {
4322 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4323
4324 if (alt_pic_reg_used != INVALID_REGNUM)
4325 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4326
4327 pic_reg_used = true;
4328 }
4329
4330 if (pic_reg_used)
4331 {
4332 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4333
4334 /* Even with accurate pre-reload life analysis, we can wind up
4335 deleting all references to the pic register after reload.
4336 Consider if cross-jumping unifies two sides of a branch
4337 controlled by a comparison vs the only read from a global.
4338 In which case, allow the set_got to be deleted, though we're
4339 too late to do anything about the ebx save in the prologue. */
4340 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4341 }
4342
4343 /* Prevent function calls from be scheduled before the call to mcount.
4344 In the pic_reg_used case, make sure that the got load isn't deleted. */
4345 if (current_function_profile)
4346 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4347 }
4348
4349 /* Emit code to restore saved registers using MOV insns. First register
4350 is restored from POINTER + OFFSET. */
4351 static void
4352 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4353 int maybe_eh_return)
4354 {
4355 int regno;
4356 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4357
4358 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4359 if (ix86_save_reg (regno, maybe_eh_return))
4360 {
4361 /* Ensure that adjust_address won't be forced to produce pointer
4362 out of range allowed by x86-64 instruction set. */
4363 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4364 {
4365 rtx r11;
4366
4367 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4368 emit_move_insn (r11, GEN_INT (offset));
4369 emit_insn (gen_adddi3 (r11, r11, pointer));
4370 base_address = gen_rtx_MEM (Pmode, r11);
4371 offset = 0;
4372 }
4373 emit_move_insn (gen_rtx_REG (Pmode, regno),
4374 adjust_address (base_address, Pmode, offset));
4375 offset += UNITS_PER_WORD;
4376 }
4377 }
4378
4379 /* Restore function stack, frame, and registers. */
4380
4381 void
4382 ix86_expand_epilogue (int style)
4383 {
4384 int regno;
4385 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4386 struct ix86_frame frame;
4387 HOST_WIDE_INT offset;
4388
4389 ix86_compute_frame_layout (&frame);
4390
4391 /* Calculate start of saved registers relative to ebp. Special care
4392 must be taken for the normal return case of a function using
4393 eh_return: the eax and edx registers are marked as saved, but not
4394 restored along this path. */
4395 offset = frame.nregs;
4396 if (current_function_calls_eh_return && style != 2)
4397 offset -= 2;
4398 offset *= -UNITS_PER_WORD;
4399
4400 /* If we're only restoring one register and sp is not valid then
4401 using a move instruction to restore the register since it's
4402 less work than reloading sp and popping the register.
4403
4404 The default code result in stack adjustment using add/lea instruction,
4405 while this code results in LEAVE instruction (or discrete equivalent),
4406 so it is profitable in some other cases as well. Especially when there
4407 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4408 and there is exactly one register to pop. This heuristic may need some
4409 tuning in future. */
4410 if ((!sp_valid && frame.nregs <= 1)
4411 || (TARGET_EPILOGUE_USING_MOVE
4412 && cfun->machine->use_fast_prologue_epilogue
4413 && (frame.nregs > 1 || frame.to_allocate))
4414 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4415 || (frame_pointer_needed && TARGET_USE_LEAVE
4416 && cfun->machine->use_fast_prologue_epilogue
4417 && frame.nregs == 1)
4418 || current_function_calls_eh_return)
4419 {
4420 /* Restore registers. We can use ebp or esp to address the memory
4421 locations. If both are available, default to ebp, since offsets
4422 are known to be small. Only exception is esp pointing directly to the
4423 end of block of saved registers, where we may simplify addressing
4424 mode. */
4425
4426 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4427 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4428 frame.to_allocate, style == 2);
4429 else
4430 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4431 offset, style == 2);
4432
4433 /* eh_return epilogues need %ecx added to the stack pointer. */
4434 if (style == 2)
4435 {
4436 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4437
4438 if (frame_pointer_needed)
4439 {
4440 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4441 tmp = plus_constant (tmp, UNITS_PER_WORD);
4442 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4443
4444 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4445 emit_move_insn (hard_frame_pointer_rtx, tmp);
4446
4447 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4448 const0_rtx, style);
4449 }
4450 else
4451 {
4452 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4453 tmp = plus_constant (tmp, (frame.to_allocate
4454 + frame.nregs * UNITS_PER_WORD));
4455 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4456 }
4457 }
4458 else if (!frame_pointer_needed)
4459 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4460 GEN_INT (frame.to_allocate
4461 + frame.nregs * UNITS_PER_WORD),
4462 style);
4463 /* If not an i386, mov & pop is faster than "leave". */
4464 else if (TARGET_USE_LEAVE || optimize_size
4465 || !cfun->machine->use_fast_prologue_epilogue)
4466 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4467 else
4468 {
4469 pro_epilogue_adjust_stack (stack_pointer_rtx,
4470 hard_frame_pointer_rtx,
4471 const0_rtx, style);
4472 if (TARGET_64BIT)
4473 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4474 else
4475 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4476 }
4477 }
4478 else
4479 {
4480 /* First step is to deallocate the stack frame so that we can
4481 pop the registers. */
4482 if (!sp_valid)
4483 {
4484 if (!frame_pointer_needed)
4485 abort ();
4486 pro_epilogue_adjust_stack (stack_pointer_rtx,
4487 hard_frame_pointer_rtx,
4488 GEN_INT (offset), style);
4489 }
4490 else if (frame.to_allocate)
4491 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4492 GEN_INT (frame.to_allocate), style);
4493
4494 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4495 if (ix86_save_reg (regno, false))
4496 {
4497 if (TARGET_64BIT)
4498 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4499 else
4500 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4501 }
4502 if (frame_pointer_needed)
4503 {
4504 /* Leave results in shorter dependency chains on CPUs that are
4505 able to grok it fast. */
4506 if (TARGET_USE_LEAVE)
4507 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4508 else if (TARGET_64BIT)
4509 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4510 else
4511 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4512 }
4513 }
4514
4515 /* Sibcall epilogues don't want a return instruction. */
4516 if (style == 0)
4517 return;
4518
4519 if (current_function_pops_args && current_function_args_size)
4520 {
4521 rtx popc = GEN_INT (current_function_pops_args);
4522
4523 /* i386 can only pop 64K bytes. If asked to pop more, pop
4524 return address, do explicit add, and jump indirectly to the
4525 caller. */
4526
4527 if (current_function_pops_args >= 65536)
4528 {
4529 rtx ecx = gen_rtx_REG (SImode, 2);
4530
4531 /* There is no "pascal" calling convention in 64bit ABI. */
4532 if (TARGET_64BIT)
4533 abort ();
4534
4535 emit_insn (gen_popsi1 (ecx));
4536 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4537 emit_jump_insn (gen_return_indirect_internal (ecx));
4538 }
4539 else
4540 emit_jump_insn (gen_return_pop_internal (popc));
4541 }
4542 else
4543 emit_jump_insn (gen_return_internal ());
4544 }
4545
4546 /* Reset from the function's potential modifications. */
4547
4548 static void
4549 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4550 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4551 {
4552 if (pic_offset_table_rtx)
4553 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4554 }
4555 \f
4556 /* Extract the parts of an RTL expression that is a valid memory address
4557 for an instruction. Return 0 if the structure of the address is
4558 grossly off. Return -1 if the address contains ASHIFT, so it is not
4559 strictly valid, but still used for computing length of lea instruction. */
4560
4561 int
4562 ix86_decompose_address (rtx addr, struct ix86_address *out)
4563 {
4564 rtx base = NULL_RTX;
4565 rtx index = NULL_RTX;
4566 rtx disp = NULL_RTX;
4567 HOST_WIDE_INT scale = 1;
4568 rtx scale_rtx = NULL_RTX;
4569 int retval = 1;
4570 enum ix86_address_seg seg = SEG_DEFAULT;
4571
4572 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4573 base = addr;
4574 else if (GET_CODE (addr) == PLUS)
4575 {
4576 rtx addends[4], op;
4577 int n = 0, i;
4578
4579 op = addr;
4580 do
4581 {
4582 if (n >= 4)
4583 return 0;
4584 addends[n++] = XEXP (op, 1);
4585 op = XEXP (op, 0);
4586 }
4587 while (GET_CODE (op) == PLUS);
4588 if (n >= 4)
4589 return 0;
4590 addends[n] = op;
4591
4592 for (i = n; i >= 0; --i)
4593 {
4594 op = addends[i];
4595 switch (GET_CODE (op))
4596 {
4597 case MULT:
4598 if (index)
4599 return 0;
4600 index = XEXP (op, 0);
4601 scale_rtx = XEXP (op, 1);
4602 break;
4603
4604 case UNSPEC:
4605 if (XINT (op, 1) == UNSPEC_TP
4606 && TARGET_TLS_DIRECT_SEG_REFS
4607 && seg == SEG_DEFAULT)
4608 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4609 else
4610 return 0;
4611 break;
4612
4613 case REG:
4614 case SUBREG:
4615 if (!base)
4616 base = op;
4617 else if (!index)
4618 index = op;
4619 else
4620 return 0;
4621 break;
4622
4623 case CONST:
4624 case CONST_INT:
4625 case SYMBOL_REF:
4626 case LABEL_REF:
4627 if (disp)
4628 return 0;
4629 disp = op;
4630 break;
4631
4632 default:
4633 return 0;
4634 }
4635 }
4636 }
4637 else if (GET_CODE (addr) == MULT)
4638 {
4639 index = XEXP (addr, 0); /* index*scale */
4640 scale_rtx = XEXP (addr, 1);
4641 }
4642 else if (GET_CODE (addr) == ASHIFT)
4643 {
4644 rtx tmp;
4645
4646 /* We're called for lea too, which implements ashift on occasion. */
4647 index = XEXP (addr, 0);
4648 tmp = XEXP (addr, 1);
4649 if (GET_CODE (tmp) != CONST_INT)
4650 return 0;
4651 scale = INTVAL (tmp);
4652 if ((unsigned HOST_WIDE_INT) scale > 3)
4653 return 0;
4654 scale = 1 << scale;
4655 retval = -1;
4656 }
4657 else
4658 disp = addr; /* displacement */
4659
4660 /* Extract the integral value of scale. */
4661 if (scale_rtx)
4662 {
4663 if (GET_CODE (scale_rtx) != CONST_INT)
4664 return 0;
4665 scale = INTVAL (scale_rtx);
4666 }
4667
4668 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4669 if (base && index && scale == 1
4670 && (index == arg_pointer_rtx
4671 || index == frame_pointer_rtx
4672 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4673 {
4674 rtx tmp = base;
4675 base = index;
4676 index = tmp;
4677 }
4678
4679 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4680 if ((base == hard_frame_pointer_rtx
4681 || base == frame_pointer_rtx
4682 || base == arg_pointer_rtx) && !disp)
4683 disp = const0_rtx;
4684
4685 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4686 Avoid this by transforming to [%esi+0]. */
4687 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4688 && base && !index && !disp
4689 && REG_P (base)
4690 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4691 disp = const0_rtx;
4692
4693 /* Special case: encode reg+reg instead of reg*2. */
4694 if (!base && index && scale && scale == 2)
4695 base = index, scale = 1;
4696
4697 /* Special case: scaling cannot be encoded without base or displacement. */
4698 if (!base && !disp && index && scale != 1)
4699 disp = const0_rtx;
4700
4701 out->base = base;
4702 out->index = index;
4703 out->disp = disp;
4704 out->scale = scale;
4705 out->seg = seg;
4706
4707 return retval;
4708 }
4709 \f
4710 /* Return cost of the memory address x.
4711 For i386, it is better to use a complex address than let gcc copy
4712 the address into a reg and make a new pseudo. But not if the address
4713 requires to two regs - that would mean more pseudos with longer
4714 lifetimes. */
4715 static int
4716 ix86_address_cost (rtx x)
4717 {
4718 struct ix86_address parts;
4719 int cost = 1;
4720
4721 if (!ix86_decompose_address (x, &parts))
4722 abort ();
4723
4724 /* More complex memory references are better. */
4725 if (parts.disp && parts.disp != const0_rtx)
4726 cost--;
4727 if (parts.seg != SEG_DEFAULT)
4728 cost--;
4729
4730 /* Attempt to minimize number of registers in the address. */
4731 if ((parts.base
4732 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4733 || (parts.index
4734 && (!REG_P (parts.index)
4735 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4736 cost++;
4737
4738 if (parts.base
4739 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4740 && parts.index
4741 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4742 && parts.base != parts.index)
4743 cost++;
4744
4745 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4746 since it's predecode logic can't detect the length of instructions
4747 and it degenerates to vector decoded. Increase cost of such
4748 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4749 to split such addresses or even refuse such addresses at all.
4750
4751 Following addressing modes are affected:
4752 [base+scale*index]
4753 [scale*index+disp]
4754 [base+index]
4755
4756 The first and last case may be avoidable by explicitly coding the zero in
4757 memory address, but I don't have AMD-K6 machine handy to check this
4758 theory. */
4759
4760 if (TARGET_K6
4761 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4762 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4763 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4764 cost += 10;
4765
4766 return cost;
4767 }
4768 \f
4769 /* If X is a machine specific address (i.e. a symbol or label being
4770 referenced as a displacement from the GOT implemented using an
4771 UNSPEC), then return the base term. Otherwise return X. */
4772
4773 rtx
4774 ix86_find_base_term (rtx x)
4775 {
4776 rtx term;
4777
4778 if (TARGET_64BIT)
4779 {
4780 if (GET_CODE (x) != CONST)
4781 return x;
4782 term = XEXP (x, 0);
4783 if (GET_CODE (term) == PLUS
4784 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4785 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4786 term = XEXP (term, 0);
4787 if (GET_CODE (term) != UNSPEC
4788 || XINT (term, 1) != UNSPEC_GOTPCREL)
4789 return x;
4790
4791 term = XVECEXP (term, 0, 0);
4792
4793 if (GET_CODE (term) != SYMBOL_REF
4794 && GET_CODE (term) != LABEL_REF)
4795 return x;
4796
4797 return term;
4798 }
4799
4800 term = ix86_delegitimize_address (x);
4801
4802 if (GET_CODE (term) != SYMBOL_REF
4803 && GET_CODE (term) != LABEL_REF)
4804 return x;
4805
4806 return term;
4807 }
4808
4809 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4810 this is used for to form addresses to local data when -fPIC is in
4811 use. */
4812
4813 static bool
4814 darwin_local_data_pic (rtx disp)
4815 {
4816 if (GET_CODE (disp) == MINUS)
4817 {
4818 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4819 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4820 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4821 {
4822 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4823 if (! strcmp (sym_name, "<pic base>"))
4824 return true;
4825 }
4826 }
4827
4828 return false;
4829 }
4830 \f
4831 /* Determine if a given RTX is a valid constant. We already know this
4832 satisfies CONSTANT_P. */
4833
4834 bool
4835 legitimate_constant_p (rtx x)
4836 {
4837 switch (GET_CODE (x))
4838 {
4839 case CONST:
4840 x = XEXP (x, 0);
4841
4842 if (GET_CODE (x) == PLUS)
4843 {
4844 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4845 return false;
4846 x = XEXP (x, 0);
4847 }
4848
4849 if (TARGET_MACHO && darwin_local_data_pic (x))
4850 return true;
4851
4852 /* Only some unspecs are valid as "constants". */
4853 if (GET_CODE (x) == UNSPEC)
4854 switch (XINT (x, 1))
4855 {
4856 case UNSPEC_TPOFF:
4857 case UNSPEC_NTPOFF:
4858 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4859 case UNSPEC_DTPOFF:
4860 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4861 default:
4862 return false;
4863 }
4864
4865 /* We must have drilled down to a symbol. */
4866 if (!symbolic_operand (x, Pmode))
4867 return false;
4868 /* FALLTHRU */
4869
4870 case SYMBOL_REF:
4871 /* TLS symbols are never valid. */
4872 if (tls_symbolic_operand (x, Pmode))
4873 return false;
4874 break;
4875
4876 default:
4877 break;
4878 }
4879
4880 /* Otherwise we handle everything else in the move patterns. */
4881 return true;
4882 }
4883
4884 /* Determine if it's legal to put X into the constant pool. This
4885 is not possible for the address of thread-local symbols, which
4886 is checked above. */
4887
4888 static bool
4889 ix86_cannot_force_const_mem (rtx x)
4890 {
4891 return !legitimate_constant_p (x);
4892 }
4893
4894 /* Determine if a given RTX is a valid constant address. */
4895
4896 bool
4897 constant_address_p (rtx x)
4898 {
4899 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4900 }
4901
4902 /* Nonzero if the constant value X is a legitimate general operand
4903 when generating PIC code. It is given that flag_pic is on and
4904 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4905
4906 bool
4907 legitimate_pic_operand_p (rtx x)
4908 {
4909 rtx inner;
4910
4911 switch (GET_CODE (x))
4912 {
4913 case CONST:
4914 inner = XEXP (x, 0);
4915
4916 /* Only some unspecs are valid as "constants". */
4917 if (GET_CODE (inner) == UNSPEC)
4918 switch (XINT (inner, 1))
4919 {
4920 case UNSPEC_TPOFF:
4921 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4922 default:
4923 return false;
4924 }
4925 /* FALLTHRU */
4926
4927 case SYMBOL_REF:
4928 case LABEL_REF:
4929 return legitimate_pic_address_disp_p (x);
4930
4931 default:
4932 return true;
4933 }
4934 }
4935
4936 /* Determine if a given CONST RTX is a valid memory displacement
4937 in PIC mode. */
4938
4939 int
4940 legitimate_pic_address_disp_p (rtx disp)
4941 {
4942 bool saw_plus;
4943
4944 /* In 64bit mode we can allow direct addresses of symbols and labels
4945 when they are not dynamic symbols. */
4946 if (TARGET_64BIT)
4947 {
4948 /* TLS references should always be enclosed in UNSPEC. */
4949 if (tls_symbolic_operand (disp, GET_MODE (disp)))
4950 return 0;
4951 if (GET_CODE (disp) == SYMBOL_REF
4952 && ix86_cmodel == CM_SMALL_PIC
4953 && SYMBOL_REF_LOCAL_P (disp))
4954 return 1;
4955 if (GET_CODE (disp) == LABEL_REF)
4956 return 1;
4957 if (GET_CODE (disp) == CONST
4958 && GET_CODE (XEXP (disp, 0)) == PLUS)
4959 {
4960 rtx op0 = XEXP (XEXP (disp, 0), 0);
4961 rtx op1 = XEXP (XEXP (disp, 0), 1);
4962
4963 /* TLS references should always be enclosed in UNSPEC. */
4964 if (tls_symbolic_operand (op0, GET_MODE (op0)))
4965 return 0;
4966 if (((GET_CODE (op0) == SYMBOL_REF
4967 && ix86_cmodel == CM_SMALL_PIC
4968 && SYMBOL_REF_LOCAL_P (op0))
4969 || GET_CODE (op0) == LABEL_REF)
4970 && GET_CODE (op1) == CONST_INT
4971 && INTVAL (op1) < 16*1024*1024
4972 && INTVAL (op1) >= -16*1024*1024)
4973 return 1;
4974 }
4975 }
4976 if (GET_CODE (disp) != CONST)
4977 return 0;
4978 disp = XEXP (disp, 0);
4979
4980 if (TARGET_64BIT)
4981 {
4982 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4983 of GOT tables. We should not need these anyway. */
4984 if (GET_CODE (disp) != UNSPEC
4985 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4986 return 0;
4987
4988 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4989 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4990 return 0;
4991 return 1;
4992 }
4993
4994 saw_plus = false;
4995 if (GET_CODE (disp) == PLUS)
4996 {
4997 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4998 return 0;
4999 disp = XEXP (disp, 0);
5000 saw_plus = true;
5001 }
5002
5003 if (TARGET_MACHO && darwin_local_data_pic (disp))
5004 return 1;
5005
5006 if (GET_CODE (disp) != UNSPEC)
5007 return 0;
5008
5009 switch (XINT (disp, 1))
5010 {
5011 case UNSPEC_GOT:
5012 if (saw_plus)
5013 return false;
5014 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5015 case UNSPEC_GOTOFF:
5016 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5017 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5018 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5019 return false;
5020 case UNSPEC_GOTTPOFF:
5021 case UNSPEC_GOTNTPOFF:
5022 case UNSPEC_INDNTPOFF:
5023 if (saw_plus)
5024 return false;
5025 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5026 case UNSPEC_NTPOFF:
5027 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5028 case UNSPEC_DTPOFF:
5029 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5030 }
5031
5032 return 0;
5033 }
5034
5035 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5036 memory address for an instruction. The MODE argument is the machine mode
5037 for the MEM expression that wants to use this address.
5038
5039 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5040 convert common non-canonical forms to canonical form so that they will
5041 be recognized. */
5042
5043 int
5044 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5045 {
5046 struct ix86_address parts;
5047 rtx base, index, disp;
5048 HOST_WIDE_INT scale;
5049 const char *reason = NULL;
5050 rtx reason_rtx = NULL_RTX;
5051
5052 if (TARGET_DEBUG_ADDR)
5053 {
5054 fprintf (stderr,
5055 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5056 GET_MODE_NAME (mode), strict);
5057 debug_rtx (addr);
5058 }
5059
5060 if (ix86_decompose_address (addr, &parts) <= 0)
5061 {
5062 reason = "decomposition failed";
5063 goto report_error;
5064 }
5065
5066 base = parts.base;
5067 index = parts.index;
5068 disp = parts.disp;
5069 scale = parts.scale;
5070
5071 /* Validate base register.
5072
5073 Don't allow SUBREG's here, it can lead to spill failures when the base
5074 is one word out of a two word structure, which is represented internally
5075 as a DImode int. */
5076
5077 if (base)
5078 {
5079 reason_rtx = base;
5080
5081 if (GET_CODE (base) != REG)
5082 {
5083 reason = "base is not a register";
5084 goto report_error;
5085 }
5086
5087 if (GET_MODE (base) != Pmode)
5088 {
5089 reason = "base is not in Pmode";
5090 goto report_error;
5091 }
5092
5093 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5094 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5095 {
5096 reason = "base is not valid";
5097 goto report_error;
5098 }
5099 }
5100
5101 /* Validate index register.
5102
5103 Don't allow SUBREG's here, it can lead to spill failures when the index
5104 is one word out of a two word structure, which is represented internally
5105 as a DImode int. */
5106
5107 if (index)
5108 {
5109 reason_rtx = index;
5110
5111 if (GET_CODE (index) != REG)
5112 {
5113 reason = "index is not a register";
5114 goto report_error;
5115 }
5116
5117 if (GET_MODE (index) != Pmode)
5118 {
5119 reason = "index is not in Pmode";
5120 goto report_error;
5121 }
5122
5123 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5124 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5125 {
5126 reason = "index is not valid";
5127 goto report_error;
5128 }
5129 }
5130
5131 /* Validate scale factor. */
5132 if (scale != 1)
5133 {
5134 reason_rtx = GEN_INT (scale);
5135 if (!index)
5136 {
5137 reason = "scale without index";
5138 goto report_error;
5139 }
5140
5141 if (scale != 2 && scale != 4 && scale != 8)
5142 {
5143 reason = "scale is not a valid multiplier";
5144 goto report_error;
5145 }
5146 }
5147
5148 /* Validate displacement. */
5149 if (disp)
5150 {
5151 reason_rtx = disp;
5152
5153 if (GET_CODE (disp) == CONST
5154 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5155 switch (XINT (XEXP (disp, 0), 1))
5156 {
5157 case UNSPEC_GOT:
5158 case UNSPEC_GOTOFF:
5159 case UNSPEC_GOTPCREL:
5160 if (!flag_pic)
5161 abort ();
5162 goto is_legitimate_pic;
5163
5164 case UNSPEC_GOTTPOFF:
5165 case UNSPEC_GOTNTPOFF:
5166 case UNSPEC_INDNTPOFF:
5167 case UNSPEC_NTPOFF:
5168 case UNSPEC_DTPOFF:
5169 break;
5170
5171 default:
5172 reason = "invalid address unspec";
5173 goto report_error;
5174 }
5175
5176 else if (flag_pic && (SYMBOLIC_CONST (disp)
5177 #if TARGET_MACHO
5178 && !machopic_operand_p (disp)
5179 #endif
5180 ))
5181 {
5182 is_legitimate_pic:
5183 if (TARGET_64BIT && (index || base))
5184 {
5185 /* foo@dtpoff(%rX) is ok. */
5186 if (GET_CODE (disp) != CONST
5187 || GET_CODE (XEXP (disp, 0)) != PLUS
5188 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5189 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5190 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5191 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5192 {
5193 reason = "non-constant pic memory reference";
5194 goto report_error;
5195 }
5196 }
5197 else if (! legitimate_pic_address_disp_p (disp))
5198 {
5199 reason = "displacement is an invalid pic construct";
5200 goto report_error;
5201 }
5202
5203 /* This code used to verify that a symbolic pic displacement
5204 includes the pic_offset_table_rtx register.
5205
5206 While this is good idea, unfortunately these constructs may
5207 be created by "adds using lea" optimization for incorrect
5208 code like:
5209
5210 int a;
5211 int foo(int i)
5212 {
5213 return *(&a+i);
5214 }
5215
5216 This code is nonsensical, but results in addressing
5217 GOT table with pic_offset_table_rtx base. We can't
5218 just refuse it easily, since it gets matched by
5219 "addsi3" pattern, that later gets split to lea in the
5220 case output register differs from input. While this
5221 can be handled by separate addsi pattern for this case
5222 that never results in lea, this seems to be easier and
5223 correct fix for crash to disable this test. */
5224 }
5225 else if (GET_CODE (disp) != LABEL_REF
5226 && GET_CODE (disp) != CONST_INT
5227 && (GET_CODE (disp) != CONST
5228 || !legitimate_constant_p (disp))
5229 && (GET_CODE (disp) != SYMBOL_REF
5230 || !legitimate_constant_p (disp)))
5231 {
5232 reason = "displacement is not constant";
5233 goto report_error;
5234 }
5235 else if (TARGET_64BIT
5236 && !x86_64_immediate_operand (disp, VOIDmode))
5237 {
5238 reason = "displacement is out of range";
5239 goto report_error;
5240 }
5241 }
5242
5243 /* Everything looks valid. */
5244 if (TARGET_DEBUG_ADDR)
5245 fprintf (stderr, "Success.\n");
5246 return TRUE;
5247
5248 report_error:
5249 if (TARGET_DEBUG_ADDR)
5250 {
5251 fprintf (stderr, "Error: %s\n", reason);
5252 debug_rtx (reason_rtx);
5253 }
5254 return FALSE;
5255 }
5256 \f
5257 /* Return an unique alias set for the GOT. */
5258
5259 static HOST_WIDE_INT
5260 ix86_GOT_alias_set (void)
5261 {
5262 static HOST_WIDE_INT set = -1;
5263 if (set == -1)
5264 set = new_alias_set ();
5265 return set;
5266 }
5267
5268 /* Return a legitimate reference for ORIG (an address) using the
5269 register REG. If REG is 0, a new pseudo is generated.
5270
5271 There are two types of references that must be handled:
5272
5273 1. Global data references must load the address from the GOT, via
5274 the PIC reg. An insn is emitted to do this load, and the reg is
5275 returned.
5276
5277 2. Static data references, constant pool addresses, and code labels
5278 compute the address as an offset from the GOT, whose base is in
5279 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5280 differentiate them from global data objects. The returned
5281 address is the PIC reg + an unspec constant.
5282
5283 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5284 reg also appears in the address. */
5285
5286 static rtx
5287 legitimize_pic_address (rtx orig, rtx reg)
5288 {
5289 rtx addr = orig;
5290 rtx new = orig;
5291 rtx base;
5292
5293 #if TARGET_MACHO
5294 if (reg == 0)
5295 reg = gen_reg_rtx (Pmode);
5296 /* Use the generic Mach-O PIC machinery. */
5297 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5298 #endif
5299
5300 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5301 new = addr;
5302 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5303 {
5304 /* This symbol may be referenced via a displacement from the PIC
5305 base address (@GOTOFF). */
5306
5307 if (reload_in_progress)
5308 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5309 if (GET_CODE (addr) == CONST)
5310 addr = XEXP (addr, 0);
5311 if (GET_CODE (addr) == PLUS)
5312 {
5313 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5314 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5315 }
5316 else
5317 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5318 new = gen_rtx_CONST (Pmode, new);
5319 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5320
5321 if (reg != 0)
5322 {
5323 emit_move_insn (reg, new);
5324 new = reg;
5325 }
5326 }
5327 else if (GET_CODE (addr) == SYMBOL_REF)
5328 {
5329 if (TARGET_64BIT)
5330 {
5331 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5332 new = gen_rtx_CONST (Pmode, new);
5333 new = gen_const_mem (Pmode, new);
5334 set_mem_alias_set (new, ix86_GOT_alias_set ());
5335
5336 if (reg == 0)
5337 reg = gen_reg_rtx (Pmode);
5338 /* Use directly gen_movsi, otherwise the address is loaded
5339 into register for CSE. We don't want to CSE this addresses,
5340 instead we CSE addresses from the GOT table, so skip this. */
5341 emit_insn (gen_movsi (reg, new));
5342 new = reg;
5343 }
5344 else
5345 {
5346 /* This symbol must be referenced via a load from the
5347 Global Offset Table (@GOT). */
5348
5349 if (reload_in_progress)
5350 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5351 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5352 new = gen_rtx_CONST (Pmode, new);
5353 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5354 new = gen_const_mem (Pmode, new);
5355 set_mem_alias_set (new, ix86_GOT_alias_set ());
5356
5357 if (reg == 0)
5358 reg = gen_reg_rtx (Pmode);
5359 emit_move_insn (reg, new);
5360 new = reg;
5361 }
5362 }
5363 else
5364 {
5365 if (GET_CODE (addr) == CONST)
5366 {
5367 addr = XEXP (addr, 0);
5368
5369 /* We must match stuff we generate before. Assume the only
5370 unspecs that can get here are ours. Not that we could do
5371 anything with them anyway.... */
5372 if (GET_CODE (addr) == UNSPEC
5373 || (GET_CODE (addr) == PLUS
5374 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5375 return orig;
5376 if (GET_CODE (addr) != PLUS)
5377 abort ();
5378 }
5379 if (GET_CODE (addr) == PLUS)
5380 {
5381 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5382
5383 /* Check first to see if this is a constant offset from a @GOTOFF
5384 symbol reference. */
5385 if (local_symbolic_operand (op0, Pmode)
5386 && GET_CODE (op1) == CONST_INT)
5387 {
5388 if (!TARGET_64BIT)
5389 {
5390 if (reload_in_progress)
5391 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5392 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5393 UNSPEC_GOTOFF);
5394 new = gen_rtx_PLUS (Pmode, new, op1);
5395 new = gen_rtx_CONST (Pmode, new);
5396 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5397
5398 if (reg != 0)
5399 {
5400 emit_move_insn (reg, new);
5401 new = reg;
5402 }
5403 }
5404 else
5405 {
5406 if (INTVAL (op1) < -16*1024*1024
5407 || INTVAL (op1) >= 16*1024*1024)
5408 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5409 }
5410 }
5411 else
5412 {
5413 base = legitimize_pic_address (XEXP (addr, 0), reg);
5414 new = legitimize_pic_address (XEXP (addr, 1),
5415 base == reg ? NULL_RTX : reg);
5416
5417 if (GET_CODE (new) == CONST_INT)
5418 new = plus_constant (base, INTVAL (new));
5419 else
5420 {
5421 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5422 {
5423 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5424 new = XEXP (new, 1);
5425 }
5426 new = gen_rtx_PLUS (Pmode, base, new);
5427 }
5428 }
5429 }
5430 }
5431 return new;
5432 }
5433 \f
5434 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5435
5436 static rtx
5437 get_thread_pointer (int to_reg)
5438 {
5439 rtx tp, reg, insn;
5440
5441 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5442 if (!to_reg)
5443 return tp;
5444
5445 reg = gen_reg_rtx (Pmode);
5446 insn = gen_rtx_SET (VOIDmode, reg, tp);
5447 insn = emit_insn (insn);
5448
5449 return reg;
5450 }
5451
5452 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5453 false if we expect this to be used for a memory address and true if
5454 we expect to load the address into a register. */
5455
5456 static rtx
5457 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5458 {
5459 rtx dest, base, off, pic;
5460 int type;
5461
5462 switch (model)
5463 {
5464 case TLS_MODEL_GLOBAL_DYNAMIC:
5465 dest = gen_reg_rtx (Pmode);
5466 if (TARGET_64BIT)
5467 {
5468 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5469
5470 start_sequence ();
5471 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5472 insns = get_insns ();
5473 end_sequence ();
5474
5475 emit_libcall_block (insns, dest, rax, x);
5476 }
5477 else
5478 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5479 break;
5480
5481 case TLS_MODEL_LOCAL_DYNAMIC:
5482 base = gen_reg_rtx (Pmode);
5483 if (TARGET_64BIT)
5484 {
5485 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5486
5487 start_sequence ();
5488 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5489 insns = get_insns ();
5490 end_sequence ();
5491
5492 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5493 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5494 emit_libcall_block (insns, base, rax, note);
5495 }
5496 else
5497 emit_insn (gen_tls_local_dynamic_base_32 (base));
5498
5499 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5500 off = gen_rtx_CONST (Pmode, off);
5501
5502 return gen_rtx_PLUS (Pmode, base, off);
5503
5504 case TLS_MODEL_INITIAL_EXEC:
5505 if (TARGET_64BIT)
5506 {
5507 pic = NULL;
5508 type = UNSPEC_GOTNTPOFF;
5509 }
5510 else if (flag_pic)
5511 {
5512 if (reload_in_progress)
5513 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5514 pic = pic_offset_table_rtx;
5515 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5516 }
5517 else if (!TARGET_GNU_TLS)
5518 {
5519 pic = gen_reg_rtx (Pmode);
5520 emit_insn (gen_set_got (pic));
5521 type = UNSPEC_GOTTPOFF;
5522 }
5523 else
5524 {
5525 pic = NULL;
5526 type = UNSPEC_INDNTPOFF;
5527 }
5528
5529 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5530 off = gen_rtx_CONST (Pmode, off);
5531 if (pic)
5532 off = gen_rtx_PLUS (Pmode, pic, off);
5533 off = gen_const_mem (Pmode, off);
5534 set_mem_alias_set (off, ix86_GOT_alias_set ());
5535
5536 if (TARGET_64BIT || TARGET_GNU_TLS)
5537 {
5538 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5539 off = force_reg (Pmode, off);
5540 return gen_rtx_PLUS (Pmode, base, off);
5541 }
5542 else
5543 {
5544 base = get_thread_pointer (true);
5545 dest = gen_reg_rtx (Pmode);
5546 emit_insn (gen_subsi3 (dest, base, off));
5547 }
5548 break;
5549
5550 case TLS_MODEL_LOCAL_EXEC:
5551 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5552 (TARGET_64BIT || TARGET_GNU_TLS)
5553 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5554 off = gen_rtx_CONST (Pmode, off);
5555
5556 if (TARGET_64BIT || TARGET_GNU_TLS)
5557 {
5558 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5559 return gen_rtx_PLUS (Pmode, base, off);
5560 }
5561 else
5562 {
5563 base = get_thread_pointer (true);
5564 dest = gen_reg_rtx (Pmode);
5565 emit_insn (gen_subsi3 (dest, base, off));
5566 }
5567 break;
5568
5569 default:
5570 abort ();
5571 }
5572
5573 return dest;
5574 }
5575
5576 /* Try machine-dependent ways of modifying an illegitimate address
5577 to be legitimate. If we find one, return the new, valid address.
5578 This macro is used in only one place: `memory_address' in explow.c.
5579
5580 OLDX is the address as it was before break_out_memory_refs was called.
5581 In some cases it is useful to look at this to decide what needs to be done.
5582
5583 MODE and WIN are passed so that this macro can use
5584 GO_IF_LEGITIMATE_ADDRESS.
5585
5586 It is always safe for this macro to do nothing. It exists to recognize
5587 opportunities to optimize the output.
5588
5589 For the 80386, we handle X+REG by loading X into a register R and
5590 using R+REG. R will go in a general reg and indexing will be used.
5591 However, if REG is a broken-out memory address or multiplication,
5592 nothing needs to be done because REG can certainly go in a general reg.
5593
5594 When -fpic is used, special handling is needed for symbolic references.
5595 See comments by legitimize_pic_address in i386.c for details. */
5596
5597 rtx
5598 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5599 {
5600 int changed = 0;
5601 unsigned log;
5602
5603 if (TARGET_DEBUG_ADDR)
5604 {
5605 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5606 GET_MODE_NAME (mode));
5607 debug_rtx (x);
5608 }
5609
5610 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5611 if (log)
5612 return legitimize_tls_address (x, log, false);
5613 if (GET_CODE (x) == CONST
5614 && GET_CODE (XEXP (x, 0)) == PLUS
5615 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5616 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5617 {
5618 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5619 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5620 }
5621
5622 if (flag_pic && SYMBOLIC_CONST (x))
5623 return legitimize_pic_address (x, 0);
5624
5625 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5626 if (GET_CODE (x) == ASHIFT
5627 && GET_CODE (XEXP (x, 1)) == CONST_INT
5628 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5629 {
5630 changed = 1;
5631 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5632 GEN_INT (1 << log));
5633 }
5634
5635 if (GET_CODE (x) == PLUS)
5636 {
5637 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5638
5639 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5640 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5641 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5642 {
5643 changed = 1;
5644 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5645 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5646 GEN_INT (1 << log));
5647 }
5648
5649 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5650 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5651 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5652 {
5653 changed = 1;
5654 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5655 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5656 GEN_INT (1 << log));
5657 }
5658
5659 /* Put multiply first if it isn't already. */
5660 if (GET_CODE (XEXP (x, 1)) == MULT)
5661 {
5662 rtx tmp = XEXP (x, 0);
5663 XEXP (x, 0) = XEXP (x, 1);
5664 XEXP (x, 1) = tmp;
5665 changed = 1;
5666 }
5667
5668 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5669 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5670 created by virtual register instantiation, register elimination, and
5671 similar optimizations. */
5672 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5673 {
5674 changed = 1;
5675 x = gen_rtx_PLUS (Pmode,
5676 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5677 XEXP (XEXP (x, 1), 0)),
5678 XEXP (XEXP (x, 1), 1));
5679 }
5680
5681 /* Canonicalize
5682 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5683 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5684 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5685 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5686 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5687 && CONSTANT_P (XEXP (x, 1)))
5688 {
5689 rtx constant;
5690 rtx other = NULL_RTX;
5691
5692 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5693 {
5694 constant = XEXP (x, 1);
5695 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5696 }
5697 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5698 {
5699 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5700 other = XEXP (x, 1);
5701 }
5702 else
5703 constant = 0;
5704
5705 if (constant)
5706 {
5707 changed = 1;
5708 x = gen_rtx_PLUS (Pmode,
5709 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5710 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5711 plus_constant (other, INTVAL (constant)));
5712 }
5713 }
5714
5715 if (changed && legitimate_address_p (mode, x, FALSE))
5716 return x;
5717
5718 if (GET_CODE (XEXP (x, 0)) == MULT)
5719 {
5720 changed = 1;
5721 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5722 }
5723
5724 if (GET_CODE (XEXP (x, 1)) == MULT)
5725 {
5726 changed = 1;
5727 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5728 }
5729
5730 if (changed
5731 && GET_CODE (XEXP (x, 1)) == REG
5732 && GET_CODE (XEXP (x, 0)) == REG)
5733 return x;
5734
5735 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5736 {
5737 changed = 1;
5738 x = legitimize_pic_address (x, 0);
5739 }
5740
5741 if (changed && legitimate_address_p (mode, x, FALSE))
5742 return x;
5743
5744 if (GET_CODE (XEXP (x, 0)) == REG)
5745 {
5746 rtx temp = gen_reg_rtx (Pmode);
5747 rtx val = force_operand (XEXP (x, 1), temp);
5748 if (val != temp)
5749 emit_move_insn (temp, val);
5750
5751 XEXP (x, 1) = temp;
5752 return x;
5753 }
5754
5755 else if (GET_CODE (XEXP (x, 1)) == REG)
5756 {
5757 rtx temp = gen_reg_rtx (Pmode);
5758 rtx val = force_operand (XEXP (x, 0), temp);
5759 if (val != temp)
5760 emit_move_insn (temp, val);
5761
5762 XEXP (x, 0) = temp;
5763 return x;
5764 }
5765 }
5766
5767 return x;
5768 }
5769 \f
5770 /* Print an integer constant expression in assembler syntax. Addition
5771 and subtraction are the only arithmetic that may appear in these
5772 expressions. FILE is the stdio stream to write to, X is the rtx, and
5773 CODE is the operand print code from the output string. */
5774
5775 static void
5776 output_pic_addr_const (FILE *file, rtx x, int code)
5777 {
5778 char buf[256];
5779
5780 switch (GET_CODE (x))
5781 {
5782 case PC:
5783 if (flag_pic)
5784 putc ('.', file);
5785 else
5786 abort ();
5787 break;
5788
5789 case SYMBOL_REF:
5790 /* Mark the decl as referenced so that cgraph will output the function. */
5791 if (SYMBOL_REF_DECL (x))
5792 mark_decl_referenced (SYMBOL_REF_DECL (x));
5793
5794 assemble_name (file, XSTR (x, 0));
5795 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5796 fputs ("@PLT", file);
5797 break;
5798
5799 case LABEL_REF:
5800 x = XEXP (x, 0);
5801 /* FALLTHRU */
5802 case CODE_LABEL:
5803 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5804 assemble_name (asm_out_file, buf);
5805 break;
5806
5807 case CONST_INT:
5808 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5809 break;
5810
5811 case CONST:
5812 /* This used to output parentheses around the expression,
5813 but that does not work on the 386 (either ATT or BSD assembler). */
5814 output_pic_addr_const (file, XEXP (x, 0), code);
5815 break;
5816
5817 case CONST_DOUBLE:
5818 if (GET_MODE (x) == VOIDmode)
5819 {
5820 /* We can use %d if the number is <32 bits and positive. */
5821 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5822 fprintf (file, "0x%lx%08lx",
5823 (unsigned long) CONST_DOUBLE_HIGH (x),
5824 (unsigned long) CONST_DOUBLE_LOW (x));
5825 else
5826 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5827 }
5828 else
5829 /* We can't handle floating point constants;
5830 PRINT_OPERAND must handle them. */
5831 output_operand_lossage ("floating constant misused");
5832 break;
5833
5834 case PLUS:
5835 /* Some assemblers need integer constants to appear first. */
5836 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5837 {
5838 output_pic_addr_const (file, XEXP (x, 0), code);
5839 putc ('+', file);
5840 output_pic_addr_const (file, XEXP (x, 1), code);
5841 }
5842 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5843 {
5844 output_pic_addr_const (file, XEXP (x, 1), code);
5845 putc ('+', file);
5846 output_pic_addr_const (file, XEXP (x, 0), code);
5847 }
5848 else
5849 abort ();
5850 break;
5851
5852 case MINUS:
5853 if (!TARGET_MACHO)
5854 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5855 output_pic_addr_const (file, XEXP (x, 0), code);
5856 putc ('-', file);
5857 output_pic_addr_const (file, XEXP (x, 1), code);
5858 if (!TARGET_MACHO)
5859 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5860 break;
5861
5862 case UNSPEC:
5863 if (XVECLEN (x, 0) != 1)
5864 abort ();
5865 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5866 switch (XINT (x, 1))
5867 {
5868 case UNSPEC_GOT:
5869 fputs ("@GOT", file);
5870 break;
5871 case UNSPEC_GOTOFF:
5872 fputs ("@GOTOFF", file);
5873 break;
5874 case UNSPEC_GOTPCREL:
5875 fputs ("@GOTPCREL(%rip)", file);
5876 break;
5877 case UNSPEC_GOTTPOFF:
5878 /* FIXME: This might be @TPOFF in Sun ld too. */
5879 fputs ("@GOTTPOFF", file);
5880 break;
5881 case UNSPEC_TPOFF:
5882 fputs ("@TPOFF", file);
5883 break;
5884 case UNSPEC_NTPOFF:
5885 if (TARGET_64BIT)
5886 fputs ("@TPOFF", file);
5887 else
5888 fputs ("@NTPOFF", file);
5889 break;
5890 case UNSPEC_DTPOFF:
5891 fputs ("@DTPOFF", file);
5892 break;
5893 case UNSPEC_GOTNTPOFF:
5894 if (TARGET_64BIT)
5895 fputs ("@GOTTPOFF(%rip)", file);
5896 else
5897 fputs ("@GOTNTPOFF", file);
5898 break;
5899 case UNSPEC_INDNTPOFF:
5900 fputs ("@INDNTPOFF", file);
5901 break;
5902 default:
5903 output_operand_lossage ("invalid UNSPEC as operand");
5904 break;
5905 }
5906 break;
5907
5908 default:
5909 output_operand_lossage ("invalid expression as operand");
5910 }
5911 }
5912
5913 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5914 We need to handle our special PIC relocations. */
5915
5916 void
5917 i386_dwarf_output_addr_const (FILE *file, rtx x)
5918 {
5919 #ifdef ASM_QUAD
5920 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5921 #else
5922 if (TARGET_64BIT)
5923 abort ();
5924 fprintf (file, "%s", ASM_LONG);
5925 #endif
5926 if (flag_pic)
5927 output_pic_addr_const (file, x, '\0');
5928 else
5929 output_addr_const (file, x);
5930 fputc ('\n', file);
5931 }
5932
5933 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5934 We need to emit DTP-relative relocations. */
5935
5936 void
5937 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5938 {
5939 fputs (ASM_LONG, file);
5940 output_addr_const (file, x);
5941 fputs ("@DTPOFF", file);
5942 switch (size)
5943 {
5944 case 4:
5945 break;
5946 case 8:
5947 fputs (", 0", file);
5948 break;
5949 default:
5950 abort ();
5951 }
5952 }
5953
5954 /* In the name of slightly smaller debug output, and to cater to
5955 general assembler losage, recognize PIC+GOTOFF and turn it back
5956 into a direct symbol reference. */
5957
5958 static rtx
5959 ix86_delegitimize_address (rtx orig_x)
5960 {
5961 rtx x = orig_x, y;
5962
5963 if (GET_CODE (x) == MEM)
5964 x = XEXP (x, 0);
5965
5966 if (TARGET_64BIT)
5967 {
5968 if (GET_CODE (x) != CONST
5969 || GET_CODE (XEXP (x, 0)) != UNSPEC
5970 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5971 || GET_CODE (orig_x) != MEM)
5972 return orig_x;
5973 return XVECEXP (XEXP (x, 0), 0, 0);
5974 }
5975
5976 if (GET_CODE (x) != PLUS
5977 || GET_CODE (XEXP (x, 1)) != CONST)
5978 return orig_x;
5979
5980 if (GET_CODE (XEXP (x, 0)) == REG
5981 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5982 /* %ebx + GOT/GOTOFF */
5983 y = NULL;
5984 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5985 {
5986 /* %ebx + %reg * scale + GOT/GOTOFF */
5987 y = XEXP (x, 0);
5988 if (GET_CODE (XEXP (y, 0)) == REG
5989 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5990 y = XEXP (y, 1);
5991 else if (GET_CODE (XEXP (y, 1)) == REG
5992 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5993 y = XEXP (y, 0);
5994 else
5995 return orig_x;
5996 if (GET_CODE (y) != REG
5997 && GET_CODE (y) != MULT
5998 && GET_CODE (y) != ASHIFT)
5999 return orig_x;
6000 }
6001 else
6002 return orig_x;
6003
6004 x = XEXP (XEXP (x, 1), 0);
6005 if (GET_CODE (x) == UNSPEC
6006 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6007 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6008 {
6009 if (y)
6010 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6011 return XVECEXP (x, 0, 0);
6012 }
6013
6014 if (GET_CODE (x) == PLUS
6015 && GET_CODE (XEXP (x, 0)) == UNSPEC
6016 && GET_CODE (XEXP (x, 1)) == CONST_INT
6017 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6018 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6019 && GET_CODE (orig_x) != MEM)))
6020 {
6021 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6022 if (y)
6023 return gen_rtx_PLUS (Pmode, y, x);
6024 return x;
6025 }
6026
6027 return orig_x;
6028 }
6029 \f
6030 static void
6031 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6032 int fp, FILE *file)
6033 {
6034 const char *suffix;
6035
6036 if (mode == CCFPmode || mode == CCFPUmode)
6037 {
6038 enum rtx_code second_code, bypass_code;
6039 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6040 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6041 abort ();
6042 code = ix86_fp_compare_code_to_integer (code);
6043 mode = CCmode;
6044 }
6045 if (reverse)
6046 code = reverse_condition (code);
6047
6048 switch (code)
6049 {
6050 case EQ:
6051 suffix = "e";
6052 break;
6053 case NE:
6054 suffix = "ne";
6055 break;
6056 case GT:
6057 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6058 abort ();
6059 suffix = "g";
6060 break;
6061 case GTU:
6062 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6063 Those same assemblers have the same but opposite losage on cmov. */
6064 if (mode != CCmode)
6065 abort ();
6066 suffix = fp ? "nbe" : "a";
6067 break;
6068 case LT:
6069 if (mode == CCNOmode || mode == CCGOCmode)
6070 suffix = "s";
6071 else if (mode == CCmode || mode == CCGCmode)
6072 suffix = "l";
6073 else
6074 abort ();
6075 break;
6076 case LTU:
6077 if (mode != CCmode)
6078 abort ();
6079 suffix = "b";
6080 break;
6081 case GE:
6082 if (mode == CCNOmode || mode == CCGOCmode)
6083 suffix = "ns";
6084 else if (mode == CCmode || mode == CCGCmode)
6085 suffix = "ge";
6086 else
6087 abort ();
6088 break;
6089 case GEU:
6090 /* ??? As above. */
6091 if (mode != CCmode)
6092 abort ();
6093 suffix = fp ? "nb" : "ae";
6094 break;
6095 case LE:
6096 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6097 abort ();
6098 suffix = "le";
6099 break;
6100 case LEU:
6101 if (mode != CCmode)
6102 abort ();
6103 suffix = "be";
6104 break;
6105 case UNORDERED:
6106 suffix = fp ? "u" : "p";
6107 break;
6108 case ORDERED:
6109 suffix = fp ? "nu" : "np";
6110 break;
6111 default:
6112 abort ();
6113 }
6114 fputs (suffix, file);
6115 }
6116
6117 /* Print the name of register X to FILE based on its machine mode and number.
6118 If CODE is 'w', pretend the mode is HImode.
6119 If CODE is 'b', pretend the mode is QImode.
6120 If CODE is 'k', pretend the mode is SImode.
6121 If CODE is 'q', pretend the mode is DImode.
6122 If CODE is 'h', pretend the reg is the `high' byte register.
6123 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6124
6125 void
6126 print_reg (rtx x, int code, FILE *file)
6127 {
6128 if (REGNO (x) == ARG_POINTER_REGNUM
6129 || REGNO (x) == FRAME_POINTER_REGNUM
6130 || REGNO (x) == FLAGS_REG
6131 || REGNO (x) == FPSR_REG)
6132 abort ();
6133
6134 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6135 putc ('%', file);
6136
6137 if (code == 'w' || MMX_REG_P (x))
6138 code = 2;
6139 else if (code == 'b')
6140 code = 1;
6141 else if (code == 'k')
6142 code = 4;
6143 else if (code == 'q')
6144 code = 8;
6145 else if (code == 'y')
6146 code = 3;
6147 else if (code == 'h')
6148 code = 0;
6149 else
6150 code = GET_MODE_SIZE (GET_MODE (x));
6151
6152 /* Irritatingly, AMD extended registers use different naming convention
6153 from the normal registers. */
6154 if (REX_INT_REG_P (x))
6155 {
6156 if (!TARGET_64BIT)
6157 abort ();
6158 switch (code)
6159 {
6160 case 0:
6161 error ("extended registers have no high halves");
6162 break;
6163 case 1:
6164 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6165 break;
6166 case 2:
6167 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6168 break;
6169 case 4:
6170 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6171 break;
6172 case 8:
6173 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6174 break;
6175 default:
6176 error ("unsupported operand size for extended register");
6177 break;
6178 }
6179 return;
6180 }
6181 switch (code)
6182 {
6183 case 3:
6184 if (STACK_TOP_P (x))
6185 {
6186 fputs ("st(0)", file);
6187 break;
6188 }
6189 /* FALLTHRU */
6190 case 8:
6191 case 4:
6192 case 12:
6193 if (! ANY_FP_REG_P (x))
6194 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6195 /* FALLTHRU */
6196 case 16:
6197 case 2:
6198 normal:
6199 fputs (hi_reg_name[REGNO (x)], file);
6200 break;
6201 case 1:
6202 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6203 goto normal;
6204 fputs (qi_reg_name[REGNO (x)], file);
6205 break;
6206 case 0:
6207 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6208 goto normal;
6209 fputs (qi_high_reg_name[REGNO (x)], file);
6210 break;
6211 default:
6212 abort ();
6213 }
6214 }
6215
6216 /* Locate some local-dynamic symbol still in use by this function
6217 so that we can print its name in some tls_local_dynamic_base
6218 pattern. */
6219
6220 static const char *
6221 get_some_local_dynamic_name (void)
6222 {
6223 rtx insn;
6224
6225 if (cfun->machine->some_ld_name)
6226 return cfun->machine->some_ld_name;
6227
6228 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6229 if (INSN_P (insn)
6230 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6231 return cfun->machine->some_ld_name;
6232
6233 abort ();
6234 }
6235
6236 static int
6237 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6238 {
6239 rtx x = *px;
6240
6241 if (GET_CODE (x) == SYMBOL_REF
6242 && local_dynamic_symbolic_operand (x, Pmode))
6243 {
6244 cfun->machine->some_ld_name = XSTR (x, 0);
6245 return 1;
6246 }
6247
6248 return 0;
6249 }
6250
6251 /* Meaning of CODE:
6252 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6253 C -- print opcode suffix for set/cmov insn.
6254 c -- like C, but print reversed condition
6255 F,f -- likewise, but for floating-point.
6256 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6257 otherwise nothing
6258 R -- print the prefix for register names.
6259 z -- print the opcode suffix for the size of the current operand.
6260 * -- print a star (in certain assembler syntax)
6261 A -- print an absolute memory reference.
6262 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6263 s -- print a shift double count, followed by the assemblers argument
6264 delimiter.
6265 b -- print the QImode name of the register for the indicated operand.
6266 %b0 would print %al if operands[0] is reg 0.
6267 w -- likewise, print the HImode name of the register.
6268 k -- likewise, print the SImode name of the register.
6269 q -- likewise, print the DImode name of the register.
6270 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6271 y -- print "st(0)" instead of "st" as a register.
6272 D -- print condition for SSE cmp instruction.
6273 P -- if PIC, print an @PLT suffix.
6274 X -- don't print any sort of PIC '@' suffix for a symbol.
6275 & -- print some in-use local-dynamic symbol name.
6276 */
6277
6278 void
6279 print_operand (FILE *file, rtx x, int code)
6280 {
6281 if (code)
6282 {
6283 switch (code)
6284 {
6285 case '*':
6286 if (ASSEMBLER_DIALECT == ASM_ATT)
6287 putc ('*', file);
6288 return;
6289
6290 case '&':
6291 assemble_name (file, get_some_local_dynamic_name ());
6292 return;
6293
6294 case 'A':
6295 if (ASSEMBLER_DIALECT == ASM_ATT)
6296 putc ('*', file);
6297 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6298 {
6299 /* Intel syntax. For absolute addresses, registers should not
6300 be surrounded by braces. */
6301 if (GET_CODE (x) != REG)
6302 {
6303 putc ('[', file);
6304 PRINT_OPERAND (file, x, 0);
6305 putc (']', file);
6306 return;
6307 }
6308 }
6309 else
6310 abort ();
6311
6312 PRINT_OPERAND (file, x, 0);
6313 return;
6314
6315
6316 case 'L':
6317 if (ASSEMBLER_DIALECT == ASM_ATT)
6318 putc ('l', file);
6319 return;
6320
6321 case 'W':
6322 if (ASSEMBLER_DIALECT == ASM_ATT)
6323 putc ('w', file);
6324 return;
6325
6326 case 'B':
6327 if (ASSEMBLER_DIALECT == ASM_ATT)
6328 putc ('b', file);
6329 return;
6330
6331 case 'Q':
6332 if (ASSEMBLER_DIALECT == ASM_ATT)
6333 putc ('l', file);
6334 return;
6335
6336 case 'S':
6337 if (ASSEMBLER_DIALECT == ASM_ATT)
6338 putc ('s', file);
6339 return;
6340
6341 case 'T':
6342 if (ASSEMBLER_DIALECT == ASM_ATT)
6343 putc ('t', file);
6344 return;
6345
6346 case 'z':
6347 /* 387 opcodes don't get size suffixes if the operands are
6348 registers. */
6349 if (STACK_REG_P (x))
6350 return;
6351
6352 /* Likewise if using Intel opcodes. */
6353 if (ASSEMBLER_DIALECT == ASM_INTEL)
6354 return;
6355
6356 /* This is the size of op from size of operand. */
6357 switch (GET_MODE_SIZE (GET_MODE (x)))
6358 {
6359 case 2:
6360 #ifdef HAVE_GAS_FILDS_FISTS
6361 putc ('s', file);
6362 #endif
6363 return;
6364
6365 case 4:
6366 if (GET_MODE (x) == SFmode)
6367 {
6368 putc ('s', file);
6369 return;
6370 }
6371 else
6372 putc ('l', file);
6373 return;
6374
6375 case 12:
6376 case 16:
6377 putc ('t', file);
6378 return;
6379
6380 case 8:
6381 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6382 {
6383 #ifdef GAS_MNEMONICS
6384 putc ('q', file);
6385 #else
6386 putc ('l', file);
6387 putc ('l', file);
6388 #endif
6389 }
6390 else
6391 putc ('l', file);
6392 return;
6393
6394 default:
6395 abort ();
6396 }
6397
6398 case 'b':
6399 case 'w':
6400 case 'k':
6401 case 'q':
6402 case 'h':
6403 case 'y':
6404 case 'X':
6405 case 'P':
6406 break;
6407
6408 case 's':
6409 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6410 {
6411 PRINT_OPERAND (file, x, 0);
6412 putc (',', file);
6413 }
6414 return;
6415
6416 case 'D':
6417 /* Little bit of braindamage here. The SSE compare instructions
6418 does use completely different names for the comparisons that the
6419 fp conditional moves. */
6420 switch (GET_CODE (x))
6421 {
6422 case EQ:
6423 case UNEQ:
6424 fputs ("eq", file);
6425 break;
6426 case LT:
6427 case UNLT:
6428 fputs ("lt", file);
6429 break;
6430 case LE:
6431 case UNLE:
6432 fputs ("le", file);
6433 break;
6434 case UNORDERED:
6435 fputs ("unord", file);
6436 break;
6437 case NE:
6438 case LTGT:
6439 fputs ("neq", file);
6440 break;
6441 case UNGE:
6442 case GE:
6443 fputs ("nlt", file);
6444 break;
6445 case UNGT:
6446 case GT:
6447 fputs ("nle", file);
6448 break;
6449 case ORDERED:
6450 fputs ("ord", file);
6451 break;
6452 default:
6453 abort ();
6454 break;
6455 }
6456 return;
6457 case 'O':
6458 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6459 if (ASSEMBLER_DIALECT == ASM_ATT)
6460 {
6461 switch (GET_MODE (x))
6462 {
6463 case HImode: putc ('w', file); break;
6464 case SImode:
6465 case SFmode: putc ('l', file); break;
6466 case DImode:
6467 case DFmode: putc ('q', file); break;
6468 default: abort ();
6469 }
6470 putc ('.', file);
6471 }
6472 #endif
6473 return;
6474 case 'C':
6475 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6476 return;
6477 case 'F':
6478 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6479 if (ASSEMBLER_DIALECT == ASM_ATT)
6480 putc ('.', file);
6481 #endif
6482 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6483 return;
6484
6485 /* Like above, but reverse condition */
6486 case 'c':
6487 /* Check to see if argument to %c is really a constant
6488 and not a condition code which needs to be reversed. */
6489 if (!COMPARISON_P (x))
6490 {
6491 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6492 return;
6493 }
6494 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6495 return;
6496 case 'f':
6497 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6498 if (ASSEMBLER_DIALECT == ASM_ATT)
6499 putc ('.', file);
6500 #endif
6501 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6502 return;
6503 case '+':
6504 {
6505 rtx x;
6506
6507 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6508 return;
6509
6510 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6511 if (x)
6512 {
6513 int pred_val = INTVAL (XEXP (x, 0));
6514
6515 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6516 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6517 {
6518 int taken = pred_val > REG_BR_PROB_BASE / 2;
6519 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6520
6521 /* Emit hints only in the case default branch prediction
6522 heuristics would fail. */
6523 if (taken != cputaken)
6524 {
6525 /* We use 3e (DS) prefix for taken branches and
6526 2e (CS) prefix for not taken branches. */
6527 if (taken)
6528 fputs ("ds ; ", file);
6529 else
6530 fputs ("cs ; ", file);
6531 }
6532 }
6533 }
6534 return;
6535 }
6536 default:
6537 output_operand_lossage ("invalid operand code `%c'", code);
6538 }
6539 }
6540
6541 if (GET_CODE (x) == REG)
6542 print_reg (x, code, file);
6543
6544 else if (GET_CODE (x) == MEM)
6545 {
6546 /* No `byte ptr' prefix for call instructions. */
6547 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6548 {
6549 const char * size;
6550 switch (GET_MODE_SIZE (GET_MODE (x)))
6551 {
6552 case 1: size = "BYTE"; break;
6553 case 2: size = "WORD"; break;
6554 case 4: size = "DWORD"; break;
6555 case 8: size = "QWORD"; break;
6556 case 12: size = "XWORD"; break;
6557 case 16: size = "XMMWORD"; break;
6558 default:
6559 abort ();
6560 }
6561
6562 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6563 if (code == 'b')
6564 size = "BYTE";
6565 else if (code == 'w')
6566 size = "WORD";
6567 else if (code == 'k')
6568 size = "DWORD";
6569
6570 fputs (size, file);
6571 fputs (" PTR ", file);
6572 }
6573
6574 x = XEXP (x, 0);
6575 /* Avoid (%rip) for call operands. */
6576 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6577 && GET_CODE (x) != CONST_INT)
6578 output_addr_const (file, x);
6579 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6580 output_operand_lossage ("invalid constraints for operand");
6581 else
6582 output_address (x);
6583 }
6584
6585 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6586 {
6587 REAL_VALUE_TYPE r;
6588 long l;
6589
6590 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6591 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6592
6593 if (ASSEMBLER_DIALECT == ASM_ATT)
6594 putc ('$', file);
6595 fprintf (file, "0x%08lx", l);
6596 }
6597
6598 /* These float cases don't actually occur as immediate operands. */
6599 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6600 {
6601 char dstr[30];
6602
6603 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6604 fprintf (file, "%s", dstr);
6605 }
6606
6607 else if (GET_CODE (x) == CONST_DOUBLE
6608 && GET_MODE (x) == XFmode)
6609 {
6610 char dstr[30];
6611
6612 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6613 fprintf (file, "%s", dstr);
6614 }
6615
6616 else
6617 {
6618 if (code != 'P')
6619 {
6620 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6621 {
6622 if (ASSEMBLER_DIALECT == ASM_ATT)
6623 putc ('$', file);
6624 }
6625 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6626 || GET_CODE (x) == LABEL_REF)
6627 {
6628 if (ASSEMBLER_DIALECT == ASM_ATT)
6629 putc ('$', file);
6630 else
6631 fputs ("OFFSET FLAT:", file);
6632 }
6633 }
6634 if (GET_CODE (x) == CONST_INT)
6635 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6636 else if (flag_pic)
6637 output_pic_addr_const (file, x, code);
6638 else
6639 output_addr_const (file, x);
6640 }
6641 }
6642 \f
6643 /* Print a memory operand whose address is ADDR. */
6644
6645 void
6646 print_operand_address (FILE *file, rtx addr)
6647 {
6648 struct ix86_address parts;
6649 rtx base, index, disp;
6650 int scale;
6651
6652 if (! ix86_decompose_address (addr, &parts))
6653 abort ();
6654
6655 base = parts.base;
6656 index = parts.index;
6657 disp = parts.disp;
6658 scale = parts.scale;
6659
6660 switch (parts.seg)
6661 {
6662 case SEG_DEFAULT:
6663 break;
6664 case SEG_FS:
6665 case SEG_GS:
6666 if (USER_LABEL_PREFIX[0] == 0)
6667 putc ('%', file);
6668 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6669 break;
6670 default:
6671 abort ();
6672 }
6673
6674 if (!base && !index)
6675 {
6676 /* Displacement only requires special attention. */
6677
6678 if (GET_CODE (disp) == CONST_INT)
6679 {
6680 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6681 {
6682 if (USER_LABEL_PREFIX[0] == 0)
6683 putc ('%', file);
6684 fputs ("ds:", file);
6685 }
6686 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6687 }
6688 else if (flag_pic)
6689 output_pic_addr_const (file, disp, 0);
6690 else
6691 output_addr_const (file, disp);
6692
6693 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6694 if (TARGET_64BIT
6695 && ((GET_CODE (disp) == SYMBOL_REF
6696 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6697 || GET_CODE (disp) == LABEL_REF
6698 || (GET_CODE (disp) == CONST
6699 && GET_CODE (XEXP (disp, 0)) == PLUS
6700 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6701 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6702 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6703 fputs ("(%rip)", file);
6704 }
6705 else
6706 {
6707 if (ASSEMBLER_DIALECT == ASM_ATT)
6708 {
6709 if (disp)
6710 {
6711 if (flag_pic)
6712 output_pic_addr_const (file, disp, 0);
6713 else if (GET_CODE (disp) == LABEL_REF)
6714 output_asm_label (disp);
6715 else
6716 output_addr_const (file, disp);
6717 }
6718
6719 putc ('(', file);
6720 if (base)
6721 print_reg (base, 0, file);
6722 if (index)
6723 {
6724 putc (',', file);
6725 print_reg (index, 0, file);
6726 if (scale != 1)
6727 fprintf (file, ",%d", scale);
6728 }
6729 putc (')', file);
6730 }
6731 else
6732 {
6733 rtx offset = NULL_RTX;
6734
6735 if (disp)
6736 {
6737 /* Pull out the offset of a symbol; print any symbol itself. */
6738 if (GET_CODE (disp) == CONST
6739 && GET_CODE (XEXP (disp, 0)) == PLUS
6740 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6741 {
6742 offset = XEXP (XEXP (disp, 0), 1);
6743 disp = gen_rtx_CONST (VOIDmode,
6744 XEXP (XEXP (disp, 0), 0));
6745 }
6746
6747 if (flag_pic)
6748 output_pic_addr_const (file, disp, 0);
6749 else if (GET_CODE (disp) == LABEL_REF)
6750 output_asm_label (disp);
6751 else if (GET_CODE (disp) == CONST_INT)
6752 offset = disp;
6753 else
6754 output_addr_const (file, disp);
6755 }
6756
6757 putc ('[', file);
6758 if (base)
6759 {
6760 print_reg (base, 0, file);
6761 if (offset)
6762 {
6763 if (INTVAL (offset) >= 0)
6764 putc ('+', file);
6765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6766 }
6767 }
6768 else if (offset)
6769 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6770 else
6771 putc ('0', file);
6772
6773 if (index)
6774 {
6775 putc ('+', file);
6776 print_reg (index, 0, file);
6777 if (scale != 1)
6778 fprintf (file, "*%d", scale);
6779 }
6780 putc (']', file);
6781 }
6782 }
6783 }
6784
6785 bool
6786 output_addr_const_extra (FILE *file, rtx x)
6787 {
6788 rtx op;
6789
6790 if (GET_CODE (x) != UNSPEC)
6791 return false;
6792
6793 op = XVECEXP (x, 0, 0);
6794 switch (XINT (x, 1))
6795 {
6796 case UNSPEC_GOTTPOFF:
6797 output_addr_const (file, op);
6798 /* FIXME: This might be @TPOFF in Sun ld. */
6799 fputs ("@GOTTPOFF", file);
6800 break;
6801 case UNSPEC_TPOFF:
6802 output_addr_const (file, op);
6803 fputs ("@TPOFF", file);
6804 break;
6805 case UNSPEC_NTPOFF:
6806 output_addr_const (file, op);
6807 if (TARGET_64BIT)
6808 fputs ("@TPOFF", file);
6809 else
6810 fputs ("@NTPOFF", file);
6811 break;
6812 case UNSPEC_DTPOFF:
6813 output_addr_const (file, op);
6814 fputs ("@DTPOFF", file);
6815 break;
6816 case UNSPEC_GOTNTPOFF:
6817 output_addr_const (file, op);
6818 if (TARGET_64BIT)
6819 fputs ("@GOTTPOFF(%rip)", file);
6820 else
6821 fputs ("@GOTNTPOFF", file);
6822 break;
6823 case UNSPEC_INDNTPOFF:
6824 output_addr_const (file, op);
6825 fputs ("@INDNTPOFF", file);
6826 break;
6827
6828 default:
6829 return false;
6830 }
6831
6832 return true;
6833 }
6834 \f
6835 /* Split one or more DImode RTL references into pairs of SImode
6836 references. The RTL can be REG, offsettable MEM, integer constant, or
6837 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6838 split and "num" is its length. lo_half and hi_half are output arrays
6839 that parallel "operands". */
6840
6841 void
6842 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6843 {
6844 while (num--)
6845 {
6846 rtx op = operands[num];
6847
6848 /* simplify_subreg refuse to split volatile memory addresses,
6849 but we still have to handle it. */
6850 if (GET_CODE (op) == MEM)
6851 {
6852 lo_half[num] = adjust_address (op, SImode, 0);
6853 hi_half[num] = adjust_address (op, SImode, 4);
6854 }
6855 else
6856 {
6857 lo_half[num] = simplify_gen_subreg (SImode, op,
6858 GET_MODE (op) == VOIDmode
6859 ? DImode : GET_MODE (op), 0);
6860 hi_half[num] = simplify_gen_subreg (SImode, op,
6861 GET_MODE (op) == VOIDmode
6862 ? DImode : GET_MODE (op), 4);
6863 }
6864 }
6865 }
6866 /* Split one or more TImode RTL references into pairs of SImode
6867 references. The RTL can be REG, offsettable MEM, integer constant, or
6868 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6869 split and "num" is its length. lo_half and hi_half are output arrays
6870 that parallel "operands". */
6871
6872 void
6873 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6874 {
6875 while (num--)
6876 {
6877 rtx op = operands[num];
6878
6879 /* simplify_subreg refuse to split volatile memory addresses, but we
6880 still have to handle it. */
6881 if (GET_CODE (op) == MEM)
6882 {
6883 lo_half[num] = adjust_address (op, DImode, 0);
6884 hi_half[num] = adjust_address (op, DImode, 8);
6885 }
6886 else
6887 {
6888 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6889 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6890 }
6891 }
6892 }
6893 \f
6894 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6895 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6896 is the expression of the binary operation. The output may either be
6897 emitted here, or returned to the caller, like all output_* functions.
6898
6899 There is no guarantee that the operands are the same mode, as they
6900 might be within FLOAT or FLOAT_EXTEND expressions. */
6901
6902 #ifndef SYSV386_COMPAT
6903 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6904 wants to fix the assemblers because that causes incompatibility
6905 with gcc. No-one wants to fix gcc because that causes
6906 incompatibility with assemblers... You can use the option of
6907 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6908 #define SYSV386_COMPAT 1
6909 #endif
6910
6911 const char *
6912 output_387_binary_op (rtx insn, rtx *operands)
6913 {
6914 static char buf[30];
6915 const char *p;
6916 const char *ssep;
6917 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6918
6919 #ifdef ENABLE_CHECKING
6920 /* Even if we do not want to check the inputs, this documents input
6921 constraints. Which helps in understanding the following code. */
6922 if (STACK_REG_P (operands[0])
6923 && ((REG_P (operands[1])
6924 && REGNO (operands[0]) == REGNO (operands[1])
6925 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6926 || (REG_P (operands[2])
6927 && REGNO (operands[0]) == REGNO (operands[2])
6928 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6929 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6930 ; /* ok */
6931 else if (!is_sse)
6932 abort ();
6933 #endif
6934
6935 switch (GET_CODE (operands[3]))
6936 {
6937 case PLUS:
6938 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6939 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6940 p = "fiadd";
6941 else
6942 p = "fadd";
6943 ssep = "add";
6944 break;
6945
6946 case MINUS:
6947 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6948 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6949 p = "fisub";
6950 else
6951 p = "fsub";
6952 ssep = "sub";
6953 break;
6954
6955 case MULT:
6956 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6957 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6958 p = "fimul";
6959 else
6960 p = "fmul";
6961 ssep = "mul";
6962 break;
6963
6964 case DIV:
6965 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6966 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6967 p = "fidiv";
6968 else
6969 p = "fdiv";
6970 ssep = "div";
6971 break;
6972
6973 default:
6974 abort ();
6975 }
6976
6977 if (is_sse)
6978 {
6979 strcpy (buf, ssep);
6980 if (GET_MODE (operands[0]) == SFmode)
6981 strcat (buf, "ss\t{%2, %0|%0, %2}");
6982 else
6983 strcat (buf, "sd\t{%2, %0|%0, %2}");
6984 return buf;
6985 }
6986 strcpy (buf, p);
6987
6988 switch (GET_CODE (operands[3]))
6989 {
6990 case MULT:
6991 case PLUS:
6992 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6993 {
6994 rtx temp = operands[2];
6995 operands[2] = operands[1];
6996 operands[1] = temp;
6997 }
6998
6999 /* know operands[0] == operands[1]. */
7000
7001 if (GET_CODE (operands[2]) == MEM)
7002 {
7003 p = "%z2\t%2";
7004 break;
7005 }
7006
7007 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7008 {
7009 if (STACK_TOP_P (operands[0]))
7010 /* How is it that we are storing to a dead operand[2]?
7011 Well, presumably operands[1] is dead too. We can't
7012 store the result to st(0) as st(0) gets popped on this
7013 instruction. Instead store to operands[2] (which I
7014 think has to be st(1)). st(1) will be popped later.
7015 gcc <= 2.8.1 didn't have this check and generated
7016 assembly code that the Unixware assembler rejected. */
7017 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7018 else
7019 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7020 break;
7021 }
7022
7023 if (STACK_TOP_P (operands[0]))
7024 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7025 else
7026 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7027 break;
7028
7029 case MINUS:
7030 case DIV:
7031 if (GET_CODE (operands[1]) == MEM)
7032 {
7033 p = "r%z1\t%1";
7034 break;
7035 }
7036
7037 if (GET_CODE (operands[2]) == MEM)
7038 {
7039 p = "%z2\t%2";
7040 break;
7041 }
7042
7043 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7044 {
7045 #if SYSV386_COMPAT
7046 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7047 derived assemblers, confusingly reverse the direction of
7048 the operation for fsub{r} and fdiv{r} when the
7049 destination register is not st(0). The Intel assembler
7050 doesn't have this brain damage. Read !SYSV386_COMPAT to
7051 figure out what the hardware really does. */
7052 if (STACK_TOP_P (operands[0]))
7053 p = "{p\t%0, %2|rp\t%2, %0}";
7054 else
7055 p = "{rp\t%2, %0|p\t%0, %2}";
7056 #else
7057 if (STACK_TOP_P (operands[0]))
7058 /* As above for fmul/fadd, we can't store to st(0). */
7059 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7060 else
7061 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7062 #endif
7063 break;
7064 }
7065
7066 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7067 {
7068 #if SYSV386_COMPAT
7069 if (STACK_TOP_P (operands[0]))
7070 p = "{rp\t%0, %1|p\t%1, %0}";
7071 else
7072 p = "{p\t%1, %0|rp\t%0, %1}";
7073 #else
7074 if (STACK_TOP_P (operands[0]))
7075 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7076 else
7077 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7078 #endif
7079 break;
7080 }
7081
7082 if (STACK_TOP_P (operands[0]))
7083 {
7084 if (STACK_TOP_P (operands[1]))
7085 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7086 else
7087 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7088 break;
7089 }
7090 else if (STACK_TOP_P (operands[1]))
7091 {
7092 #if SYSV386_COMPAT
7093 p = "{\t%1, %0|r\t%0, %1}";
7094 #else
7095 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7096 #endif
7097 }
7098 else
7099 {
7100 #if SYSV386_COMPAT
7101 p = "{r\t%2, %0|\t%0, %2}";
7102 #else
7103 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7104 #endif
7105 }
7106 break;
7107
7108 default:
7109 abort ();
7110 }
7111
7112 strcat (buf, p);
7113 return buf;
7114 }
7115
7116 /* Output code to initialize control word copies used by trunc?f?i and
7117 rounding patterns. CURRENT_MODE is set to current control word,
7118 while NEW_MODE is set to new control word. */
7119
7120 void
7121 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7122 {
7123 rtx reg = gen_reg_rtx (HImode);
7124
7125 emit_insn (gen_x86_fnstcw_1 (current_mode));
7126 emit_move_insn (reg, current_mode);
7127
7128 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7129 && !TARGET_64BIT)
7130 {
7131 switch (mode)
7132 {
7133 case I387_CW_FLOOR:
7134 /* round down toward -oo */
7135 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7136 break;
7137
7138 case I387_CW_CEIL:
7139 /* round up toward +oo */
7140 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7141 break;
7142
7143 case I387_CW_TRUNC:
7144 /* round toward zero (truncate) */
7145 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7146 break;
7147
7148 case I387_CW_MASK_PM:
7149 /* mask precision exception for nearbyint() */
7150 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7151 break;
7152
7153 default:
7154 abort();
7155 }
7156 }
7157 else
7158 {
7159 switch (mode)
7160 {
7161 case I387_CW_FLOOR:
7162 /* round down toward -oo */
7163 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7164 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7165 break;
7166
7167 case I387_CW_CEIL:
7168 /* round up toward +oo */
7169 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7170 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7171 break;
7172
7173 case I387_CW_TRUNC:
7174 /* round toward zero (truncate) */
7175 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7176 break;
7177
7178 case I387_CW_MASK_PM:
7179 /* mask precision exception for nearbyint() */
7180 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7181 break;
7182
7183 default:
7184 abort();
7185 }
7186 }
7187
7188 emit_move_insn (new_mode, reg);
7189 }
7190
7191 /* Output code for INSN to convert a float to a signed int. OPERANDS
7192 are the insn operands. The output may be [HSD]Imode and the input
7193 operand may be [SDX]Fmode. */
7194
7195 const char *
7196 output_fix_trunc (rtx insn, rtx *operands)
7197 {
7198 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7199 int dimode_p = GET_MODE (operands[0]) == DImode;
7200
7201 /* Jump through a hoop or two for DImode, since the hardware has no
7202 non-popping instruction. We used to do this a different way, but
7203 that was somewhat fragile and broke with post-reload splitters. */
7204 if (dimode_p && !stack_top_dies)
7205 output_asm_insn ("fld\t%y1", operands);
7206
7207 if (!STACK_TOP_P (operands[1]))
7208 abort ();
7209
7210 if (GET_CODE (operands[0]) != MEM)
7211 abort ();
7212
7213 output_asm_insn ("fldcw\t%3", operands);
7214 if (stack_top_dies || dimode_p)
7215 output_asm_insn ("fistp%z0\t%0", operands);
7216 else
7217 output_asm_insn ("fist%z0\t%0", operands);
7218 output_asm_insn ("fldcw\t%2", operands);
7219
7220 return "";
7221 }
7222
7223 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7224 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7225 when fucom should be used. */
7226
7227 const char *
7228 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7229 {
7230 int stack_top_dies;
7231 rtx cmp_op0, cmp_op1;
7232 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7233
7234 if (eflags_p == 2)
7235 {
7236 cmp_op0 = operands[1];
7237 cmp_op1 = operands[2];
7238 }
7239 else
7240 {
7241 cmp_op0 = operands[0];
7242 cmp_op1 = operands[1];
7243 }
7244
7245 if (is_sse)
7246 {
7247 if (GET_MODE (operands[0]) == SFmode)
7248 if (unordered_p)
7249 return "ucomiss\t{%1, %0|%0, %1}";
7250 else
7251 return "comiss\t{%1, %0|%0, %1}";
7252 else
7253 if (unordered_p)
7254 return "ucomisd\t{%1, %0|%0, %1}";
7255 else
7256 return "comisd\t{%1, %0|%0, %1}";
7257 }
7258
7259 if (! STACK_TOP_P (cmp_op0))
7260 abort ();
7261
7262 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7263
7264 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7265 {
7266 if (stack_top_dies)
7267 {
7268 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7269 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7270 }
7271 else
7272 return "ftst\n\tfnstsw\t%0";
7273 }
7274
7275 if (STACK_REG_P (cmp_op1)
7276 && stack_top_dies
7277 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7278 && REGNO (cmp_op1) != FIRST_STACK_REG)
7279 {
7280 /* If both the top of the 387 stack dies, and the other operand
7281 is also a stack register that dies, then this must be a
7282 `fcompp' float compare */
7283
7284 if (eflags_p == 1)
7285 {
7286 /* There is no double popping fcomi variant. Fortunately,
7287 eflags is immune from the fstp's cc clobbering. */
7288 if (unordered_p)
7289 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7290 else
7291 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7292 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7293 }
7294 else
7295 {
7296 if (eflags_p == 2)
7297 {
7298 if (unordered_p)
7299 return "fucompp\n\tfnstsw\t%0";
7300 else
7301 return "fcompp\n\tfnstsw\t%0";
7302 }
7303 else
7304 {
7305 if (unordered_p)
7306 return "fucompp";
7307 else
7308 return "fcompp";
7309 }
7310 }
7311 }
7312 else
7313 {
7314 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7315
7316 static const char * const alt[24] =
7317 {
7318 "fcom%z1\t%y1",
7319 "fcomp%z1\t%y1",
7320 "fucom%z1\t%y1",
7321 "fucomp%z1\t%y1",
7322
7323 "ficom%z1\t%y1",
7324 "ficomp%z1\t%y1",
7325 NULL,
7326 NULL,
7327
7328 "fcomi\t{%y1, %0|%0, %y1}",
7329 "fcomip\t{%y1, %0|%0, %y1}",
7330 "fucomi\t{%y1, %0|%0, %y1}",
7331 "fucomip\t{%y1, %0|%0, %y1}",
7332
7333 NULL,
7334 NULL,
7335 NULL,
7336 NULL,
7337
7338 "fcom%z2\t%y2\n\tfnstsw\t%0",
7339 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7340 "fucom%z2\t%y2\n\tfnstsw\t%0",
7341 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7342
7343 "ficom%z2\t%y2\n\tfnstsw\t%0",
7344 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7345 NULL,
7346 NULL
7347 };
7348
7349 int mask;
7350 const char *ret;
7351
7352 mask = eflags_p << 3;
7353 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7354 mask |= unordered_p << 1;
7355 mask |= stack_top_dies;
7356
7357 if (mask >= 24)
7358 abort ();
7359 ret = alt[mask];
7360 if (ret == NULL)
7361 abort ();
7362
7363 return ret;
7364 }
7365 }
7366
7367 void
7368 ix86_output_addr_vec_elt (FILE *file, int value)
7369 {
7370 const char *directive = ASM_LONG;
7371
7372 if (TARGET_64BIT)
7373 {
7374 #ifdef ASM_QUAD
7375 directive = ASM_QUAD;
7376 #else
7377 abort ();
7378 #endif
7379 }
7380
7381 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7382 }
7383
7384 void
7385 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7386 {
7387 if (TARGET_64BIT)
7388 fprintf (file, "%s%s%d-%s%d\n",
7389 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7390 else if (HAVE_AS_GOTOFF_IN_DATA)
7391 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7392 #if TARGET_MACHO
7393 else if (TARGET_MACHO)
7394 {
7395 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7396 machopic_output_function_base_name (file);
7397 fprintf(file, "\n");
7398 }
7399 #endif
7400 else
7401 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7402 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7403 }
7404 \f
7405 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7406 for the target. */
7407
7408 void
7409 ix86_expand_clear (rtx dest)
7410 {
7411 rtx tmp;
7412
7413 /* We play register width games, which are only valid after reload. */
7414 if (!reload_completed)
7415 abort ();
7416
7417 /* Avoid HImode and its attendant prefix byte. */
7418 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7419 dest = gen_rtx_REG (SImode, REGNO (dest));
7420
7421 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7422
7423 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7424 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7425 {
7426 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7427 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7428 }
7429
7430 emit_insn (tmp);
7431 }
7432
7433 /* X is an unchanging MEM. If it is a constant pool reference, return
7434 the constant pool rtx, else NULL. */
7435
7436 rtx
7437 maybe_get_pool_constant (rtx x)
7438 {
7439 x = ix86_delegitimize_address (XEXP (x, 0));
7440
7441 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7442 return get_pool_constant (x);
7443
7444 return NULL_RTX;
7445 }
7446
7447 void
7448 ix86_expand_move (enum machine_mode mode, rtx operands[])
7449 {
7450 int strict = (reload_in_progress || reload_completed);
7451 rtx op0, op1;
7452 enum tls_model model;
7453
7454 op0 = operands[0];
7455 op1 = operands[1];
7456
7457 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7458 if (model)
7459 {
7460 op1 = legitimize_tls_address (op1, model, true);
7461 op1 = force_operand (op1, op0);
7462 if (op1 == op0)
7463 return;
7464 }
7465
7466 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7467 {
7468 #if TARGET_MACHO
7469 if (MACHOPIC_PURE)
7470 {
7471 rtx temp = ((reload_in_progress
7472 || ((op0 && GET_CODE (op0) == REG)
7473 && mode == Pmode))
7474 ? op0 : gen_reg_rtx (Pmode));
7475 op1 = machopic_indirect_data_reference (op1, temp);
7476 op1 = machopic_legitimize_pic_address (op1, mode,
7477 temp == op1 ? 0 : temp);
7478 }
7479 else if (MACHOPIC_INDIRECT)
7480 op1 = machopic_indirect_data_reference (op1, 0);
7481 if (op0 == op1)
7482 return;
7483 #else
7484 if (GET_CODE (op0) == MEM)
7485 op1 = force_reg (Pmode, op1);
7486 else
7487 op1 = legitimize_address (op1, op1, Pmode);
7488 #endif /* TARGET_MACHO */
7489 }
7490 else
7491 {
7492 if (GET_CODE (op0) == MEM
7493 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7494 || !push_operand (op0, mode))
7495 && GET_CODE (op1) == MEM)
7496 op1 = force_reg (mode, op1);
7497
7498 if (push_operand (op0, mode)
7499 && ! general_no_elim_operand (op1, mode))
7500 op1 = copy_to_mode_reg (mode, op1);
7501
7502 /* Force large constants in 64bit compilation into register
7503 to get them CSEed. */
7504 if (TARGET_64BIT && mode == DImode
7505 && immediate_operand (op1, mode)
7506 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7507 && !register_operand (op0, mode)
7508 && optimize && !reload_completed && !reload_in_progress)
7509 op1 = copy_to_mode_reg (mode, op1);
7510
7511 if (FLOAT_MODE_P (mode))
7512 {
7513 /* If we are loading a floating point constant to a register,
7514 force the value to memory now, since we'll get better code
7515 out the back end. */
7516
7517 if (strict)
7518 ;
7519 else if (GET_CODE (op1) == CONST_DOUBLE)
7520 {
7521 op1 = validize_mem (force_const_mem (mode, op1));
7522 if (!register_operand (op0, mode))
7523 {
7524 rtx temp = gen_reg_rtx (mode);
7525 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7526 emit_move_insn (op0, temp);
7527 return;
7528 }
7529 }
7530 }
7531 }
7532
7533 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7534 }
7535
7536 void
7537 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7538 {
7539 /* Force constants other than zero into memory. We do not know how
7540 the instructions used to build constants modify the upper 64 bits
7541 of the register, once we have that information we may be able
7542 to handle some of them more efficiently. */
7543 if ((reload_in_progress | reload_completed) == 0
7544 && register_operand (operands[0], mode)
7545 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7546 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
7547
7548 /* Make operand1 a register if it isn't already. */
7549 if (!no_new_pseudos
7550 && !register_operand (operands[0], mode)
7551 && !register_operand (operands[1], mode))
7552 {
7553 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7554 emit_move_insn (operands[0], temp);
7555 return;
7556 }
7557
7558 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7559 }
7560
7561 /* Attempt to expand a binary operator. Make the expansion closer to the
7562 actual machine, then just general_operand, which will allow 3 separate
7563 memory references (one output, two input) in a single insn. */
7564
7565 void
7566 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7567 rtx operands[])
7568 {
7569 int matching_memory;
7570 rtx src1, src2, dst, op, clob;
7571
7572 dst = operands[0];
7573 src1 = operands[1];
7574 src2 = operands[2];
7575
7576 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7577 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7578 && (rtx_equal_p (dst, src2)
7579 || immediate_operand (src1, mode)))
7580 {
7581 rtx temp = src1;
7582 src1 = src2;
7583 src2 = temp;
7584 }
7585
7586 /* If the destination is memory, and we do not have matching source
7587 operands, do things in registers. */
7588 matching_memory = 0;
7589 if (GET_CODE (dst) == MEM)
7590 {
7591 if (rtx_equal_p (dst, src1))
7592 matching_memory = 1;
7593 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7594 && rtx_equal_p (dst, src2))
7595 matching_memory = 2;
7596 else
7597 dst = gen_reg_rtx (mode);
7598 }
7599
7600 /* Both source operands cannot be in memory. */
7601 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7602 {
7603 if (matching_memory != 2)
7604 src2 = force_reg (mode, src2);
7605 else
7606 src1 = force_reg (mode, src1);
7607 }
7608
7609 /* If the operation is not commutable, source 1 cannot be a constant
7610 or non-matching memory. */
7611 if ((CONSTANT_P (src1)
7612 || (!matching_memory && GET_CODE (src1) == MEM))
7613 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7614 src1 = force_reg (mode, src1);
7615
7616 /* If optimizing, copy to regs to improve CSE */
7617 if (optimize && ! no_new_pseudos)
7618 {
7619 if (GET_CODE (dst) == MEM)
7620 dst = gen_reg_rtx (mode);
7621 if (GET_CODE (src1) == MEM)
7622 src1 = force_reg (mode, src1);
7623 if (GET_CODE (src2) == MEM)
7624 src2 = force_reg (mode, src2);
7625 }
7626
7627 /* Emit the instruction. */
7628
7629 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7630 if (reload_in_progress)
7631 {
7632 /* Reload doesn't know about the flags register, and doesn't know that
7633 it doesn't want to clobber it. We can only do this with PLUS. */
7634 if (code != PLUS)
7635 abort ();
7636 emit_insn (op);
7637 }
7638 else
7639 {
7640 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7641 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7642 }
7643
7644 /* Fix up the destination if needed. */
7645 if (dst != operands[0])
7646 emit_move_insn (operands[0], dst);
7647 }
7648
7649 /* Return TRUE or FALSE depending on whether the binary operator meets the
7650 appropriate constraints. */
7651
7652 int
7653 ix86_binary_operator_ok (enum rtx_code code,
7654 enum machine_mode mode ATTRIBUTE_UNUSED,
7655 rtx operands[3])
7656 {
7657 /* Both source operands cannot be in memory. */
7658 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7659 return 0;
7660 /* If the operation is not commutable, source 1 cannot be a constant. */
7661 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7662 return 0;
7663 /* If the destination is memory, we must have a matching source operand. */
7664 if (GET_CODE (operands[0]) == MEM
7665 && ! (rtx_equal_p (operands[0], operands[1])
7666 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7667 && rtx_equal_p (operands[0], operands[2]))))
7668 return 0;
7669 /* If the operation is not commutable and the source 1 is memory, we must
7670 have a matching destination. */
7671 if (GET_CODE (operands[1]) == MEM
7672 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7673 && ! rtx_equal_p (operands[0], operands[1]))
7674 return 0;
7675 return 1;
7676 }
7677
7678 /* Attempt to expand a unary operator. Make the expansion closer to the
7679 actual machine, then just general_operand, which will allow 2 separate
7680 memory references (one output, one input) in a single insn. */
7681
7682 void
7683 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7684 rtx operands[])
7685 {
7686 int matching_memory;
7687 rtx src, dst, op, clob;
7688
7689 dst = operands[0];
7690 src = operands[1];
7691
7692 /* If the destination is memory, and we do not have matching source
7693 operands, do things in registers. */
7694 matching_memory = 0;
7695 if (GET_CODE (dst) == MEM)
7696 {
7697 if (rtx_equal_p (dst, src))
7698 matching_memory = 1;
7699 else
7700 dst = gen_reg_rtx (mode);
7701 }
7702
7703 /* When source operand is memory, destination must match. */
7704 if (!matching_memory && GET_CODE (src) == MEM)
7705 src = force_reg (mode, src);
7706
7707 /* If optimizing, copy to regs to improve CSE */
7708 if (optimize && ! no_new_pseudos)
7709 {
7710 if (GET_CODE (dst) == MEM)
7711 dst = gen_reg_rtx (mode);
7712 if (GET_CODE (src) == MEM)
7713 src = force_reg (mode, src);
7714 }
7715
7716 /* Emit the instruction. */
7717
7718 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7719 if (reload_in_progress || code == NOT)
7720 {
7721 /* Reload doesn't know about the flags register, and doesn't know that
7722 it doesn't want to clobber it. */
7723 if (code != NOT)
7724 abort ();
7725 emit_insn (op);
7726 }
7727 else
7728 {
7729 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7730 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7731 }
7732
7733 /* Fix up the destination if needed. */
7734 if (dst != operands[0])
7735 emit_move_insn (operands[0], dst);
7736 }
7737
7738 /* Return TRUE or FALSE depending on whether the unary operator meets the
7739 appropriate constraints. */
7740
7741 int
7742 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7743 enum machine_mode mode ATTRIBUTE_UNUSED,
7744 rtx operands[2] ATTRIBUTE_UNUSED)
7745 {
7746 /* If one of operands is memory, source and destination must match. */
7747 if ((GET_CODE (operands[0]) == MEM
7748 || GET_CODE (operands[1]) == MEM)
7749 && ! rtx_equal_p (operands[0], operands[1]))
7750 return FALSE;
7751 return TRUE;
7752 }
7753
7754 /* Return TRUE or FALSE depending on whether the first SET in INSN
7755 has source and destination with matching CC modes, and that the
7756 CC mode is at least as constrained as REQ_MODE. */
7757
7758 int
7759 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7760 {
7761 rtx set;
7762 enum machine_mode set_mode;
7763
7764 set = PATTERN (insn);
7765 if (GET_CODE (set) == PARALLEL)
7766 set = XVECEXP (set, 0, 0);
7767 if (GET_CODE (set) != SET)
7768 abort ();
7769 if (GET_CODE (SET_SRC (set)) != COMPARE)
7770 abort ();
7771
7772 set_mode = GET_MODE (SET_DEST (set));
7773 switch (set_mode)
7774 {
7775 case CCNOmode:
7776 if (req_mode != CCNOmode
7777 && (req_mode != CCmode
7778 || XEXP (SET_SRC (set), 1) != const0_rtx))
7779 return 0;
7780 break;
7781 case CCmode:
7782 if (req_mode == CCGCmode)
7783 return 0;
7784 /* FALLTHRU */
7785 case CCGCmode:
7786 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7787 return 0;
7788 /* FALLTHRU */
7789 case CCGOCmode:
7790 if (req_mode == CCZmode)
7791 return 0;
7792 /* FALLTHRU */
7793 case CCZmode:
7794 break;
7795
7796 default:
7797 abort ();
7798 }
7799
7800 return (GET_MODE (SET_SRC (set)) == set_mode);
7801 }
7802
7803 /* Generate insn patterns to do an integer compare of OPERANDS. */
7804
7805 static rtx
7806 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
7807 {
7808 enum machine_mode cmpmode;
7809 rtx tmp, flags;
7810
7811 cmpmode = SELECT_CC_MODE (code, op0, op1);
7812 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7813
7814 /* This is very simple, but making the interface the same as in the
7815 FP case makes the rest of the code easier. */
7816 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7817 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7818
7819 /* Return the test that should be put into the flags user, i.e.
7820 the bcc, scc, or cmov instruction. */
7821 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7822 }
7823
7824 /* Figure out whether to use ordered or unordered fp comparisons.
7825 Return the appropriate mode to use. */
7826
7827 enum machine_mode
7828 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
7829 {
7830 /* ??? In order to make all comparisons reversible, we do all comparisons
7831 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7832 all forms trapping and nontrapping comparisons, we can make inequality
7833 comparisons trapping again, since it results in better code when using
7834 FCOM based compares. */
7835 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7836 }
7837
7838 enum machine_mode
7839 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
7840 {
7841 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7842 return ix86_fp_compare_mode (code);
7843 switch (code)
7844 {
7845 /* Only zero flag is needed. */
7846 case EQ: /* ZF=0 */
7847 case NE: /* ZF!=0 */
7848 return CCZmode;
7849 /* Codes needing carry flag. */
7850 case GEU: /* CF=0 */
7851 case GTU: /* CF=0 & ZF=0 */
7852 case LTU: /* CF=1 */
7853 case LEU: /* CF=1 | ZF=1 */
7854 return CCmode;
7855 /* Codes possibly doable only with sign flag when
7856 comparing against zero. */
7857 case GE: /* SF=OF or SF=0 */
7858 case LT: /* SF<>OF or SF=1 */
7859 if (op1 == const0_rtx)
7860 return CCGOCmode;
7861 else
7862 /* For other cases Carry flag is not required. */
7863 return CCGCmode;
7864 /* Codes doable only with sign flag when comparing
7865 against zero, but we miss jump instruction for it
7866 so we need to use relational tests against overflow
7867 that thus needs to be zero. */
7868 case GT: /* ZF=0 & SF=OF */
7869 case LE: /* ZF=1 | SF<>OF */
7870 if (op1 == const0_rtx)
7871 return CCNOmode;
7872 else
7873 return CCGCmode;
7874 /* strcmp pattern do (use flags) and combine may ask us for proper
7875 mode. */
7876 case USE:
7877 return CCmode;
7878 default:
7879 abort ();
7880 }
7881 }
7882
7883 /* Return the fixed registers used for condition codes. */
7884
7885 static bool
7886 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7887 {
7888 *p1 = FLAGS_REG;
7889 *p2 = FPSR_REG;
7890 return true;
7891 }
7892
7893 /* If two condition code modes are compatible, return a condition code
7894 mode which is compatible with both. Otherwise, return
7895 VOIDmode. */
7896
7897 static enum machine_mode
7898 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
7899 {
7900 if (m1 == m2)
7901 return m1;
7902
7903 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
7904 return VOIDmode;
7905
7906 if ((m1 == CCGCmode && m2 == CCGOCmode)
7907 || (m1 == CCGOCmode && m2 == CCGCmode))
7908 return CCGCmode;
7909
7910 switch (m1)
7911 {
7912 default:
7913 abort ();
7914
7915 case CCmode:
7916 case CCGCmode:
7917 case CCGOCmode:
7918 case CCNOmode:
7919 case CCZmode:
7920 switch (m2)
7921 {
7922 default:
7923 return VOIDmode;
7924
7925 case CCmode:
7926 case CCGCmode:
7927 case CCGOCmode:
7928 case CCNOmode:
7929 case CCZmode:
7930 return CCmode;
7931 }
7932
7933 case CCFPmode:
7934 case CCFPUmode:
7935 /* These are only compatible with themselves, which we already
7936 checked above. */
7937 return VOIDmode;
7938 }
7939 }
7940
7941 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7942
7943 int
7944 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
7945 {
7946 enum rtx_code swapped_code = swap_condition (code);
7947 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7948 || (ix86_fp_comparison_cost (swapped_code)
7949 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7950 }
7951
7952 /* Swap, force into registers, or otherwise massage the two operands
7953 to a fp comparison. The operands are updated in place; the new
7954 comparison code is returned. */
7955
7956 static enum rtx_code
7957 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
7958 {
7959 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7960 rtx op0 = *pop0, op1 = *pop1;
7961 enum machine_mode op_mode = GET_MODE (op0);
7962 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7963
7964 /* All of the unordered compare instructions only work on registers.
7965 The same is true of the fcomi compare instructions. The same is
7966 true of the XFmode compare instructions if not comparing with
7967 zero (ftst insn is used in this case). */
7968
7969 if (!is_sse
7970 && (fpcmp_mode == CCFPUmode
7971 || (op_mode == XFmode
7972 && ! (standard_80387_constant_p (op0) == 1
7973 || standard_80387_constant_p (op1) == 1))
7974 || ix86_use_fcomi_compare (code)))
7975 {
7976 op0 = force_reg (op_mode, op0);
7977 op1 = force_reg (op_mode, op1);
7978 }
7979 else
7980 {
7981 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7982 things around if they appear profitable, otherwise force op0
7983 into a register. */
7984
7985 if (standard_80387_constant_p (op0) == 0
7986 || (GET_CODE (op0) == MEM
7987 && ! (standard_80387_constant_p (op1) == 0
7988 || GET_CODE (op1) == MEM)))
7989 {
7990 rtx tmp;
7991 tmp = op0, op0 = op1, op1 = tmp;
7992 code = swap_condition (code);
7993 }
7994
7995 if (GET_CODE (op0) != REG)
7996 op0 = force_reg (op_mode, op0);
7997
7998 if (CONSTANT_P (op1))
7999 {
8000 int tmp = standard_80387_constant_p (op1);
8001 if (tmp == 0)
8002 op1 = validize_mem (force_const_mem (op_mode, op1));
8003 else if (tmp == 1)
8004 {
8005 if (TARGET_CMOVE)
8006 op1 = force_reg (op_mode, op1);
8007 }
8008 else
8009 op1 = force_reg (op_mode, op1);
8010 }
8011 }
8012
8013 /* Try to rearrange the comparison to make it cheaper. */
8014 if (ix86_fp_comparison_cost (code)
8015 > ix86_fp_comparison_cost (swap_condition (code))
8016 && (GET_CODE (op1) == REG || !no_new_pseudos))
8017 {
8018 rtx tmp;
8019 tmp = op0, op0 = op1, op1 = tmp;
8020 code = swap_condition (code);
8021 if (GET_CODE (op0) != REG)
8022 op0 = force_reg (op_mode, op0);
8023 }
8024
8025 *pop0 = op0;
8026 *pop1 = op1;
8027 return code;
8028 }
8029
8030 /* Convert comparison codes we use to represent FP comparison to integer
8031 code that will result in proper branch. Return UNKNOWN if no such code
8032 is available. */
8033
8034 enum rtx_code
8035 ix86_fp_compare_code_to_integer (enum rtx_code code)
8036 {
8037 switch (code)
8038 {
8039 case GT:
8040 return GTU;
8041 case GE:
8042 return GEU;
8043 case ORDERED:
8044 case UNORDERED:
8045 return code;
8046 break;
8047 case UNEQ:
8048 return EQ;
8049 break;
8050 case UNLT:
8051 return LTU;
8052 break;
8053 case UNLE:
8054 return LEU;
8055 break;
8056 case LTGT:
8057 return NE;
8058 break;
8059 default:
8060 return UNKNOWN;
8061 }
8062 }
8063
8064 /* Split comparison code CODE into comparisons we can do using branch
8065 instructions. BYPASS_CODE is comparison code for branch that will
8066 branch around FIRST_CODE and SECOND_CODE. If some of branches
8067 is not required, set value to UNKNOWN.
8068 We never require more than two branches. */
8069
8070 void
8071 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8072 enum rtx_code *first_code,
8073 enum rtx_code *second_code)
8074 {
8075 *first_code = code;
8076 *bypass_code = UNKNOWN;
8077 *second_code = UNKNOWN;
8078
8079 /* The fcomi comparison sets flags as follows:
8080
8081 cmp ZF PF CF
8082 > 0 0 0
8083 < 0 0 1
8084 = 1 0 0
8085 un 1 1 1 */
8086
8087 switch (code)
8088 {
8089 case GT: /* GTU - CF=0 & ZF=0 */
8090 case GE: /* GEU - CF=0 */
8091 case ORDERED: /* PF=0 */
8092 case UNORDERED: /* PF=1 */
8093 case UNEQ: /* EQ - ZF=1 */
8094 case UNLT: /* LTU - CF=1 */
8095 case UNLE: /* LEU - CF=1 | ZF=1 */
8096 case LTGT: /* EQ - ZF=0 */
8097 break;
8098 case LT: /* LTU - CF=1 - fails on unordered */
8099 *first_code = UNLT;
8100 *bypass_code = UNORDERED;
8101 break;
8102 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8103 *first_code = UNLE;
8104 *bypass_code = UNORDERED;
8105 break;
8106 case EQ: /* EQ - ZF=1 - fails on unordered */
8107 *first_code = UNEQ;
8108 *bypass_code = UNORDERED;
8109 break;
8110 case NE: /* NE - ZF=0 - fails on unordered */
8111 *first_code = LTGT;
8112 *second_code = UNORDERED;
8113 break;
8114 case UNGE: /* GEU - CF=0 - fails on unordered */
8115 *first_code = GE;
8116 *second_code = UNORDERED;
8117 break;
8118 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8119 *first_code = GT;
8120 *second_code = UNORDERED;
8121 break;
8122 default:
8123 abort ();
8124 }
8125 if (!TARGET_IEEE_FP)
8126 {
8127 *second_code = UNKNOWN;
8128 *bypass_code = UNKNOWN;
8129 }
8130 }
8131
8132 /* Return cost of comparison done fcom + arithmetics operations on AX.
8133 All following functions do use number of instructions as a cost metrics.
8134 In future this should be tweaked to compute bytes for optimize_size and
8135 take into account performance of various instructions on various CPUs. */
8136 static int
8137 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8138 {
8139 if (!TARGET_IEEE_FP)
8140 return 4;
8141 /* The cost of code output by ix86_expand_fp_compare. */
8142 switch (code)
8143 {
8144 case UNLE:
8145 case UNLT:
8146 case LTGT:
8147 case GT:
8148 case GE:
8149 case UNORDERED:
8150 case ORDERED:
8151 case UNEQ:
8152 return 4;
8153 break;
8154 case LT:
8155 case NE:
8156 case EQ:
8157 case UNGE:
8158 return 5;
8159 break;
8160 case LE:
8161 case UNGT:
8162 return 6;
8163 break;
8164 default:
8165 abort ();
8166 }
8167 }
8168
8169 /* Return cost of comparison done using fcomi operation.
8170 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8171 static int
8172 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8173 {
8174 enum rtx_code bypass_code, first_code, second_code;
8175 /* Return arbitrarily high cost when instruction is not supported - this
8176 prevents gcc from using it. */
8177 if (!TARGET_CMOVE)
8178 return 1024;
8179 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8180 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8181 }
8182
8183 /* Return cost of comparison done using sahf operation.
8184 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8185 static int
8186 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8187 {
8188 enum rtx_code bypass_code, first_code, second_code;
8189 /* Return arbitrarily high cost when instruction is not preferred - this
8190 avoids gcc from using it. */
8191 if (!TARGET_USE_SAHF && !optimize_size)
8192 return 1024;
8193 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8194 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8195 }
8196
8197 /* Compute cost of the comparison done using any method.
8198 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8199 static int
8200 ix86_fp_comparison_cost (enum rtx_code code)
8201 {
8202 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8203 int min;
8204
8205 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8206 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8207
8208 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8209 if (min > sahf_cost)
8210 min = sahf_cost;
8211 if (min > fcomi_cost)
8212 min = fcomi_cost;
8213 return min;
8214 }
8215
8216 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8217
8218 static rtx
8219 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8220 rtx *second_test, rtx *bypass_test)
8221 {
8222 enum machine_mode fpcmp_mode, intcmp_mode;
8223 rtx tmp, tmp2;
8224 int cost = ix86_fp_comparison_cost (code);
8225 enum rtx_code bypass_code, first_code, second_code;
8226
8227 fpcmp_mode = ix86_fp_compare_mode (code);
8228 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8229
8230 if (second_test)
8231 *second_test = NULL_RTX;
8232 if (bypass_test)
8233 *bypass_test = NULL_RTX;
8234
8235 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8236
8237 /* Do fcomi/sahf based test when profitable. */
8238 if ((bypass_code == UNKNOWN || bypass_test)
8239 && (second_code == UNKNOWN || second_test)
8240 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8241 {
8242 if (TARGET_CMOVE)
8243 {
8244 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8245 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8246 tmp);
8247 emit_insn (tmp);
8248 }
8249 else
8250 {
8251 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8252 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8253 if (!scratch)
8254 scratch = gen_reg_rtx (HImode);
8255 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8256 emit_insn (gen_x86_sahf_1 (scratch));
8257 }
8258
8259 /* The FP codes work out to act like unsigned. */
8260 intcmp_mode = fpcmp_mode;
8261 code = first_code;
8262 if (bypass_code != UNKNOWN)
8263 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8264 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8265 const0_rtx);
8266 if (second_code != UNKNOWN)
8267 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8268 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8269 const0_rtx);
8270 }
8271 else
8272 {
8273 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8274 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8275 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8276 if (!scratch)
8277 scratch = gen_reg_rtx (HImode);
8278 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8279
8280 /* In the unordered case, we have to check C2 for NaN's, which
8281 doesn't happen to work out to anything nice combination-wise.
8282 So do some bit twiddling on the value we've got in AH to come
8283 up with an appropriate set of condition codes. */
8284
8285 intcmp_mode = CCNOmode;
8286 switch (code)
8287 {
8288 case GT:
8289 case UNGT:
8290 if (code == GT || !TARGET_IEEE_FP)
8291 {
8292 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8293 code = EQ;
8294 }
8295 else
8296 {
8297 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8298 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8299 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8300 intcmp_mode = CCmode;
8301 code = GEU;
8302 }
8303 break;
8304 case LT:
8305 case UNLT:
8306 if (code == LT && TARGET_IEEE_FP)
8307 {
8308 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8309 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8310 intcmp_mode = CCmode;
8311 code = EQ;
8312 }
8313 else
8314 {
8315 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8316 code = NE;
8317 }
8318 break;
8319 case GE:
8320 case UNGE:
8321 if (code == GE || !TARGET_IEEE_FP)
8322 {
8323 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8324 code = EQ;
8325 }
8326 else
8327 {
8328 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8329 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8330 GEN_INT (0x01)));
8331 code = NE;
8332 }
8333 break;
8334 case LE:
8335 case UNLE:
8336 if (code == LE && TARGET_IEEE_FP)
8337 {
8338 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8339 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8340 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8341 intcmp_mode = CCmode;
8342 code = LTU;
8343 }
8344 else
8345 {
8346 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8347 code = NE;
8348 }
8349 break;
8350 case EQ:
8351 case UNEQ:
8352 if (code == EQ && TARGET_IEEE_FP)
8353 {
8354 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8355 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8356 intcmp_mode = CCmode;
8357 code = EQ;
8358 }
8359 else
8360 {
8361 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8362 code = NE;
8363 break;
8364 }
8365 break;
8366 case NE:
8367 case LTGT:
8368 if (code == NE && TARGET_IEEE_FP)
8369 {
8370 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8371 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8372 GEN_INT (0x40)));
8373 code = NE;
8374 }
8375 else
8376 {
8377 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8378 code = EQ;
8379 }
8380 break;
8381
8382 case UNORDERED:
8383 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8384 code = NE;
8385 break;
8386 case ORDERED:
8387 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8388 code = EQ;
8389 break;
8390
8391 default:
8392 abort ();
8393 }
8394 }
8395
8396 /* Return the test that should be put into the flags user, i.e.
8397 the bcc, scc, or cmov instruction. */
8398 return gen_rtx_fmt_ee (code, VOIDmode,
8399 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8400 const0_rtx);
8401 }
8402
8403 rtx
8404 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8405 {
8406 rtx op0, op1, ret;
8407 op0 = ix86_compare_op0;
8408 op1 = ix86_compare_op1;
8409
8410 if (second_test)
8411 *second_test = NULL_RTX;
8412 if (bypass_test)
8413 *bypass_test = NULL_RTX;
8414
8415 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8416 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8417 second_test, bypass_test);
8418 else
8419 ret = ix86_expand_int_compare (code, op0, op1);
8420
8421 return ret;
8422 }
8423
8424 /* Return true if the CODE will result in nontrivial jump sequence. */
8425 bool
8426 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8427 {
8428 enum rtx_code bypass_code, first_code, second_code;
8429 if (!TARGET_CMOVE)
8430 return true;
8431 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8432 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8433 }
8434
8435 void
8436 ix86_expand_branch (enum rtx_code code, rtx label)
8437 {
8438 rtx tmp;
8439
8440 switch (GET_MODE (ix86_compare_op0))
8441 {
8442 case QImode:
8443 case HImode:
8444 case SImode:
8445 simple:
8446 tmp = ix86_expand_compare (code, NULL, NULL);
8447 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8448 gen_rtx_LABEL_REF (VOIDmode, label),
8449 pc_rtx);
8450 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8451 return;
8452
8453 case SFmode:
8454 case DFmode:
8455 case XFmode:
8456 {
8457 rtvec vec;
8458 int use_fcomi;
8459 enum rtx_code bypass_code, first_code, second_code;
8460
8461 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8462 &ix86_compare_op1);
8463
8464 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8465
8466 /* Check whether we will use the natural sequence with one jump. If
8467 so, we can expand jump early. Otherwise delay expansion by
8468 creating compound insn to not confuse optimizers. */
8469 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8470 && TARGET_CMOVE)
8471 {
8472 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8473 gen_rtx_LABEL_REF (VOIDmode, label),
8474 pc_rtx, NULL_RTX);
8475 }
8476 else
8477 {
8478 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8479 ix86_compare_op0, ix86_compare_op1);
8480 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8481 gen_rtx_LABEL_REF (VOIDmode, label),
8482 pc_rtx);
8483 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8484
8485 use_fcomi = ix86_use_fcomi_compare (code);
8486 vec = rtvec_alloc (3 + !use_fcomi);
8487 RTVEC_ELT (vec, 0) = tmp;
8488 RTVEC_ELT (vec, 1)
8489 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8490 RTVEC_ELT (vec, 2)
8491 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8492 if (! use_fcomi)
8493 RTVEC_ELT (vec, 3)
8494 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8495
8496 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8497 }
8498 return;
8499 }
8500
8501 case DImode:
8502 if (TARGET_64BIT)
8503 goto simple;
8504 /* Expand DImode branch into multiple compare+branch. */
8505 {
8506 rtx lo[2], hi[2], label2;
8507 enum rtx_code code1, code2, code3;
8508
8509 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8510 {
8511 tmp = ix86_compare_op0;
8512 ix86_compare_op0 = ix86_compare_op1;
8513 ix86_compare_op1 = tmp;
8514 code = swap_condition (code);
8515 }
8516 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8517 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8518
8519 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8520 avoid two branches. This costs one extra insn, so disable when
8521 optimizing for size. */
8522
8523 if ((code == EQ || code == NE)
8524 && (!optimize_size
8525 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8526 {
8527 rtx xor0, xor1;
8528
8529 xor1 = hi[0];
8530 if (hi[1] != const0_rtx)
8531 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8532 NULL_RTX, 0, OPTAB_WIDEN);
8533
8534 xor0 = lo[0];
8535 if (lo[1] != const0_rtx)
8536 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8537 NULL_RTX, 0, OPTAB_WIDEN);
8538
8539 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8540 NULL_RTX, 0, OPTAB_WIDEN);
8541
8542 ix86_compare_op0 = tmp;
8543 ix86_compare_op1 = const0_rtx;
8544 ix86_expand_branch (code, label);
8545 return;
8546 }
8547
8548 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8549 op1 is a constant and the low word is zero, then we can just
8550 examine the high word. */
8551
8552 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8553 switch (code)
8554 {
8555 case LT: case LTU: case GE: case GEU:
8556 ix86_compare_op0 = hi[0];
8557 ix86_compare_op1 = hi[1];
8558 ix86_expand_branch (code, label);
8559 return;
8560 default:
8561 break;
8562 }
8563
8564 /* Otherwise, we need two or three jumps. */
8565
8566 label2 = gen_label_rtx ();
8567
8568 code1 = code;
8569 code2 = swap_condition (code);
8570 code3 = unsigned_condition (code);
8571
8572 switch (code)
8573 {
8574 case LT: case GT: case LTU: case GTU:
8575 break;
8576
8577 case LE: code1 = LT; code2 = GT; break;
8578 case GE: code1 = GT; code2 = LT; break;
8579 case LEU: code1 = LTU; code2 = GTU; break;
8580 case GEU: code1 = GTU; code2 = LTU; break;
8581
8582 case EQ: code1 = UNKNOWN; code2 = NE; break;
8583 case NE: code2 = UNKNOWN; break;
8584
8585 default:
8586 abort ();
8587 }
8588
8589 /*
8590 * a < b =>
8591 * if (hi(a) < hi(b)) goto true;
8592 * if (hi(a) > hi(b)) goto false;
8593 * if (lo(a) < lo(b)) goto true;
8594 * false:
8595 */
8596
8597 ix86_compare_op0 = hi[0];
8598 ix86_compare_op1 = hi[1];
8599
8600 if (code1 != UNKNOWN)
8601 ix86_expand_branch (code1, label);
8602 if (code2 != UNKNOWN)
8603 ix86_expand_branch (code2, label2);
8604
8605 ix86_compare_op0 = lo[0];
8606 ix86_compare_op1 = lo[1];
8607 ix86_expand_branch (code3, label);
8608
8609 if (code2 != UNKNOWN)
8610 emit_label (label2);
8611 return;
8612 }
8613
8614 default:
8615 abort ();
8616 }
8617 }
8618
8619 /* Split branch based on floating point condition. */
8620 void
8621 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8622 rtx target1, rtx target2, rtx tmp)
8623 {
8624 rtx second, bypass;
8625 rtx label = NULL_RTX;
8626 rtx condition;
8627 int bypass_probability = -1, second_probability = -1, probability = -1;
8628 rtx i;
8629
8630 if (target2 != pc_rtx)
8631 {
8632 rtx tmp = target2;
8633 code = reverse_condition_maybe_unordered (code);
8634 target2 = target1;
8635 target1 = tmp;
8636 }
8637
8638 condition = ix86_expand_fp_compare (code, op1, op2,
8639 tmp, &second, &bypass);
8640
8641 if (split_branch_probability >= 0)
8642 {
8643 /* Distribute the probabilities across the jumps.
8644 Assume the BYPASS and SECOND to be always test
8645 for UNORDERED. */
8646 probability = split_branch_probability;
8647
8648 /* Value of 1 is low enough to make no need for probability
8649 to be updated. Later we may run some experiments and see
8650 if unordered values are more frequent in practice. */
8651 if (bypass)
8652 bypass_probability = 1;
8653 if (second)
8654 second_probability = 1;
8655 }
8656 if (bypass != NULL_RTX)
8657 {
8658 label = gen_label_rtx ();
8659 i = emit_jump_insn (gen_rtx_SET
8660 (VOIDmode, pc_rtx,
8661 gen_rtx_IF_THEN_ELSE (VOIDmode,
8662 bypass,
8663 gen_rtx_LABEL_REF (VOIDmode,
8664 label),
8665 pc_rtx)));
8666 if (bypass_probability >= 0)
8667 REG_NOTES (i)
8668 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8669 GEN_INT (bypass_probability),
8670 REG_NOTES (i));
8671 }
8672 i = emit_jump_insn (gen_rtx_SET
8673 (VOIDmode, pc_rtx,
8674 gen_rtx_IF_THEN_ELSE (VOIDmode,
8675 condition, target1, target2)));
8676 if (probability >= 0)
8677 REG_NOTES (i)
8678 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8679 GEN_INT (probability),
8680 REG_NOTES (i));
8681 if (second != NULL_RTX)
8682 {
8683 i = emit_jump_insn (gen_rtx_SET
8684 (VOIDmode, pc_rtx,
8685 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8686 target2)));
8687 if (second_probability >= 0)
8688 REG_NOTES (i)
8689 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8690 GEN_INT (second_probability),
8691 REG_NOTES (i));
8692 }
8693 if (label != NULL_RTX)
8694 emit_label (label);
8695 }
8696
8697 int
8698 ix86_expand_setcc (enum rtx_code code, rtx dest)
8699 {
8700 rtx ret, tmp, tmpreg, equiv;
8701 rtx second_test, bypass_test;
8702
8703 if (GET_MODE (ix86_compare_op0) == DImode
8704 && !TARGET_64BIT)
8705 return 0; /* FAIL */
8706
8707 if (GET_MODE (dest) != QImode)
8708 abort ();
8709
8710 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8711 PUT_MODE (ret, QImode);
8712
8713 tmp = dest;
8714 tmpreg = dest;
8715
8716 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8717 if (bypass_test || second_test)
8718 {
8719 rtx test = second_test;
8720 int bypass = 0;
8721 rtx tmp2 = gen_reg_rtx (QImode);
8722 if (bypass_test)
8723 {
8724 if (second_test)
8725 abort ();
8726 test = bypass_test;
8727 bypass = 1;
8728 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8729 }
8730 PUT_MODE (test, QImode);
8731 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8732
8733 if (bypass)
8734 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8735 else
8736 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8737 }
8738
8739 /* Attach a REG_EQUAL note describing the comparison result. */
8740 equiv = simplify_gen_relational (code, QImode,
8741 GET_MODE (ix86_compare_op0),
8742 ix86_compare_op0, ix86_compare_op1);
8743 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8744
8745 return 1; /* DONE */
8746 }
8747
8748 /* Expand comparison setting or clearing carry flag. Return true when
8749 successful and set pop for the operation. */
8750 static bool
8751 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8752 {
8753 enum machine_mode mode =
8754 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8755
8756 /* Do not handle DImode compares that go trought special path. Also we can't
8757 deal with FP compares yet. This is possible to add. */
8758 if ((mode == DImode && !TARGET_64BIT))
8759 return false;
8760 if (FLOAT_MODE_P (mode))
8761 {
8762 rtx second_test = NULL, bypass_test = NULL;
8763 rtx compare_op, compare_seq;
8764
8765 /* Shortcut: following common codes never translate into carry flag compares. */
8766 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8767 || code == ORDERED || code == UNORDERED)
8768 return false;
8769
8770 /* These comparisons require zero flag; swap operands so they won't. */
8771 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8772 && !TARGET_IEEE_FP)
8773 {
8774 rtx tmp = op0;
8775 op0 = op1;
8776 op1 = tmp;
8777 code = swap_condition (code);
8778 }
8779
8780 /* Try to expand the comparison and verify that we end up with carry flag
8781 based comparison. This is fails to be true only when we decide to expand
8782 comparison using arithmetic that is not too common scenario. */
8783 start_sequence ();
8784 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8785 &second_test, &bypass_test);
8786 compare_seq = get_insns ();
8787 end_sequence ();
8788
8789 if (second_test || bypass_test)
8790 return false;
8791 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8792 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8793 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8794 else
8795 code = GET_CODE (compare_op);
8796 if (code != LTU && code != GEU)
8797 return false;
8798 emit_insn (compare_seq);
8799 *pop = compare_op;
8800 return true;
8801 }
8802 if (!INTEGRAL_MODE_P (mode))
8803 return false;
8804 switch (code)
8805 {
8806 case LTU:
8807 case GEU:
8808 break;
8809
8810 /* Convert a==0 into (unsigned)a<1. */
8811 case EQ:
8812 case NE:
8813 if (op1 != const0_rtx)
8814 return false;
8815 op1 = const1_rtx;
8816 code = (code == EQ ? LTU : GEU);
8817 break;
8818
8819 /* Convert a>b into b<a or a>=b-1. */
8820 case GTU:
8821 case LEU:
8822 if (GET_CODE (op1) == CONST_INT)
8823 {
8824 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8825 /* Bail out on overflow. We still can swap operands but that
8826 would force loading of the constant into register. */
8827 if (op1 == const0_rtx
8828 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8829 return false;
8830 code = (code == GTU ? GEU : LTU);
8831 }
8832 else
8833 {
8834 rtx tmp = op1;
8835 op1 = op0;
8836 op0 = tmp;
8837 code = (code == GTU ? LTU : GEU);
8838 }
8839 break;
8840
8841 /* Convert a>=0 into (unsigned)a<0x80000000. */
8842 case LT:
8843 case GE:
8844 if (mode == DImode || op1 != const0_rtx)
8845 return false;
8846 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8847 code = (code == LT ? GEU : LTU);
8848 break;
8849 case LE:
8850 case GT:
8851 if (mode == DImode || op1 != constm1_rtx)
8852 return false;
8853 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8854 code = (code == LE ? GEU : LTU);
8855 break;
8856
8857 default:
8858 return false;
8859 }
8860 /* Swapping operands may cause constant to appear as first operand. */
8861 if (!nonimmediate_operand (op0, VOIDmode))
8862 {
8863 if (no_new_pseudos)
8864 return false;
8865 op0 = force_reg (mode, op0);
8866 }
8867 ix86_compare_op0 = op0;
8868 ix86_compare_op1 = op1;
8869 *pop = ix86_expand_compare (code, NULL, NULL);
8870 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8871 abort ();
8872 return true;
8873 }
8874
8875 int
8876 ix86_expand_int_movcc (rtx operands[])
8877 {
8878 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8879 rtx compare_seq, compare_op;
8880 rtx second_test, bypass_test;
8881 enum machine_mode mode = GET_MODE (operands[0]);
8882 bool sign_bit_compare_p = false;;
8883
8884 start_sequence ();
8885 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8886 compare_seq = get_insns ();
8887 end_sequence ();
8888
8889 compare_code = GET_CODE (compare_op);
8890
8891 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
8892 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
8893 sign_bit_compare_p = true;
8894
8895 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8896 HImode insns, we'd be swallowed in word prefix ops. */
8897
8898 if ((mode != HImode || TARGET_FAST_PREFIX)
8899 && (mode != DImode || TARGET_64BIT)
8900 && GET_CODE (operands[2]) == CONST_INT
8901 && GET_CODE (operands[3]) == CONST_INT)
8902 {
8903 rtx out = operands[0];
8904 HOST_WIDE_INT ct = INTVAL (operands[2]);
8905 HOST_WIDE_INT cf = INTVAL (operands[3]);
8906 HOST_WIDE_INT diff;
8907
8908 diff = ct - cf;
8909 /* Sign bit compares are better done using shifts than we do by using
8910 sbb. */
8911 if (sign_bit_compare_p
8912 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
8913 ix86_compare_op1, &compare_op))
8914 {
8915 /* Detect overlap between destination and compare sources. */
8916 rtx tmp = out;
8917
8918 if (!sign_bit_compare_p)
8919 {
8920 bool fpcmp = false;
8921
8922 compare_code = GET_CODE (compare_op);
8923
8924 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8925 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8926 {
8927 fpcmp = true;
8928 compare_code = ix86_fp_compare_code_to_integer (compare_code);
8929 }
8930
8931 /* To simplify rest of code, restrict to the GEU case. */
8932 if (compare_code == LTU)
8933 {
8934 HOST_WIDE_INT tmp = ct;
8935 ct = cf;
8936 cf = tmp;
8937 compare_code = reverse_condition (compare_code);
8938 code = reverse_condition (code);
8939 }
8940 else
8941 {
8942 if (fpcmp)
8943 PUT_CODE (compare_op,
8944 reverse_condition_maybe_unordered
8945 (GET_CODE (compare_op)));
8946 else
8947 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
8948 }
8949 diff = ct - cf;
8950
8951 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8952 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8953 tmp = gen_reg_rtx (mode);
8954
8955 if (mode == DImode)
8956 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
8957 else
8958 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
8959 }
8960 else
8961 {
8962 if (code == GT || code == GE)
8963 code = reverse_condition (code);
8964 else
8965 {
8966 HOST_WIDE_INT tmp = ct;
8967 ct = cf;
8968 cf = tmp;
8969 diff = ct - cf;
8970 }
8971 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
8972 ix86_compare_op1, VOIDmode, 0, -1);
8973 }
8974
8975 if (diff == 1)
8976 {
8977 /*
8978 * cmpl op0,op1
8979 * sbbl dest,dest
8980 * [addl dest, ct]
8981 *
8982 * Size 5 - 8.
8983 */
8984 if (ct)
8985 tmp = expand_simple_binop (mode, PLUS,
8986 tmp, GEN_INT (ct),
8987 copy_rtx (tmp), 1, OPTAB_DIRECT);
8988 }
8989 else if (cf == -1)
8990 {
8991 /*
8992 * cmpl op0,op1
8993 * sbbl dest,dest
8994 * orl $ct, dest
8995 *
8996 * Size 8.
8997 */
8998 tmp = expand_simple_binop (mode, IOR,
8999 tmp, GEN_INT (ct),
9000 copy_rtx (tmp), 1, OPTAB_DIRECT);
9001 }
9002 else if (diff == -1 && ct)
9003 {
9004 /*
9005 * cmpl op0,op1
9006 * sbbl dest,dest
9007 * notl dest
9008 * [addl dest, cf]
9009 *
9010 * Size 8 - 11.
9011 */
9012 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9013 if (cf)
9014 tmp = expand_simple_binop (mode, PLUS,
9015 copy_rtx (tmp), GEN_INT (cf),
9016 copy_rtx (tmp), 1, OPTAB_DIRECT);
9017 }
9018 else
9019 {
9020 /*
9021 * cmpl op0,op1
9022 * sbbl dest,dest
9023 * [notl dest]
9024 * andl cf - ct, dest
9025 * [addl dest, ct]
9026 *
9027 * Size 8 - 11.
9028 */
9029
9030 if (cf == 0)
9031 {
9032 cf = ct;
9033 ct = 0;
9034 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9035 }
9036
9037 tmp = expand_simple_binop (mode, AND,
9038 copy_rtx (tmp),
9039 gen_int_mode (cf - ct, mode),
9040 copy_rtx (tmp), 1, OPTAB_DIRECT);
9041 if (ct)
9042 tmp = expand_simple_binop (mode, PLUS,
9043 copy_rtx (tmp), GEN_INT (ct),
9044 copy_rtx (tmp), 1, OPTAB_DIRECT);
9045 }
9046
9047 if (!rtx_equal_p (tmp, out))
9048 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9049
9050 return 1; /* DONE */
9051 }
9052
9053 if (diff < 0)
9054 {
9055 HOST_WIDE_INT tmp;
9056 tmp = ct, ct = cf, cf = tmp;
9057 diff = -diff;
9058 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9059 {
9060 /* We may be reversing unordered compare to normal compare, that
9061 is not valid in general (we may convert non-trapping condition
9062 to trapping one), however on i386 we currently emit all
9063 comparisons unordered. */
9064 compare_code = reverse_condition_maybe_unordered (compare_code);
9065 code = reverse_condition_maybe_unordered (code);
9066 }
9067 else
9068 {
9069 compare_code = reverse_condition (compare_code);
9070 code = reverse_condition (code);
9071 }
9072 }
9073
9074 compare_code = UNKNOWN;
9075 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9076 && GET_CODE (ix86_compare_op1) == CONST_INT)
9077 {
9078 if (ix86_compare_op1 == const0_rtx
9079 && (code == LT || code == GE))
9080 compare_code = code;
9081 else if (ix86_compare_op1 == constm1_rtx)
9082 {
9083 if (code == LE)
9084 compare_code = LT;
9085 else if (code == GT)
9086 compare_code = GE;
9087 }
9088 }
9089
9090 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9091 if (compare_code != UNKNOWN
9092 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9093 && (cf == -1 || ct == -1))
9094 {
9095 /* If lea code below could be used, only optimize
9096 if it results in a 2 insn sequence. */
9097
9098 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9099 || diff == 3 || diff == 5 || diff == 9)
9100 || (compare_code == LT && ct == -1)
9101 || (compare_code == GE && cf == -1))
9102 {
9103 /*
9104 * notl op1 (if necessary)
9105 * sarl $31, op1
9106 * orl cf, op1
9107 */
9108 if (ct != -1)
9109 {
9110 cf = ct;
9111 ct = -1;
9112 code = reverse_condition (code);
9113 }
9114
9115 out = emit_store_flag (out, code, ix86_compare_op0,
9116 ix86_compare_op1, VOIDmode, 0, -1);
9117
9118 out = expand_simple_binop (mode, IOR,
9119 out, GEN_INT (cf),
9120 out, 1, OPTAB_DIRECT);
9121 if (out != operands[0])
9122 emit_move_insn (operands[0], out);
9123
9124 return 1; /* DONE */
9125 }
9126 }
9127
9128
9129 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9130 || diff == 3 || diff == 5 || diff == 9)
9131 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9132 && (mode != DImode
9133 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9134 {
9135 /*
9136 * xorl dest,dest
9137 * cmpl op1,op2
9138 * setcc dest
9139 * lea cf(dest*(ct-cf)),dest
9140 *
9141 * Size 14.
9142 *
9143 * This also catches the degenerate setcc-only case.
9144 */
9145
9146 rtx tmp;
9147 int nops;
9148
9149 out = emit_store_flag (out, code, ix86_compare_op0,
9150 ix86_compare_op1, VOIDmode, 0, 1);
9151
9152 nops = 0;
9153 /* On x86_64 the lea instruction operates on Pmode, so we need
9154 to get arithmetics done in proper mode to match. */
9155 if (diff == 1)
9156 tmp = copy_rtx (out);
9157 else
9158 {
9159 rtx out1;
9160 out1 = copy_rtx (out);
9161 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9162 nops++;
9163 if (diff & 1)
9164 {
9165 tmp = gen_rtx_PLUS (mode, tmp, out1);
9166 nops++;
9167 }
9168 }
9169 if (cf != 0)
9170 {
9171 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9172 nops++;
9173 }
9174 if (!rtx_equal_p (tmp, out))
9175 {
9176 if (nops == 1)
9177 out = force_operand (tmp, copy_rtx (out));
9178 else
9179 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9180 }
9181 if (!rtx_equal_p (out, operands[0]))
9182 emit_move_insn (operands[0], copy_rtx (out));
9183
9184 return 1; /* DONE */
9185 }
9186
9187 /*
9188 * General case: Jumpful:
9189 * xorl dest,dest cmpl op1, op2
9190 * cmpl op1, op2 movl ct, dest
9191 * setcc dest jcc 1f
9192 * decl dest movl cf, dest
9193 * andl (cf-ct),dest 1:
9194 * addl ct,dest
9195 *
9196 * Size 20. Size 14.
9197 *
9198 * This is reasonably steep, but branch mispredict costs are
9199 * high on modern cpus, so consider failing only if optimizing
9200 * for space.
9201 */
9202
9203 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9204 && BRANCH_COST >= 2)
9205 {
9206 if (cf == 0)
9207 {
9208 cf = ct;
9209 ct = 0;
9210 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9211 /* We may be reversing unordered compare to normal compare,
9212 that is not valid in general (we may convert non-trapping
9213 condition to trapping one), however on i386 we currently
9214 emit all comparisons unordered. */
9215 code = reverse_condition_maybe_unordered (code);
9216 else
9217 {
9218 code = reverse_condition (code);
9219 if (compare_code != UNKNOWN)
9220 compare_code = reverse_condition (compare_code);
9221 }
9222 }
9223
9224 if (compare_code != UNKNOWN)
9225 {
9226 /* notl op1 (if needed)
9227 sarl $31, op1
9228 andl (cf-ct), op1
9229 addl ct, op1
9230
9231 For x < 0 (resp. x <= -1) there will be no notl,
9232 so if possible swap the constants to get rid of the
9233 complement.
9234 True/false will be -1/0 while code below (store flag
9235 followed by decrement) is 0/-1, so the constants need
9236 to be exchanged once more. */
9237
9238 if (compare_code == GE || !cf)
9239 {
9240 code = reverse_condition (code);
9241 compare_code = LT;
9242 }
9243 else
9244 {
9245 HOST_WIDE_INT tmp = cf;
9246 cf = ct;
9247 ct = tmp;
9248 }
9249
9250 out = emit_store_flag (out, code, ix86_compare_op0,
9251 ix86_compare_op1, VOIDmode, 0, -1);
9252 }
9253 else
9254 {
9255 out = emit_store_flag (out, code, ix86_compare_op0,
9256 ix86_compare_op1, VOIDmode, 0, 1);
9257
9258 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9259 copy_rtx (out), 1, OPTAB_DIRECT);
9260 }
9261
9262 out = expand_simple_binop (mode, AND, copy_rtx (out),
9263 gen_int_mode (cf - ct, mode),
9264 copy_rtx (out), 1, OPTAB_DIRECT);
9265 if (ct)
9266 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9267 copy_rtx (out), 1, OPTAB_DIRECT);
9268 if (!rtx_equal_p (out, operands[0]))
9269 emit_move_insn (operands[0], copy_rtx (out));
9270
9271 return 1; /* DONE */
9272 }
9273 }
9274
9275 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9276 {
9277 /* Try a few things more with specific constants and a variable. */
9278
9279 optab op;
9280 rtx var, orig_out, out, tmp;
9281
9282 if (BRANCH_COST <= 2)
9283 return 0; /* FAIL */
9284
9285 /* If one of the two operands is an interesting constant, load a
9286 constant with the above and mask it in with a logical operation. */
9287
9288 if (GET_CODE (operands[2]) == CONST_INT)
9289 {
9290 var = operands[3];
9291 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9292 operands[3] = constm1_rtx, op = and_optab;
9293 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9294 operands[3] = const0_rtx, op = ior_optab;
9295 else
9296 return 0; /* FAIL */
9297 }
9298 else if (GET_CODE (operands[3]) == CONST_INT)
9299 {
9300 var = operands[2];
9301 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9302 operands[2] = constm1_rtx, op = and_optab;
9303 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9304 operands[2] = const0_rtx, op = ior_optab;
9305 else
9306 return 0; /* FAIL */
9307 }
9308 else
9309 return 0; /* FAIL */
9310
9311 orig_out = operands[0];
9312 tmp = gen_reg_rtx (mode);
9313 operands[0] = tmp;
9314
9315 /* Recurse to get the constant loaded. */
9316 if (ix86_expand_int_movcc (operands) == 0)
9317 return 0; /* FAIL */
9318
9319 /* Mask in the interesting variable. */
9320 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9321 OPTAB_WIDEN);
9322 if (!rtx_equal_p (out, orig_out))
9323 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9324
9325 return 1; /* DONE */
9326 }
9327
9328 /*
9329 * For comparison with above,
9330 *
9331 * movl cf,dest
9332 * movl ct,tmp
9333 * cmpl op1,op2
9334 * cmovcc tmp,dest
9335 *
9336 * Size 15.
9337 */
9338
9339 if (! nonimmediate_operand (operands[2], mode))
9340 operands[2] = force_reg (mode, operands[2]);
9341 if (! nonimmediate_operand (operands[3], mode))
9342 operands[3] = force_reg (mode, operands[3]);
9343
9344 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9345 {
9346 rtx tmp = gen_reg_rtx (mode);
9347 emit_move_insn (tmp, operands[3]);
9348 operands[3] = tmp;
9349 }
9350 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9351 {
9352 rtx tmp = gen_reg_rtx (mode);
9353 emit_move_insn (tmp, operands[2]);
9354 operands[2] = tmp;
9355 }
9356
9357 if (! register_operand (operands[2], VOIDmode)
9358 && (mode == QImode
9359 || ! register_operand (operands[3], VOIDmode)))
9360 operands[2] = force_reg (mode, operands[2]);
9361
9362 if (mode == QImode
9363 && ! register_operand (operands[3], VOIDmode))
9364 operands[3] = force_reg (mode, operands[3]);
9365
9366 emit_insn (compare_seq);
9367 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9368 gen_rtx_IF_THEN_ELSE (mode,
9369 compare_op, operands[2],
9370 operands[3])));
9371 if (bypass_test)
9372 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9373 gen_rtx_IF_THEN_ELSE (mode,
9374 bypass_test,
9375 copy_rtx (operands[3]),
9376 copy_rtx (operands[0]))));
9377 if (second_test)
9378 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9379 gen_rtx_IF_THEN_ELSE (mode,
9380 second_test,
9381 copy_rtx (operands[2]),
9382 copy_rtx (operands[0]))));
9383
9384 return 1; /* DONE */
9385 }
9386
9387 int
9388 ix86_expand_fp_movcc (rtx operands[])
9389 {
9390 enum rtx_code code;
9391 rtx tmp;
9392 rtx compare_op, second_test, bypass_test;
9393
9394 /* For SF/DFmode conditional moves based on comparisons
9395 in same mode, we may want to use SSE min/max instructions. */
9396 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9397 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9398 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9399 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9400 && (!TARGET_IEEE_FP
9401 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9402 /* We may be called from the post-reload splitter. */
9403 && (!REG_P (operands[0])
9404 || SSE_REG_P (operands[0])
9405 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9406 {
9407 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9408 code = GET_CODE (operands[1]);
9409
9410 /* See if we have (cross) match between comparison operands and
9411 conditional move operands. */
9412 if (rtx_equal_p (operands[2], op1))
9413 {
9414 rtx tmp = op0;
9415 op0 = op1;
9416 op1 = tmp;
9417 code = reverse_condition_maybe_unordered (code);
9418 }
9419 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9420 {
9421 /* Check for min operation. */
9422 if (code == LT || code == UNLE)
9423 {
9424 if (code == UNLE)
9425 {
9426 rtx tmp = op0;
9427 op0 = op1;
9428 op1 = tmp;
9429 }
9430 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9431 if (memory_operand (op0, VOIDmode))
9432 op0 = force_reg (GET_MODE (operands[0]), op0);
9433 if (GET_MODE (operands[0]) == SFmode)
9434 emit_insn (gen_minsf3 (operands[0], op0, op1));
9435 else
9436 emit_insn (gen_mindf3 (operands[0], op0, op1));
9437 return 1;
9438 }
9439 /* Check for max operation. */
9440 if (code == GT || code == UNGE)
9441 {
9442 if (code == UNGE)
9443 {
9444 rtx tmp = op0;
9445 op0 = op1;
9446 op1 = tmp;
9447 }
9448 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9449 if (memory_operand (op0, VOIDmode))
9450 op0 = force_reg (GET_MODE (operands[0]), op0);
9451 if (GET_MODE (operands[0]) == SFmode)
9452 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9453 else
9454 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9455 return 1;
9456 }
9457 }
9458 /* Manage condition to be sse_comparison_operator. In case we are
9459 in non-ieee mode, try to canonicalize the destination operand
9460 to be first in the comparison - this helps reload to avoid extra
9461 moves. */
9462 if (!sse_comparison_operator (operands[1], VOIDmode)
9463 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9464 {
9465 rtx tmp = ix86_compare_op0;
9466 ix86_compare_op0 = ix86_compare_op1;
9467 ix86_compare_op1 = tmp;
9468 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9469 VOIDmode, ix86_compare_op0,
9470 ix86_compare_op1);
9471 }
9472 /* Similarly try to manage result to be first operand of conditional
9473 move. We also don't support the NE comparison on SSE, so try to
9474 avoid it. */
9475 if ((rtx_equal_p (operands[0], operands[3])
9476 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9477 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9478 {
9479 rtx tmp = operands[2];
9480 operands[2] = operands[3];
9481 operands[3] = tmp;
9482 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9483 (GET_CODE (operands[1])),
9484 VOIDmode, ix86_compare_op0,
9485 ix86_compare_op1);
9486 }
9487 if (GET_MODE (operands[0]) == SFmode)
9488 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9489 operands[2], operands[3],
9490 ix86_compare_op0, ix86_compare_op1));
9491 else
9492 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9493 operands[2], operands[3],
9494 ix86_compare_op0, ix86_compare_op1));
9495 return 1;
9496 }
9497
9498 /* The floating point conditional move instructions don't directly
9499 support conditions resulting from a signed integer comparison. */
9500
9501 code = GET_CODE (operands[1]);
9502 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9503
9504 /* The floating point conditional move instructions don't directly
9505 support signed integer comparisons. */
9506
9507 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9508 {
9509 if (second_test != NULL || bypass_test != NULL)
9510 abort ();
9511 tmp = gen_reg_rtx (QImode);
9512 ix86_expand_setcc (code, tmp);
9513 code = NE;
9514 ix86_compare_op0 = tmp;
9515 ix86_compare_op1 = const0_rtx;
9516 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9517 }
9518 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9519 {
9520 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9521 emit_move_insn (tmp, operands[3]);
9522 operands[3] = tmp;
9523 }
9524 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9525 {
9526 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9527 emit_move_insn (tmp, operands[2]);
9528 operands[2] = tmp;
9529 }
9530
9531 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9532 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9533 compare_op,
9534 operands[2],
9535 operands[3])));
9536 if (bypass_test)
9537 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9538 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9539 bypass_test,
9540 operands[3],
9541 operands[0])));
9542 if (second_test)
9543 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9544 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9545 second_test,
9546 operands[2],
9547 operands[0])));
9548
9549 return 1;
9550 }
9551
9552 /* Expand conditional increment or decrement using adb/sbb instructions.
9553 The default case using setcc followed by the conditional move can be
9554 done by generic code. */
9555 int
9556 ix86_expand_int_addcc (rtx operands[])
9557 {
9558 enum rtx_code code = GET_CODE (operands[1]);
9559 rtx compare_op;
9560 rtx val = const0_rtx;
9561 bool fpcmp = false;
9562 enum machine_mode mode = GET_MODE (operands[0]);
9563
9564 if (operands[3] != const1_rtx
9565 && operands[3] != constm1_rtx)
9566 return 0;
9567 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9568 ix86_compare_op1, &compare_op))
9569 return 0;
9570 code = GET_CODE (compare_op);
9571
9572 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9573 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9574 {
9575 fpcmp = true;
9576 code = ix86_fp_compare_code_to_integer (code);
9577 }
9578
9579 if (code != LTU)
9580 {
9581 val = constm1_rtx;
9582 if (fpcmp)
9583 PUT_CODE (compare_op,
9584 reverse_condition_maybe_unordered
9585 (GET_CODE (compare_op)));
9586 else
9587 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9588 }
9589 PUT_MODE (compare_op, mode);
9590
9591 /* Construct either adc or sbb insn. */
9592 if ((code == LTU) == (operands[3] == constm1_rtx))
9593 {
9594 switch (GET_MODE (operands[0]))
9595 {
9596 case QImode:
9597 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9598 break;
9599 case HImode:
9600 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9601 break;
9602 case SImode:
9603 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9604 break;
9605 case DImode:
9606 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9607 break;
9608 default:
9609 abort ();
9610 }
9611 }
9612 else
9613 {
9614 switch (GET_MODE (operands[0]))
9615 {
9616 case QImode:
9617 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9618 break;
9619 case HImode:
9620 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9621 break;
9622 case SImode:
9623 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9624 break;
9625 case DImode:
9626 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9627 break;
9628 default:
9629 abort ();
9630 }
9631 }
9632 return 1; /* DONE */
9633 }
9634
9635
9636 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9637 works for floating pointer parameters and nonoffsetable memories.
9638 For pushes, it returns just stack offsets; the values will be saved
9639 in the right order. Maximally three parts are generated. */
9640
9641 static int
9642 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9643 {
9644 int size;
9645
9646 if (!TARGET_64BIT)
9647 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9648 else
9649 size = (GET_MODE_SIZE (mode) + 4) / 8;
9650
9651 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9652 abort ();
9653 if (size < 2 || size > 3)
9654 abort ();
9655
9656 /* Optimize constant pool reference to immediates. This is used by fp
9657 moves, that force all constants to memory to allow combining. */
9658 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9659 {
9660 rtx tmp = maybe_get_pool_constant (operand);
9661 if (tmp)
9662 operand = tmp;
9663 }
9664
9665 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9666 {
9667 /* The only non-offsetable memories we handle are pushes. */
9668 if (! push_operand (operand, VOIDmode))
9669 abort ();
9670
9671 operand = copy_rtx (operand);
9672 PUT_MODE (operand, Pmode);
9673 parts[0] = parts[1] = parts[2] = operand;
9674 }
9675 else if (!TARGET_64BIT)
9676 {
9677 if (mode == DImode)
9678 split_di (&operand, 1, &parts[0], &parts[1]);
9679 else
9680 {
9681 if (REG_P (operand))
9682 {
9683 if (!reload_completed)
9684 abort ();
9685 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9686 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9687 if (size == 3)
9688 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9689 }
9690 else if (offsettable_memref_p (operand))
9691 {
9692 operand = adjust_address (operand, SImode, 0);
9693 parts[0] = operand;
9694 parts[1] = adjust_address (operand, SImode, 4);
9695 if (size == 3)
9696 parts[2] = adjust_address (operand, SImode, 8);
9697 }
9698 else if (GET_CODE (operand) == CONST_DOUBLE)
9699 {
9700 REAL_VALUE_TYPE r;
9701 long l[4];
9702
9703 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9704 switch (mode)
9705 {
9706 case XFmode:
9707 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9708 parts[2] = gen_int_mode (l[2], SImode);
9709 break;
9710 case DFmode:
9711 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9712 break;
9713 default:
9714 abort ();
9715 }
9716 parts[1] = gen_int_mode (l[1], SImode);
9717 parts[0] = gen_int_mode (l[0], SImode);
9718 }
9719 else
9720 abort ();
9721 }
9722 }
9723 else
9724 {
9725 if (mode == TImode)
9726 split_ti (&operand, 1, &parts[0], &parts[1]);
9727 if (mode == XFmode || mode == TFmode)
9728 {
9729 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9730 if (REG_P (operand))
9731 {
9732 if (!reload_completed)
9733 abort ();
9734 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9735 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9736 }
9737 else if (offsettable_memref_p (operand))
9738 {
9739 operand = adjust_address (operand, DImode, 0);
9740 parts[0] = operand;
9741 parts[1] = adjust_address (operand, upper_mode, 8);
9742 }
9743 else if (GET_CODE (operand) == CONST_DOUBLE)
9744 {
9745 REAL_VALUE_TYPE r;
9746 long l[3];
9747
9748 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9749 real_to_target (l, &r, mode);
9750 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9751 if (HOST_BITS_PER_WIDE_INT >= 64)
9752 parts[0]
9753 = gen_int_mode
9754 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9755 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9756 DImode);
9757 else
9758 parts[0] = immed_double_const (l[0], l[1], DImode);
9759 if (upper_mode == SImode)
9760 parts[1] = gen_int_mode (l[2], SImode);
9761 else if (HOST_BITS_PER_WIDE_INT >= 64)
9762 parts[1]
9763 = gen_int_mode
9764 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9765 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9766 DImode);
9767 else
9768 parts[1] = immed_double_const (l[2], l[3], DImode);
9769 }
9770 else
9771 abort ();
9772 }
9773 }
9774
9775 return size;
9776 }
9777
9778 /* Emit insns to perform a move or push of DI, DF, and XF values.
9779 Return false when normal moves are needed; true when all required
9780 insns have been emitted. Operands 2-4 contain the input values
9781 int the correct order; operands 5-7 contain the output values. */
9782
9783 void
9784 ix86_split_long_move (rtx operands[])
9785 {
9786 rtx part[2][3];
9787 int nparts;
9788 int push = 0;
9789 int collisions = 0;
9790 enum machine_mode mode = GET_MODE (operands[0]);
9791
9792 /* The DFmode expanders may ask us to move double.
9793 For 64bit target this is single move. By hiding the fact
9794 here we simplify i386.md splitters. */
9795 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9796 {
9797 /* Optimize constant pool reference to immediates. This is used by
9798 fp moves, that force all constants to memory to allow combining. */
9799
9800 if (GET_CODE (operands[1]) == MEM
9801 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9802 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9803 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9804 if (push_operand (operands[0], VOIDmode))
9805 {
9806 operands[0] = copy_rtx (operands[0]);
9807 PUT_MODE (operands[0], Pmode);
9808 }
9809 else
9810 operands[0] = gen_lowpart (DImode, operands[0]);
9811 operands[1] = gen_lowpart (DImode, operands[1]);
9812 emit_move_insn (operands[0], operands[1]);
9813 return;
9814 }
9815
9816 /* The only non-offsettable memory we handle is push. */
9817 if (push_operand (operands[0], VOIDmode))
9818 push = 1;
9819 else if (GET_CODE (operands[0]) == MEM
9820 && ! offsettable_memref_p (operands[0]))
9821 abort ();
9822
9823 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9824 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9825
9826 /* When emitting push, take care for source operands on the stack. */
9827 if (push && GET_CODE (operands[1]) == MEM
9828 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9829 {
9830 if (nparts == 3)
9831 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9832 XEXP (part[1][2], 0));
9833 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9834 XEXP (part[1][1], 0));
9835 }
9836
9837 /* We need to do copy in the right order in case an address register
9838 of the source overlaps the destination. */
9839 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9840 {
9841 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9842 collisions++;
9843 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9844 collisions++;
9845 if (nparts == 3
9846 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9847 collisions++;
9848
9849 /* Collision in the middle part can be handled by reordering. */
9850 if (collisions == 1 && nparts == 3
9851 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9852 {
9853 rtx tmp;
9854 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9855 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9856 }
9857
9858 /* If there are more collisions, we can't handle it by reordering.
9859 Do an lea to the last part and use only one colliding move. */
9860 else if (collisions > 1)
9861 {
9862 rtx base;
9863
9864 collisions = 1;
9865
9866 base = part[0][nparts - 1];
9867
9868 /* Handle the case when the last part isn't valid for lea.
9869 Happens in 64-bit mode storing the 12-byte XFmode. */
9870 if (GET_MODE (base) != Pmode)
9871 base = gen_rtx_REG (Pmode, REGNO (base));
9872
9873 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9874 part[1][0] = replace_equiv_address (part[1][0], base);
9875 part[1][1] = replace_equiv_address (part[1][1],
9876 plus_constant (base, UNITS_PER_WORD));
9877 if (nparts == 3)
9878 part[1][2] = replace_equiv_address (part[1][2],
9879 plus_constant (base, 8));
9880 }
9881 }
9882
9883 if (push)
9884 {
9885 if (!TARGET_64BIT)
9886 {
9887 if (nparts == 3)
9888 {
9889 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
9890 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
9891 emit_move_insn (part[0][2], part[1][2]);
9892 }
9893 }
9894 else
9895 {
9896 /* In 64bit mode we don't have 32bit push available. In case this is
9897 register, it is OK - we will just use larger counterpart. We also
9898 retype memory - these comes from attempt to avoid REX prefix on
9899 moving of second half of TFmode value. */
9900 if (GET_MODE (part[1][1]) == SImode)
9901 {
9902 if (GET_CODE (part[1][1]) == MEM)
9903 part[1][1] = adjust_address (part[1][1], DImode, 0);
9904 else if (REG_P (part[1][1]))
9905 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9906 else
9907 abort ();
9908 if (GET_MODE (part[1][0]) == SImode)
9909 part[1][0] = part[1][1];
9910 }
9911 }
9912 emit_move_insn (part[0][1], part[1][1]);
9913 emit_move_insn (part[0][0], part[1][0]);
9914 return;
9915 }
9916
9917 /* Choose correct order to not overwrite the source before it is copied. */
9918 if ((REG_P (part[0][0])
9919 && REG_P (part[1][1])
9920 && (REGNO (part[0][0]) == REGNO (part[1][1])
9921 || (nparts == 3
9922 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9923 || (collisions > 0
9924 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9925 {
9926 if (nparts == 3)
9927 {
9928 operands[2] = part[0][2];
9929 operands[3] = part[0][1];
9930 operands[4] = part[0][0];
9931 operands[5] = part[1][2];
9932 operands[6] = part[1][1];
9933 operands[7] = part[1][0];
9934 }
9935 else
9936 {
9937 operands[2] = part[0][1];
9938 operands[3] = part[0][0];
9939 operands[5] = part[1][1];
9940 operands[6] = part[1][0];
9941 }
9942 }
9943 else
9944 {
9945 if (nparts == 3)
9946 {
9947 operands[2] = part[0][0];
9948 operands[3] = part[0][1];
9949 operands[4] = part[0][2];
9950 operands[5] = part[1][0];
9951 operands[6] = part[1][1];
9952 operands[7] = part[1][2];
9953 }
9954 else
9955 {
9956 operands[2] = part[0][0];
9957 operands[3] = part[0][1];
9958 operands[5] = part[1][0];
9959 operands[6] = part[1][1];
9960 }
9961 }
9962
9963 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
9964 if (optimize_size)
9965 {
9966 if (GET_CODE (operands[5]) == CONST_INT
9967 && operands[5] != const0_rtx
9968 && REG_P (operands[2]))
9969 {
9970 if (GET_CODE (operands[6]) == CONST_INT
9971 && INTVAL (operands[6]) == INTVAL (operands[5]))
9972 operands[6] = operands[2];
9973
9974 if (nparts == 3
9975 && GET_CODE (operands[7]) == CONST_INT
9976 && INTVAL (operands[7]) == INTVAL (operands[5]))
9977 operands[7] = operands[2];
9978 }
9979
9980 if (nparts == 3
9981 && GET_CODE (operands[6]) == CONST_INT
9982 && operands[6] != const0_rtx
9983 && REG_P (operands[3])
9984 && GET_CODE (operands[7]) == CONST_INT
9985 && INTVAL (operands[7]) == INTVAL (operands[6]))
9986 operands[7] = operands[3];
9987 }
9988
9989 emit_move_insn (operands[2], operands[5]);
9990 emit_move_insn (operands[3], operands[6]);
9991 if (nparts == 3)
9992 emit_move_insn (operands[4], operands[7]);
9993
9994 return;
9995 }
9996
9997 /* Helper function of ix86_split_ashldi used to generate an SImode
9998 left shift by a constant, either using a single shift or
9999 a sequence of add instructions. */
10000
10001 static void
10002 ix86_expand_ashlsi3_const (rtx operand, int count)
10003 {
10004 if (count == 1)
10005 emit_insn (gen_addsi3 (operand, operand, operand));
10006 else if (!optimize_size
10007 && count * ix86_cost->add <= ix86_cost->shift_const)
10008 {
10009 int i;
10010 for (i=0; i<count; i++)
10011 emit_insn (gen_addsi3 (operand, operand, operand));
10012 }
10013 else
10014 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10015 }
10016
10017 void
10018 ix86_split_ashldi (rtx *operands, rtx scratch)
10019 {
10020 rtx low[2], high[2];
10021 int count;
10022
10023 if (GET_CODE (operands[2]) == CONST_INT)
10024 {
10025 split_di (operands, 2, low, high);
10026 count = INTVAL (operands[2]) & 63;
10027
10028 if (count >= 32)
10029 {
10030 emit_move_insn (high[0], low[1]);
10031 emit_move_insn (low[0], const0_rtx);
10032
10033 if (count > 32)
10034 ix86_expand_ashlsi3_const (high[0], count - 32);
10035 }
10036 else
10037 {
10038 if (!rtx_equal_p (operands[0], operands[1]))
10039 emit_move_insn (operands[0], operands[1]);
10040 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10041 ix86_expand_ashlsi3_const (low[0], count);
10042 }
10043 return;
10044 }
10045
10046 split_di (operands, 1, low, high);
10047
10048 if (operands[1] == const1_rtx)
10049 {
10050 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10051 can be done with two 32-bit shifts, no branches, no cmoves. */
10052 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10053 {
10054 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10055
10056 ix86_expand_clear (low[0]);
10057 ix86_expand_clear (high[0]);
10058 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10059
10060 d = gen_lowpart (QImode, low[0]);
10061 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10062 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10063 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10064
10065 d = gen_lowpart (QImode, high[0]);
10066 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10067 s = gen_rtx_NE (QImode, flags, const0_rtx);
10068 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10069 }
10070
10071 /* Otherwise, we can get the same results by manually performing
10072 a bit extract operation on bit 5, and then performing the two
10073 shifts. The two methods of getting 0/1 into low/high are exactly
10074 the same size. Avoiding the shift in the bit extract case helps
10075 pentium4 a bit; no one else seems to care much either way. */
10076 else
10077 {
10078 rtx x;
10079
10080 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10081 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10082 else
10083 x = gen_lowpart (SImode, operands[2]);
10084 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10085
10086 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10087 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10088 emit_move_insn (low[0], high[0]);
10089 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10090 }
10091
10092 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10093 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10094 return;
10095 }
10096
10097 if (operands[1] == constm1_rtx)
10098 {
10099 /* For -1LL << N, we can avoid the shld instruction, because we
10100 know that we're shifting 0...31 ones into a -1. */
10101 emit_move_insn (low[0], constm1_rtx);
10102 if (optimize_size)
10103 emit_move_insn (high[0], low[0]);
10104 else
10105 emit_move_insn (high[0], constm1_rtx);
10106 }
10107 else
10108 {
10109 if (!rtx_equal_p (operands[0], operands[1]))
10110 emit_move_insn (operands[0], operands[1]);
10111
10112 split_di (operands, 1, low, high);
10113 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10114 }
10115
10116 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10117
10118 if (TARGET_CMOVE && scratch)
10119 {
10120 ix86_expand_clear (scratch);
10121 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10122 }
10123 else
10124 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10125 }
10126
10127 void
10128 ix86_split_ashrdi (rtx *operands, rtx scratch)
10129 {
10130 rtx low[2], high[2];
10131 int count;
10132
10133 if (GET_CODE (operands[2]) == CONST_INT)
10134 {
10135 split_di (operands, 2, low, high);
10136 count = INTVAL (operands[2]) & 63;
10137
10138 if (count == 63)
10139 {
10140 emit_move_insn (high[0], high[1]);
10141 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10142 emit_move_insn (low[0], high[0]);
10143
10144 }
10145 else if (count >= 32)
10146 {
10147 emit_move_insn (low[0], high[1]);
10148 emit_move_insn (high[0], low[0]);
10149 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10150 if (count > 32)
10151 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10152 }
10153 else
10154 {
10155 if (!rtx_equal_p (operands[0], operands[1]))
10156 emit_move_insn (operands[0], operands[1]);
10157 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10158 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10159 }
10160 }
10161 else
10162 {
10163 if (!rtx_equal_p (operands[0], operands[1]))
10164 emit_move_insn (operands[0], operands[1]);
10165
10166 split_di (operands, 1, low, high);
10167
10168 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10169 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10170
10171 if (TARGET_CMOVE && scratch)
10172 {
10173 emit_move_insn (scratch, high[0]);
10174 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10175 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10176 scratch));
10177 }
10178 else
10179 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10180 }
10181 }
10182
10183 void
10184 ix86_split_lshrdi (rtx *operands, rtx scratch)
10185 {
10186 rtx low[2], high[2];
10187 int count;
10188
10189 if (GET_CODE (operands[2]) == CONST_INT)
10190 {
10191 split_di (operands, 2, low, high);
10192 count = INTVAL (operands[2]) & 63;
10193
10194 if (count >= 32)
10195 {
10196 emit_move_insn (low[0], high[1]);
10197 ix86_expand_clear (high[0]);
10198
10199 if (count > 32)
10200 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10201 }
10202 else
10203 {
10204 if (!rtx_equal_p (operands[0], operands[1]))
10205 emit_move_insn (operands[0], operands[1]);
10206 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10207 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10208 }
10209 }
10210 else
10211 {
10212 if (!rtx_equal_p (operands[0], operands[1]))
10213 emit_move_insn (operands[0], operands[1]);
10214
10215 split_di (operands, 1, low, high);
10216
10217 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10218 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10219
10220 /* Heh. By reversing the arguments, we can reuse this pattern. */
10221 if (TARGET_CMOVE && scratch)
10222 {
10223 ix86_expand_clear (scratch);
10224 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10225 scratch));
10226 }
10227 else
10228 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10229 }
10230 }
10231
10232 /* Helper function for the string operations below. Dest VARIABLE whether
10233 it is aligned to VALUE bytes. If true, jump to the label. */
10234 static rtx
10235 ix86_expand_aligntest (rtx variable, int value)
10236 {
10237 rtx label = gen_label_rtx ();
10238 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10239 if (GET_MODE (variable) == DImode)
10240 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10241 else
10242 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10243 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10244 1, label);
10245 return label;
10246 }
10247
10248 /* Adjust COUNTER by the VALUE. */
10249 static void
10250 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10251 {
10252 if (GET_MODE (countreg) == DImode)
10253 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10254 else
10255 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10256 }
10257
10258 /* Zero extend possibly SImode EXP to Pmode register. */
10259 rtx
10260 ix86_zero_extend_to_Pmode (rtx exp)
10261 {
10262 rtx r;
10263 if (GET_MODE (exp) == VOIDmode)
10264 return force_reg (Pmode, exp);
10265 if (GET_MODE (exp) == Pmode)
10266 return copy_to_mode_reg (Pmode, exp);
10267 r = gen_reg_rtx (Pmode);
10268 emit_insn (gen_zero_extendsidi2 (r, exp));
10269 return r;
10270 }
10271
10272 /* Expand string move (memcpy) operation. Use i386 string operations when
10273 profitable. expand_clrmem contains similar code. */
10274 int
10275 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10276 {
10277 rtx srcreg, destreg, countreg, srcexp, destexp;
10278 enum machine_mode counter_mode;
10279 HOST_WIDE_INT align = 0;
10280 unsigned HOST_WIDE_INT count = 0;
10281
10282 if (GET_CODE (align_exp) == CONST_INT)
10283 align = INTVAL (align_exp);
10284
10285 /* Can't use any of this if the user has appropriated esi or edi. */
10286 if (global_regs[4] || global_regs[5])
10287 return 0;
10288
10289 /* This simple hack avoids all inlining code and simplifies code below. */
10290 if (!TARGET_ALIGN_STRINGOPS)
10291 align = 64;
10292
10293 if (GET_CODE (count_exp) == CONST_INT)
10294 {
10295 count = INTVAL (count_exp);
10296 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10297 return 0;
10298 }
10299
10300 /* Figure out proper mode for counter. For 32bits it is always SImode,
10301 for 64bits use SImode when possible, otherwise DImode.
10302 Set count to number of bytes copied when known at compile time. */
10303 if (!TARGET_64BIT
10304 || GET_MODE (count_exp) == SImode
10305 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10306 counter_mode = SImode;
10307 else
10308 counter_mode = DImode;
10309
10310 if (counter_mode != SImode && counter_mode != DImode)
10311 abort ();
10312
10313 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10314 if (destreg != XEXP (dst, 0))
10315 dst = replace_equiv_address_nv (dst, destreg);
10316 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10317 if (srcreg != XEXP (src, 0))
10318 src = replace_equiv_address_nv (src, srcreg);
10319
10320 /* When optimizing for size emit simple rep ; movsb instruction for
10321 counts not divisible by 4. */
10322
10323 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10324 {
10325 emit_insn (gen_cld ());
10326 countreg = ix86_zero_extend_to_Pmode (count_exp);
10327 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10328 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10329 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10330 destexp, srcexp));
10331 }
10332
10333 /* For constant aligned (or small unaligned) copies use rep movsl
10334 followed by code copying the rest. For PentiumPro ensure 8 byte
10335 alignment to allow rep movsl acceleration. */
10336
10337 else if (count != 0
10338 && (align >= 8
10339 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10340 || optimize_size || count < (unsigned int) 64))
10341 {
10342 unsigned HOST_WIDE_INT offset = 0;
10343 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10344 rtx srcmem, dstmem;
10345
10346 emit_insn (gen_cld ());
10347 if (count & ~(size - 1))
10348 {
10349 countreg = copy_to_mode_reg (counter_mode,
10350 GEN_INT ((count >> (size == 4 ? 2 : 3))
10351 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10352 countreg = ix86_zero_extend_to_Pmode (countreg);
10353
10354 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10355 GEN_INT (size == 4 ? 2 : 3));
10356 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10357 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10358
10359 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10360 countreg, destexp, srcexp));
10361 offset = count & ~(size - 1);
10362 }
10363 if (size == 8 && (count & 0x04))
10364 {
10365 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10366 offset);
10367 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10368 offset);
10369 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10370 offset += 4;
10371 }
10372 if (count & 0x02)
10373 {
10374 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10375 offset);
10376 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10377 offset);
10378 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10379 offset += 2;
10380 }
10381 if (count & 0x01)
10382 {
10383 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10384 offset);
10385 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10386 offset);
10387 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10388 }
10389 }
10390 /* The generic code based on the glibc implementation:
10391 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10392 allowing accelerated copying there)
10393 - copy the data using rep movsl
10394 - copy the rest. */
10395 else
10396 {
10397 rtx countreg2;
10398 rtx label = NULL;
10399 rtx srcmem, dstmem;
10400 int desired_alignment = (TARGET_PENTIUMPRO
10401 && (count == 0 || count >= (unsigned int) 260)
10402 ? 8 : UNITS_PER_WORD);
10403 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10404 dst = change_address (dst, BLKmode, destreg);
10405 src = change_address (src, BLKmode, srcreg);
10406
10407 /* In case we don't know anything about the alignment, default to
10408 library version, since it is usually equally fast and result in
10409 shorter code.
10410
10411 Also emit call when we know that the count is large and call overhead
10412 will not be important. */
10413 if (!TARGET_INLINE_ALL_STRINGOPS
10414 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10415 return 0;
10416
10417 if (TARGET_SINGLE_STRINGOP)
10418 emit_insn (gen_cld ());
10419
10420 countreg2 = gen_reg_rtx (Pmode);
10421 countreg = copy_to_mode_reg (counter_mode, count_exp);
10422
10423 /* We don't use loops to align destination and to copy parts smaller
10424 than 4 bytes, because gcc is able to optimize such code better (in
10425 the case the destination or the count really is aligned, gcc is often
10426 able to predict the branches) and also it is friendlier to the
10427 hardware branch prediction.
10428
10429 Using loops is beneficial for generic case, because we can
10430 handle small counts using the loops. Many CPUs (such as Athlon)
10431 have large REP prefix setup costs.
10432
10433 This is quite costly. Maybe we can revisit this decision later or
10434 add some customizability to this code. */
10435
10436 if (count == 0 && align < desired_alignment)
10437 {
10438 label = gen_label_rtx ();
10439 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10440 LEU, 0, counter_mode, 1, label);
10441 }
10442 if (align <= 1)
10443 {
10444 rtx label = ix86_expand_aligntest (destreg, 1);
10445 srcmem = change_address (src, QImode, srcreg);
10446 dstmem = change_address (dst, QImode, destreg);
10447 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10448 ix86_adjust_counter (countreg, 1);
10449 emit_label (label);
10450 LABEL_NUSES (label) = 1;
10451 }
10452 if (align <= 2)
10453 {
10454 rtx label = ix86_expand_aligntest (destreg, 2);
10455 srcmem = change_address (src, HImode, srcreg);
10456 dstmem = change_address (dst, HImode, destreg);
10457 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10458 ix86_adjust_counter (countreg, 2);
10459 emit_label (label);
10460 LABEL_NUSES (label) = 1;
10461 }
10462 if (align <= 4 && desired_alignment > 4)
10463 {
10464 rtx label = ix86_expand_aligntest (destreg, 4);
10465 srcmem = change_address (src, SImode, srcreg);
10466 dstmem = change_address (dst, SImode, destreg);
10467 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10468 ix86_adjust_counter (countreg, 4);
10469 emit_label (label);
10470 LABEL_NUSES (label) = 1;
10471 }
10472
10473 if (label && desired_alignment > 4 && !TARGET_64BIT)
10474 {
10475 emit_label (label);
10476 LABEL_NUSES (label) = 1;
10477 label = NULL_RTX;
10478 }
10479 if (!TARGET_SINGLE_STRINGOP)
10480 emit_insn (gen_cld ());
10481 if (TARGET_64BIT)
10482 {
10483 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10484 GEN_INT (3)));
10485 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10486 }
10487 else
10488 {
10489 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10490 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10491 }
10492 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10493 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10494 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10495 countreg2, destexp, srcexp));
10496
10497 if (label)
10498 {
10499 emit_label (label);
10500 LABEL_NUSES (label) = 1;
10501 }
10502 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10503 {
10504 srcmem = change_address (src, SImode, srcreg);
10505 dstmem = change_address (dst, SImode, destreg);
10506 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10507 }
10508 if ((align <= 4 || count == 0) && TARGET_64BIT)
10509 {
10510 rtx label = ix86_expand_aligntest (countreg, 4);
10511 srcmem = change_address (src, SImode, srcreg);
10512 dstmem = change_address (dst, SImode, destreg);
10513 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10514 emit_label (label);
10515 LABEL_NUSES (label) = 1;
10516 }
10517 if (align > 2 && count != 0 && (count & 2))
10518 {
10519 srcmem = change_address (src, HImode, srcreg);
10520 dstmem = change_address (dst, HImode, destreg);
10521 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10522 }
10523 if (align <= 2 || count == 0)
10524 {
10525 rtx label = ix86_expand_aligntest (countreg, 2);
10526 srcmem = change_address (src, HImode, srcreg);
10527 dstmem = change_address (dst, HImode, destreg);
10528 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10529 emit_label (label);
10530 LABEL_NUSES (label) = 1;
10531 }
10532 if (align > 1 && count != 0 && (count & 1))
10533 {
10534 srcmem = change_address (src, QImode, srcreg);
10535 dstmem = change_address (dst, QImode, destreg);
10536 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10537 }
10538 if (align <= 1 || count == 0)
10539 {
10540 rtx label = ix86_expand_aligntest (countreg, 1);
10541 srcmem = change_address (src, QImode, srcreg);
10542 dstmem = change_address (dst, QImode, destreg);
10543 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10544 emit_label (label);
10545 LABEL_NUSES (label) = 1;
10546 }
10547 }
10548
10549 return 1;
10550 }
10551
10552 /* Expand string clear operation (bzero). Use i386 string operations when
10553 profitable. expand_movmem contains similar code. */
10554 int
10555 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10556 {
10557 rtx destreg, zeroreg, countreg, destexp;
10558 enum machine_mode counter_mode;
10559 HOST_WIDE_INT align = 0;
10560 unsigned HOST_WIDE_INT count = 0;
10561
10562 if (GET_CODE (align_exp) == CONST_INT)
10563 align = INTVAL (align_exp);
10564
10565 /* Can't use any of this if the user has appropriated esi. */
10566 if (global_regs[4])
10567 return 0;
10568
10569 /* This simple hack avoids all inlining code and simplifies code below. */
10570 if (!TARGET_ALIGN_STRINGOPS)
10571 align = 32;
10572
10573 if (GET_CODE (count_exp) == CONST_INT)
10574 {
10575 count = INTVAL (count_exp);
10576 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10577 return 0;
10578 }
10579 /* Figure out proper mode for counter. For 32bits it is always SImode,
10580 for 64bits use SImode when possible, otherwise DImode.
10581 Set count to number of bytes copied when known at compile time. */
10582 if (!TARGET_64BIT
10583 || GET_MODE (count_exp) == SImode
10584 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10585 counter_mode = SImode;
10586 else
10587 counter_mode = DImode;
10588
10589 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10590 if (destreg != XEXP (dst, 0))
10591 dst = replace_equiv_address_nv (dst, destreg);
10592
10593
10594 /* When optimizing for size emit simple rep ; movsb instruction for
10595 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10596 sequence is 7 bytes long, so if optimizing for size and count is
10597 small enough that some stosl, stosw and stosb instructions without
10598 rep are shorter, fall back into the next if. */
10599
10600 if ((!optimize || optimize_size)
10601 && (count == 0
10602 || ((count & 0x03)
10603 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10604 {
10605 emit_insn (gen_cld ());
10606
10607 countreg = ix86_zero_extend_to_Pmode (count_exp);
10608 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10609 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10610 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10611 }
10612 else if (count != 0
10613 && (align >= 8
10614 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10615 || optimize_size || count < (unsigned int) 64))
10616 {
10617 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10618 unsigned HOST_WIDE_INT offset = 0;
10619
10620 emit_insn (gen_cld ());
10621
10622 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10623 if (count & ~(size - 1))
10624 {
10625 unsigned HOST_WIDE_INT repcount;
10626 unsigned int max_nonrep;
10627
10628 repcount = count >> (size == 4 ? 2 : 3);
10629 if (!TARGET_64BIT)
10630 repcount &= 0x3fffffff;
10631
10632 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10633 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10634 bytes. In both cases the latter seems to be faster for small
10635 values of N. */
10636 max_nonrep = size == 4 ? 7 : 4;
10637 if (!optimize_size)
10638 switch (ix86_tune)
10639 {
10640 case PROCESSOR_PENTIUM4:
10641 case PROCESSOR_NOCONA:
10642 max_nonrep = 3;
10643 break;
10644 default:
10645 break;
10646 }
10647
10648 if (repcount <= max_nonrep)
10649 while (repcount-- > 0)
10650 {
10651 rtx mem = adjust_automodify_address_nv (dst,
10652 GET_MODE (zeroreg),
10653 destreg, offset);
10654 emit_insn (gen_strset (destreg, mem, zeroreg));
10655 offset += size;
10656 }
10657 else
10658 {
10659 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10660 countreg = ix86_zero_extend_to_Pmode (countreg);
10661 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10662 GEN_INT (size == 4 ? 2 : 3));
10663 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10664 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10665 destexp));
10666 offset = count & ~(size - 1);
10667 }
10668 }
10669 if (size == 8 && (count & 0x04))
10670 {
10671 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10672 offset);
10673 emit_insn (gen_strset (destreg, mem,
10674 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10675 offset += 4;
10676 }
10677 if (count & 0x02)
10678 {
10679 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10680 offset);
10681 emit_insn (gen_strset (destreg, mem,
10682 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10683 offset += 2;
10684 }
10685 if (count & 0x01)
10686 {
10687 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10688 offset);
10689 emit_insn (gen_strset (destreg, mem,
10690 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10691 }
10692 }
10693 else
10694 {
10695 rtx countreg2;
10696 rtx label = NULL;
10697 /* Compute desired alignment of the string operation. */
10698 int desired_alignment = (TARGET_PENTIUMPRO
10699 && (count == 0 || count >= (unsigned int) 260)
10700 ? 8 : UNITS_PER_WORD);
10701
10702 /* In case we don't know anything about the alignment, default to
10703 library version, since it is usually equally fast and result in
10704 shorter code.
10705
10706 Also emit call when we know that the count is large and call overhead
10707 will not be important. */
10708 if (!TARGET_INLINE_ALL_STRINGOPS
10709 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10710 return 0;
10711
10712 if (TARGET_SINGLE_STRINGOP)
10713 emit_insn (gen_cld ());
10714
10715 countreg2 = gen_reg_rtx (Pmode);
10716 countreg = copy_to_mode_reg (counter_mode, count_exp);
10717 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10718 /* Get rid of MEM_OFFSET, it won't be accurate. */
10719 dst = change_address (dst, BLKmode, destreg);
10720
10721 if (count == 0 && align < desired_alignment)
10722 {
10723 label = gen_label_rtx ();
10724 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10725 LEU, 0, counter_mode, 1, label);
10726 }
10727 if (align <= 1)
10728 {
10729 rtx label = ix86_expand_aligntest (destreg, 1);
10730 emit_insn (gen_strset (destreg, dst,
10731 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10732 ix86_adjust_counter (countreg, 1);
10733 emit_label (label);
10734 LABEL_NUSES (label) = 1;
10735 }
10736 if (align <= 2)
10737 {
10738 rtx label = ix86_expand_aligntest (destreg, 2);
10739 emit_insn (gen_strset (destreg, dst,
10740 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10741 ix86_adjust_counter (countreg, 2);
10742 emit_label (label);
10743 LABEL_NUSES (label) = 1;
10744 }
10745 if (align <= 4 && desired_alignment > 4)
10746 {
10747 rtx label = ix86_expand_aligntest (destreg, 4);
10748 emit_insn (gen_strset (destreg, dst,
10749 (TARGET_64BIT
10750 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10751 : zeroreg)));
10752 ix86_adjust_counter (countreg, 4);
10753 emit_label (label);
10754 LABEL_NUSES (label) = 1;
10755 }
10756
10757 if (label && desired_alignment > 4 && !TARGET_64BIT)
10758 {
10759 emit_label (label);
10760 LABEL_NUSES (label) = 1;
10761 label = NULL_RTX;
10762 }
10763
10764 if (!TARGET_SINGLE_STRINGOP)
10765 emit_insn (gen_cld ());
10766 if (TARGET_64BIT)
10767 {
10768 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10769 GEN_INT (3)));
10770 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10771 }
10772 else
10773 {
10774 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10775 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10776 }
10777 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10778 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10779
10780 if (label)
10781 {
10782 emit_label (label);
10783 LABEL_NUSES (label) = 1;
10784 }
10785
10786 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10787 emit_insn (gen_strset (destreg, dst,
10788 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10789 if (TARGET_64BIT && (align <= 4 || count == 0))
10790 {
10791 rtx label = ix86_expand_aligntest (countreg, 4);
10792 emit_insn (gen_strset (destreg, dst,
10793 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10794 emit_label (label);
10795 LABEL_NUSES (label) = 1;
10796 }
10797 if (align > 2 && count != 0 && (count & 2))
10798 emit_insn (gen_strset (destreg, dst,
10799 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10800 if (align <= 2 || count == 0)
10801 {
10802 rtx label = ix86_expand_aligntest (countreg, 2);
10803 emit_insn (gen_strset (destreg, dst,
10804 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10805 emit_label (label);
10806 LABEL_NUSES (label) = 1;
10807 }
10808 if (align > 1 && count != 0 && (count & 1))
10809 emit_insn (gen_strset (destreg, dst,
10810 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10811 if (align <= 1 || count == 0)
10812 {
10813 rtx label = ix86_expand_aligntest (countreg, 1);
10814 emit_insn (gen_strset (destreg, dst,
10815 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10816 emit_label (label);
10817 LABEL_NUSES (label) = 1;
10818 }
10819 }
10820 return 1;
10821 }
10822
10823 /* Expand strlen. */
10824 int
10825 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
10826 {
10827 rtx addr, scratch1, scratch2, scratch3, scratch4;
10828
10829 /* The generic case of strlen expander is long. Avoid it's
10830 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10831
10832 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10833 && !TARGET_INLINE_ALL_STRINGOPS
10834 && !optimize_size
10835 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10836 return 0;
10837
10838 addr = force_reg (Pmode, XEXP (src, 0));
10839 scratch1 = gen_reg_rtx (Pmode);
10840
10841 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10842 && !optimize_size)
10843 {
10844 /* Well it seems that some optimizer does not combine a call like
10845 foo(strlen(bar), strlen(bar));
10846 when the move and the subtraction is done here. It does calculate
10847 the length just once when these instructions are done inside of
10848 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10849 often used and I use one fewer register for the lifetime of
10850 output_strlen_unroll() this is better. */
10851
10852 emit_move_insn (out, addr);
10853
10854 ix86_expand_strlensi_unroll_1 (out, src, align);
10855
10856 /* strlensi_unroll_1 returns the address of the zero at the end of
10857 the string, like memchr(), so compute the length by subtracting
10858 the start address. */
10859 if (TARGET_64BIT)
10860 emit_insn (gen_subdi3 (out, out, addr));
10861 else
10862 emit_insn (gen_subsi3 (out, out, addr));
10863 }
10864 else
10865 {
10866 rtx unspec;
10867 scratch2 = gen_reg_rtx (Pmode);
10868 scratch3 = gen_reg_rtx (Pmode);
10869 scratch4 = force_reg (Pmode, constm1_rtx);
10870
10871 emit_move_insn (scratch3, addr);
10872 eoschar = force_reg (QImode, eoschar);
10873
10874 emit_insn (gen_cld ());
10875 src = replace_equiv_address_nv (src, scratch3);
10876
10877 /* If .md starts supporting :P, this can be done in .md. */
10878 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10879 scratch4), UNSPEC_SCAS);
10880 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
10881 if (TARGET_64BIT)
10882 {
10883 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10884 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10885 }
10886 else
10887 {
10888 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10889 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10890 }
10891 }
10892 return 1;
10893 }
10894
10895 /* Expand the appropriate insns for doing strlen if not just doing
10896 repnz; scasb
10897
10898 out = result, initialized with the start address
10899 align_rtx = alignment of the address.
10900 scratch = scratch register, initialized with the startaddress when
10901 not aligned, otherwise undefined
10902
10903 This is just the body. It needs the initializations mentioned above and
10904 some address computing at the end. These things are done in i386.md. */
10905
10906 static void
10907 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
10908 {
10909 int align;
10910 rtx tmp;
10911 rtx align_2_label = NULL_RTX;
10912 rtx align_3_label = NULL_RTX;
10913 rtx align_4_label = gen_label_rtx ();
10914 rtx end_0_label = gen_label_rtx ();
10915 rtx mem;
10916 rtx tmpreg = gen_reg_rtx (SImode);
10917 rtx scratch = gen_reg_rtx (SImode);
10918 rtx cmp;
10919
10920 align = 0;
10921 if (GET_CODE (align_rtx) == CONST_INT)
10922 align = INTVAL (align_rtx);
10923
10924 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10925
10926 /* Is there a known alignment and is it less than 4? */
10927 if (align < 4)
10928 {
10929 rtx scratch1 = gen_reg_rtx (Pmode);
10930 emit_move_insn (scratch1, out);
10931 /* Is there a known alignment and is it not 2? */
10932 if (align != 2)
10933 {
10934 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10935 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10936
10937 /* Leave just the 3 lower bits. */
10938 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10939 NULL_RTX, 0, OPTAB_WIDEN);
10940
10941 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10942 Pmode, 1, align_4_label);
10943 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
10944 Pmode, 1, align_2_label);
10945 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
10946 Pmode, 1, align_3_label);
10947 }
10948 else
10949 {
10950 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10951 check if is aligned to 4 - byte. */
10952
10953 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
10954 NULL_RTX, 0, OPTAB_WIDEN);
10955
10956 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10957 Pmode, 1, align_4_label);
10958 }
10959
10960 mem = change_address (src, QImode, out);
10961
10962 /* Now compare the bytes. */
10963
10964 /* Compare the first n unaligned byte on a byte per byte basis. */
10965 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10966 QImode, 1, end_0_label);
10967
10968 /* Increment the address. */
10969 if (TARGET_64BIT)
10970 emit_insn (gen_adddi3 (out, out, const1_rtx));
10971 else
10972 emit_insn (gen_addsi3 (out, out, const1_rtx));
10973
10974 /* Not needed with an alignment of 2 */
10975 if (align != 2)
10976 {
10977 emit_label (align_2_label);
10978
10979 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10980 end_0_label);
10981
10982 if (TARGET_64BIT)
10983 emit_insn (gen_adddi3 (out, out, const1_rtx));
10984 else
10985 emit_insn (gen_addsi3 (out, out, const1_rtx));
10986
10987 emit_label (align_3_label);
10988 }
10989
10990 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10991 end_0_label);
10992
10993 if (TARGET_64BIT)
10994 emit_insn (gen_adddi3 (out, out, const1_rtx));
10995 else
10996 emit_insn (gen_addsi3 (out, out, const1_rtx));
10997 }
10998
10999 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11000 align this loop. It gives only huge programs, but does not help to
11001 speed up. */
11002 emit_label (align_4_label);
11003
11004 mem = change_address (src, SImode, out);
11005 emit_move_insn (scratch, mem);
11006 if (TARGET_64BIT)
11007 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11008 else
11009 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11010
11011 /* This formula yields a nonzero result iff one of the bytes is zero.
11012 This saves three branches inside loop and many cycles. */
11013
11014 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11015 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11016 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11017 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11018 gen_int_mode (0x80808080, SImode)));
11019 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11020 align_4_label);
11021
11022 if (TARGET_CMOVE)
11023 {
11024 rtx reg = gen_reg_rtx (SImode);
11025 rtx reg2 = gen_reg_rtx (Pmode);
11026 emit_move_insn (reg, tmpreg);
11027 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11028
11029 /* If zero is not in the first two bytes, move two bytes forward. */
11030 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11031 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11032 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11033 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11034 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11035 reg,
11036 tmpreg)));
11037 /* Emit lea manually to avoid clobbering of flags. */
11038 emit_insn (gen_rtx_SET (SImode, reg2,
11039 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11040
11041 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11042 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11043 emit_insn (gen_rtx_SET (VOIDmode, out,
11044 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11045 reg2,
11046 out)));
11047
11048 }
11049 else
11050 {
11051 rtx end_2_label = gen_label_rtx ();
11052 /* Is zero in the first two bytes? */
11053
11054 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11055 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11056 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11057 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11058 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11059 pc_rtx);
11060 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11061 JUMP_LABEL (tmp) = end_2_label;
11062
11063 /* Not in the first two. Move two bytes forward. */
11064 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11065 if (TARGET_64BIT)
11066 emit_insn (gen_adddi3 (out, out, const2_rtx));
11067 else
11068 emit_insn (gen_addsi3 (out, out, const2_rtx));
11069
11070 emit_label (end_2_label);
11071
11072 }
11073
11074 /* Avoid branch in fixing the byte. */
11075 tmpreg = gen_lowpart (QImode, tmpreg);
11076 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11077 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11078 if (TARGET_64BIT)
11079 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11080 else
11081 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11082
11083 emit_label (end_0_label);
11084 }
11085
11086 void
11087 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11088 rtx callarg2 ATTRIBUTE_UNUSED,
11089 rtx pop, int sibcall)
11090 {
11091 rtx use = NULL, call;
11092
11093 if (pop == const0_rtx)
11094 pop = NULL;
11095 if (TARGET_64BIT && pop)
11096 abort ();
11097
11098 #if TARGET_MACHO
11099 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11100 fnaddr = machopic_indirect_call_target (fnaddr);
11101 #else
11102 /* Static functions and indirect calls don't need the pic register. */
11103 if (! TARGET_64BIT && flag_pic
11104 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11105 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11106 use_reg (&use, pic_offset_table_rtx);
11107
11108 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11109 {
11110 rtx al = gen_rtx_REG (QImode, 0);
11111 emit_move_insn (al, callarg2);
11112 use_reg (&use, al);
11113 }
11114 #endif /* TARGET_MACHO */
11115
11116 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11117 {
11118 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11119 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11120 }
11121 if (sibcall && TARGET_64BIT
11122 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11123 {
11124 rtx addr;
11125 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11126 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11127 emit_move_insn (fnaddr, addr);
11128 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11129 }
11130
11131 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11132 if (retval)
11133 call = gen_rtx_SET (VOIDmode, retval, call);
11134 if (pop)
11135 {
11136 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11137 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11138 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11139 }
11140
11141 call = emit_call_insn (call);
11142 if (use)
11143 CALL_INSN_FUNCTION_USAGE (call) = use;
11144 }
11145
11146 \f
11147 /* Clear stack slot assignments remembered from previous functions.
11148 This is called from INIT_EXPANDERS once before RTL is emitted for each
11149 function. */
11150
11151 static struct machine_function *
11152 ix86_init_machine_status (void)
11153 {
11154 struct machine_function *f;
11155
11156 f = ggc_alloc_cleared (sizeof (struct machine_function));
11157 f->use_fast_prologue_epilogue_nregs = -1;
11158
11159 return f;
11160 }
11161
11162 /* Return a MEM corresponding to a stack slot with mode MODE.
11163 Allocate a new slot if necessary.
11164
11165 The RTL for a function can have several slots available: N is
11166 which slot to use. */
11167
11168 rtx
11169 assign_386_stack_local (enum machine_mode mode, int n)
11170 {
11171 struct stack_local_entry *s;
11172
11173 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11174 abort ();
11175
11176 for (s = ix86_stack_locals; s; s = s->next)
11177 if (s->mode == mode && s->n == n)
11178 return s->rtl;
11179
11180 s = (struct stack_local_entry *)
11181 ggc_alloc (sizeof (struct stack_local_entry));
11182 s->n = n;
11183 s->mode = mode;
11184 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11185
11186 s->next = ix86_stack_locals;
11187 ix86_stack_locals = s;
11188 return s->rtl;
11189 }
11190
11191 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11192
11193 static GTY(()) rtx ix86_tls_symbol;
11194 rtx
11195 ix86_tls_get_addr (void)
11196 {
11197
11198 if (!ix86_tls_symbol)
11199 {
11200 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11201 (TARGET_GNU_TLS && !TARGET_64BIT)
11202 ? "___tls_get_addr"
11203 : "__tls_get_addr");
11204 }
11205
11206 return ix86_tls_symbol;
11207 }
11208 \f
11209 /* Calculate the length of the memory address in the instruction
11210 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11211
11212 int
11213 memory_address_length (rtx addr)
11214 {
11215 struct ix86_address parts;
11216 rtx base, index, disp;
11217 int len;
11218
11219 if (GET_CODE (addr) == PRE_DEC
11220 || GET_CODE (addr) == POST_INC
11221 || GET_CODE (addr) == PRE_MODIFY
11222 || GET_CODE (addr) == POST_MODIFY)
11223 return 0;
11224
11225 if (! ix86_decompose_address (addr, &parts))
11226 abort ();
11227
11228 base = parts.base;
11229 index = parts.index;
11230 disp = parts.disp;
11231 len = 0;
11232
11233 /* Rule of thumb:
11234 - esp as the base always wants an index,
11235 - ebp as the base always wants a displacement. */
11236
11237 /* Register Indirect. */
11238 if (base && !index && !disp)
11239 {
11240 /* esp (for its index) and ebp (for its displacement) need
11241 the two-byte modrm form. */
11242 if (addr == stack_pointer_rtx
11243 || addr == arg_pointer_rtx
11244 || addr == frame_pointer_rtx
11245 || addr == hard_frame_pointer_rtx)
11246 len = 1;
11247 }
11248
11249 /* Direct Addressing. */
11250 else if (disp && !base && !index)
11251 len = 4;
11252
11253 else
11254 {
11255 /* Find the length of the displacement constant. */
11256 if (disp)
11257 {
11258 if (GET_CODE (disp) == CONST_INT
11259 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11260 && base)
11261 len = 1;
11262 else
11263 len = 4;
11264 }
11265 /* ebp always wants a displacement. */
11266 else if (base == hard_frame_pointer_rtx)
11267 len = 1;
11268
11269 /* An index requires the two-byte modrm form.... */
11270 if (index
11271 /* ...like esp, which always wants an index. */
11272 || base == stack_pointer_rtx
11273 || base == arg_pointer_rtx
11274 || base == frame_pointer_rtx)
11275 len += 1;
11276 }
11277
11278 return len;
11279 }
11280
11281 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11282 is set, expect that insn have 8bit immediate alternative. */
11283 int
11284 ix86_attr_length_immediate_default (rtx insn, int shortform)
11285 {
11286 int len = 0;
11287 int i;
11288 extract_insn_cached (insn);
11289 for (i = recog_data.n_operands - 1; i >= 0; --i)
11290 if (CONSTANT_P (recog_data.operand[i]))
11291 {
11292 if (len)
11293 abort ();
11294 if (shortform
11295 && GET_CODE (recog_data.operand[i]) == CONST_INT
11296 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11297 len = 1;
11298 else
11299 {
11300 switch (get_attr_mode (insn))
11301 {
11302 case MODE_QI:
11303 len+=1;
11304 break;
11305 case MODE_HI:
11306 len+=2;
11307 break;
11308 case MODE_SI:
11309 len+=4;
11310 break;
11311 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11312 case MODE_DI:
11313 len+=4;
11314 break;
11315 default:
11316 fatal_insn ("unknown insn mode", insn);
11317 }
11318 }
11319 }
11320 return len;
11321 }
11322 /* Compute default value for "length_address" attribute. */
11323 int
11324 ix86_attr_length_address_default (rtx insn)
11325 {
11326 int i;
11327
11328 if (get_attr_type (insn) == TYPE_LEA)
11329 {
11330 rtx set = PATTERN (insn);
11331 if (GET_CODE (set) == SET)
11332 ;
11333 else if (GET_CODE (set) == PARALLEL
11334 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11335 set = XVECEXP (set, 0, 0);
11336 else
11337 {
11338 #ifdef ENABLE_CHECKING
11339 abort ();
11340 #endif
11341 return 0;
11342 }
11343
11344 return memory_address_length (SET_SRC (set));
11345 }
11346
11347 extract_insn_cached (insn);
11348 for (i = recog_data.n_operands - 1; i >= 0; --i)
11349 if (GET_CODE (recog_data.operand[i]) == MEM)
11350 {
11351 return memory_address_length (XEXP (recog_data.operand[i], 0));
11352 break;
11353 }
11354 return 0;
11355 }
11356 \f
11357 /* Return the maximum number of instructions a cpu can issue. */
11358
11359 static int
11360 ix86_issue_rate (void)
11361 {
11362 switch (ix86_tune)
11363 {
11364 case PROCESSOR_PENTIUM:
11365 case PROCESSOR_K6:
11366 return 2;
11367
11368 case PROCESSOR_PENTIUMPRO:
11369 case PROCESSOR_PENTIUM4:
11370 case PROCESSOR_ATHLON:
11371 case PROCESSOR_K8:
11372 case PROCESSOR_NOCONA:
11373 return 3;
11374
11375 default:
11376 return 1;
11377 }
11378 }
11379
11380 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11381 by DEP_INSN and nothing set by DEP_INSN. */
11382
11383 static int
11384 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11385 {
11386 rtx set, set2;
11387
11388 /* Simplify the test for uninteresting insns. */
11389 if (insn_type != TYPE_SETCC
11390 && insn_type != TYPE_ICMOV
11391 && insn_type != TYPE_FCMOV
11392 && insn_type != TYPE_IBR)
11393 return 0;
11394
11395 if ((set = single_set (dep_insn)) != 0)
11396 {
11397 set = SET_DEST (set);
11398 set2 = NULL_RTX;
11399 }
11400 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11401 && XVECLEN (PATTERN (dep_insn), 0) == 2
11402 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11403 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11404 {
11405 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11406 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11407 }
11408 else
11409 return 0;
11410
11411 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11412 return 0;
11413
11414 /* This test is true if the dependent insn reads the flags but
11415 not any other potentially set register. */
11416 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11417 return 0;
11418
11419 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11420 return 0;
11421
11422 return 1;
11423 }
11424
11425 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11426 address with operands set by DEP_INSN. */
11427
11428 static int
11429 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11430 {
11431 rtx addr;
11432
11433 if (insn_type == TYPE_LEA
11434 && TARGET_PENTIUM)
11435 {
11436 addr = PATTERN (insn);
11437 if (GET_CODE (addr) == SET)
11438 ;
11439 else if (GET_CODE (addr) == PARALLEL
11440 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11441 addr = XVECEXP (addr, 0, 0);
11442 else
11443 abort ();
11444 addr = SET_SRC (addr);
11445 }
11446 else
11447 {
11448 int i;
11449 extract_insn_cached (insn);
11450 for (i = recog_data.n_operands - 1; i >= 0; --i)
11451 if (GET_CODE (recog_data.operand[i]) == MEM)
11452 {
11453 addr = XEXP (recog_data.operand[i], 0);
11454 goto found;
11455 }
11456 return 0;
11457 found:;
11458 }
11459
11460 return modified_in_p (addr, dep_insn);
11461 }
11462
11463 static int
11464 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11465 {
11466 enum attr_type insn_type, dep_insn_type;
11467 enum attr_memory memory;
11468 rtx set, set2;
11469 int dep_insn_code_number;
11470
11471 /* Anti and output dependencies have zero cost on all CPUs. */
11472 if (REG_NOTE_KIND (link) != 0)
11473 return 0;
11474
11475 dep_insn_code_number = recog_memoized (dep_insn);
11476
11477 /* If we can't recognize the insns, we can't really do anything. */
11478 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11479 return cost;
11480
11481 insn_type = get_attr_type (insn);
11482 dep_insn_type = get_attr_type (dep_insn);
11483
11484 switch (ix86_tune)
11485 {
11486 case PROCESSOR_PENTIUM:
11487 /* Address Generation Interlock adds a cycle of latency. */
11488 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11489 cost += 1;
11490
11491 /* ??? Compares pair with jump/setcc. */
11492 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11493 cost = 0;
11494
11495 /* Floating point stores require value to be ready one cycle earlier. */
11496 if (insn_type == TYPE_FMOV
11497 && get_attr_memory (insn) == MEMORY_STORE
11498 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11499 cost += 1;
11500 break;
11501
11502 case PROCESSOR_PENTIUMPRO:
11503 memory = get_attr_memory (insn);
11504
11505 /* INT->FP conversion is expensive. */
11506 if (get_attr_fp_int_src (dep_insn))
11507 cost += 5;
11508
11509 /* There is one cycle extra latency between an FP op and a store. */
11510 if (insn_type == TYPE_FMOV
11511 && (set = single_set (dep_insn)) != NULL_RTX
11512 && (set2 = single_set (insn)) != NULL_RTX
11513 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11514 && GET_CODE (SET_DEST (set2)) == MEM)
11515 cost += 1;
11516
11517 /* Show ability of reorder buffer to hide latency of load by executing
11518 in parallel with previous instruction in case
11519 previous instruction is not needed to compute the address. */
11520 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11521 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11522 {
11523 /* Claim moves to take one cycle, as core can issue one load
11524 at time and the next load can start cycle later. */
11525 if (dep_insn_type == TYPE_IMOV
11526 || dep_insn_type == TYPE_FMOV)
11527 cost = 1;
11528 else if (cost > 1)
11529 cost--;
11530 }
11531 break;
11532
11533 case PROCESSOR_K6:
11534 memory = get_attr_memory (insn);
11535
11536 /* The esp dependency is resolved before the instruction is really
11537 finished. */
11538 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11539 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11540 return 1;
11541
11542 /* INT->FP conversion is expensive. */
11543 if (get_attr_fp_int_src (dep_insn))
11544 cost += 5;
11545
11546 /* Show ability of reorder buffer to hide latency of load by executing
11547 in parallel with previous instruction in case
11548 previous instruction is not needed to compute the address. */
11549 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11550 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11551 {
11552 /* Claim moves to take one cycle, as core can issue one load
11553 at time and the next load can start cycle later. */
11554 if (dep_insn_type == TYPE_IMOV
11555 || dep_insn_type == TYPE_FMOV)
11556 cost = 1;
11557 else if (cost > 2)
11558 cost -= 2;
11559 else
11560 cost = 1;
11561 }
11562 break;
11563
11564 case PROCESSOR_ATHLON:
11565 case PROCESSOR_K8:
11566 memory = get_attr_memory (insn);
11567
11568 /* Show ability of reorder buffer to hide latency of load by executing
11569 in parallel with previous instruction in case
11570 previous instruction is not needed to compute the address. */
11571 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11572 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11573 {
11574 enum attr_unit unit = get_attr_unit (insn);
11575 int loadcost = 3;
11576
11577 /* Because of the difference between the length of integer and
11578 floating unit pipeline preparation stages, the memory operands
11579 for floating point are cheaper.
11580
11581 ??? For Athlon it the difference is most probably 2. */
11582 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11583 loadcost = 3;
11584 else
11585 loadcost = TARGET_ATHLON ? 2 : 0;
11586
11587 if (cost >= loadcost)
11588 cost -= loadcost;
11589 else
11590 cost = 0;
11591 }
11592
11593 default:
11594 break;
11595 }
11596
11597 return cost;
11598 }
11599
11600 /* How many alternative schedules to try. This should be as wide as the
11601 scheduling freedom in the DFA, but no wider. Making this value too
11602 large results extra work for the scheduler. */
11603
11604 static int
11605 ia32_multipass_dfa_lookahead (void)
11606 {
11607 if (ix86_tune == PROCESSOR_PENTIUM)
11608 return 2;
11609
11610 if (ix86_tune == PROCESSOR_PENTIUMPRO
11611 || ix86_tune == PROCESSOR_K6)
11612 return 1;
11613
11614 else
11615 return 0;
11616 }
11617
11618 \f
11619 /* Compute the alignment given to a constant that is being placed in memory.
11620 EXP is the constant and ALIGN is the alignment that the object would
11621 ordinarily have.
11622 The value of this function is used instead of that alignment to align
11623 the object. */
11624
11625 int
11626 ix86_constant_alignment (tree exp, int align)
11627 {
11628 if (TREE_CODE (exp) == REAL_CST)
11629 {
11630 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11631 return 64;
11632 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11633 return 128;
11634 }
11635 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11636 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11637 return BITS_PER_WORD;
11638
11639 return align;
11640 }
11641
11642 /* Compute the alignment for a static variable.
11643 TYPE is the data type, and ALIGN is the alignment that
11644 the object would ordinarily have. The value of this function is used
11645 instead of that alignment to align the object. */
11646
11647 int
11648 ix86_data_alignment (tree type, int align)
11649 {
11650 if (AGGREGATE_TYPE_P (type)
11651 && TYPE_SIZE (type)
11652 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11653 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11654 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11655 return 256;
11656
11657 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11658 to 16byte boundary. */
11659 if (TARGET_64BIT)
11660 {
11661 if (AGGREGATE_TYPE_P (type)
11662 && TYPE_SIZE (type)
11663 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11664 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11665 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11666 return 128;
11667 }
11668
11669 if (TREE_CODE (type) == ARRAY_TYPE)
11670 {
11671 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11672 return 64;
11673 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11674 return 128;
11675 }
11676 else if (TREE_CODE (type) == COMPLEX_TYPE)
11677 {
11678
11679 if (TYPE_MODE (type) == DCmode && align < 64)
11680 return 64;
11681 if (TYPE_MODE (type) == XCmode && align < 128)
11682 return 128;
11683 }
11684 else if ((TREE_CODE (type) == RECORD_TYPE
11685 || TREE_CODE (type) == UNION_TYPE
11686 || TREE_CODE (type) == QUAL_UNION_TYPE)
11687 && TYPE_FIELDS (type))
11688 {
11689 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11690 return 64;
11691 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11692 return 128;
11693 }
11694 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11695 || TREE_CODE (type) == INTEGER_TYPE)
11696 {
11697 if (TYPE_MODE (type) == DFmode && align < 64)
11698 return 64;
11699 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11700 return 128;
11701 }
11702
11703 return align;
11704 }
11705
11706 /* Compute the alignment for a local variable.
11707 TYPE is the data type, and ALIGN is the alignment that
11708 the object would ordinarily have. The value of this macro is used
11709 instead of that alignment to align the object. */
11710
11711 int
11712 ix86_local_alignment (tree type, int align)
11713 {
11714 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11715 to 16byte boundary. */
11716 if (TARGET_64BIT)
11717 {
11718 if (AGGREGATE_TYPE_P (type)
11719 && TYPE_SIZE (type)
11720 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11721 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11722 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11723 return 128;
11724 }
11725 if (TREE_CODE (type) == ARRAY_TYPE)
11726 {
11727 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11728 return 64;
11729 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11730 return 128;
11731 }
11732 else if (TREE_CODE (type) == COMPLEX_TYPE)
11733 {
11734 if (TYPE_MODE (type) == DCmode && align < 64)
11735 return 64;
11736 if (TYPE_MODE (type) == XCmode && align < 128)
11737 return 128;
11738 }
11739 else if ((TREE_CODE (type) == RECORD_TYPE
11740 || TREE_CODE (type) == UNION_TYPE
11741 || TREE_CODE (type) == QUAL_UNION_TYPE)
11742 && TYPE_FIELDS (type))
11743 {
11744 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11745 return 64;
11746 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11747 return 128;
11748 }
11749 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11750 || TREE_CODE (type) == INTEGER_TYPE)
11751 {
11752
11753 if (TYPE_MODE (type) == DFmode && align < 64)
11754 return 64;
11755 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11756 return 128;
11757 }
11758 return align;
11759 }
11760 \f
11761 /* Emit RTL insns to initialize the variable parts of a trampoline.
11762 FNADDR is an RTX for the address of the function's pure code.
11763 CXT is an RTX for the static chain value for the function. */
11764 void
11765 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11766 {
11767 if (!TARGET_64BIT)
11768 {
11769 /* Compute offset from the end of the jmp to the target function. */
11770 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11771 plus_constant (tramp, 10),
11772 NULL_RTX, 1, OPTAB_DIRECT);
11773 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11774 gen_int_mode (0xb9, QImode));
11775 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11776 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11777 gen_int_mode (0xe9, QImode));
11778 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11779 }
11780 else
11781 {
11782 int offset = 0;
11783 /* Try to load address using shorter movl instead of movabs.
11784 We may want to support movq for kernel mode, but kernel does not use
11785 trampolines at the moment. */
11786 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
11787 {
11788 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11789 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11790 gen_int_mode (0xbb41, HImode));
11791 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11792 gen_lowpart (SImode, fnaddr));
11793 offset += 6;
11794 }
11795 else
11796 {
11797 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11798 gen_int_mode (0xbb49, HImode));
11799 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11800 fnaddr);
11801 offset += 10;
11802 }
11803 /* Load static chain using movabs to r10. */
11804 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11805 gen_int_mode (0xba49, HImode));
11806 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11807 cxt);
11808 offset += 10;
11809 /* Jump to the r11 */
11810 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11811 gen_int_mode (0xff49, HImode));
11812 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11813 gen_int_mode (0xe3, QImode));
11814 offset += 3;
11815 if (offset > TRAMPOLINE_SIZE)
11816 abort ();
11817 }
11818
11819 #ifdef ENABLE_EXECUTE_STACK
11820 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
11821 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11822 #endif
11823 }
11824 \f
11825 #define def_builtin(MASK, NAME, TYPE, CODE) \
11826 do { \
11827 if ((MASK) & target_flags \
11828 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11829 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11830 NULL, NULL_TREE); \
11831 } while (0)
11832
11833 struct builtin_description
11834 {
11835 const unsigned int mask;
11836 const enum insn_code icode;
11837 const char *const name;
11838 const enum ix86_builtins code;
11839 const enum rtx_code comparison;
11840 const unsigned int flag;
11841 };
11842
11843 static const struct builtin_description bdesc_comi[] =
11844 {
11845 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11846 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11847 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11848 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11849 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11850 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11851 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11852 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11853 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11854 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11855 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11856 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11857 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11858 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11859 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11860 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11861 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11862 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11863 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11864 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11865 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11866 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11867 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11868 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11869 };
11870
11871 static const struct builtin_description bdesc_2arg[] =
11872 {
11873 /* SSE */
11874 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11875 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11876 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11877 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11878 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11879 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11880 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11881 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11882
11883 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11884 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11885 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11886 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11887 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11888 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11889 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11890 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11891 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11892 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11893 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11894 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11895 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11896 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11897 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11898 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11899 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11900 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11901 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11902 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11903
11904 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11905 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11906 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11907 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11908
11909 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11910 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11911 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11912 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11913
11914 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11915 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11916 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11917 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11918 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11919
11920 /* MMX */
11921 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11922 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11923 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11924 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
11925 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11926 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11927 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11928 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
11929
11930 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11931 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11932 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11933 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11934 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11935 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11936 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11937 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11938
11939 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11940 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11941 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11942
11943 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11944 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11945 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11946 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11947
11948 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11949 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11950
11951 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11952 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11953 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11954 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11955 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11956 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11957
11958 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11959 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11960 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11961 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11962
11963 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11964 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11965 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11966 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11967 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11968 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11969
11970 /* Special. */
11971 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11972 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11973 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11974
11975 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11976 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11977 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
11978
11979 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11980 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11981 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11982 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11983 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11984 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11985
11986 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11987 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11988 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11989 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11990 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11991 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11992
11993 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11994 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11995 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11996 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11997
11998 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11999 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12000
12001 /* SSE2 */
12002 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12003 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12004 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12005 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12006 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12007 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12008 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12009 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12010
12011 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12012 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12013 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12014 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12015 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12016 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12017 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12018 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12019 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12020 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12021 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12022 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12023 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12024 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12025 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12026 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12027 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12028 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12029 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12030 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12031
12032 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12033 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12034 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12035 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12036
12037 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12038 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12039 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12040 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12041
12042 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12043 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12044 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12045
12046 /* SSE2 MMX */
12047 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12048 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12050 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12051 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12052 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12053 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12055
12056 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12057 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12058 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12059 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12060 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12061 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12062 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12063 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12064
12065 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12066 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12067
12068 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12069 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12070 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12071 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12072
12073 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12074 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12075
12076 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12077 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12078 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12079 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12080 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12081 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12082
12083 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12084 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12085 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12086 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12087
12088 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12090 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12091 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12092 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12095 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12096
12097 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12098 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12099 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12100
12101 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12102 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12103
12104 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12105 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12106
12107 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12108 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12109 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12110 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12111 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12112 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12113
12114 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12115 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12116 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12117 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12118 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12119 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12120
12121 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12124 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12125
12126 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12127
12128 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12129 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12130 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12132
12133 /* SSE3 MMX */
12134 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12135 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12136 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12137 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12138 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12139 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12140 };
12141
12142 static const struct builtin_description bdesc_1arg[] =
12143 {
12144 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12145 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12146
12147 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12148 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12149 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12150
12151 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12152 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12153 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12154 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12155 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12156 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12157
12158 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12159 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12160 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12161 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12162
12163 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12164
12165 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12166 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12167
12168 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12170 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12171 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12172 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12173
12174 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12175
12176 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12177 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12178 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12179 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12180
12181 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12182 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12183 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12184
12185 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12186
12187 /* SSE3 */
12188 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12189 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12190 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12191 };
12192
12193 void
12194 ix86_init_builtins (void)
12195 {
12196 if (TARGET_MMX)
12197 ix86_init_mmx_sse_builtins ();
12198 }
12199
12200 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12201 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12202 builtins. */
12203 static void
12204 ix86_init_mmx_sse_builtins (void)
12205 {
12206 const struct builtin_description * d;
12207 size_t i;
12208
12209 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12210 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12211 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12212 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12213 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12214 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12215 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12216 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12217 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12218 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12219
12220 tree pchar_type_node = build_pointer_type (char_type_node);
12221 tree pcchar_type_node = build_pointer_type (
12222 build_type_variant (char_type_node, 1, 0));
12223 tree pfloat_type_node = build_pointer_type (float_type_node);
12224 tree pcfloat_type_node = build_pointer_type (
12225 build_type_variant (float_type_node, 1, 0));
12226 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12227 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12228 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12229
12230 /* Comparisons. */
12231 tree int_ftype_v4sf_v4sf
12232 = build_function_type_list (integer_type_node,
12233 V4SF_type_node, V4SF_type_node, NULL_TREE);
12234 tree v4si_ftype_v4sf_v4sf
12235 = build_function_type_list (V4SI_type_node,
12236 V4SF_type_node, V4SF_type_node, NULL_TREE);
12237 /* MMX/SSE/integer conversions. */
12238 tree int_ftype_v4sf
12239 = build_function_type_list (integer_type_node,
12240 V4SF_type_node, NULL_TREE);
12241 tree int64_ftype_v4sf
12242 = build_function_type_list (long_long_integer_type_node,
12243 V4SF_type_node, NULL_TREE);
12244 tree int_ftype_v8qi
12245 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12246 tree v4sf_ftype_v4sf_int
12247 = build_function_type_list (V4SF_type_node,
12248 V4SF_type_node, integer_type_node, NULL_TREE);
12249 tree v4sf_ftype_v4sf_int64
12250 = build_function_type_list (V4SF_type_node,
12251 V4SF_type_node, long_long_integer_type_node,
12252 NULL_TREE);
12253 tree v4sf_ftype_v4sf_v2si
12254 = build_function_type_list (V4SF_type_node,
12255 V4SF_type_node, V2SI_type_node, NULL_TREE);
12256 tree int_ftype_v4hi_int
12257 = build_function_type_list (integer_type_node,
12258 V4HI_type_node, integer_type_node, NULL_TREE);
12259 tree v4hi_ftype_v4hi_int_int
12260 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12261 integer_type_node, integer_type_node,
12262 NULL_TREE);
12263 /* Miscellaneous. */
12264 tree v8qi_ftype_v4hi_v4hi
12265 = build_function_type_list (V8QI_type_node,
12266 V4HI_type_node, V4HI_type_node, NULL_TREE);
12267 tree v4hi_ftype_v2si_v2si
12268 = build_function_type_list (V4HI_type_node,
12269 V2SI_type_node, V2SI_type_node, NULL_TREE);
12270 tree v4sf_ftype_v4sf_v4sf_int
12271 = build_function_type_list (V4SF_type_node,
12272 V4SF_type_node, V4SF_type_node,
12273 integer_type_node, NULL_TREE);
12274 tree v2si_ftype_v4hi_v4hi
12275 = build_function_type_list (V2SI_type_node,
12276 V4HI_type_node, V4HI_type_node, NULL_TREE);
12277 tree v4hi_ftype_v4hi_int
12278 = build_function_type_list (V4HI_type_node,
12279 V4HI_type_node, integer_type_node, NULL_TREE);
12280 tree v4hi_ftype_v4hi_di
12281 = build_function_type_list (V4HI_type_node,
12282 V4HI_type_node, long_long_unsigned_type_node,
12283 NULL_TREE);
12284 tree v2si_ftype_v2si_di
12285 = build_function_type_list (V2SI_type_node,
12286 V2SI_type_node, long_long_unsigned_type_node,
12287 NULL_TREE);
12288 tree void_ftype_void
12289 = build_function_type (void_type_node, void_list_node);
12290 tree void_ftype_unsigned
12291 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12292 tree void_ftype_unsigned_unsigned
12293 = build_function_type_list (void_type_node, unsigned_type_node,
12294 unsigned_type_node, NULL_TREE);
12295 tree void_ftype_pcvoid_unsigned_unsigned
12296 = build_function_type_list (void_type_node, const_ptr_type_node,
12297 unsigned_type_node, unsigned_type_node,
12298 NULL_TREE);
12299 tree unsigned_ftype_void
12300 = build_function_type (unsigned_type_node, void_list_node);
12301 tree di_ftype_void
12302 = build_function_type (long_long_unsigned_type_node, void_list_node);
12303 tree v4sf_ftype_void
12304 = build_function_type (V4SF_type_node, void_list_node);
12305 tree v2si_ftype_v4sf
12306 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12307 /* Loads/stores. */
12308 tree void_ftype_v8qi_v8qi_pchar
12309 = build_function_type_list (void_type_node,
12310 V8QI_type_node, V8QI_type_node,
12311 pchar_type_node, NULL_TREE);
12312 tree v4sf_ftype_pcfloat
12313 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12314 /* @@@ the type is bogus */
12315 tree v4sf_ftype_v4sf_pv2si
12316 = build_function_type_list (V4SF_type_node,
12317 V4SF_type_node, pv2si_type_node, NULL_TREE);
12318 tree void_ftype_pv2si_v4sf
12319 = build_function_type_list (void_type_node,
12320 pv2si_type_node, V4SF_type_node, NULL_TREE);
12321 tree void_ftype_pfloat_v4sf
12322 = build_function_type_list (void_type_node,
12323 pfloat_type_node, V4SF_type_node, NULL_TREE);
12324 tree void_ftype_pdi_di
12325 = build_function_type_list (void_type_node,
12326 pdi_type_node, long_long_unsigned_type_node,
12327 NULL_TREE);
12328 tree void_ftype_pv2di_v2di
12329 = build_function_type_list (void_type_node,
12330 pv2di_type_node, V2DI_type_node, NULL_TREE);
12331 /* Normal vector unops. */
12332 tree v4sf_ftype_v4sf
12333 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12334
12335 /* Normal vector binops. */
12336 tree v4sf_ftype_v4sf_v4sf
12337 = build_function_type_list (V4SF_type_node,
12338 V4SF_type_node, V4SF_type_node, NULL_TREE);
12339 tree v8qi_ftype_v8qi_v8qi
12340 = build_function_type_list (V8QI_type_node,
12341 V8QI_type_node, V8QI_type_node, NULL_TREE);
12342 tree v4hi_ftype_v4hi_v4hi
12343 = build_function_type_list (V4HI_type_node,
12344 V4HI_type_node, V4HI_type_node, NULL_TREE);
12345 tree v2si_ftype_v2si_v2si
12346 = build_function_type_list (V2SI_type_node,
12347 V2SI_type_node, V2SI_type_node, NULL_TREE);
12348 tree di_ftype_di_di
12349 = build_function_type_list (long_long_unsigned_type_node,
12350 long_long_unsigned_type_node,
12351 long_long_unsigned_type_node, NULL_TREE);
12352
12353 tree v2si_ftype_v2sf
12354 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12355 tree v2sf_ftype_v2si
12356 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12357 tree v2si_ftype_v2si
12358 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12359 tree v2sf_ftype_v2sf
12360 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12361 tree v2sf_ftype_v2sf_v2sf
12362 = build_function_type_list (V2SF_type_node,
12363 V2SF_type_node, V2SF_type_node, NULL_TREE);
12364 tree v2si_ftype_v2sf_v2sf
12365 = build_function_type_list (V2SI_type_node,
12366 V2SF_type_node, V2SF_type_node, NULL_TREE);
12367 tree pint_type_node = build_pointer_type (integer_type_node);
12368 tree pcint_type_node = build_pointer_type (
12369 build_type_variant (integer_type_node, 1, 0));
12370 tree pdouble_type_node = build_pointer_type (double_type_node);
12371 tree pcdouble_type_node = build_pointer_type (
12372 build_type_variant (double_type_node, 1, 0));
12373 tree int_ftype_v2df_v2df
12374 = build_function_type_list (integer_type_node,
12375 V2DF_type_node, V2DF_type_node, NULL_TREE);
12376
12377 tree ti_ftype_void
12378 = build_function_type (intTI_type_node, void_list_node);
12379 tree v2di_ftype_void
12380 = build_function_type (V2DI_type_node, void_list_node);
12381 tree ti_ftype_ti_ti
12382 = build_function_type_list (intTI_type_node,
12383 intTI_type_node, intTI_type_node, NULL_TREE);
12384 tree void_ftype_pcvoid
12385 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12386 tree v2di_ftype_di
12387 = build_function_type_list (V2DI_type_node,
12388 long_long_unsigned_type_node, NULL_TREE);
12389 tree di_ftype_v2di
12390 = build_function_type_list (long_long_unsigned_type_node,
12391 V2DI_type_node, NULL_TREE);
12392 tree v4sf_ftype_v4si
12393 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12394 tree v4si_ftype_v4sf
12395 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12396 tree v2df_ftype_v4si
12397 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12398 tree v4si_ftype_v2df
12399 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12400 tree v2si_ftype_v2df
12401 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12402 tree v4sf_ftype_v2df
12403 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12404 tree v2df_ftype_v2si
12405 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12406 tree v2df_ftype_v4sf
12407 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12408 tree int_ftype_v2df
12409 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12410 tree int64_ftype_v2df
12411 = build_function_type_list (long_long_integer_type_node,
12412 V2DF_type_node, NULL_TREE);
12413 tree v2df_ftype_v2df_int
12414 = build_function_type_list (V2DF_type_node,
12415 V2DF_type_node, integer_type_node, NULL_TREE);
12416 tree v2df_ftype_v2df_int64
12417 = build_function_type_list (V2DF_type_node,
12418 V2DF_type_node, long_long_integer_type_node,
12419 NULL_TREE);
12420 tree v4sf_ftype_v4sf_v2df
12421 = build_function_type_list (V4SF_type_node,
12422 V4SF_type_node, V2DF_type_node, NULL_TREE);
12423 tree v2df_ftype_v2df_v4sf
12424 = build_function_type_list (V2DF_type_node,
12425 V2DF_type_node, V4SF_type_node, NULL_TREE);
12426 tree v2df_ftype_v2df_v2df_int
12427 = build_function_type_list (V2DF_type_node,
12428 V2DF_type_node, V2DF_type_node,
12429 integer_type_node,
12430 NULL_TREE);
12431 tree v2df_ftype_v2df_pv2si
12432 = build_function_type_list (V2DF_type_node,
12433 V2DF_type_node, pv2si_type_node, NULL_TREE);
12434 tree void_ftype_pv2si_v2df
12435 = build_function_type_list (void_type_node,
12436 pv2si_type_node, V2DF_type_node, NULL_TREE);
12437 tree void_ftype_pdouble_v2df
12438 = build_function_type_list (void_type_node,
12439 pdouble_type_node, V2DF_type_node, NULL_TREE);
12440 tree void_ftype_pint_int
12441 = build_function_type_list (void_type_node,
12442 pint_type_node, integer_type_node, NULL_TREE);
12443 tree void_ftype_v16qi_v16qi_pchar
12444 = build_function_type_list (void_type_node,
12445 V16QI_type_node, V16QI_type_node,
12446 pchar_type_node, NULL_TREE);
12447 tree v2df_ftype_pcdouble
12448 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12449 tree v2df_ftype_v2df_v2df
12450 = build_function_type_list (V2DF_type_node,
12451 V2DF_type_node, V2DF_type_node, NULL_TREE);
12452 tree v16qi_ftype_v16qi_v16qi
12453 = build_function_type_list (V16QI_type_node,
12454 V16QI_type_node, V16QI_type_node, NULL_TREE);
12455 tree v8hi_ftype_v8hi_v8hi
12456 = build_function_type_list (V8HI_type_node,
12457 V8HI_type_node, V8HI_type_node, NULL_TREE);
12458 tree v4si_ftype_v4si_v4si
12459 = build_function_type_list (V4SI_type_node,
12460 V4SI_type_node, V4SI_type_node, NULL_TREE);
12461 tree v2di_ftype_v2di_v2di
12462 = build_function_type_list (V2DI_type_node,
12463 V2DI_type_node, V2DI_type_node, NULL_TREE);
12464 tree v2di_ftype_v2df_v2df
12465 = build_function_type_list (V2DI_type_node,
12466 V2DF_type_node, V2DF_type_node, NULL_TREE);
12467 tree v2df_ftype_v2df
12468 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12469 tree v2df_ftype_double
12470 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12471 tree v2df_ftype_double_double
12472 = build_function_type_list (V2DF_type_node,
12473 double_type_node, double_type_node, NULL_TREE);
12474 tree int_ftype_v8hi_int
12475 = build_function_type_list (integer_type_node,
12476 V8HI_type_node, integer_type_node, NULL_TREE);
12477 tree v8hi_ftype_v8hi_int_int
12478 = build_function_type_list (V8HI_type_node,
12479 V8HI_type_node, integer_type_node,
12480 integer_type_node, NULL_TREE);
12481 tree v2di_ftype_v2di_int
12482 = build_function_type_list (V2DI_type_node,
12483 V2DI_type_node, integer_type_node, NULL_TREE);
12484 tree v4si_ftype_v4si_int
12485 = build_function_type_list (V4SI_type_node,
12486 V4SI_type_node, integer_type_node, NULL_TREE);
12487 tree v8hi_ftype_v8hi_int
12488 = build_function_type_list (V8HI_type_node,
12489 V8HI_type_node, integer_type_node, NULL_TREE);
12490 tree v8hi_ftype_v8hi_v2di
12491 = build_function_type_list (V8HI_type_node,
12492 V8HI_type_node, V2DI_type_node, NULL_TREE);
12493 tree v4si_ftype_v4si_v2di
12494 = build_function_type_list (V4SI_type_node,
12495 V4SI_type_node, V2DI_type_node, NULL_TREE);
12496 tree v4si_ftype_v8hi_v8hi
12497 = build_function_type_list (V4SI_type_node,
12498 V8HI_type_node, V8HI_type_node, NULL_TREE);
12499 tree di_ftype_v8qi_v8qi
12500 = build_function_type_list (long_long_unsigned_type_node,
12501 V8QI_type_node, V8QI_type_node, NULL_TREE);
12502 tree di_ftype_v2si_v2si
12503 = build_function_type_list (long_long_unsigned_type_node,
12504 V2SI_type_node, V2SI_type_node, NULL_TREE);
12505 tree v2di_ftype_v16qi_v16qi
12506 = build_function_type_list (V2DI_type_node,
12507 V16QI_type_node, V16QI_type_node, NULL_TREE);
12508 tree v2di_ftype_v4si_v4si
12509 = build_function_type_list (V2DI_type_node,
12510 V4SI_type_node, V4SI_type_node, NULL_TREE);
12511 tree int_ftype_v16qi
12512 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12513 tree v16qi_ftype_pcchar
12514 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12515 tree void_ftype_pchar_v16qi
12516 = build_function_type_list (void_type_node,
12517 pchar_type_node, V16QI_type_node, NULL_TREE);
12518 tree v4si_ftype_pcint
12519 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12520 tree void_ftype_pcint_v4si
12521 = build_function_type_list (void_type_node,
12522 pcint_type_node, V4SI_type_node, NULL_TREE);
12523 tree v2di_ftype_v2di
12524 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12525
12526 tree float80_type;
12527 tree float128_type;
12528
12529 /* The __float80 type. */
12530 if (TYPE_MODE (long_double_type_node) == XFmode)
12531 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12532 "__float80");
12533 else
12534 {
12535 /* The __float80 type. */
12536 float80_type = make_node (REAL_TYPE);
12537 TYPE_PRECISION (float80_type) = 80;
12538 layout_type (float80_type);
12539 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12540 }
12541
12542 float128_type = make_node (REAL_TYPE);
12543 TYPE_PRECISION (float128_type) = 128;
12544 layout_type (float128_type);
12545 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12546
12547 /* Add all builtins that are more or less simple operations on two
12548 operands. */
12549 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12550 {
12551 /* Use one of the operands; the target can have a different mode for
12552 mask-generating compares. */
12553 enum machine_mode mode;
12554 tree type;
12555
12556 if (d->name == 0)
12557 continue;
12558 mode = insn_data[d->icode].operand[1].mode;
12559
12560 switch (mode)
12561 {
12562 case V16QImode:
12563 type = v16qi_ftype_v16qi_v16qi;
12564 break;
12565 case V8HImode:
12566 type = v8hi_ftype_v8hi_v8hi;
12567 break;
12568 case V4SImode:
12569 type = v4si_ftype_v4si_v4si;
12570 break;
12571 case V2DImode:
12572 type = v2di_ftype_v2di_v2di;
12573 break;
12574 case V2DFmode:
12575 type = v2df_ftype_v2df_v2df;
12576 break;
12577 case TImode:
12578 type = ti_ftype_ti_ti;
12579 break;
12580 case V4SFmode:
12581 type = v4sf_ftype_v4sf_v4sf;
12582 break;
12583 case V8QImode:
12584 type = v8qi_ftype_v8qi_v8qi;
12585 break;
12586 case V4HImode:
12587 type = v4hi_ftype_v4hi_v4hi;
12588 break;
12589 case V2SImode:
12590 type = v2si_ftype_v2si_v2si;
12591 break;
12592 case DImode:
12593 type = di_ftype_di_di;
12594 break;
12595
12596 default:
12597 abort ();
12598 }
12599
12600 /* Override for comparisons. */
12601 if (d->icode == CODE_FOR_maskcmpv4sf3
12602 || d->icode == CODE_FOR_maskncmpv4sf3
12603 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12604 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12605 type = v4si_ftype_v4sf_v4sf;
12606
12607 if (d->icode == CODE_FOR_maskcmpv2df3
12608 || d->icode == CODE_FOR_maskncmpv2df3
12609 || d->icode == CODE_FOR_vmmaskcmpv2df3
12610 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12611 type = v2di_ftype_v2df_v2df;
12612
12613 def_builtin (d->mask, d->name, type, d->code);
12614 }
12615
12616 /* Add the remaining MMX insns with somewhat more complicated types. */
12617 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12618 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12619 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12620 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12621 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12622
12623 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12624 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12625 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12626
12627 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12628 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12629
12630 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12631 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12632
12633 /* comi/ucomi insns. */
12634 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12635 if (d->mask == MASK_SSE2)
12636 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12637 else
12638 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12639
12640 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12641 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12642 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12643
12644 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12645 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12646 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12647 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12648 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12649 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12650 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12651 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12652 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12653 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12654 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12655
12656 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12657 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12658
12659 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12660
12661 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12662 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12663 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12664 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12665 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12666 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12667
12668 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12669 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12670 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12671 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12672
12673 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12674 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12675 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12676 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12677
12678 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12679
12680 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12681
12682 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12683 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12684 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12685 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12686 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12687 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12688
12689 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12690
12691 /* Original 3DNow! */
12692 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12693 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12694 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12695 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12696 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12697 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12698 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12699 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12700 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12701 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12702 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12703 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12704 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12705 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12706 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12707 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12708 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12709 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12710 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12711 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12712
12713 /* 3DNow! extension as used in the Athlon CPU. */
12714 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12715 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12716 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12717 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12718 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12719 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12720
12721 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12722
12723 /* SSE2 */
12724 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12725 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12726
12727 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12728 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12729 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12730
12731 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12732 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12733 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12734 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12735 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12736 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12737
12738 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12739 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12740 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12741 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12742
12743 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12744 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12745 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12746 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12747 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12748
12749 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12750 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12751 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12752 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12753
12754 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12755 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12756
12757 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12758
12759 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12760 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12761
12762 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12763 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12764 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12765 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12766 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12767
12768 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12769
12770 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12771 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12772 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12773 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12774
12775 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12776 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12777 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12778
12779 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12780 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12781 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12782 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12783
12784 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12785 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12786 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12787 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12788 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12789 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12790 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12791
12792 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12793 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12794 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12795
12796 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12797 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12798 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12799 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12800 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12801 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12802 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12803
12804 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12805
12806 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12807 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12808
12809 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12810 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12811 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12812
12813 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12814 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12815 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12816
12817 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12818 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12819
12820 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12821 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12822 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12823 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12824
12825 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12826 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12827 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12828 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12829
12830 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12831 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12832
12833 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12834
12835 /* Prescott New Instructions. */
12836 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
12837 void_ftype_pcvoid_unsigned_unsigned,
12838 IX86_BUILTIN_MONITOR);
12839 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
12840 void_ftype_unsigned_unsigned,
12841 IX86_BUILTIN_MWAIT);
12842 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
12843 v4sf_ftype_v4sf,
12844 IX86_BUILTIN_MOVSHDUP);
12845 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
12846 v4sf_ftype_v4sf,
12847 IX86_BUILTIN_MOVSLDUP);
12848 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
12849 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
12850 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
12851 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
12852 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
12853 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
12854 }
12855
12856 /* Errors in the source file can cause expand_expr to return const0_rtx
12857 where we expect a vector. To avoid crashing, use one of the vector
12858 clear instructions. */
12859 static rtx
12860 safe_vector_operand (rtx x, enum machine_mode mode)
12861 {
12862 if (x != const0_rtx)
12863 return x;
12864 x = gen_reg_rtx (mode);
12865
12866 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12867 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12868 : gen_rtx_SUBREG (DImode, x, 0)));
12869 else
12870 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12871 : gen_rtx_SUBREG (V4SFmode, x, 0),
12872 CONST0_RTX (V4SFmode)));
12873 return x;
12874 }
12875
12876 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12877
12878 static rtx
12879 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
12880 {
12881 rtx pat;
12882 tree arg0 = TREE_VALUE (arglist);
12883 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12884 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12885 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12886 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12887 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12888 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12889
12890 if (VECTOR_MODE_P (mode0))
12891 op0 = safe_vector_operand (op0, mode0);
12892 if (VECTOR_MODE_P (mode1))
12893 op1 = safe_vector_operand (op1, mode1);
12894
12895 if (! target
12896 || GET_MODE (target) != tmode
12897 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12898 target = gen_reg_rtx (tmode);
12899
12900 if (GET_MODE (op1) == SImode && mode1 == TImode)
12901 {
12902 rtx x = gen_reg_rtx (V4SImode);
12903 emit_insn (gen_sse2_loadd (x, op1));
12904 op1 = gen_lowpart (TImode, x);
12905 }
12906
12907 /* In case the insn wants input operands in modes different from
12908 the result, abort. */
12909 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
12910 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
12911 abort ();
12912
12913 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12914 op0 = copy_to_mode_reg (mode0, op0);
12915 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12916 op1 = copy_to_mode_reg (mode1, op1);
12917
12918 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12919 yet one of the two must not be a memory. This is normally enforced
12920 by expanders, but we didn't bother to create one here. */
12921 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12922 op0 = copy_to_mode_reg (mode0, op0);
12923
12924 pat = GEN_FCN (icode) (target, op0, op1);
12925 if (! pat)
12926 return 0;
12927 emit_insn (pat);
12928 return target;
12929 }
12930
12931 /* Subroutine of ix86_expand_builtin to take care of stores. */
12932
12933 static rtx
12934 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
12935 {
12936 rtx pat;
12937 tree arg0 = TREE_VALUE (arglist);
12938 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12939 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12940 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12941 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12942 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12943
12944 if (VECTOR_MODE_P (mode1))
12945 op1 = safe_vector_operand (op1, mode1);
12946
12947 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12948 op1 = copy_to_mode_reg (mode1, op1);
12949
12950 pat = GEN_FCN (icode) (op0, op1);
12951 if (pat)
12952 emit_insn (pat);
12953 return 0;
12954 }
12955
12956 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12957
12958 static rtx
12959 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
12960 rtx target, int do_load)
12961 {
12962 rtx pat;
12963 tree arg0 = TREE_VALUE (arglist);
12964 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12965 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12966 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12967
12968 if (! target
12969 || GET_MODE (target) != tmode
12970 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12971 target = gen_reg_rtx (tmode);
12972 if (do_load)
12973 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12974 else
12975 {
12976 if (VECTOR_MODE_P (mode0))
12977 op0 = safe_vector_operand (op0, mode0);
12978
12979 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12980 op0 = copy_to_mode_reg (mode0, op0);
12981 }
12982
12983 pat = GEN_FCN (icode) (target, op0);
12984 if (! pat)
12985 return 0;
12986 emit_insn (pat);
12987 return target;
12988 }
12989
12990 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12991 sqrtss, rsqrtss, rcpss. */
12992
12993 static rtx
12994 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
12995 {
12996 rtx pat;
12997 tree arg0 = TREE_VALUE (arglist);
12998 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12999 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13000 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13001
13002 if (! target
13003 || GET_MODE (target) != tmode
13004 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13005 target = gen_reg_rtx (tmode);
13006
13007 if (VECTOR_MODE_P (mode0))
13008 op0 = safe_vector_operand (op0, mode0);
13009
13010 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13011 op0 = copy_to_mode_reg (mode0, op0);
13012
13013 op1 = op0;
13014 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13015 op1 = copy_to_mode_reg (mode0, op1);
13016
13017 pat = GEN_FCN (icode) (target, op0, op1);
13018 if (! pat)
13019 return 0;
13020 emit_insn (pat);
13021 return target;
13022 }
13023
13024 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13025
13026 static rtx
13027 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13028 rtx target)
13029 {
13030 rtx pat;
13031 tree arg0 = TREE_VALUE (arglist);
13032 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13033 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13034 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13035 rtx op2;
13036 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13037 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13038 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13039 enum rtx_code comparison = d->comparison;
13040
13041 if (VECTOR_MODE_P (mode0))
13042 op0 = safe_vector_operand (op0, mode0);
13043 if (VECTOR_MODE_P (mode1))
13044 op1 = safe_vector_operand (op1, mode1);
13045
13046 /* Swap operands if we have a comparison that isn't available in
13047 hardware. */
13048 if (d->flag)
13049 {
13050 rtx tmp = gen_reg_rtx (mode1);
13051 emit_move_insn (tmp, op1);
13052 op1 = op0;
13053 op0 = tmp;
13054 }
13055
13056 if (! target
13057 || GET_MODE (target) != tmode
13058 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13059 target = gen_reg_rtx (tmode);
13060
13061 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13062 op0 = copy_to_mode_reg (mode0, op0);
13063 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13064 op1 = copy_to_mode_reg (mode1, op1);
13065
13066 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13067 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13068 if (! pat)
13069 return 0;
13070 emit_insn (pat);
13071 return target;
13072 }
13073
13074 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13075
13076 static rtx
13077 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13078 rtx target)
13079 {
13080 rtx pat;
13081 tree arg0 = TREE_VALUE (arglist);
13082 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13083 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13084 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13085 rtx op2;
13086 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13087 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13088 enum rtx_code comparison = d->comparison;
13089
13090 if (VECTOR_MODE_P (mode0))
13091 op0 = safe_vector_operand (op0, mode0);
13092 if (VECTOR_MODE_P (mode1))
13093 op1 = safe_vector_operand (op1, mode1);
13094
13095 /* Swap operands if we have a comparison that isn't available in
13096 hardware. */
13097 if (d->flag)
13098 {
13099 rtx tmp = op1;
13100 op1 = op0;
13101 op0 = tmp;
13102 }
13103
13104 target = gen_reg_rtx (SImode);
13105 emit_move_insn (target, const0_rtx);
13106 target = gen_rtx_SUBREG (QImode, target, 0);
13107
13108 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13109 op0 = copy_to_mode_reg (mode0, op0);
13110 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13111 op1 = copy_to_mode_reg (mode1, op1);
13112
13113 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13114 pat = GEN_FCN (d->icode) (op0, op1);
13115 if (! pat)
13116 return 0;
13117 emit_insn (pat);
13118 emit_insn (gen_rtx_SET (VOIDmode,
13119 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13120 gen_rtx_fmt_ee (comparison, QImode,
13121 SET_DEST (pat),
13122 const0_rtx)));
13123
13124 return SUBREG_REG (target);
13125 }
13126
13127 /* Expand an expression EXP that calls a built-in function,
13128 with result going to TARGET if that's convenient
13129 (and in mode MODE if that's convenient).
13130 SUBTARGET may be used as the target for computing one of EXP's operands.
13131 IGNORE is nonzero if the value is to be ignored. */
13132
13133 rtx
13134 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13135 enum machine_mode mode ATTRIBUTE_UNUSED,
13136 int ignore ATTRIBUTE_UNUSED)
13137 {
13138 const struct builtin_description *d;
13139 size_t i;
13140 enum insn_code icode;
13141 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13142 tree arglist = TREE_OPERAND (exp, 1);
13143 tree arg0, arg1, arg2;
13144 rtx op0, op1, op2, pat;
13145 enum machine_mode tmode, mode0, mode1, mode2;
13146 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13147
13148 switch (fcode)
13149 {
13150 case IX86_BUILTIN_EMMS:
13151 emit_insn (gen_emms ());
13152 return 0;
13153
13154 case IX86_BUILTIN_SFENCE:
13155 emit_insn (gen_sfence ());
13156 return 0;
13157
13158 case IX86_BUILTIN_PEXTRW:
13159 case IX86_BUILTIN_PEXTRW128:
13160 icode = (fcode == IX86_BUILTIN_PEXTRW
13161 ? CODE_FOR_mmx_pextrw
13162 : CODE_FOR_sse2_pextrw);
13163 arg0 = TREE_VALUE (arglist);
13164 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13165 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13166 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13167 tmode = insn_data[icode].operand[0].mode;
13168 mode0 = insn_data[icode].operand[1].mode;
13169 mode1 = insn_data[icode].operand[2].mode;
13170
13171 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13172 op0 = copy_to_mode_reg (mode0, op0);
13173 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13174 {
13175 error ("selector must be an integer constant in the range 0..%i",
13176 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13177 return gen_reg_rtx (tmode);
13178 }
13179 if (target == 0
13180 || GET_MODE (target) != tmode
13181 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13182 target = gen_reg_rtx (tmode);
13183 pat = GEN_FCN (icode) (target, op0, op1);
13184 if (! pat)
13185 return 0;
13186 emit_insn (pat);
13187 return target;
13188
13189 case IX86_BUILTIN_PINSRW:
13190 case IX86_BUILTIN_PINSRW128:
13191 icode = (fcode == IX86_BUILTIN_PINSRW
13192 ? CODE_FOR_mmx_pinsrw
13193 : CODE_FOR_sse2_pinsrw);
13194 arg0 = TREE_VALUE (arglist);
13195 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13196 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13197 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13198 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13199 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13200 tmode = insn_data[icode].operand[0].mode;
13201 mode0 = insn_data[icode].operand[1].mode;
13202 mode1 = insn_data[icode].operand[2].mode;
13203 mode2 = insn_data[icode].operand[3].mode;
13204
13205 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13206 op0 = copy_to_mode_reg (mode0, op0);
13207 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13208 op1 = copy_to_mode_reg (mode1, op1);
13209 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13210 {
13211 error ("selector must be an integer constant in the range 0..%i",
13212 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13213 return const0_rtx;
13214 }
13215 if (target == 0
13216 || GET_MODE (target) != tmode
13217 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13218 target = gen_reg_rtx (tmode);
13219 pat = GEN_FCN (icode) (target, op0, op1, op2);
13220 if (! pat)
13221 return 0;
13222 emit_insn (pat);
13223 return target;
13224
13225 case IX86_BUILTIN_MASKMOVQ:
13226 case IX86_BUILTIN_MASKMOVDQU:
13227 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13228 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13229 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13230 : CODE_FOR_sse2_maskmovdqu));
13231 /* Note the arg order is different from the operand order. */
13232 arg1 = TREE_VALUE (arglist);
13233 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13234 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13235 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13236 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13237 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13238 mode0 = insn_data[icode].operand[0].mode;
13239 mode1 = insn_data[icode].operand[1].mode;
13240 mode2 = insn_data[icode].operand[2].mode;
13241
13242 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13243 op0 = copy_to_mode_reg (mode0, op0);
13244 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13245 op1 = copy_to_mode_reg (mode1, op1);
13246 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13247 op2 = copy_to_mode_reg (mode2, op2);
13248 pat = GEN_FCN (icode) (op0, op1, op2);
13249 if (! pat)
13250 return 0;
13251 emit_insn (pat);
13252 return 0;
13253
13254 case IX86_BUILTIN_SQRTSS:
13255 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13256 case IX86_BUILTIN_RSQRTSS:
13257 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13258 case IX86_BUILTIN_RCPSS:
13259 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13260
13261 case IX86_BUILTIN_LOADAPS:
13262 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13263
13264 case IX86_BUILTIN_LOADUPS:
13265 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13266
13267 case IX86_BUILTIN_STOREAPS:
13268 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13269
13270 case IX86_BUILTIN_STOREUPS:
13271 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13272
13273 case IX86_BUILTIN_LOADSS:
13274 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13275
13276 case IX86_BUILTIN_STORESS:
13277 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13278
13279 case IX86_BUILTIN_LOADHPS:
13280 case IX86_BUILTIN_LOADLPS:
13281 case IX86_BUILTIN_LOADHPD:
13282 case IX86_BUILTIN_LOADLPD:
13283 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13284 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13285 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13286 : CODE_FOR_sse2_movsd);
13287 arg0 = TREE_VALUE (arglist);
13288 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13289 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13290 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13291 tmode = insn_data[icode].operand[0].mode;
13292 mode0 = insn_data[icode].operand[1].mode;
13293 mode1 = insn_data[icode].operand[2].mode;
13294
13295 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13296 op0 = copy_to_mode_reg (mode0, op0);
13297 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13298 if (target == 0
13299 || GET_MODE (target) != tmode
13300 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13301 target = gen_reg_rtx (tmode);
13302 pat = GEN_FCN (icode) (target, op0, op1);
13303 if (! pat)
13304 return 0;
13305 emit_insn (pat);
13306 return target;
13307
13308 case IX86_BUILTIN_STOREHPS:
13309 case IX86_BUILTIN_STORELPS:
13310 case IX86_BUILTIN_STOREHPD:
13311 case IX86_BUILTIN_STORELPD:
13312 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13313 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13314 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13315 : CODE_FOR_sse2_movsd);
13316 arg0 = TREE_VALUE (arglist);
13317 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13318 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13319 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13320 mode0 = insn_data[icode].operand[1].mode;
13321 mode1 = insn_data[icode].operand[2].mode;
13322
13323 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13324 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13325 op1 = copy_to_mode_reg (mode1, op1);
13326
13327 pat = GEN_FCN (icode) (op0, op0, op1);
13328 if (! pat)
13329 return 0;
13330 emit_insn (pat);
13331 return 0;
13332
13333 case IX86_BUILTIN_MOVNTPS:
13334 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13335 case IX86_BUILTIN_MOVNTQ:
13336 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13337
13338 case IX86_BUILTIN_LDMXCSR:
13339 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13340 target = assign_386_stack_local (SImode, 0);
13341 emit_move_insn (target, op0);
13342 emit_insn (gen_ldmxcsr (target));
13343 return 0;
13344
13345 case IX86_BUILTIN_STMXCSR:
13346 target = assign_386_stack_local (SImode, 0);
13347 emit_insn (gen_stmxcsr (target));
13348 return copy_to_mode_reg (SImode, target);
13349
13350 case IX86_BUILTIN_SHUFPS:
13351 case IX86_BUILTIN_SHUFPD:
13352 icode = (fcode == IX86_BUILTIN_SHUFPS
13353 ? CODE_FOR_sse_shufps
13354 : CODE_FOR_sse2_shufpd);
13355 arg0 = TREE_VALUE (arglist);
13356 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13357 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13358 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13359 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13360 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13361 tmode = insn_data[icode].operand[0].mode;
13362 mode0 = insn_data[icode].operand[1].mode;
13363 mode1 = insn_data[icode].operand[2].mode;
13364 mode2 = insn_data[icode].operand[3].mode;
13365
13366 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13367 op0 = copy_to_mode_reg (mode0, op0);
13368 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13369 op1 = copy_to_mode_reg (mode1, op1);
13370 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13371 {
13372 /* @@@ better error message */
13373 error ("mask must be an immediate");
13374 return gen_reg_rtx (tmode);
13375 }
13376 if (target == 0
13377 || GET_MODE (target) != tmode
13378 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13379 target = gen_reg_rtx (tmode);
13380 pat = GEN_FCN (icode) (target, op0, op1, op2);
13381 if (! pat)
13382 return 0;
13383 emit_insn (pat);
13384 return target;
13385
13386 case IX86_BUILTIN_PSHUFW:
13387 case IX86_BUILTIN_PSHUFD:
13388 case IX86_BUILTIN_PSHUFHW:
13389 case IX86_BUILTIN_PSHUFLW:
13390 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13391 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13392 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13393 : CODE_FOR_mmx_pshufw);
13394 arg0 = TREE_VALUE (arglist);
13395 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13396 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13397 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13398 tmode = insn_data[icode].operand[0].mode;
13399 mode1 = insn_data[icode].operand[1].mode;
13400 mode2 = insn_data[icode].operand[2].mode;
13401
13402 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13403 op0 = copy_to_mode_reg (mode1, op0);
13404 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13405 {
13406 /* @@@ better error message */
13407 error ("mask must be an immediate");
13408 return const0_rtx;
13409 }
13410 if (target == 0
13411 || GET_MODE (target) != tmode
13412 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13413 target = gen_reg_rtx (tmode);
13414 pat = GEN_FCN (icode) (target, op0, op1);
13415 if (! pat)
13416 return 0;
13417 emit_insn (pat);
13418 return target;
13419
13420 case IX86_BUILTIN_PSLLDQI128:
13421 case IX86_BUILTIN_PSRLDQI128:
13422 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13423 : CODE_FOR_sse2_lshrti3);
13424 arg0 = TREE_VALUE (arglist);
13425 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13426 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13427 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13428 tmode = insn_data[icode].operand[0].mode;
13429 mode1 = insn_data[icode].operand[1].mode;
13430 mode2 = insn_data[icode].operand[2].mode;
13431
13432 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13433 {
13434 op0 = copy_to_reg (op0);
13435 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13436 }
13437 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13438 {
13439 error ("shift must be an immediate");
13440 return const0_rtx;
13441 }
13442 target = gen_reg_rtx (V2DImode);
13443 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13444 if (! pat)
13445 return 0;
13446 emit_insn (pat);
13447 return target;
13448
13449 case IX86_BUILTIN_FEMMS:
13450 emit_insn (gen_femms ());
13451 return NULL_RTX;
13452
13453 case IX86_BUILTIN_PAVGUSB:
13454 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13455
13456 case IX86_BUILTIN_PF2ID:
13457 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13458
13459 case IX86_BUILTIN_PFACC:
13460 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13461
13462 case IX86_BUILTIN_PFADD:
13463 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13464
13465 case IX86_BUILTIN_PFCMPEQ:
13466 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13467
13468 case IX86_BUILTIN_PFCMPGE:
13469 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13470
13471 case IX86_BUILTIN_PFCMPGT:
13472 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13473
13474 case IX86_BUILTIN_PFMAX:
13475 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13476
13477 case IX86_BUILTIN_PFMIN:
13478 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13479
13480 case IX86_BUILTIN_PFMUL:
13481 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13482
13483 case IX86_BUILTIN_PFRCP:
13484 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13485
13486 case IX86_BUILTIN_PFRCPIT1:
13487 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13488
13489 case IX86_BUILTIN_PFRCPIT2:
13490 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13491
13492 case IX86_BUILTIN_PFRSQIT1:
13493 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13494
13495 case IX86_BUILTIN_PFRSQRT:
13496 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13497
13498 case IX86_BUILTIN_PFSUB:
13499 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13500
13501 case IX86_BUILTIN_PFSUBR:
13502 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13503
13504 case IX86_BUILTIN_PI2FD:
13505 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13506
13507 case IX86_BUILTIN_PMULHRW:
13508 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13509
13510 case IX86_BUILTIN_PF2IW:
13511 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13512
13513 case IX86_BUILTIN_PFNACC:
13514 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13515
13516 case IX86_BUILTIN_PFPNACC:
13517 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13518
13519 case IX86_BUILTIN_PI2FW:
13520 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13521
13522 case IX86_BUILTIN_PSWAPDSI:
13523 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13524
13525 case IX86_BUILTIN_PSWAPDSF:
13526 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13527
13528 case IX86_BUILTIN_SSE_ZERO:
13529 target = gen_reg_rtx (V4SFmode);
13530 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13531 return target;
13532
13533 case IX86_BUILTIN_MMX_ZERO:
13534 target = gen_reg_rtx (DImode);
13535 emit_insn (gen_mmx_clrdi (target));
13536 return target;
13537
13538 case IX86_BUILTIN_CLRTI:
13539 target = gen_reg_rtx (V2DImode);
13540 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13541 return target;
13542
13543
13544 case IX86_BUILTIN_SQRTSD:
13545 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13546 case IX86_BUILTIN_LOADAPD:
13547 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13548 case IX86_BUILTIN_LOADUPD:
13549 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13550
13551 case IX86_BUILTIN_STOREAPD:
13552 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13553 case IX86_BUILTIN_STOREUPD:
13554 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13555
13556 case IX86_BUILTIN_LOADSD:
13557 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13558
13559 case IX86_BUILTIN_STORESD:
13560 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13561
13562 case IX86_BUILTIN_SETPD1:
13563 target = assign_386_stack_local (DFmode, 0);
13564 arg0 = TREE_VALUE (arglist);
13565 emit_move_insn (adjust_address (target, DFmode, 0),
13566 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13567 op0 = gen_reg_rtx (V2DFmode);
13568 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13569 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13570 return op0;
13571
13572 case IX86_BUILTIN_SETPD:
13573 target = assign_386_stack_local (V2DFmode, 0);
13574 arg0 = TREE_VALUE (arglist);
13575 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13576 emit_move_insn (adjust_address (target, DFmode, 0),
13577 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13578 emit_move_insn (adjust_address (target, DFmode, 8),
13579 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13580 op0 = gen_reg_rtx (V2DFmode);
13581 emit_insn (gen_sse2_movapd (op0, target));
13582 return op0;
13583
13584 case IX86_BUILTIN_LOADRPD:
13585 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13586 gen_reg_rtx (V2DFmode), 1);
13587 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13588 return target;
13589
13590 case IX86_BUILTIN_LOADPD1:
13591 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13592 gen_reg_rtx (V2DFmode), 1);
13593 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13594 return target;
13595
13596 case IX86_BUILTIN_STOREPD1:
13597 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13598 case IX86_BUILTIN_STORERPD:
13599 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13600
13601 case IX86_BUILTIN_CLRPD:
13602 target = gen_reg_rtx (V2DFmode);
13603 emit_insn (gen_sse_clrv2df (target));
13604 return target;
13605
13606 case IX86_BUILTIN_MFENCE:
13607 emit_insn (gen_sse2_mfence ());
13608 return 0;
13609 case IX86_BUILTIN_LFENCE:
13610 emit_insn (gen_sse2_lfence ());
13611 return 0;
13612
13613 case IX86_BUILTIN_CLFLUSH:
13614 arg0 = TREE_VALUE (arglist);
13615 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13616 icode = CODE_FOR_sse2_clflush;
13617 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13618 op0 = copy_to_mode_reg (Pmode, op0);
13619
13620 emit_insn (gen_sse2_clflush (op0));
13621 return 0;
13622
13623 case IX86_BUILTIN_MOVNTPD:
13624 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13625 case IX86_BUILTIN_MOVNTDQ:
13626 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13627 case IX86_BUILTIN_MOVNTI:
13628 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13629
13630 case IX86_BUILTIN_LOADDQA:
13631 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13632 case IX86_BUILTIN_LOADDQU:
13633 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13634 case IX86_BUILTIN_LOADD:
13635 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13636
13637 case IX86_BUILTIN_STOREDQA:
13638 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13639 case IX86_BUILTIN_STOREDQU:
13640 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13641 case IX86_BUILTIN_STORED:
13642 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13643
13644 case IX86_BUILTIN_MONITOR:
13645 arg0 = TREE_VALUE (arglist);
13646 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13647 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13648 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13649 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13650 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13651 if (!REG_P (op0))
13652 op0 = copy_to_mode_reg (SImode, op0);
13653 if (!REG_P (op1))
13654 op1 = copy_to_mode_reg (SImode, op1);
13655 if (!REG_P (op2))
13656 op2 = copy_to_mode_reg (SImode, op2);
13657 emit_insn (gen_monitor (op0, op1, op2));
13658 return 0;
13659
13660 case IX86_BUILTIN_MWAIT:
13661 arg0 = TREE_VALUE (arglist);
13662 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13663 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13664 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13665 if (!REG_P (op0))
13666 op0 = copy_to_mode_reg (SImode, op0);
13667 if (!REG_P (op1))
13668 op1 = copy_to_mode_reg (SImode, op1);
13669 emit_insn (gen_mwait (op0, op1));
13670 return 0;
13671
13672 case IX86_BUILTIN_LOADDDUP:
13673 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13674
13675 case IX86_BUILTIN_LDDQU:
13676 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13677 1);
13678
13679 default:
13680 break;
13681 }
13682
13683 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13684 if (d->code == fcode)
13685 {
13686 /* Compares are treated specially. */
13687 if (d->icode == CODE_FOR_maskcmpv4sf3
13688 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13689 || d->icode == CODE_FOR_maskncmpv4sf3
13690 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13691 || d->icode == CODE_FOR_maskcmpv2df3
13692 || d->icode == CODE_FOR_vmmaskcmpv2df3
13693 || d->icode == CODE_FOR_maskncmpv2df3
13694 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13695 return ix86_expand_sse_compare (d, arglist, target);
13696
13697 return ix86_expand_binop_builtin (d->icode, arglist, target);
13698 }
13699
13700 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13701 if (d->code == fcode)
13702 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13703
13704 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13705 if (d->code == fcode)
13706 return ix86_expand_sse_comi (d, arglist, target);
13707
13708 /* @@@ Should really do something sensible here. */
13709 return 0;
13710 }
13711
13712 /* Store OPERAND to the memory after reload is completed. This means
13713 that we can't easily use assign_stack_local. */
13714 rtx
13715 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13716 {
13717 rtx result;
13718 if (!reload_completed)
13719 abort ();
13720 if (TARGET_RED_ZONE)
13721 {
13722 result = gen_rtx_MEM (mode,
13723 gen_rtx_PLUS (Pmode,
13724 stack_pointer_rtx,
13725 GEN_INT (-RED_ZONE_SIZE)));
13726 emit_move_insn (result, operand);
13727 }
13728 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13729 {
13730 switch (mode)
13731 {
13732 case HImode:
13733 case SImode:
13734 operand = gen_lowpart (DImode, operand);
13735 /* FALLTHRU */
13736 case DImode:
13737 emit_insn (
13738 gen_rtx_SET (VOIDmode,
13739 gen_rtx_MEM (DImode,
13740 gen_rtx_PRE_DEC (DImode,
13741 stack_pointer_rtx)),
13742 operand));
13743 break;
13744 default:
13745 abort ();
13746 }
13747 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13748 }
13749 else
13750 {
13751 switch (mode)
13752 {
13753 case DImode:
13754 {
13755 rtx operands[2];
13756 split_di (&operand, 1, operands, operands + 1);
13757 emit_insn (
13758 gen_rtx_SET (VOIDmode,
13759 gen_rtx_MEM (SImode,
13760 gen_rtx_PRE_DEC (Pmode,
13761 stack_pointer_rtx)),
13762 operands[1]));
13763 emit_insn (
13764 gen_rtx_SET (VOIDmode,
13765 gen_rtx_MEM (SImode,
13766 gen_rtx_PRE_DEC (Pmode,
13767 stack_pointer_rtx)),
13768 operands[0]));
13769 }
13770 break;
13771 case HImode:
13772 /* It is better to store HImodes as SImodes. */
13773 if (!TARGET_PARTIAL_REG_STALL)
13774 operand = gen_lowpart (SImode, operand);
13775 /* FALLTHRU */
13776 case SImode:
13777 emit_insn (
13778 gen_rtx_SET (VOIDmode,
13779 gen_rtx_MEM (GET_MODE (operand),
13780 gen_rtx_PRE_DEC (SImode,
13781 stack_pointer_rtx)),
13782 operand));
13783 break;
13784 default:
13785 abort ();
13786 }
13787 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13788 }
13789 return result;
13790 }
13791
13792 /* Free operand from the memory. */
13793 void
13794 ix86_free_from_memory (enum machine_mode mode)
13795 {
13796 if (!TARGET_RED_ZONE)
13797 {
13798 int size;
13799
13800 if (mode == DImode || TARGET_64BIT)
13801 size = 8;
13802 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13803 size = 2;
13804 else
13805 size = 4;
13806 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13807 to pop or add instruction if registers are available. */
13808 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13809 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13810 GEN_INT (size))));
13811 }
13812 }
13813
13814 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13815 QImode must go into class Q_REGS.
13816 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13817 movdf to do mem-to-mem moves through integer regs. */
13818 enum reg_class
13819 ix86_preferred_reload_class (rtx x, enum reg_class class)
13820 {
13821 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13822 return NO_REGS;
13823 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13824 {
13825 /* SSE can't load any constant directly yet. */
13826 if (SSE_CLASS_P (class))
13827 return NO_REGS;
13828 /* Floats can load 0 and 1. */
13829 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13830 {
13831 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13832 if (MAYBE_SSE_CLASS_P (class))
13833 return (reg_class_subset_p (class, GENERAL_REGS)
13834 ? GENERAL_REGS : FLOAT_REGS);
13835 else
13836 return class;
13837 }
13838 /* General regs can load everything. */
13839 if (reg_class_subset_p (class, GENERAL_REGS))
13840 return GENERAL_REGS;
13841 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13842 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13843 return NO_REGS;
13844 }
13845 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13846 return NO_REGS;
13847 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13848 return Q_REGS;
13849 return class;
13850 }
13851
13852 /* If we are copying between general and FP registers, we need a memory
13853 location. The same is true for SSE and MMX registers.
13854
13855 The macro can't work reliably when one of the CLASSES is class containing
13856 registers from multiple units (SSE, MMX, integer). We avoid this by never
13857 combining those units in single alternative in the machine description.
13858 Ensure that this constraint holds to avoid unexpected surprises.
13859
13860 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13861 enforce these sanity checks. */
13862 int
13863 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
13864 enum machine_mode mode, int strict)
13865 {
13866 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13867 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13868 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13869 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13870 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13871 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13872 {
13873 if (strict)
13874 abort ();
13875 else
13876 return 1;
13877 }
13878 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13879 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13880 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
13881 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
13882 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
13883 }
13884 /* Return the cost of moving data from a register in class CLASS1 to
13885 one in class CLASS2.
13886
13887 It is not required that the cost always equal 2 when FROM is the same as TO;
13888 on some machines it is expensive to move between registers if they are not
13889 general registers. */
13890 int
13891 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
13892 enum reg_class class2)
13893 {
13894 /* In case we require secondary memory, compute cost of the store followed
13895 by load. In order to avoid bad register allocation choices, we need
13896 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13897
13898 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13899 {
13900 int cost = 1;
13901
13902 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13903 MEMORY_MOVE_COST (mode, class1, 1));
13904 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13905 MEMORY_MOVE_COST (mode, class2, 1));
13906
13907 /* In case of copying from general_purpose_register we may emit multiple
13908 stores followed by single load causing memory size mismatch stall.
13909 Count this as arbitrarily high cost of 20. */
13910 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13911 cost += 20;
13912
13913 /* In the case of FP/MMX moves, the registers actually overlap, and we
13914 have to switch modes in order to treat them differently. */
13915 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13916 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13917 cost += 20;
13918
13919 return cost;
13920 }
13921
13922 /* Moves between SSE/MMX and integer unit are expensive. */
13923 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13924 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13925 return ix86_cost->mmxsse_to_integer;
13926 if (MAYBE_FLOAT_CLASS_P (class1))
13927 return ix86_cost->fp_move;
13928 if (MAYBE_SSE_CLASS_P (class1))
13929 return ix86_cost->sse_move;
13930 if (MAYBE_MMX_CLASS_P (class1))
13931 return ix86_cost->mmx_move;
13932 return 2;
13933 }
13934
13935 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13936 int
13937 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
13938 {
13939 /* Flags and only flags can only hold CCmode values. */
13940 if (CC_REGNO_P (regno))
13941 return GET_MODE_CLASS (mode) == MODE_CC;
13942 if (GET_MODE_CLASS (mode) == MODE_CC
13943 || GET_MODE_CLASS (mode) == MODE_RANDOM
13944 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13945 return 0;
13946 if (FP_REGNO_P (regno))
13947 return VALID_FP_MODE_P (mode);
13948 if (SSE_REGNO_P (regno))
13949 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
13950 if (MMX_REGNO_P (regno))
13951 return (TARGET_MMX
13952 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
13953 /* We handle both integer and floats in the general purpose registers.
13954 In future we should be able to handle vector modes as well. */
13955 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13956 return 0;
13957 /* Take care for QImode values - they can be in non-QI regs, but then
13958 they do cause partial register stalls. */
13959 if (regno < 4 || mode != QImode || TARGET_64BIT)
13960 return 1;
13961 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13962 }
13963
13964 /* Return the cost of moving data of mode M between a
13965 register and memory. A value of 2 is the default; this cost is
13966 relative to those in `REGISTER_MOVE_COST'.
13967
13968 If moving between registers and memory is more expensive than
13969 between two registers, you should define this macro to express the
13970 relative cost.
13971
13972 Model also increased moving costs of QImode registers in non
13973 Q_REGS classes.
13974 */
13975 int
13976 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
13977 {
13978 if (FLOAT_CLASS_P (class))
13979 {
13980 int index;
13981 switch (mode)
13982 {
13983 case SFmode:
13984 index = 0;
13985 break;
13986 case DFmode:
13987 index = 1;
13988 break;
13989 case XFmode:
13990 index = 2;
13991 break;
13992 default:
13993 return 100;
13994 }
13995 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13996 }
13997 if (SSE_CLASS_P (class))
13998 {
13999 int index;
14000 switch (GET_MODE_SIZE (mode))
14001 {
14002 case 4:
14003 index = 0;
14004 break;
14005 case 8:
14006 index = 1;
14007 break;
14008 case 16:
14009 index = 2;
14010 break;
14011 default:
14012 return 100;
14013 }
14014 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14015 }
14016 if (MMX_CLASS_P (class))
14017 {
14018 int index;
14019 switch (GET_MODE_SIZE (mode))
14020 {
14021 case 4:
14022 index = 0;
14023 break;
14024 case 8:
14025 index = 1;
14026 break;
14027 default:
14028 return 100;
14029 }
14030 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14031 }
14032 switch (GET_MODE_SIZE (mode))
14033 {
14034 case 1:
14035 if (in)
14036 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14037 : ix86_cost->movzbl_load);
14038 else
14039 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14040 : ix86_cost->int_store[0] + 4);
14041 break;
14042 case 2:
14043 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14044 default:
14045 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14046 if (mode == TFmode)
14047 mode = XFmode;
14048 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14049 * (((int) GET_MODE_SIZE (mode)
14050 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14051 }
14052 }
14053
14054 /* Compute a (partial) cost for rtx X. Return true if the complete
14055 cost has been computed, and false if subexpressions should be
14056 scanned. In either case, *TOTAL contains the cost result. */
14057
14058 static bool
14059 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14060 {
14061 enum machine_mode mode = GET_MODE (x);
14062
14063 switch (code)
14064 {
14065 case CONST_INT:
14066 case CONST:
14067 case LABEL_REF:
14068 case SYMBOL_REF:
14069 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14070 *total = 3;
14071 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14072 *total = 2;
14073 else if (flag_pic && SYMBOLIC_CONST (x)
14074 && (!TARGET_64BIT
14075 || (!GET_CODE (x) != LABEL_REF
14076 && (GET_CODE (x) != SYMBOL_REF
14077 || !SYMBOL_REF_LOCAL_P (x)))))
14078 *total = 1;
14079 else
14080 *total = 0;
14081 return true;
14082
14083 case CONST_DOUBLE:
14084 if (mode == VOIDmode)
14085 *total = 0;
14086 else
14087 switch (standard_80387_constant_p (x))
14088 {
14089 case 1: /* 0.0 */
14090 *total = 1;
14091 break;
14092 default: /* Other constants */
14093 *total = 2;
14094 break;
14095 case 0:
14096 case -1:
14097 /* Start with (MEM (SYMBOL_REF)), since that's where
14098 it'll probably end up. Add a penalty for size. */
14099 *total = (COSTS_N_INSNS (1)
14100 + (flag_pic != 0 && !TARGET_64BIT)
14101 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14102 break;
14103 }
14104 return true;
14105
14106 case ZERO_EXTEND:
14107 /* The zero extensions is often completely free on x86_64, so make
14108 it as cheap as possible. */
14109 if (TARGET_64BIT && mode == DImode
14110 && GET_MODE (XEXP (x, 0)) == SImode)
14111 *total = 1;
14112 else if (TARGET_ZERO_EXTEND_WITH_AND)
14113 *total = COSTS_N_INSNS (ix86_cost->add);
14114 else
14115 *total = COSTS_N_INSNS (ix86_cost->movzx);
14116 return false;
14117
14118 case SIGN_EXTEND:
14119 *total = COSTS_N_INSNS (ix86_cost->movsx);
14120 return false;
14121
14122 case ASHIFT:
14123 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14124 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14125 {
14126 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14127 if (value == 1)
14128 {
14129 *total = COSTS_N_INSNS (ix86_cost->add);
14130 return false;
14131 }
14132 if ((value == 2 || value == 3)
14133 && ix86_cost->lea <= ix86_cost->shift_const)
14134 {
14135 *total = COSTS_N_INSNS (ix86_cost->lea);
14136 return false;
14137 }
14138 }
14139 /* FALLTHRU */
14140
14141 case ROTATE:
14142 case ASHIFTRT:
14143 case LSHIFTRT:
14144 case ROTATERT:
14145 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14146 {
14147 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14148 {
14149 if (INTVAL (XEXP (x, 1)) > 32)
14150 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14151 else
14152 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14153 }
14154 else
14155 {
14156 if (GET_CODE (XEXP (x, 1)) == AND)
14157 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14158 else
14159 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14160 }
14161 }
14162 else
14163 {
14164 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14165 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14166 else
14167 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14168 }
14169 return false;
14170
14171 case MULT:
14172 if (FLOAT_MODE_P (mode))
14173 {
14174 *total = COSTS_N_INSNS (ix86_cost->fmul);
14175 return false;
14176 }
14177 else
14178 {
14179 rtx op0 = XEXP (x, 0);
14180 rtx op1 = XEXP (x, 1);
14181 int nbits;
14182 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14183 {
14184 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14185 for (nbits = 0; value != 0; value &= value - 1)
14186 nbits++;
14187 }
14188 else
14189 /* This is arbitrary. */
14190 nbits = 7;
14191
14192 /* Compute costs correctly for widening multiplication. */
14193 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14194 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14195 == GET_MODE_SIZE (mode))
14196 {
14197 int is_mulwiden = 0;
14198 enum machine_mode inner_mode = GET_MODE (op0);
14199
14200 if (GET_CODE (op0) == GET_CODE (op1))
14201 is_mulwiden = 1, op1 = XEXP (op1, 0);
14202 else if (GET_CODE (op1) == CONST_INT)
14203 {
14204 if (GET_CODE (op0) == SIGN_EXTEND)
14205 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14206 == INTVAL (op1);
14207 else
14208 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14209 }
14210
14211 if (is_mulwiden)
14212 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14213 }
14214
14215 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14216 + nbits * ix86_cost->mult_bit)
14217 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14218
14219 return true;
14220 }
14221
14222 case DIV:
14223 case UDIV:
14224 case MOD:
14225 case UMOD:
14226 if (FLOAT_MODE_P (mode))
14227 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14228 else
14229 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14230 return false;
14231
14232 case PLUS:
14233 if (FLOAT_MODE_P (mode))
14234 *total = COSTS_N_INSNS (ix86_cost->fadd);
14235 else if (GET_MODE_CLASS (mode) == MODE_INT
14236 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14237 {
14238 if (GET_CODE (XEXP (x, 0)) == PLUS
14239 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14240 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14241 && CONSTANT_P (XEXP (x, 1)))
14242 {
14243 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14244 if (val == 2 || val == 4 || val == 8)
14245 {
14246 *total = COSTS_N_INSNS (ix86_cost->lea);
14247 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14248 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14249 outer_code);
14250 *total += rtx_cost (XEXP (x, 1), outer_code);
14251 return true;
14252 }
14253 }
14254 else if (GET_CODE (XEXP (x, 0)) == MULT
14255 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14256 {
14257 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14258 if (val == 2 || val == 4 || val == 8)
14259 {
14260 *total = COSTS_N_INSNS (ix86_cost->lea);
14261 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14262 *total += rtx_cost (XEXP (x, 1), outer_code);
14263 return true;
14264 }
14265 }
14266 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14267 {
14268 *total = COSTS_N_INSNS (ix86_cost->lea);
14269 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14270 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14271 *total += rtx_cost (XEXP (x, 1), outer_code);
14272 return true;
14273 }
14274 }
14275 /* FALLTHRU */
14276
14277 case MINUS:
14278 if (FLOAT_MODE_P (mode))
14279 {
14280 *total = COSTS_N_INSNS (ix86_cost->fadd);
14281 return false;
14282 }
14283 /* FALLTHRU */
14284
14285 case AND:
14286 case IOR:
14287 case XOR:
14288 if (!TARGET_64BIT && mode == DImode)
14289 {
14290 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14291 + (rtx_cost (XEXP (x, 0), outer_code)
14292 << (GET_MODE (XEXP (x, 0)) != DImode))
14293 + (rtx_cost (XEXP (x, 1), outer_code)
14294 << (GET_MODE (XEXP (x, 1)) != DImode)));
14295 return true;
14296 }
14297 /* FALLTHRU */
14298
14299 case NEG:
14300 if (FLOAT_MODE_P (mode))
14301 {
14302 *total = COSTS_N_INSNS (ix86_cost->fchs);
14303 return false;
14304 }
14305 /* FALLTHRU */
14306
14307 case NOT:
14308 if (!TARGET_64BIT && mode == DImode)
14309 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14310 else
14311 *total = COSTS_N_INSNS (ix86_cost->add);
14312 return false;
14313
14314 case FLOAT_EXTEND:
14315 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14316 *total = 0;
14317 return false;
14318
14319 case ABS:
14320 if (FLOAT_MODE_P (mode))
14321 *total = COSTS_N_INSNS (ix86_cost->fabs);
14322 return false;
14323
14324 case SQRT:
14325 if (FLOAT_MODE_P (mode))
14326 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14327 return false;
14328
14329 case UNSPEC:
14330 if (XINT (x, 1) == UNSPEC_TP)
14331 *total = 0;
14332 return false;
14333
14334 default:
14335 return false;
14336 }
14337 }
14338
14339 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14340 static void
14341 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14342 {
14343 init_section ();
14344 fputs ("\tpushl $", asm_out_file);
14345 assemble_name (asm_out_file, XSTR (symbol, 0));
14346 fputc ('\n', asm_out_file);
14347 }
14348 #endif
14349
14350 #if TARGET_MACHO
14351
14352 static int current_machopic_label_num;
14353
14354 /* Given a symbol name and its associated stub, write out the
14355 definition of the stub. */
14356
14357 void
14358 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14359 {
14360 unsigned int length;
14361 char *binder_name, *symbol_name, lazy_ptr_name[32];
14362 int label = ++current_machopic_label_num;
14363
14364 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14365 symb = (*targetm.strip_name_encoding) (symb);
14366
14367 length = strlen (stub);
14368 binder_name = alloca (length + 32);
14369 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14370
14371 length = strlen (symb);
14372 symbol_name = alloca (length + 32);
14373 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14374
14375 sprintf (lazy_ptr_name, "L%d$lz", label);
14376
14377 if (MACHOPIC_PURE)
14378 machopic_picsymbol_stub_section ();
14379 else
14380 machopic_symbol_stub_section ();
14381
14382 fprintf (file, "%s:\n", stub);
14383 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14384
14385 if (MACHOPIC_PURE)
14386 {
14387 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14388 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14389 fprintf (file, "\tjmp %%edx\n");
14390 }
14391 else
14392 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14393
14394 fprintf (file, "%s:\n", binder_name);
14395
14396 if (MACHOPIC_PURE)
14397 {
14398 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14399 fprintf (file, "\tpushl %%eax\n");
14400 }
14401 else
14402 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14403
14404 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14405
14406 machopic_lazy_symbol_ptr_section ();
14407 fprintf (file, "%s:\n", lazy_ptr_name);
14408 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14409 fprintf (file, "\t.long %s\n", binder_name);
14410 }
14411 #endif /* TARGET_MACHO */
14412
14413 /* Order the registers for register allocator. */
14414
14415 void
14416 x86_order_regs_for_local_alloc (void)
14417 {
14418 int pos = 0;
14419 int i;
14420
14421 /* First allocate the local general purpose registers. */
14422 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14423 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14424 reg_alloc_order [pos++] = i;
14425
14426 /* Global general purpose registers. */
14427 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14428 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14429 reg_alloc_order [pos++] = i;
14430
14431 /* x87 registers come first in case we are doing FP math
14432 using them. */
14433 if (!TARGET_SSE_MATH)
14434 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14435 reg_alloc_order [pos++] = i;
14436
14437 /* SSE registers. */
14438 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14439 reg_alloc_order [pos++] = i;
14440 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14441 reg_alloc_order [pos++] = i;
14442
14443 /* x87 registers. */
14444 if (TARGET_SSE_MATH)
14445 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14446 reg_alloc_order [pos++] = i;
14447
14448 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14449 reg_alloc_order [pos++] = i;
14450
14451 /* Initialize the rest of array as we do not allocate some registers
14452 at all. */
14453 while (pos < FIRST_PSEUDO_REGISTER)
14454 reg_alloc_order [pos++] = 0;
14455 }
14456
14457 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14458 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14459 #endif
14460
14461 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14462 struct attribute_spec.handler. */
14463 static tree
14464 ix86_handle_struct_attribute (tree *node, tree name,
14465 tree args ATTRIBUTE_UNUSED,
14466 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14467 {
14468 tree *type = NULL;
14469 if (DECL_P (*node))
14470 {
14471 if (TREE_CODE (*node) == TYPE_DECL)
14472 type = &TREE_TYPE (*node);
14473 }
14474 else
14475 type = node;
14476
14477 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14478 || TREE_CODE (*type) == UNION_TYPE)))
14479 {
14480 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14481 *no_add_attrs = true;
14482 }
14483
14484 else if ((is_attribute_p ("ms_struct", name)
14485 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14486 || ((is_attribute_p ("gcc_struct", name)
14487 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14488 {
14489 warning ("`%s' incompatible attribute ignored",
14490 IDENTIFIER_POINTER (name));
14491 *no_add_attrs = true;
14492 }
14493
14494 return NULL_TREE;
14495 }
14496
14497 static bool
14498 ix86_ms_bitfield_layout_p (tree record_type)
14499 {
14500 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14501 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14502 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14503 }
14504
14505 /* Returns an expression indicating where the this parameter is
14506 located on entry to the FUNCTION. */
14507
14508 static rtx
14509 x86_this_parameter (tree function)
14510 {
14511 tree type = TREE_TYPE (function);
14512
14513 if (TARGET_64BIT)
14514 {
14515 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14516 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14517 }
14518
14519 if (ix86_function_regparm (type, function) > 0)
14520 {
14521 tree parm;
14522
14523 parm = TYPE_ARG_TYPES (type);
14524 /* Figure out whether or not the function has a variable number of
14525 arguments. */
14526 for (; parm; parm = TREE_CHAIN (parm))
14527 if (TREE_VALUE (parm) == void_type_node)
14528 break;
14529 /* If not, the this parameter is in the first argument. */
14530 if (parm)
14531 {
14532 int regno = 0;
14533 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14534 regno = 2;
14535 return gen_rtx_REG (SImode, regno);
14536 }
14537 }
14538
14539 if (aggregate_value_p (TREE_TYPE (type), type))
14540 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14541 else
14542 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14543 }
14544
14545 /* Determine whether x86_output_mi_thunk can succeed. */
14546
14547 static bool
14548 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14549 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14550 HOST_WIDE_INT vcall_offset, tree function)
14551 {
14552 /* 64-bit can handle anything. */
14553 if (TARGET_64BIT)
14554 return true;
14555
14556 /* For 32-bit, everything's fine if we have one free register. */
14557 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14558 return true;
14559
14560 /* Need a free register for vcall_offset. */
14561 if (vcall_offset)
14562 return false;
14563
14564 /* Need a free register for GOT references. */
14565 if (flag_pic && !(*targetm.binds_local_p) (function))
14566 return false;
14567
14568 /* Otherwise ok. */
14569 return true;
14570 }
14571
14572 /* Output the assembler code for a thunk function. THUNK_DECL is the
14573 declaration for the thunk function itself, FUNCTION is the decl for
14574 the target function. DELTA is an immediate constant offset to be
14575 added to THIS. If VCALL_OFFSET is nonzero, the word at
14576 *(*this + vcall_offset) should be added to THIS. */
14577
14578 static void
14579 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14580 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14581 HOST_WIDE_INT vcall_offset, tree function)
14582 {
14583 rtx xops[3];
14584 rtx this = x86_this_parameter (function);
14585 rtx this_reg, tmp;
14586
14587 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14588 pull it in now and let DELTA benefit. */
14589 if (REG_P (this))
14590 this_reg = this;
14591 else if (vcall_offset)
14592 {
14593 /* Put the this parameter into %eax. */
14594 xops[0] = this;
14595 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14596 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14597 }
14598 else
14599 this_reg = NULL_RTX;
14600
14601 /* Adjust the this parameter by a fixed constant. */
14602 if (delta)
14603 {
14604 xops[0] = GEN_INT (delta);
14605 xops[1] = this_reg ? this_reg : this;
14606 if (TARGET_64BIT)
14607 {
14608 if (!x86_64_general_operand (xops[0], DImode))
14609 {
14610 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14611 xops[1] = tmp;
14612 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14613 xops[0] = tmp;
14614 xops[1] = this;
14615 }
14616 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14617 }
14618 else
14619 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14620 }
14621
14622 /* Adjust the this parameter by a value stored in the vtable. */
14623 if (vcall_offset)
14624 {
14625 if (TARGET_64BIT)
14626 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14627 else
14628 {
14629 int tmp_regno = 2 /* ECX */;
14630 if (lookup_attribute ("fastcall",
14631 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14632 tmp_regno = 0 /* EAX */;
14633 tmp = gen_rtx_REG (SImode, tmp_regno);
14634 }
14635
14636 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14637 xops[1] = tmp;
14638 if (TARGET_64BIT)
14639 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14640 else
14641 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14642
14643 /* Adjust the this parameter. */
14644 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14645 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14646 {
14647 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14648 xops[0] = GEN_INT (vcall_offset);
14649 xops[1] = tmp2;
14650 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14651 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14652 }
14653 xops[1] = this_reg;
14654 if (TARGET_64BIT)
14655 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14656 else
14657 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14658 }
14659
14660 /* If necessary, drop THIS back to its stack slot. */
14661 if (this_reg && this_reg != this)
14662 {
14663 xops[0] = this_reg;
14664 xops[1] = this;
14665 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14666 }
14667
14668 xops[0] = XEXP (DECL_RTL (function), 0);
14669 if (TARGET_64BIT)
14670 {
14671 if (!flag_pic || (*targetm.binds_local_p) (function))
14672 output_asm_insn ("jmp\t%P0", xops);
14673 else
14674 {
14675 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14676 tmp = gen_rtx_CONST (Pmode, tmp);
14677 tmp = gen_rtx_MEM (QImode, tmp);
14678 xops[0] = tmp;
14679 output_asm_insn ("jmp\t%A0", xops);
14680 }
14681 }
14682 else
14683 {
14684 if (!flag_pic || (*targetm.binds_local_p) (function))
14685 output_asm_insn ("jmp\t%P0", xops);
14686 else
14687 #if TARGET_MACHO
14688 if (TARGET_MACHO)
14689 {
14690 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14691 tmp = (gen_rtx_SYMBOL_REF
14692 (Pmode,
14693 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14694 tmp = gen_rtx_MEM (QImode, tmp);
14695 xops[0] = tmp;
14696 output_asm_insn ("jmp\t%0", xops);
14697 }
14698 else
14699 #endif /* TARGET_MACHO */
14700 {
14701 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14702 output_set_got (tmp);
14703
14704 xops[1] = tmp;
14705 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14706 output_asm_insn ("jmp\t{*}%1", xops);
14707 }
14708 }
14709 }
14710
14711 static void
14712 x86_file_start (void)
14713 {
14714 default_file_start ();
14715 if (X86_FILE_START_VERSION_DIRECTIVE)
14716 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14717 if (X86_FILE_START_FLTUSED)
14718 fputs ("\t.global\t__fltused\n", asm_out_file);
14719 if (ix86_asm_dialect == ASM_INTEL)
14720 fputs ("\t.intel_syntax\n", asm_out_file);
14721 }
14722
14723 int
14724 x86_field_alignment (tree field, int computed)
14725 {
14726 enum machine_mode mode;
14727 tree type = TREE_TYPE (field);
14728
14729 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14730 return computed;
14731 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14732 ? get_inner_array_type (type) : type);
14733 if (mode == DFmode || mode == DCmode
14734 || GET_MODE_CLASS (mode) == MODE_INT
14735 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14736 return MIN (32, computed);
14737 return computed;
14738 }
14739
14740 /* Output assembler code to FILE to increment profiler label # LABELNO
14741 for profiling a function entry. */
14742 void
14743 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
14744 {
14745 if (TARGET_64BIT)
14746 if (flag_pic)
14747 {
14748 #ifndef NO_PROFILE_COUNTERS
14749 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14750 #endif
14751 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14752 }
14753 else
14754 {
14755 #ifndef NO_PROFILE_COUNTERS
14756 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14757 #endif
14758 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14759 }
14760 else if (flag_pic)
14761 {
14762 #ifndef NO_PROFILE_COUNTERS
14763 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14764 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14765 #endif
14766 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14767 }
14768 else
14769 {
14770 #ifndef NO_PROFILE_COUNTERS
14771 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14772 PROFILE_COUNT_REGISTER);
14773 #endif
14774 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14775 }
14776 }
14777
14778 /* We don't have exact information about the insn sizes, but we may assume
14779 quite safely that we are informed about all 1 byte insns and memory
14780 address sizes. This is enough to eliminate unnecessary padding in
14781 99% of cases. */
14782
14783 static int
14784 min_insn_size (rtx insn)
14785 {
14786 int l = 0;
14787
14788 if (!INSN_P (insn) || !active_insn_p (insn))
14789 return 0;
14790
14791 /* Discard alignments we've emit and jump instructions. */
14792 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14793 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14794 return 0;
14795 if (GET_CODE (insn) == JUMP_INSN
14796 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14797 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14798 return 0;
14799
14800 /* Important case - calls are always 5 bytes.
14801 It is common to have many calls in the row. */
14802 if (GET_CODE (insn) == CALL_INSN
14803 && symbolic_reference_mentioned_p (PATTERN (insn))
14804 && !SIBLING_CALL_P (insn))
14805 return 5;
14806 if (get_attr_length (insn) <= 1)
14807 return 1;
14808
14809 /* For normal instructions we may rely on the sizes of addresses
14810 and the presence of symbol to require 4 bytes of encoding.
14811 This is not the case for jumps where references are PC relative. */
14812 if (GET_CODE (insn) != JUMP_INSN)
14813 {
14814 l = get_attr_length_address (insn);
14815 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14816 l = 4;
14817 }
14818 if (l)
14819 return 1+l;
14820 else
14821 return 2;
14822 }
14823
14824 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14825 window. */
14826
14827 static void
14828 ix86_avoid_jump_misspredicts (void)
14829 {
14830 rtx insn, start = get_insns ();
14831 int nbytes = 0, njumps = 0;
14832 int isjump = 0;
14833
14834 /* Look for all minimal intervals of instructions containing 4 jumps.
14835 The intervals are bounded by START and INSN. NBYTES is the total
14836 size of instructions in the interval including INSN and not including
14837 START. When the NBYTES is smaller than 16 bytes, it is possible
14838 that the end of START and INSN ends up in the same 16byte page.
14839
14840 The smallest offset in the page INSN can start is the case where START
14841 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14842 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14843 */
14844 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14845 {
14846
14847 nbytes += min_insn_size (insn);
14848 if (dump_file)
14849 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
14850 INSN_UID (insn), min_insn_size (insn));
14851 if ((GET_CODE (insn) == JUMP_INSN
14852 && GET_CODE (PATTERN (insn)) != ADDR_VEC
14853 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
14854 || GET_CODE (insn) == CALL_INSN)
14855 njumps++;
14856 else
14857 continue;
14858
14859 while (njumps > 3)
14860 {
14861 start = NEXT_INSN (start);
14862 if ((GET_CODE (start) == JUMP_INSN
14863 && GET_CODE (PATTERN (start)) != ADDR_VEC
14864 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
14865 || GET_CODE (start) == CALL_INSN)
14866 njumps--, isjump = 1;
14867 else
14868 isjump = 0;
14869 nbytes -= min_insn_size (start);
14870 }
14871 if (njumps < 0)
14872 abort ();
14873 if (dump_file)
14874 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
14875 INSN_UID (start), INSN_UID (insn), nbytes);
14876
14877 if (njumps == 3 && isjump && nbytes < 16)
14878 {
14879 int padsize = 15 - nbytes + min_insn_size (insn);
14880
14881 if (dump_file)
14882 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
14883 INSN_UID (insn), padsize);
14884 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
14885 }
14886 }
14887 }
14888
14889 /* AMD Athlon works faster
14890 when RET is not destination of conditional jump or directly preceded
14891 by other jump instruction. We avoid the penalty by inserting NOP just
14892 before the RET instructions in such cases. */
14893 static void
14894 ix86_pad_returns (void)
14895 {
14896 edge e;
14897
14898 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14899 {
14900 basic_block bb = e->src;
14901 rtx ret = BB_END (bb);
14902 rtx prev;
14903 bool replace = false;
14904
14905 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
14906 || !maybe_hot_bb_p (bb))
14907 continue;
14908 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14909 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14910 break;
14911 if (prev && GET_CODE (prev) == CODE_LABEL)
14912 {
14913 edge e;
14914 for (e = bb->pred; e; e = e->pred_next)
14915 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14916 && !(e->flags & EDGE_FALLTHRU))
14917 replace = true;
14918 }
14919 if (!replace)
14920 {
14921 prev = prev_active_insn (ret);
14922 if (prev
14923 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
14924 || GET_CODE (prev) == CALL_INSN))
14925 replace = true;
14926 /* Empty functions get branch mispredict even when the jump destination
14927 is not visible to us. */
14928 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14929 replace = true;
14930 }
14931 if (replace)
14932 {
14933 emit_insn_before (gen_return_internal_long (), ret);
14934 delete_insn (ret);
14935 }
14936 }
14937 }
14938
14939 /* Implement machine specific optimizations. We implement padding of returns
14940 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14941 static void
14942 ix86_reorg (void)
14943 {
14944 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
14945 ix86_pad_returns ();
14946 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
14947 ix86_avoid_jump_misspredicts ();
14948 }
14949
14950 /* Return nonzero when QImode register that must be represented via REX prefix
14951 is used. */
14952 bool
14953 x86_extended_QIreg_mentioned_p (rtx insn)
14954 {
14955 int i;
14956 extract_insn_cached (insn);
14957 for (i = 0; i < recog_data.n_operands; i++)
14958 if (REG_P (recog_data.operand[i])
14959 && REGNO (recog_data.operand[i]) >= 4)
14960 return true;
14961 return false;
14962 }
14963
14964 /* Return nonzero when P points to register encoded via REX prefix.
14965 Called via for_each_rtx. */
14966 static int
14967 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
14968 {
14969 unsigned int regno;
14970 if (!REG_P (*p))
14971 return 0;
14972 regno = REGNO (*p);
14973 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14974 }
14975
14976 /* Return true when INSN mentions register that must be encoded using REX
14977 prefix. */
14978 bool
14979 x86_extended_reg_mentioned_p (rtx insn)
14980 {
14981 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14982 }
14983
14984 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
14985 optabs would emit if we didn't have TFmode patterns. */
14986
14987 void
14988 x86_emit_floatuns (rtx operands[2])
14989 {
14990 rtx neglab, donelab, i0, i1, f0, in, out;
14991 enum machine_mode mode, inmode;
14992
14993 inmode = GET_MODE (operands[1]);
14994 if (inmode != SImode
14995 && inmode != DImode)
14996 abort ();
14997
14998 out = operands[0];
14999 in = force_reg (inmode, operands[1]);
15000 mode = GET_MODE (out);
15001 neglab = gen_label_rtx ();
15002 donelab = gen_label_rtx ();
15003 i1 = gen_reg_rtx (Pmode);
15004 f0 = gen_reg_rtx (mode);
15005
15006 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15007
15008 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15009 emit_jump_insn (gen_jump (donelab));
15010 emit_barrier ();
15011
15012 emit_label (neglab);
15013
15014 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15015 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15016 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15017 expand_float (f0, i0, 0);
15018 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15019
15020 emit_label (donelab);
15021 }
15022
15023 /* Initialize vector TARGET via VALS. */
15024 void
15025 ix86_expand_vector_init (rtx target, rtx vals)
15026 {
15027 enum machine_mode mode = GET_MODE (target);
15028 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15029 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15030 int i;
15031
15032 for (i = n_elts - 1; i >= 0; i--)
15033 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15034 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15035 break;
15036
15037 /* Few special cases first...
15038 ... constants are best loaded from constant pool. */
15039 if (i < 0)
15040 {
15041 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15042 return;
15043 }
15044
15045 /* ... values where only first field is non-constant are best loaded
15046 from the pool and overwritten via move later. */
15047 if (!i)
15048 {
15049 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15050 GET_MODE_INNER (mode), 0);
15051
15052 op = force_reg (mode, op);
15053 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15054 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15055 switch (GET_MODE (target))
15056 {
15057 case V2DFmode:
15058 emit_insn (gen_sse2_movsd (target, target, op));
15059 break;
15060 case V4SFmode:
15061 emit_insn (gen_sse_movss (target, target, op));
15062 break;
15063 default:
15064 break;
15065 }
15066 return;
15067 }
15068
15069 /* And the busy sequence doing rotations. */
15070 switch (GET_MODE (target))
15071 {
15072 case V2DFmode:
15073 {
15074 rtx vecop0 =
15075 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15076 rtx vecop1 =
15077 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15078
15079 vecop0 = force_reg (V2DFmode, vecop0);
15080 vecop1 = force_reg (V2DFmode, vecop1);
15081 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15082 }
15083 break;
15084 case V4SFmode:
15085 {
15086 rtx vecop0 =
15087 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15088 rtx vecop1 =
15089 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15090 rtx vecop2 =
15091 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15092 rtx vecop3 =
15093 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15094 rtx tmp1 = gen_reg_rtx (V4SFmode);
15095 rtx tmp2 = gen_reg_rtx (V4SFmode);
15096
15097 vecop0 = force_reg (V4SFmode, vecop0);
15098 vecop1 = force_reg (V4SFmode, vecop1);
15099 vecop2 = force_reg (V4SFmode, vecop2);
15100 vecop3 = force_reg (V4SFmode, vecop3);
15101 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15102 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15103 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15104 }
15105 break;
15106 default:
15107 abort ();
15108 }
15109 }
15110
15111 /* Implements target hook vector_mode_supported_p. */
15112 static bool
15113 ix86_vector_mode_supported_p (enum machine_mode mode)
15114 {
15115 if (TARGET_SSE
15116 && VALID_SSE_REG_MODE (mode))
15117 return true;
15118
15119 else if (TARGET_MMX
15120 && VALID_MMX_REG_MODE (mode))
15121 return true;
15122
15123 else if (TARGET_3DNOW
15124 && VALID_MMX_REG_MODE_3DNOW (mode))
15125 return true;
15126
15127 else
15128 return false;
15129 }
15130
15131 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15132
15133 We do this in the new i386 backend to maintain source compatibility
15134 with the old cc0-based compiler. */
15135
15136 static tree
15137 ix86_md_asm_clobbers (tree clobbers)
15138 {
15139 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15140 clobbers);
15141 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15142 clobbers);
15143 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15144 clobbers);
15145 return clobbers;
15146 }
15147
15148 /* Worker function for REVERSE_CONDITION. */
15149
15150 enum rtx_code
15151 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15152 {
15153 return (mode != CCFPmode && mode != CCFPUmode
15154 ? reverse_condition (code)
15155 : reverse_condition_maybe_unordered (code));
15156 }
15157
15158 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15159 to OPERANDS[0]. */
15160
15161 const char *
15162 output_387_reg_move (rtx insn, rtx *operands)
15163 {
15164 if (REG_P (operands[1])
15165 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15166 {
15167 if (REGNO (operands[0]) == FIRST_STACK_REG
15168 && TARGET_USE_FFREEP)
15169 return "ffreep\t%y0";
15170 return "fstp\t%y0";
15171 }
15172 if (STACK_TOP_P (operands[0]))
15173 return "fld%z1\t%y1";
15174 return "fst\t%y0";
15175 }
15176
15177 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15178 FP status register is set. */
15179
15180 void
15181 ix86_emit_fp_unordered_jump (rtx label)
15182 {
15183 rtx reg = gen_reg_rtx (HImode);
15184 rtx temp;
15185
15186 emit_insn (gen_x86_fnstsw_1 (reg));
15187
15188 if (TARGET_USE_SAHF)
15189 {
15190 emit_insn (gen_x86_sahf_1 (reg));
15191
15192 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15193 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15194 }
15195 else
15196 {
15197 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15198
15199 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15200 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15201 }
15202
15203 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15204 gen_rtx_LABEL_REF (VOIDmode, label),
15205 pc_rtx);
15206 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15207 emit_jump_insn (temp);
15208 }
15209
15210 /* Output code to perform a log1p XFmode calculation. */
15211
15212 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15213 {
15214 rtx label1 = gen_label_rtx ();
15215 rtx label2 = gen_label_rtx ();
15216
15217 rtx tmp = gen_reg_rtx (XFmode);
15218 rtx tmp2 = gen_reg_rtx (XFmode);
15219
15220 emit_insn (gen_absxf2 (tmp, op1));
15221 emit_insn (gen_cmpxf (tmp,
15222 CONST_DOUBLE_FROM_REAL_VALUE (
15223 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15224 XFmode)));
15225 emit_jump_insn (gen_bge (label1));
15226
15227 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15228 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15229 emit_jump (label2);
15230
15231 emit_label (label1);
15232 emit_move_insn (tmp, CONST1_RTX (XFmode));
15233 emit_insn (gen_addxf3 (tmp, op1, tmp));
15234 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15235 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15236
15237 emit_label (label2);
15238 }
15239
15240 #include "gt-i386.h"
This page took 0.795704 seconds and 6 git commands to generate.