]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
configure.ac (i[34567]86-*-solaris2.1[0-9]*): Set need_64bit_hwint=yes.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE register
566 parts instead of whole registers, so we may maintain just lower part of
567 scalar values in proper format leaving the upper part undefined. */
568 const int x86_sse_partial_regs = m_ATHLON_K8;
569 /* Athlon optimizes partial-register FPS special case, thus avoiding the
570 need for extra instructions beforehand */
571 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
577 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
578 /* Some CPU cores are not able to predict more than 4 branch instructions in
579 the 16 byte window. */
580 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
581 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
582
583 /* In case the average insn count for single function invocation is
584 lower than this constant, emit fast (but longer) prologue and
585 epilogue code. */
586 #define FAST_PROLOGUE_INSN_COUNT 20
587
588 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
589 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
590 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
591 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
592
593 /* Array of the smallest class containing reg number REGNO, indexed by
594 REGNO. Used by REGNO_REG_CLASS in i386.h. */
595
596 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
597 {
598 /* ax, dx, cx, bx */
599 AREG, DREG, CREG, BREG,
600 /* si, di, bp, sp */
601 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
602 /* FP registers */
603 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
604 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
605 /* arg pointer */
606 NON_Q_REGS,
607 /* flags, fpsr, dirflag, frame */
608 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
609 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
610 SSE_REGS, SSE_REGS,
611 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
612 MMX_REGS, MMX_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
615 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
616 SSE_REGS, SSE_REGS,
617 };
618
619 /* The "default" register map used in 32bit mode. */
620
621 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
622 {
623 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
624 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
625 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
626 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
627 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
629 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
630 };
631
632 static int const x86_64_int_parameter_registers[6] =
633 {
634 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
635 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
636 };
637
638 static int const x86_64_int_return_registers[4] =
639 {
640 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
641 };
642
643 /* The "default" register map used in 64bit mode. */
644 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
645 {
646 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
647 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
648 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
649 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
650 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
651 8,9,10,11,12,13,14,15, /* extended integer registers */
652 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
653 };
654
655 /* Define the register numbers to be used in Dwarf debugging information.
656 The SVR4 reference port C compiler uses the following register numbers
657 in its Dwarf output code:
658 0 for %eax (gcc regno = 0)
659 1 for %ecx (gcc regno = 2)
660 2 for %edx (gcc regno = 1)
661 3 for %ebx (gcc regno = 3)
662 4 for %esp (gcc regno = 7)
663 5 for %ebp (gcc regno = 6)
664 6 for %esi (gcc regno = 4)
665 7 for %edi (gcc regno = 5)
666 The following three DWARF register numbers are never generated by
667 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
668 believes these numbers have these meanings.
669 8 for %eip (no gcc equivalent)
670 9 for %eflags (gcc regno = 17)
671 10 for %trapno (no gcc equivalent)
672 It is not at all clear how we should number the FP stack registers
673 for the x86 architecture. If the version of SDB on x86/svr4 were
674 a bit less brain dead with respect to floating-point then we would
675 have a precedent to follow with respect to DWARF register numbers
676 for x86 FP registers, but the SDB on x86/svr4 is so completely
677 broken with respect to FP registers that it is hardly worth thinking
678 of it as something to strive for compatibility with.
679 The version of x86/svr4 SDB I have at the moment does (partially)
680 seem to believe that DWARF register number 11 is associated with
681 the x86 register %st(0), but that's about all. Higher DWARF
682 register numbers don't seem to be associated with anything in
683 particular, and even for DWARF regno 11, SDB only seems to under-
684 stand that it should say that a variable lives in %st(0) (when
685 asked via an `=' command) if we said it was in DWARF regno 11,
686 but SDB still prints garbage when asked for the value of the
687 variable in question (via a `/' command).
688 (Also note that the labels SDB prints for various FP stack regs
689 when doing an `x' command are all wrong.)
690 Note that these problems generally don't affect the native SVR4
691 C compiler because it doesn't allow the use of -O with -g and
692 because when it is *not* optimizing, it allocates a memory
693 location for each floating-point variable, and the memory
694 location is what gets described in the DWARF AT_location
695 attribute for the variable in question.
696 Regardless of the severe mental illness of the x86/svr4 SDB, we
697 do something sensible here and we use the following DWARF
698 register numbers. Note that these are all stack-top-relative
699 numbers.
700 11 for %st(0) (gcc regno = 8)
701 12 for %st(1) (gcc regno = 9)
702 13 for %st(2) (gcc regno = 10)
703 14 for %st(3) (gcc regno = 11)
704 15 for %st(4) (gcc regno = 12)
705 16 for %st(5) (gcc regno = 13)
706 17 for %st(6) (gcc regno = 14)
707 18 for %st(7) (gcc regno = 15)
708 */
709 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
710 {
711 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
712 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
713 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
714 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
715 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
717 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
718 };
719
720 /* Test and compare insns in i386.md store the information needed to
721 generate branch and scc insns here. */
722
723 rtx ix86_compare_op0 = NULL_RTX;
724 rtx ix86_compare_op1 = NULL_RTX;
725
726 #define MAX_386_STACK_LOCALS 3
727 /* Size of the register save area. */
728 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
729
730 /* Define the structure for the machine field in struct function. */
731
732 struct stack_local_entry GTY(())
733 {
734 unsigned short mode;
735 unsigned short n;
736 rtx rtl;
737 struct stack_local_entry *next;
738 };
739
740 /* Structure describing stack frame layout.
741 Stack grows downward:
742
743 [arguments]
744 <- ARG_POINTER
745 saved pc
746
747 saved frame pointer if frame_pointer_needed
748 <- HARD_FRAME_POINTER
749 [saved regs]
750
751 [padding1] \
752 )
753 [va_arg registers] (
754 > to_allocate <- FRAME_POINTER
755 [frame] (
756 )
757 [padding2] /
758 */
759 struct ix86_frame
760 {
761 int nregs;
762 int padding1;
763 int va_arg_size;
764 HOST_WIDE_INT frame;
765 int padding2;
766 int outgoing_arguments_size;
767 int red_zone_size;
768
769 HOST_WIDE_INT to_allocate;
770 /* The offsets relative to ARG_POINTER. */
771 HOST_WIDE_INT frame_pointer_offset;
772 HOST_WIDE_INT hard_frame_pointer_offset;
773 HOST_WIDE_INT stack_pointer_offset;
774
775 /* When save_regs_using_mov is set, emit prologue using
776 move instead of push instructions. */
777 bool save_regs_using_mov;
778 };
779
780 /* Used to enable/disable debugging features. */
781 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
782 /* Code model option as passed by user. */
783 const char *ix86_cmodel_string;
784 /* Parsed value. */
785 enum cmodel ix86_cmodel;
786 /* Asm dialect. */
787 const char *ix86_asm_string;
788 enum asm_dialect ix86_asm_dialect = ASM_ATT;
789 /* TLS dialext. */
790 const char *ix86_tls_dialect_string;
791 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
792
793 /* Which unit we are generating floating point math for. */
794 enum fpmath_unit ix86_fpmath;
795
796 /* Which cpu are we scheduling for. */
797 enum processor_type ix86_tune;
798 /* Which instruction set architecture to use. */
799 enum processor_type ix86_arch;
800
801 /* Strings to hold which cpu and instruction set architecture to use. */
802 const char *ix86_tune_string; /* for -mtune=<xxx> */
803 const char *ix86_arch_string; /* for -march=<xxx> */
804 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
805
806 /* # of registers to use to pass arguments. */
807 const char *ix86_regparm_string;
808
809 /* true if sse prefetch instruction is not NOOP. */
810 int x86_prefetch_sse;
811
812 /* ix86_regparm_string as a number */
813 int ix86_regparm;
814
815 /* Alignment to use for loops and jumps: */
816
817 /* Power of two alignment for loops. */
818 const char *ix86_align_loops_string;
819
820 /* Power of two alignment for non-loop jumps. */
821 const char *ix86_align_jumps_string;
822
823 /* Power of two alignment for stack boundary in bytes. */
824 const char *ix86_preferred_stack_boundary_string;
825
826 /* Preferred alignment for stack boundary in bits. */
827 unsigned int ix86_preferred_stack_boundary;
828
829 /* Values 1-5: see jump.c */
830 int ix86_branch_cost;
831 const char *ix86_branch_cost_string;
832
833 /* Power of two alignment for functions. */
834 const char *ix86_align_funcs_string;
835
836 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
837 char internal_label_prefix[16];
838 int internal_label_prefix_len;
839 \f
840 static void output_pic_addr_const (FILE *, rtx, int);
841 static void put_condition_code (enum rtx_code, enum machine_mode,
842 int, int, FILE *);
843 static const char *get_some_local_dynamic_name (void);
844 static int get_some_local_dynamic_name_1 (rtx *, void *);
845 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
846 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
847 rtx *);
848 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
849 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
850 enum machine_mode);
851 static rtx get_thread_pointer (int);
852 static rtx legitimize_tls_address (rtx, enum tls_model, int);
853 static void get_pc_thunk_name (char [32], unsigned int);
854 static rtx gen_push (rtx);
855 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
856 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
857 static struct machine_function * ix86_init_machine_status (void);
858 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
859 static int ix86_nsaved_regs (void);
860 static void ix86_emit_save_regs (void);
861 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
862 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
863 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
864 static HOST_WIDE_INT ix86_GOT_alias_set (void);
865 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
866 static rtx ix86_expand_aligntest (rtx, int);
867 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
868 static int ix86_issue_rate (void);
869 static int ix86_adjust_cost (rtx, rtx, rtx, int);
870 static int ia32_multipass_dfa_lookahead (void);
871 static bool ix86_misaligned_mem_ok (enum machine_mode);
872 static void ix86_init_mmx_sse_builtins (void);
873 static rtx x86_this_parameter (tree);
874 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
875 HOST_WIDE_INT, tree);
876 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
877 static void x86_file_start (void);
878 static void ix86_reorg (void);
879 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
880 static tree ix86_build_builtin_va_list (void);
881 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
882 tree, int *, int);
883 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
884 static bool ix86_vector_mode_supported_p (enum machine_mode);
885
886 static int ix86_address_cost (rtx);
887 static bool ix86_cannot_force_const_mem (rtx);
888 static rtx ix86_delegitimize_address (rtx);
889
890 struct builtin_description;
891 static rtx ix86_expand_sse_comi (const struct builtin_description *,
892 tree, rtx);
893 static rtx ix86_expand_sse_compare (const struct builtin_description *,
894 tree, rtx);
895 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
896 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
897 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
898 static rtx ix86_expand_store_builtin (enum insn_code, tree);
899 static rtx safe_vector_operand (rtx, enum machine_mode);
900 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
901 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
902 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
903 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
904 static int ix86_fp_comparison_cost (enum rtx_code code);
905 static unsigned int ix86_select_alt_pic_regnum (void);
906 static int ix86_save_reg (unsigned int, int);
907 static void ix86_compute_frame_layout (struct ix86_frame *);
908 static int ix86_comp_type_attributes (tree, tree);
909 static int ix86_function_regparm (tree, tree);
910 const struct attribute_spec ix86_attribute_table[];
911 static bool ix86_function_ok_for_sibcall (tree, tree);
912 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
913 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
914 static int ix86_value_regno (enum machine_mode);
915 static bool contains_128bit_aligned_vector_p (tree);
916 static rtx ix86_struct_value_rtx (tree, int);
917 static bool ix86_ms_bitfield_layout_p (tree);
918 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
919 static int extended_reg_mentioned_1 (rtx *, void *);
920 static bool ix86_rtx_costs (rtx, int, int, int *);
921 static int min_insn_size (rtx);
922 static tree ix86_md_asm_clobbers (tree clobbers);
923 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
924 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
925 tree, bool);
926
927 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
928 static void ix86_svr3_asm_out_constructor (rtx, int);
929 #endif
930 static void i386_solaris_elf_named_section (const char *, unsigned int, tree);
931
932 /* Register class used for passing given 64bit part of the argument.
933 These represent classes as documented by the PS ABI, with the exception
934 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
935 use SF or DFmode move instead of DImode to avoid reformatting penalties.
936
937 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
938 whenever possible (upper half does contain padding).
939 */
940 enum x86_64_reg_class
941 {
942 X86_64_NO_CLASS,
943 X86_64_INTEGER_CLASS,
944 X86_64_INTEGERSI_CLASS,
945 X86_64_SSE_CLASS,
946 X86_64_SSESF_CLASS,
947 X86_64_SSEDF_CLASS,
948 X86_64_SSEUP_CLASS,
949 X86_64_X87_CLASS,
950 X86_64_X87UP_CLASS,
951 X86_64_MEMORY_CLASS
952 };
953 static const char * const x86_64_reg_class_name[] =
954 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
955
956 #define MAX_CLASSES 4
957 static int classify_argument (enum machine_mode, tree,
958 enum x86_64_reg_class [MAX_CLASSES], int);
959 static int examine_argument (enum machine_mode, tree, int, int *, int *);
960 static rtx construct_container (enum machine_mode, tree, int, int, int,
961 const int *, int);
962 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
963 enum x86_64_reg_class);
964
965 /* Table of constants used by fldpi, fldln2, etc.... */
966 static REAL_VALUE_TYPE ext_80387_constants_table [5];
967 static bool ext_80387_constants_init = 0;
968 static void init_ext_80387_constants (void);
969 \f
970 /* Initialize the GCC target structure. */
971 #undef TARGET_ATTRIBUTE_TABLE
972 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
973 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
974 # undef TARGET_MERGE_DECL_ATTRIBUTES
975 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
976 #endif
977
978 #undef TARGET_COMP_TYPE_ATTRIBUTES
979 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
980
981 #undef TARGET_INIT_BUILTINS
982 #define TARGET_INIT_BUILTINS ix86_init_builtins
983
984 #undef TARGET_EXPAND_BUILTIN
985 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
986
987 #undef TARGET_ASM_FUNCTION_EPILOGUE
988 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
989
990 #undef TARGET_ASM_OPEN_PAREN
991 #define TARGET_ASM_OPEN_PAREN ""
992 #undef TARGET_ASM_CLOSE_PAREN
993 #define TARGET_ASM_CLOSE_PAREN ""
994
995 #undef TARGET_ASM_ALIGNED_HI_OP
996 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
997 #undef TARGET_ASM_ALIGNED_SI_OP
998 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
999 #ifdef ASM_QUAD
1000 #undef TARGET_ASM_ALIGNED_DI_OP
1001 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1002 #endif
1003
1004 #undef TARGET_ASM_UNALIGNED_HI_OP
1005 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1006 #undef TARGET_ASM_UNALIGNED_SI_OP
1007 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1008 #undef TARGET_ASM_UNALIGNED_DI_OP
1009 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1010
1011 #undef TARGET_SCHED_ADJUST_COST
1012 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1013 #undef TARGET_SCHED_ISSUE_RATE
1014 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1015 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1016 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1017 ia32_multipass_dfa_lookahead
1018
1019 #undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
1020 #define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok
1021
1022 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1023 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1024
1025 #ifdef HAVE_AS_TLS
1026 #undef TARGET_HAVE_TLS
1027 #define TARGET_HAVE_TLS true
1028 #endif
1029 #undef TARGET_CANNOT_FORCE_CONST_MEM
1030 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1031
1032 #undef TARGET_DELEGITIMIZE_ADDRESS
1033 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1034
1035 #undef TARGET_MS_BITFIELD_LAYOUT_P
1036 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1037
1038 #undef TARGET_ASM_OUTPUT_MI_THUNK
1039 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1040 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1041 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1042
1043 #undef TARGET_ASM_FILE_START
1044 #define TARGET_ASM_FILE_START x86_file_start
1045
1046 #undef TARGET_RTX_COSTS
1047 #define TARGET_RTX_COSTS ix86_rtx_costs
1048 #undef TARGET_ADDRESS_COST
1049 #define TARGET_ADDRESS_COST ix86_address_cost
1050
1051 #undef TARGET_FIXED_CONDITION_CODE_REGS
1052 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1053 #undef TARGET_CC_MODES_COMPATIBLE
1054 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1055
1056 #undef TARGET_MACHINE_DEPENDENT_REORG
1057 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1058
1059 #undef TARGET_BUILD_BUILTIN_VA_LIST
1060 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1061
1062 #undef TARGET_MD_ASM_CLOBBERS
1063 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1064
1065 #undef TARGET_PROMOTE_PROTOTYPES
1066 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1067 #undef TARGET_STRUCT_VALUE_RTX
1068 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1069 #undef TARGET_SETUP_INCOMING_VARARGS
1070 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1071 #undef TARGET_MUST_PASS_IN_STACK
1072 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1073 #undef TARGET_PASS_BY_REFERENCE
1074 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1075
1076 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1077 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1078
1079 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1080 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1081
1082 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1083 #undef TARGET_INSERT_ATTRIBUTES
1084 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1085 #endif
1086
1087 struct gcc_target targetm = TARGET_INITIALIZER;
1088
1089 \f
1090 /* The svr4 ABI for the i386 says that records and unions are returned
1091 in memory. */
1092 #ifndef DEFAULT_PCC_STRUCT_RETURN
1093 #define DEFAULT_PCC_STRUCT_RETURN 1
1094 #endif
1095
1096 /* Sometimes certain combinations of command options do not make
1097 sense on a particular target machine. You can define a macro
1098 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1099 defined, is executed once just after all the command options have
1100 been parsed.
1101
1102 Don't use this macro to turn on various extra optimizations for
1103 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1104
1105 void
1106 override_options (void)
1107 {
1108 int i;
1109 int ix86_tune_defaulted = 0;
1110
1111 /* Comes from final.c -- no real reason to change it. */
1112 #define MAX_CODE_ALIGN 16
1113
1114 static struct ptt
1115 {
1116 const struct processor_costs *cost; /* Processor costs */
1117 const int target_enable; /* Target flags to enable. */
1118 const int target_disable; /* Target flags to disable. */
1119 const int align_loop; /* Default alignments. */
1120 const int align_loop_max_skip;
1121 const int align_jump;
1122 const int align_jump_max_skip;
1123 const int align_func;
1124 }
1125 const processor_target_table[PROCESSOR_max] =
1126 {
1127 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1128 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1129 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1130 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1131 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1132 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1133 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1134 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1135 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1136 };
1137
1138 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1139 static struct pta
1140 {
1141 const char *const name; /* processor name or nickname. */
1142 const enum processor_type processor;
1143 const enum pta_flags
1144 {
1145 PTA_SSE = 1,
1146 PTA_SSE2 = 2,
1147 PTA_SSE3 = 4,
1148 PTA_MMX = 8,
1149 PTA_PREFETCH_SSE = 16,
1150 PTA_3DNOW = 32,
1151 PTA_3DNOW_A = 64,
1152 PTA_64BIT = 128
1153 } flags;
1154 }
1155 const processor_alias_table[] =
1156 {
1157 {"i386", PROCESSOR_I386, 0},
1158 {"i486", PROCESSOR_I486, 0},
1159 {"i586", PROCESSOR_PENTIUM, 0},
1160 {"pentium", PROCESSOR_PENTIUM, 0},
1161 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1162 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1163 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1164 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1165 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1166 {"i686", PROCESSOR_PENTIUMPRO, 0},
1167 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1168 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1169 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1170 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1171 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1172 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1173 | PTA_MMX | PTA_PREFETCH_SSE},
1174 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1175 | PTA_MMX | PTA_PREFETCH_SSE},
1176 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1177 | PTA_MMX | PTA_PREFETCH_SSE},
1178 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1179 | PTA_MMX | PTA_PREFETCH_SSE},
1180 {"k6", PROCESSOR_K6, PTA_MMX},
1181 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1182 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1183 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1184 | PTA_3DNOW_A},
1185 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1186 | PTA_3DNOW | PTA_3DNOW_A},
1187 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1188 | PTA_3DNOW_A | PTA_SSE},
1189 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1190 | PTA_3DNOW_A | PTA_SSE},
1191 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1192 | PTA_3DNOW_A | PTA_SSE},
1193 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1194 | PTA_SSE | PTA_SSE2 },
1195 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1196 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1197 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1198 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1199 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1200 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1201 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1202 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1203 };
1204
1205 int const pta_size = ARRAY_SIZE (processor_alias_table);
1206
1207 /* Set the default values for switches whose default depends on TARGET_64BIT
1208 in case they weren't overwritten by command line options. */
1209 if (TARGET_64BIT)
1210 {
1211 if (flag_omit_frame_pointer == 2)
1212 flag_omit_frame_pointer = 1;
1213 if (flag_asynchronous_unwind_tables == 2)
1214 flag_asynchronous_unwind_tables = 1;
1215 if (flag_pcc_struct_return == 2)
1216 flag_pcc_struct_return = 0;
1217 }
1218 else
1219 {
1220 if (flag_omit_frame_pointer == 2)
1221 flag_omit_frame_pointer = 0;
1222 if (flag_asynchronous_unwind_tables == 2)
1223 flag_asynchronous_unwind_tables = 0;
1224 if (flag_pcc_struct_return == 2)
1225 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1226 }
1227
1228 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1229 SUBTARGET_OVERRIDE_OPTIONS;
1230 #endif
1231
1232 if (!ix86_tune_string && ix86_arch_string)
1233 ix86_tune_string = ix86_arch_string;
1234 if (!ix86_tune_string)
1235 {
1236 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1237 ix86_tune_defaulted = 1;
1238 }
1239 if (!ix86_arch_string)
1240 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1241
1242 if (ix86_cmodel_string != 0)
1243 {
1244 if (!strcmp (ix86_cmodel_string, "small"))
1245 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1246 else if (flag_pic)
1247 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1248 else if (!strcmp (ix86_cmodel_string, "32"))
1249 ix86_cmodel = CM_32;
1250 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1251 ix86_cmodel = CM_KERNEL;
1252 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1253 ix86_cmodel = CM_MEDIUM;
1254 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1255 ix86_cmodel = CM_LARGE;
1256 else
1257 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1258 }
1259 else
1260 {
1261 ix86_cmodel = CM_32;
1262 if (TARGET_64BIT)
1263 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1264 }
1265 if (ix86_asm_string != 0)
1266 {
1267 if (!strcmp (ix86_asm_string, "intel"))
1268 ix86_asm_dialect = ASM_INTEL;
1269 else if (!strcmp (ix86_asm_string, "att"))
1270 ix86_asm_dialect = ASM_ATT;
1271 else
1272 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1273 }
1274 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1275 error ("code model %qs not supported in the %s bit mode",
1276 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1277 if (ix86_cmodel == CM_LARGE)
1278 sorry ("code model %<large%> not supported yet");
1279 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1280 sorry ("%i-bit mode not compiled in",
1281 (target_flags & MASK_64BIT) ? 64 : 32);
1282
1283 for (i = 0; i < pta_size; i++)
1284 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1285 {
1286 ix86_arch = processor_alias_table[i].processor;
1287 /* Default cpu tuning to the architecture. */
1288 ix86_tune = ix86_arch;
1289 if (processor_alias_table[i].flags & PTA_MMX
1290 && !(target_flags_explicit & MASK_MMX))
1291 target_flags |= MASK_MMX;
1292 if (processor_alias_table[i].flags & PTA_3DNOW
1293 && !(target_flags_explicit & MASK_3DNOW))
1294 target_flags |= MASK_3DNOW;
1295 if (processor_alias_table[i].flags & PTA_3DNOW_A
1296 && !(target_flags_explicit & MASK_3DNOW_A))
1297 target_flags |= MASK_3DNOW_A;
1298 if (processor_alias_table[i].flags & PTA_SSE
1299 && !(target_flags_explicit & MASK_SSE))
1300 target_flags |= MASK_SSE;
1301 if (processor_alias_table[i].flags & PTA_SSE2
1302 && !(target_flags_explicit & MASK_SSE2))
1303 target_flags |= MASK_SSE2;
1304 if (processor_alias_table[i].flags & PTA_SSE3
1305 && !(target_flags_explicit & MASK_SSE3))
1306 target_flags |= MASK_SSE3;
1307 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1308 x86_prefetch_sse = true;
1309 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1310 error ("CPU you selected does not support x86-64 "
1311 "instruction set");
1312 break;
1313 }
1314
1315 if (i == pta_size)
1316 error ("bad value (%s) for -march= switch", ix86_arch_string);
1317
1318 for (i = 0; i < pta_size; i++)
1319 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1320 {
1321 ix86_tune = processor_alias_table[i].processor;
1322 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1323 {
1324 if (ix86_tune_defaulted)
1325 {
1326 ix86_tune_string = "x86-64";
1327 for (i = 0; i < pta_size; i++)
1328 if (! strcmp (ix86_tune_string,
1329 processor_alias_table[i].name))
1330 break;
1331 ix86_tune = processor_alias_table[i].processor;
1332 }
1333 else
1334 error ("CPU you selected does not support x86-64 "
1335 "instruction set");
1336 }
1337 /* Intel CPUs have always interpreted SSE prefetch instructions as
1338 NOPs; so, we can enable SSE prefetch instructions even when
1339 -mtune (rather than -march) points us to a processor that has them.
1340 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1341 higher processors. */
1342 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1343 x86_prefetch_sse = true;
1344 break;
1345 }
1346 if (i == pta_size)
1347 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1348
1349 if (optimize_size)
1350 ix86_cost = &size_cost;
1351 else
1352 ix86_cost = processor_target_table[ix86_tune].cost;
1353 target_flags |= processor_target_table[ix86_tune].target_enable;
1354 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1355
1356 /* Arrange to set up i386_stack_locals for all functions. */
1357 init_machine_status = ix86_init_machine_status;
1358
1359 /* Validate -mregparm= value. */
1360 if (ix86_regparm_string)
1361 {
1362 i = atoi (ix86_regparm_string);
1363 if (i < 0 || i > REGPARM_MAX)
1364 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1365 else
1366 ix86_regparm = i;
1367 }
1368 else
1369 if (TARGET_64BIT)
1370 ix86_regparm = REGPARM_MAX;
1371
1372 /* If the user has provided any of the -malign-* options,
1373 warn and use that value only if -falign-* is not set.
1374 Remove this code in GCC 3.2 or later. */
1375 if (ix86_align_loops_string)
1376 {
1377 warning ("-malign-loops is obsolete, use -falign-loops");
1378 if (align_loops == 0)
1379 {
1380 i = atoi (ix86_align_loops_string);
1381 if (i < 0 || i > MAX_CODE_ALIGN)
1382 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1383 else
1384 align_loops = 1 << i;
1385 }
1386 }
1387
1388 if (ix86_align_jumps_string)
1389 {
1390 warning ("-malign-jumps is obsolete, use -falign-jumps");
1391 if (align_jumps == 0)
1392 {
1393 i = atoi (ix86_align_jumps_string);
1394 if (i < 0 || i > MAX_CODE_ALIGN)
1395 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1396 else
1397 align_jumps = 1 << i;
1398 }
1399 }
1400
1401 if (ix86_align_funcs_string)
1402 {
1403 warning ("-malign-functions is obsolete, use -falign-functions");
1404 if (align_functions == 0)
1405 {
1406 i = atoi (ix86_align_funcs_string);
1407 if (i < 0 || i > MAX_CODE_ALIGN)
1408 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1409 else
1410 align_functions = 1 << i;
1411 }
1412 }
1413
1414 /* Default align_* from the processor table. */
1415 if (align_loops == 0)
1416 {
1417 align_loops = processor_target_table[ix86_tune].align_loop;
1418 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1419 }
1420 if (align_jumps == 0)
1421 {
1422 align_jumps = processor_target_table[ix86_tune].align_jump;
1423 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1424 }
1425 if (align_functions == 0)
1426 {
1427 align_functions = processor_target_table[ix86_tune].align_func;
1428 }
1429
1430 /* Validate -mpreferred-stack-boundary= value, or provide default.
1431 The default of 128 bits is for Pentium III's SSE __m128, but we
1432 don't want additional code to keep the stack aligned when
1433 optimizing for code size. */
1434 ix86_preferred_stack_boundary = (optimize_size
1435 ? TARGET_64BIT ? 128 : 32
1436 : 128);
1437 if (ix86_preferred_stack_boundary_string)
1438 {
1439 i = atoi (ix86_preferred_stack_boundary_string);
1440 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1441 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1442 TARGET_64BIT ? 4 : 2);
1443 else
1444 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1445 }
1446
1447 /* Validate -mbranch-cost= value, or provide default. */
1448 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1449 if (ix86_branch_cost_string)
1450 {
1451 i = atoi (ix86_branch_cost_string);
1452 if (i < 0 || i > 5)
1453 error ("-mbranch-cost=%d is not between 0 and 5", i);
1454 else
1455 ix86_branch_cost = i;
1456 }
1457
1458 if (ix86_tls_dialect_string)
1459 {
1460 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1461 ix86_tls_dialect = TLS_DIALECT_GNU;
1462 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1463 ix86_tls_dialect = TLS_DIALECT_SUN;
1464 else
1465 error ("bad value (%s) for -mtls-dialect= switch",
1466 ix86_tls_dialect_string);
1467 }
1468
1469 /* Keep nonleaf frame pointers. */
1470 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1471 flag_omit_frame_pointer = 1;
1472
1473 /* If we're doing fast math, we don't care about comparison order
1474 wrt NaNs. This lets us use a shorter comparison sequence. */
1475 if (flag_unsafe_math_optimizations)
1476 target_flags &= ~MASK_IEEE_FP;
1477
1478 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1479 since the insns won't need emulation. */
1480 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1481 target_flags &= ~MASK_NO_FANCY_MATH_387;
1482
1483 /* Turn on SSE2 builtins for -msse3. */
1484 if (TARGET_SSE3)
1485 target_flags |= MASK_SSE2;
1486
1487 /* Turn on SSE builtins for -msse2. */
1488 if (TARGET_SSE2)
1489 target_flags |= MASK_SSE;
1490
1491 if (TARGET_64BIT)
1492 {
1493 if (TARGET_ALIGN_DOUBLE)
1494 error ("-malign-double makes no sense in the 64bit mode");
1495 if (TARGET_RTD)
1496 error ("-mrtd calling convention not supported in the 64bit mode");
1497 /* Enable by default the SSE and MMX builtins. */
1498 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1499 ix86_fpmath = FPMATH_SSE;
1500 }
1501 else
1502 {
1503 ix86_fpmath = FPMATH_387;
1504 /* i386 ABI does not specify red zone. It still makes sense to use it
1505 when programmer takes care to stack from being destroyed. */
1506 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1507 target_flags |= MASK_NO_RED_ZONE;
1508 }
1509
1510 if (ix86_fpmath_string != 0)
1511 {
1512 if (! strcmp (ix86_fpmath_string, "387"))
1513 ix86_fpmath = FPMATH_387;
1514 else if (! strcmp (ix86_fpmath_string, "sse"))
1515 {
1516 if (!TARGET_SSE)
1517 {
1518 warning ("SSE instruction set disabled, using 387 arithmetics");
1519 ix86_fpmath = FPMATH_387;
1520 }
1521 else
1522 ix86_fpmath = FPMATH_SSE;
1523 }
1524 else if (! strcmp (ix86_fpmath_string, "387,sse")
1525 || ! strcmp (ix86_fpmath_string, "sse,387"))
1526 {
1527 if (!TARGET_SSE)
1528 {
1529 warning ("SSE instruction set disabled, using 387 arithmetics");
1530 ix86_fpmath = FPMATH_387;
1531 }
1532 else if (!TARGET_80387)
1533 {
1534 warning ("387 instruction set disabled, using SSE arithmetics");
1535 ix86_fpmath = FPMATH_SSE;
1536 }
1537 else
1538 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1539 }
1540 else
1541 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1542 }
1543
1544 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1545 on by -msse. */
1546 if (TARGET_SSE)
1547 {
1548 target_flags |= MASK_MMX;
1549 x86_prefetch_sse = true;
1550 }
1551
1552 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1553 if (TARGET_3DNOW)
1554 {
1555 target_flags |= MASK_MMX;
1556 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1557 extensions it adds. */
1558 if (x86_3dnow_a & (1 << ix86_arch))
1559 target_flags |= MASK_3DNOW_A;
1560 }
1561 if ((x86_accumulate_outgoing_args & TUNEMASK)
1562 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1563 && !optimize_size)
1564 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1565
1566 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1567 {
1568 char *p;
1569 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1570 p = strchr (internal_label_prefix, 'X');
1571 internal_label_prefix_len = p - internal_label_prefix;
1572 *p = '\0';
1573 }
1574 /* When scheduling description is not available, disable scheduler pass so it
1575 won't slow down the compilation and make x87 code slower. */
1576 if (!TARGET_SCHEDULE)
1577 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1578 }
1579 \f
1580 void
1581 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1582 {
1583 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1584 make the problem with not enough registers even worse. */
1585 #ifdef INSN_SCHEDULING
1586 if (level > 1)
1587 flag_schedule_insns = 0;
1588 #endif
1589
1590 /* The default values of these switches depend on the TARGET_64BIT
1591 that is not known at this moment. Mark these values with 2 and
1592 let user the to override these. In case there is no command line option
1593 specifying them, we will set the defaults in override_options. */
1594 if (optimize >= 1)
1595 flag_omit_frame_pointer = 2;
1596 flag_pcc_struct_return = 2;
1597 flag_asynchronous_unwind_tables = 2;
1598 }
1599 \f
1600 /* Table of valid machine attributes. */
1601 const struct attribute_spec ix86_attribute_table[] =
1602 {
1603 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1604 /* Stdcall attribute says callee is responsible for popping arguments
1605 if they are not variable. */
1606 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1607 /* Fastcall attribute says callee is responsible for popping arguments
1608 if they are not variable. */
1609 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1610 /* Cdecl attribute says the callee is a normal C declaration */
1611 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1612 /* Regparm attribute specifies how many integer arguments are to be
1613 passed in registers. */
1614 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1615 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1616 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1617 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1618 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1619 #endif
1620 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1621 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1622 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1623 SUBTARGET_ATTRIBUTE_TABLE,
1624 #endif
1625 { NULL, 0, 0, false, false, false, NULL }
1626 };
1627
1628 /* Decide whether we can make a sibling call to a function. DECL is the
1629 declaration of the function being targeted by the call and EXP is the
1630 CALL_EXPR representing the call. */
1631
1632 static bool
1633 ix86_function_ok_for_sibcall (tree decl, tree exp)
1634 {
1635 /* If we are generating position-independent code, we cannot sibcall
1636 optimize any indirect call, or a direct call to a global function,
1637 as the PLT requires %ebx be live. */
1638 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1639 return false;
1640
1641 /* If we are returning floats on the 80387 register stack, we cannot
1642 make a sibcall from a function that doesn't return a float to a
1643 function that does or, conversely, from a function that does return
1644 a float to a function that doesn't; the necessary stack adjustment
1645 would not be executed. */
1646 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1647 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1648 return false;
1649
1650 /* If this call is indirect, we'll need to be able to use a call-clobbered
1651 register for the address of the target function. Make sure that all
1652 such registers are not used for passing parameters. */
1653 if (!decl && !TARGET_64BIT)
1654 {
1655 tree type;
1656
1657 /* We're looking at the CALL_EXPR, we need the type of the function. */
1658 type = TREE_OPERAND (exp, 0); /* pointer expression */
1659 type = TREE_TYPE (type); /* pointer type */
1660 type = TREE_TYPE (type); /* function type */
1661
1662 if (ix86_function_regparm (type, NULL) >= 3)
1663 {
1664 /* ??? Need to count the actual number of registers to be used,
1665 not the possible number of registers. Fix later. */
1666 return false;
1667 }
1668 }
1669
1670 /* Otherwise okay. That also includes certain types of indirect calls. */
1671 return true;
1672 }
1673
1674 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1675 arguments as in struct attribute_spec.handler. */
1676 static tree
1677 ix86_handle_cdecl_attribute (tree *node, tree name,
1678 tree args ATTRIBUTE_UNUSED,
1679 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1680 {
1681 if (TREE_CODE (*node) != FUNCTION_TYPE
1682 && TREE_CODE (*node) != METHOD_TYPE
1683 && TREE_CODE (*node) != FIELD_DECL
1684 && TREE_CODE (*node) != TYPE_DECL)
1685 {
1686 warning ("%qs attribute only applies to functions",
1687 IDENTIFIER_POINTER (name));
1688 *no_add_attrs = true;
1689 }
1690 else
1691 {
1692 if (is_attribute_p ("fastcall", name))
1693 {
1694 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1695 {
1696 error ("fastcall and stdcall attributes are not compatible");
1697 }
1698 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1699 {
1700 error ("fastcall and regparm attributes are not compatible");
1701 }
1702 }
1703 else if (is_attribute_p ("stdcall", name))
1704 {
1705 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1706 {
1707 error ("fastcall and stdcall attributes are not compatible");
1708 }
1709 }
1710 }
1711
1712 if (TARGET_64BIT)
1713 {
1714 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1715 *no_add_attrs = true;
1716 }
1717
1718 return NULL_TREE;
1719 }
1720
1721 /* Handle a "regparm" attribute;
1722 arguments as in struct attribute_spec.handler. */
1723 static tree
1724 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1725 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1726 {
1727 if (TREE_CODE (*node) != FUNCTION_TYPE
1728 && TREE_CODE (*node) != METHOD_TYPE
1729 && TREE_CODE (*node) != FIELD_DECL
1730 && TREE_CODE (*node) != TYPE_DECL)
1731 {
1732 warning ("%qs attribute only applies to functions",
1733 IDENTIFIER_POINTER (name));
1734 *no_add_attrs = true;
1735 }
1736 else
1737 {
1738 tree cst;
1739
1740 cst = TREE_VALUE (args);
1741 if (TREE_CODE (cst) != INTEGER_CST)
1742 {
1743 warning ("%qs attribute requires an integer constant argument",
1744 IDENTIFIER_POINTER (name));
1745 *no_add_attrs = true;
1746 }
1747 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1748 {
1749 warning ("argument to %qs attribute larger than %d",
1750 IDENTIFIER_POINTER (name), REGPARM_MAX);
1751 *no_add_attrs = true;
1752 }
1753
1754 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1755 {
1756 error ("fastcall and regparm attributes are not compatible");
1757 }
1758 }
1759
1760 return NULL_TREE;
1761 }
1762
1763 /* Return 0 if the attributes for two types are incompatible, 1 if they
1764 are compatible, and 2 if they are nearly compatible (which causes a
1765 warning to be generated). */
1766
1767 static int
1768 ix86_comp_type_attributes (tree type1, tree type2)
1769 {
1770 /* Check for mismatch of non-default calling convention. */
1771 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1772
1773 if (TREE_CODE (type1) != FUNCTION_TYPE)
1774 return 1;
1775
1776 /* Check for mismatched fastcall types */
1777 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1778 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1779 return 0;
1780
1781 /* Check for mismatched return types (cdecl vs stdcall). */
1782 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1783 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1784 return 0;
1785 if (ix86_function_regparm (type1, NULL)
1786 != ix86_function_regparm (type2, NULL))
1787 return 0;
1788 return 1;
1789 }
1790 \f
1791 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1792 DECL may be NULL when calling function indirectly
1793 or considering a libcall. */
1794
1795 static int
1796 ix86_function_regparm (tree type, tree decl)
1797 {
1798 tree attr;
1799 int regparm = ix86_regparm;
1800 bool user_convention = false;
1801
1802 if (!TARGET_64BIT)
1803 {
1804 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1805 if (attr)
1806 {
1807 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1808 user_convention = true;
1809 }
1810
1811 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1812 {
1813 regparm = 2;
1814 user_convention = true;
1815 }
1816
1817 /* Use register calling convention for local functions when possible. */
1818 if (!TARGET_64BIT && !user_convention && decl
1819 && flag_unit_at_a_time && !profile_flag)
1820 {
1821 struct cgraph_local_info *i = cgraph_local_info (decl);
1822 if (i && i->local)
1823 {
1824 /* We can't use regparm(3) for nested functions as these use
1825 static chain pointer in third argument. */
1826 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1827 regparm = 2;
1828 else
1829 regparm = 3;
1830 }
1831 }
1832 }
1833 return regparm;
1834 }
1835
1836 /* Return true if EAX is live at the start of the function. Used by
1837 ix86_expand_prologue to determine if we need special help before
1838 calling allocate_stack_worker. */
1839
1840 static bool
1841 ix86_eax_live_at_start_p (void)
1842 {
1843 /* Cheat. Don't bother working forward from ix86_function_regparm
1844 to the function type to whether an actual argument is located in
1845 eax. Instead just look at cfg info, which is still close enough
1846 to correct at this point. This gives false positives for broken
1847 functions that might use uninitialized data that happens to be
1848 allocated in eax, but who cares? */
1849 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1850 }
1851
1852 /* Value is the number of bytes of arguments automatically
1853 popped when returning from a subroutine call.
1854 FUNDECL is the declaration node of the function (as a tree),
1855 FUNTYPE is the data type of the function (as a tree),
1856 or for a library call it is an identifier node for the subroutine name.
1857 SIZE is the number of bytes of arguments passed on the stack.
1858
1859 On the 80386, the RTD insn may be used to pop them if the number
1860 of args is fixed, but if the number is variable then the caller
1861 must pop them all. RTD can't be used for library calls now
1862 because the library is compiled with the Unix compiler.
1863 Use of RTD is a selectable option, since it is incompatible with
1864 standard Unix calling sequences. If the option is not selected,
1865 the caller must always pop the args.
1866
1867 The attribute stdcall is equivalent to RTD on a per module basis. */
1868
1869 int
1870 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1871 {
1872 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1873
1874 /* Cdecl functions override -mrtd, and never pop the stack. */
1875 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1876
1877 /* Stdcall and fastcall functions will pop the stack if not
1878 variable args. */
1879 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1880 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1881 rtd = 1;
1882
1883 if (rtd
1884 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1885 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1886 == void_type_node)))
1887 return size;
1888 }
1889
1890 /* Lose any fake structure return argument if it is passed on the stack. */
1891 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1892 && !TARGET_64BIT
1893 && !KEEP_AGGREGATE_RETURN_POINTER)
1894 {
1895 int nregs = ix86_function_regparm (funtype, fundecl);
1896
1897 if (!nregs)
1898 return GET_MODE_SIZE (Pmode);
1899 }
1900
1901 return 0;
1902 }
1903 \f
1904 /* Argument support functions. */
1905
1906 /* Return true when register may be used to pass function parameters. */
1907 bool
1908 ix86_function_arg_regno_p (int regno)
1909 {
1910 int i;
1911 if (!TARGET_64BIT)
1912 return (regno < REGPARM_MAX
1913 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1914 if (SSE_REGNO_P (regno) && TARGET_SSE)
1915 return true;
1916 /* RAX is used as hidden argument to va_arg functions. */
1917 if (!regno)
1918 return true;
1919 for (i = 0; i < REGPARM_MAX; i++)
1920 if (regno == x86_64_int_parameter_registers[i])
1921 return true;
1922 return false;
1923 }
1924
1925 /* Return if we do not know how to pass TYPE solely in registers. */
1926
1927 static bool
1928 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1929 {
1930 if (must_pass_in_stack_var_size_or_pad (mode, type))
1931 return true;
1932 return (!TARGET_64BIT && type && mode == TImode);
1933 }
1934
1935 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1936 for a call to a function whose data type is FNTYPE.
1937 For a library call, FNTYPE is 0. */
1938
1939 void
1940 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1941 tree fntype, /* tree ptr for function decl */
1942 rtx libname, /* SYMBOL_REF of library name or 0 */
1943 tree fndecl)
1944 {
1945 static CUMULATIVE_ARGS zero_cum;
1946 tree param, next_param;
1947
1948 if (TARGET_DEBUG_ARG)
1949 {
1950 fprintf (stderr, "\ninit_cumulative_args (");
1951 if (fntype)
1952 fprintf (stderr, "fntype code = %s, ret code = %s",
1953 tree_code_name[(int) TREE_CODE (fntype)],
1954 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1955 else
1956 fprintf (stderr, "no fntype");
1957
1958 if (libname)
1959 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1960 }
1961
1962 *cum = zero_cum;
1963
1964 /* Set up the number of registers to use for passing arguments. */
1965 if (fntype)
1966 cum->nregs = ix86_function_regparm (fntype, fndecl);
1967 else
1968 cum->nregs = ix86_regparm;
1969 if (TARGET_SSE)
1970 cum->sse_nregs = SSE_REGPARM_MAX;
1971 if (TARGET_MMX)
1972 cum->mmx_nregs = MMX_REGPARM_MAX;
1973 cum->warn_sse = true;
1974 cum->warn_mmx = true;
1975 cum->maybe_vaarg = false;
1976
1977 /* Use ecx and edx registers if function has fastcall attribute */
1978 if (fntype && !TARGET_64BIT)
1979 {
1980 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1981 {
1982 cum->nregs = 2;
1983 cum->fastcall = 1;
1984 }
1985 }
1986
1987 /* Determine if this function has variable arguments. This is
1988 indicated by the last argument being 'void_type_mode' if there
1989 are no variable arguments. If there are variable arguments, then
1990 we won't pass anything in registers in 32-bit mode. */
1991
1992 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1993 {
1994 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1995 param != 0; param = next_param)
1996 {
1997 next_param = TREE_CHAIN (param);
1998 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1999 {
2000 if (!TARGET_64BIT)
2001 {
2002 cum->nregs = 0;
2003 cum->sse_nregs = 0;
2004 cum->mmx_nregs = 0;
2005 cum->warn_sse = 0;
2006 cum->warn_mmx = 0;
2007 cum->fastcall = 0;
2008 }
2009 cum->maybe_vaarg = true;
2010 }
2011 }
2012 }
2013 if ((!fntype && !libname)
2014 || (fntype && !TYPE_ARG_TYPES (fntype)))
2015 cum->maybe_vaarg = 1;
2016
2017 if (TARGET_DEBUG_ARG)
2018 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2019
2020 return;
2021 }
2022
2023 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2024 of this code is to classify each 8bytes of incoming argument by the register
2025 class and assign registers accordingly. */
2026
2027 /* Return the union class of CLASS1 and CLASS2.
2028 See the x86-64 PS ABI for details. */
2029
2030 static enum x86_64_reg_class
2031 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2032 {
2033 /* Rule #1: If both classes are equal, this is the resulting class. */
2034 if (class1 == class2)
2035 return class1;
2036
2037 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2038 the other class. */
2039 if (class1 == X86_64_NO_CLASS)
2040 return class2;
2041 if (class2 == X86_64_NO_CLASS)
2042 return class1;
2043
2044 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2045 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2046 return X86_64_MEMORY_CLASS;
2047
2048 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2049 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2050 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2051 return X86_64_INTEGERSI_CLASS;
2052 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2053 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2054 return X86_64_INTEGER_CLASS;
2055
2056 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2057 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2058 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2059 return X86_64_MEMORY_CLASS;
2060
2061 /* Rule #6: Otherwise class SSE is used. */
2062 return X86_64_SSE_CLASS;
2063 }
2064
2065 /* Classify the argument of type TYPE and mode MODE.
2066 CLASSES will be filled by the register class used to pass each word
2067 of the operand. The number of words is returned. In case the parameter
2068 should be passed in memory, 0 is returned. As a special case for zero
2069 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2070
2071 BIT_OFFSET is used internally for handling records and specifies offset
2072 of the offset in bits modulo 256 to avoid overflow cases.
2073
2074 See the x86-64 PS ABI for details.
2075 */
2076
2077 static int
2078 classify_argument (enum machine_mode mode, tree type,
2079 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2080 {
2081 HOST_WIDE_INT bytes =
2082 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2083 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2084
2085 /* Variable sized entities are always passed/returned in memory. */
2086 if (bytes < 0)
2087 return 0;
2088
2089 if (mode != VOIDmode
2090 && targetm.calls.must_pass_in_stack (mode, type))
2091 return 0;
2092
2093 if (type && AGGREGATE_TYPE_P (type))
2094 {
2095 int i;
2096 tree field;
2097 enum x86_64_reg_class subclasses[MAX_CLASSES];
2098
2099 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2100 if (bytes > 16)
2101 return 0;
2102
2103 for (i = 0; i < words; i++)
2104 classes[i] = X86_64_NO_CLASS;
2105
2106 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2107 signalize memory class, so handle it as special case. */
2108 if (!words)
2109 {
2110 classes[0] = X86_64_NO_CLASS;
2111 return 1;
2112 }
2113
2114 /* Classify each field of record and merge classes. */
2115 if (TREE_CODE (type) == RECORD_TYPE)
2116 {
2117 /* For classes first merge in the field of the subclasses. */
2118 if (TYPE_BINFO (type))
2119 {
2120 tree binfo, base_binfo;
2121 int basenum;
2122
2123 for (binfo = TYPE_BINFO (type), basenum = 0;
2124 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2125 {
2126 int num;
2127 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2128 tree type = BINFO_TYPE (base_binfo);
2129
2130 num = classify_argument (TYPE_MODE (type),
2131 type, subclasses,
2132 (offset + bit_offset) % 256);
2133 if (!num)
2134 return 0;
2135 for (i = 0; i < num; i++)
2136 {
2137 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2138 classes[i + pos] =
2139 merge_classes (subclasses[i], classes[i + pos]);
2140 }
2141 }
2142 }
2143 /* And now merge the fields of structure. */
2144 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2145 {
2146 if (TREE_CODE (field) == FIELD_DECL)
2147 {
2148 int num;
2149
2150 /* Bitfields are always classified as integer. Handle them
2151 early, since later code would consider them to be
2152 misaligned integers. */
2153 if (DECL_BIT_FIELD (field))
2154 {
2155 for (i = int_bit_position (field) / 8 / 8;
2156 i < (int_bit_position (field)
2157 + tree_low_cst (DECL_SIZE (field), 0)
2158 + 63) / 8 / 8; i++)
2159 classes[i] =
2160 merge_classes (X86_64_INTEGER_CLASS,
2161 classes[i]);
2162 }
2163 else
2164 {
2165 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2166 TREE_TYPE (field), subclasses,
2167 (int_bit_position (field)
2168 + bit_offset) % 256);
2169 if (!num)
2170 return 0;
2171 for (i = 0; i < num; i++)
2172 {
2173 int pos =
2174 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2175 classes[i + pos] =
2176 merge_classes (subclasses[i], classes[i + pos]);
2177 }
2178 }
2179 }
2180 }
2181 }
2182 /* Arrays are handled as small records. */
2183 else if (TREE_CODE (type) == ARRAY_TYPE)
2184 {
2185 int num;
2186 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2187 TREE_TYPE (type), subclasses, bit_offset);
2188 if (!num)
2189 return 0;
2190
2191 /* The partial classes are now full classes. */
2192 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2193 subclasses[0] = X86_64_SSE_CLASS;
2194 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2195 subclasses[0] = X86_64_INTEGER_CLASS;
2196
2197 for (i = 0; i < words; i++)
2198 classes[i] = subclasses[i % num];
2199 }
2200 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2201 else if (TREE_CODE (type) == UNION_TYPE
2202 || TREE_CODE (type) == QUAL_UNION_TYPE)
2203 {
2204 /* For classes first merge in the field of the subclasses. */
2205 if (TYPE_BINFO (type))
2206 {
2207 tree binfo, base_binfo;
2208 int basenum;
2209
2210 for (binfo = TYPE_BINFO (type), basenum = 0;
2211 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2212 {
2213 int num;
2214 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2215 tree type = BINFO_TYPE (base_binfo);
2216
2217 num = classify_argument (TYPE_MODE (type),
2218 type, subclasses,
2219 (offset + (bit_offset % 64)) % 256);
2220 if (!num)
2221 return 0;
2222 for (i = 0; i < num; i++)
2223 {
2224 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2225 classes[i + pos] =
2226 merge_classes (subclasses[i], classes[i + pos]);
2227 }
2228 }
2229 }
2230 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2231 {
2232 if (TREE_CODE (field) == FIELD_DECL)
2233 {
2234 int num;
2235 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2236 TREE_TYPE (field), subclasses,
2237 bit_offset);
2238 if (!num)
2239 return 0;
2240 for (i = 0; i < num; i++)
2241 classes[i] = merge_classes (subclasses[i], classes[i]);
2242 }
2243 }
2244 }
2245 else if (TREE_CODE (type) == SET_TYPE)
2246 {
2247 if (bytes <= 4)
2248 {
2249 classes[0] = X86_64_INTEGERSI_CLASS;
2250 return 1;
2251 }
2252 else if (bytes <= 8)
2253 {
2254 classes[0] = X86_64_INTEGER_CLASS;
2255 return 1;
2256 }
2257 else if (bytes <= 12)
2258 {
2259 classes[0] = X86_64_INTEGER_CLASS;
2260 classes[1] = X86_64_INTEGERSI_CLASS;
2261 return 2;
2262 }
2263 else
2264 {
2265 classes[0] = X86_64_INTEGER_CLASS;
2266 classes[1] = X86_64_INTEGER_CLASS;
2267 return 2;
2268 }
2269 }
2270 else
2271 abort ();
2272
2273 /* Final merger cleanup. */
2274 for (i = 0; i < words; i++)
2275 {
2276 /* If one class is MEMORY, everything should be passed in
2277 memory. */
2278 if (classes[i] == X86_64_MEMORY_CLASS)
2279 return 0;
2280
2281 /* The X86_64_SSEUP_CLASS should be always preceded by
2282 X86_64_SSE_CLASS. */
2283 if (classes[i] == X86_64_SSEUP_CLASS
2284 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2285 classes[i] = X86_64_SSE_CLASS;
2286
2287 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2288 if (classes[i] == X86_64_X87UP_CLASS
2289 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2290 classes[i] = X86_64_SSE_CLASS;
2291 }
2292 return words;
2293 }
2294
2295 /* Compute alignment needed. We align all types to natural boundaries with
2296 exception of XFmode that is aligned to 64bits. */
2297 if (mode != VOIDmode && mode != BLKmode)
2298 {
2299 int mode_alignment = GET_MODE_BITSIZE (mode);
2300
2301 if (mode == XFmode)
2302 mode_alignment = 128;
2303 else if (mode == XCmode)
2304 mode_alignment = 256;
2305 if (COMPLEX_MODE_P (mode))
2306 mode_alignment /= 2;
2307 /* Misaligned fields are always returned in memory. */
2308 if (bit_offset % mode_alignment)
2309 return 0;
2310 }
2311
2312 /* for V1xx modes, just use the base mode */
2313 if (VECTOR_MODE_P (mode)
2314 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2315 mode = GET_MODE_INNER (mode);
2316
2317 /* Classification of atomic types. */
2318 switch (mode)
2319 {
2320 case DImode:
2321 case SImode:
2322 case HImode:
2323 case QImode:
2324 case CSImode:
2325 case CHImode:
2326 case CQImode:
2327 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2328 classes[0] = X86_64_INTEGERSI_CLASS;
2329 else
2330 classes[0] = X86_64_INTEGER_CLASS;
2331 return 1;
2332 case CDImode:
2333 case TImode:
2334 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2335 return 2;
2336 case CTImode:
2337 return 0;
2338 case SFmode:
2339 if (!(bit_offset % 64))
2340 classes[0] = X86_64_SSESF_CLASS;
2341 else
2342 classes[0] = X86_64_SSE_CLASS;
2343 return 1;
2344 case DFmode:
2345 classes[0] = X86_64_SSEDF_CLASS;
2346 return 1;
2347 case XFmode:
2348 classes[0] = X86_64_X87_CLASS;
2349 classes[1] = X86_64_X87UP_CLASS;
2350 return 2;
2351 case TFmode:
2352 classes[0] = X86_64_SSE_CLASS;
2353 classes[1] = X86_64_SSEUP_CLASS;
2354 return 2;
2355 case SCmode:
2356 classes[0] = X86_64_SSE_CLASS;
2357 return 1;
2358 case DCmode:
2359 classes[0] = X86_64_SSEDF_CLASS;
2360 classes[1] = X86_64_SSEDF_CLASS;
2361 return 2;
2362 case XCmode:
2363 case TCmode:
2364 /* These modes are larger than 16 bytes. */
2365 return 0;
2366 case V4SFmode:
2367 case V4SImode:
2368 case V16QImode:
2369 case V8HImode:
2370 case V2DFmode:
2371 case V2DImode:
2372 classes[0] = X86_64_SSE_CLASS;
2373 classes[1] = X86_64_SSEUP_CLASS;
2374 return 2;
2375 case V2SFmode:
2376 case V2SImode:
2377 case V4HImode:
2378 case V8QImode:
2379 classes[0] = X86_64_SSE_CLASS;
2380 return 1;
2381 case BLKmode:
2382 case VOIDmode:
2383 return 0;
2384 default:
2385 if (VECTOR_MODE_P (mode))
2386 {
2387 if (bytes > 16)
2388 return 0;
2389 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2390 {
2391 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2392 classes[0] = X86_64_INTEGERSI_CLASS;
2393 else
2394 classes[0] = X86_64_INTEGER_CLASS;
2395 classes[1] = X86_64_INTEGER_CLASS;
2396 return 1 + (bytes > 8);
2397 }
2398 }
2399 abort ();
2400 }
2401 }
2402
2403 /* Examine the argument and return set number of register required in each
2404 class. Return 0 iff parameter should be passed in memory. */
2405 static int
2406 examine_argument (enum machine_mode mode, tree type, int in_return,
2407 int *int_nregs, int *sse_nregs)
2408 {
2409 enum x86_64_reg_class class[MAX_CLASSES];
2410 int n = classify_argument (mode, type, class, 0);
2411
2412 *int_nregs = 0;
2413 *sse_nregs = 0;
2414 if (!n)
2415 return 0;
2416 for (n--; n >= 0; n--)
2417 switch (class[n])
2418 {
2419 case X86_64_INTEGER_CLASS:
2420 case X86_64_INTEGERSI_CLASS:
2421 (*int_nregs)++;
2422 break;
2423 case X86_64_SSE_CLASS:
2424 case X86_64_SSESF_CLASS:
2425 case X86_64_SSEDF_CLASS:
2426 (*sse_nregs)++;
2427 break;
2428 case X86_64_NO_CLASS:
2429 case X86_64_SSEUP_CLASS:
2430 break;
2431 case X86_64_X87_CLASS:
2432 case X86_64_X87UP_CLASS:
2433 if (!in_return)
2434 return 0;
2435 break;
2436 case X86_64_MEMORY_CLASS:
2437 abort ();
2438 }
2439 return 1;
2440 }
2441 /* Construct container for the argument used by GCC interface. See
2442 FUNCTION_ARG for the detailed description. */
2443 static rtx
2444 construct_container (enum machine_mode mode, tree type, int in_return,
2445 int nintregs, int nsseregs, const int * intreg,
2446 int sse_regno)
2447 {
2448 enum machine_mode tmpmode;
2449 int bytes =
2450 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2451 enum x86_64_reg_class class[MAX_CLASSES];
2452 int n;
2453 int i;
2454 int nexps = 0;
2455 int needed_sseregs, needed_intregs;
2456 rtx exp[MAX_CLASSES];
2457 rtx ret;
2458
2459 n = classify_argument (mode, type, class, 0);
2460 if (TARGET_DEBUG_ARG)
2461 {
2462 if (!n)
2463 fprintf (stderr, "Memory class\n");
2464 else
2465 {
2466 fprintf (stderr, "Classes:");
2467 for (i = 0; i < n; i++)
2468 {
2469 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2470 }
2471 fprintf (stderr, "\n");
2472 }
2473 }
2474 if (!n)
2475 return NULL;
2476 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2477 return NULL;
2478 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2479 return NULL;
2480
2481 /* First construct simple cases. Avoid SCmode, since we want to use
2482 single register to pass this type. */
2483 if (n == 1 && mode != SCmode)
2484 switch (class[0])
2485 {
2486 case X86_64_INTEGER_CLASS:
2487 case X86_64_INTEGERSI_CLASS:
2488 return gen_rtx_REG (mode, intreg[0]);
2489 case X86_64_SSE_CLASS:
2490 case X86_64_SSESF_CLASS:
2491 case X86_64_SSEDF_CLASS:
2492 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2493 case X86_64_X87_CLASS:
2494 return gen_rtx_REG (mode, FIRST_STACK_REG);
2495 case X86_64_NO_CLASS:
2496 /* Zero sized array, struct or class. */
2497 return NULL;
2498 default:
2499 abort ();
2500 }
2501 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2502 && mode != BLKmode)
2503 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2504 if (n == 2
2505 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2506 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2507 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2508 && class[1] == X86_64_INTEGER_CLASS
2509 && (mode == CDImode || mode == TImode || mode == TFmode)
2510 && intreg[0] + 1 == intreg[1])
2511 return gen_rtx_REG (mode, intreg[0]);
2512 if (n == 4
2513 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2514 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2515 && mode != BLKmode)
2516 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2517
2518 /* Otherwise figure out the entries of the PARALLEL. */
2519 for (i = 0; i < n; i++)
2520 {
2521 switch (class[i])
2522 {
2523 case X86_64_NO_CLASS:
2524 break;
2525 case X86_64_INTEGER_CLASS:
2526 case X86_64_INTEGERSI_CLASS:
2527 /* Merge TImodes on aligned occasions here too. */
2528 if (i * 8 + 8 > bytes)
2529 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2530 else if (class[i] == X86_64_INTEGERSI_CLASS)
2531 tmpmode = SImode;
2532 else
2533 tmpmode = DImode;
2534 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2535 if (tmpmode == BLKmode)
2536 tmpmode = DImode;
2537 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2538 gen_rtx_REG (tmpmode, *intreg),
2539 GEN_INT (i*8));
2540 intreg++;
2541 break;
2542 case X86_64_SSESF_CLASS:
2543 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2544 gen_rtx_REG (SFmode,
2545 SSE_REGNO (sse_regno)),
2546 GEN_INT (i*8));
2547 sse_regno++;
2548 break;
2549 case X86_64_SSEDF_CLASS:
2550 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2551 gen_rtx_REG (DFmode,
2552 SSE_REGNO (sse_regno)),
2553 GEN_INT (i*8));
2554 sse_regno++;
2555 break;
2556 case X86_64_SSE_CLASS:
2557 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2558 tmpmode = TImode;
2559 else
2560 tmpmode = DImode;
2561 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2562 gen_rtx_REG (tmpmode,
2563 SSE_REGNO (sse_regno)),
2564 GEN_INT (i*8));
2565 if (tmpmode == TImode)
2566 i++;
2567 sse_regno++;
2568 break;
2569 default:
2570 abort ();
2571 }
2572 }
2573 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2574 for (i = 0; i < nexps; i++)
2575 XVECEXP (ret, 0, i) = exp [i];
2576 return ret;
2577 }
2578
2579 /* Update the data in CUM to advance over an argument
2580 of mode MODE and data type TYPE.
2581 (TYPE is null for libcalls where that information may not be available.) */
2582
2583 void
2584 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2585 enum machine_mode mode, /* current arg mode */
2586 tree type, /* type of the argument or 0 if lib support */
2587 int named) /* whether or not the argument was named */
2588 {
2589 int bytes =
2590 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2591 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2592
2593 if (TARGET_DEBUG_ARG)
2594 fprintf (stderr,
2595 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2596 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2597 if (TARGET_64BIT)
2598 {
2599 int int_nregs, sse_nregs;
2600 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2601 cum->words += words;
2602 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2603 {
2604 cum->nregs -= int_nregs;
2605 cum->sse_nregs -= sse_nregs;
2606 cum->regno += int_nregs;
2607 cum->sse_regno += sse_nregs;
2608 }
2609 else
2610 cum->words += words;
2611 }
2612 else
2613 {
2614 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2615 && (!type || !AGGREGATE_TYPE_P (type)))
2616 {
2617 cum->sse_words += words;
2618 cum->sse_nregs -= 1;
2619 cum->sse_regno += 1;
2620 if (cum->sse_nregs <= 0)
2621 {
2622 cum->sse_nregs = 0;
2623 cum->sse_regno = 0;
2624 }
2625 }
2626 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2627 && (!type || !AGGREGATE_TYPE_P (type)))
2628 {
2629 cum->mmx_words += words;
2630 cum->mmx_nregs -= 1;
2631 cum->mmx_regno += 1;
2632 if (cum->mmx_nregs <= 0)
2633 {
2634 cum->mmx_nregs = 0;
2635 cum->mmx_regno = 0;
2636 }
2637 }
2638 else
2639 {
2640 cum->words += words;
2641 cum->nregs -= words;
2642 cum->regno += words;
2643
2644 if (cum->nregs <= 0)
2645 {
2646 cum->nregs = 0;
2647 cum->regno = 0;
2648 }
2649 }
2650 }
2651 return;
2652 }
2653
2654 /* Define where to put the arguments to a function.
2655 Value is zero to push the argument on the stack,
2656 or a hard register in which to store the argument.
2657
2658 MODE is the argument's machine mode.
2659 TYPE is the data type of the argument (as a tree).
2660 This is null for libcalls where that information may
2661 not be available.
2662 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2663 the preceding args and about the function being called.
2664 NAMED is nonzero if this argument is a named parameter
2665 (otherwise it is an extra parameter matching an ellipsis). */
2666
2667 rtx
2668 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2669 enum machine_mode mode, /* current arg mode */
2670 tree type, /* type of the argument or 0 if lib support */
2671 int named) /* != 0 for normal args, == 0 for ... args */
2672 {
2673 rtx ret = NULL_RTX;
2674 int bytes =
2675 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2676 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2677 static bool warnedsse, warnedmmx;
2678
2679 /* To simplify the code below, represent vector types with a vector mode
2680 even if MMX/SSE are not active. */
2681 if (type
2682 && TREE_CODE (type) == VECTOR_TYPE
2683 && (bytes == 8 || bytes == 16)
2684 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2685 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2686 {
2687 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2688 enum machine_mode newmode
2689 = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2690 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2691
2692 /* Get the mode which has this inner mode and number of units. */
2693 for (; newmode != VOIDmode; newmode = GET_MODE_WIDER_MODE (newmode))
2694 if (GET_MODE_NUNITS (newmode) == TYPE_VECTOR_SUBPARTS (type)
2695 && GET_MODE_INNER (newmode) == innermode)
2696 {
2697 mode = newmode;
2698 break;
2699 }
2700 }
2701
2702 /* Handle a hidden AL argument containing number of registers for varargs
2703 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2704 any AL settings. */
2705 if (mode == VOIDmode)
2706 {
2707 if (TARGET_64BIT)
2708 return GEN_INT (cum->maybe_vaarg
2709 ? (cum->sse_nregs < 0
2710 ? SSE_REGPARM_MAX
2711 : cum->sse_regno)
2712 : -1);
2713 else
2714 return constm1_rtx;
2715 }
2716 if (TARGET_64BIT)
2717 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2718 &x86_64_int_parameter_registers [cum->regno],
2719 cum->sse_regno);
2720 else
2721 switch (mode)
2722 {
2723 /* For now, pass fp/complex values on the stack. */
2724 default:
2725 break;
2726
2727 case BLKmode:
2728 if (bytes < 0)
2729 break;
2730 /* FALLTHRU */
2731 case DImode:
2732 case SImode:
2733 case HImode:
2734 case QImode:
2735 if (words <= cum->nregs)
2736 {
2737 int regno = cum->regno;
2738
2739 /* Fastcall allocates the first two DWORD (SImode) or
2740 smaller arguments to ECX and EDX. */
2741 if (cum->fastcall)
2742 {
2743 if (mode == BLKmode || mode == DImode)
2744 break;
2745
2746 /* ECX not EAX is the first allocated register. */
2747 if (regno == 0)
2748 regno = 2;
2749 }
2750 ret = gen_rtx_REG (mode, regno);
2751 }
2752 break;
2753 case TImode:
2754 case V16QImode:
2755 case V8HImode:
2756 case V4SImode:
2757 case V2DImode:
2758 case V4SFmode:
2759 case V2DFmode:
2760 if (!type || !AGGREGATE_TYPE_P (type))
2761 {
2762 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2763 {
2764 warnedsse = true;
2765 warning ("SSE vector argument without SSE enabled "
2766 "changes the ABI");
2767 }
2768 if (cum->sse_nregs)
2769 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2770 }
2771 break;
2772 case V8QImode:
2773 case V4HImode:
2774 case V2SImode:
2775 case V2SFmode:
2776 if (!type || !AGGREGATE_TYPE_P (type))
2777 {
2778 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2779 {
2780 warnedmmx = true;
2781 warning ("MMX vector argument without MMX enabled "
2782 "changes the ABI");
2783 }
2784 if (cum->mmx_nregs)
2785 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2786 }
2787 break;
2788 }
2789
2790 if (TARGET_DEBUG_ARG)
2791 {
2792 fprintf (stderr,
2793 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2794 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2795
2796 if (ret)
2797 print_simple_rtl (stderr, ret);
2798 else
2799 fprintf (stderr, ", stack");
2800
2801 fprintf (stderr, " )\n");
2802 }
2803
2804 return ret;
2805 }
2806
2807 /* A C expression that indicates when an argument must be passed by
2808 reference. If nonzero for an argument, a copy of that argument is
2809 made in memory and a pointer to the argument is passed instead of
2810 the argument itself. The pointer is passed in whatever way is
2811 appropriate for passing a pointer to that type. */
2812
2813 static bool
2814 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2815 enum machine_mode mode ATTRIBUTE_UNUSED,
2816 tree type, bool named ATTRIBUTE_UNUSED)
2817 {
2818 if (!TARGET_64BIT)
2819 return 0;
2820
2821 if (type && int_size_in_bytes (type) == -1)
2822 {
2823 if (TARGET_DEBUG_ARG)
2824 fprintf (stderr, "function_arg_pass_by_reference\n");
2825 return 1;
2826 }
2827
2828 return 0;
2829 }
2830
2831 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2832 ABI. Only called if TARGET_SSE. */
2833 static bool
2834 contains_128bit_aligned_vector_p (tree type)
2835 {
2836 enum machine_mode mode = TYPE_MODE (type);
2837 if (SSE_REG_MODE_P (mode)
2838 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2839 return true;
2840 if (TYPE_ALIGN (type) < 128)
2841 return false;
2842
2843 if (AGGREGATE_TYPE_P (type))
2844 {
2845 /* Walk the aggregates recursively. */
2846 if (TREE_CODE (type) == RECORD_TYPE
2847 || TREE_CODE (type) == UNION_TYPE
2848 || TREE_CODE (type) == QUAL_UNION_TYPE)
2849 {
2850 tree field;
2851
2852 if (TYPE_BINFO (type))
2853 {
2854 tree binfo, base_binfo;
2855 int i;
2856
2857 for (binfo = TYPE_BINFO (type), i = 0;
2858 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2859 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2860 return true;
2861 }
2862 /* And now merge the fields of structure. */
2863 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2864 {
2865 if (TREE_CODE (field) == FIELD_DECL
2866 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2867 return true;
2868 }
2869 }
2870 /* Just for use if some languages passes arrays by value. */
2871 else if (TREE_CODE (type) == ARRAY_TYPE)
2872 {
2873 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2874 return true;
2875 }
2876 else
2877 abort ();
2878 }
2879 return false;
2880 }
2881
2882 /* Gives the alignment boundary, in bits, of an argument with the
2883 specified mode and type. */
2884
2885 int
2886 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2887 {
2888 int align;
2889 if (type)
2890 align = TYPE_ALIGN (type);
2891 else
2892 align = GET_MODE_ALIGNMENT (mode);
2893 if (align < PARM_BOUNDARY)
2894 align = PARM_BOUNDARY;
2895 if (!TARGET_64BIT)
2896 {
2897 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2898 make an exception for SSE modes since these require 128bit
2899 alignment.
2900
2901 The handling here differs from field_alignment. ICC aligns MMX
2902 arguments to 4 byte boundaries, while structure fields are aligned
2903 to 8 byte boundaries. */
2904 if (!TARGET_SSE)
2905 align = PARM_BOUNDARY;
2906 else if (!type)
2907 {
2908 if (!SSE_REG_MODE_P (mode))
2909 align = PARM_BOUNDARY;
2910 }
2911 else
2912 {
2913 if (!contains_128bit_aligned_vector_p (type))
2914 align = PARM_BOUNDARY;
2915 }
2916 }
2917 if (align > 128)
2918 align = 128;
2919 return align;
2920 }
2921
2922 /* Return true if N is a possible register number of function value. */
2923 bool
2924 ix86_function_value_regno_p (int regno)
2925 {
2926 if (!TARGET_64BIT)
2927 {
2928 return ((regno) == 0
2929 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2930 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2931 }
2932 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2933 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2934 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2935 }
2936
2937 /* Define how to find the value returned by a function.
2938 VALTYPE is the data type of the value (as a tree).
2939 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2940 otherwise, FUNC is 0. */
2941 rtx
2942 ix86_function_value (tree valtype)
2943 {
2944 if (TARGET_64BIT)
2945 {
2946 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2947 REGPARM_MAX, SSE_REGPARM_MAX,
2948 x86_64_int_return_registers, 0);
2949 /* For zero sized structures, construct_container return NULL, but we need
2950 to keep rest of compiler happy by returning meaningful value. */
2951 if (!ret)
2952 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2953 return ret;
2954 }
2955 else
2956 return gen_rtx_REG (TYPE_MODE (valtype),
2957 ix86_value_regno (TYPE_MODE (valtype)));
2958 }
2959
2960 /* Return false iff type is returned in memory. */
2961 int
2962 ix86_return_in_memory (tree type)
2963 {
2964 int needed_intregs, needed_sseregs, size;
2965 enum machine_mode mode = TYPE_MODE (type);
2966
2967 if (TARGET_64BIT)
2968 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2969
2970 if (mode == BLKmode)
2971 return 1;
2972
2973 size = int_size_in_bytes (type);
2974
2975 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2976 return 0;
2977
2978 if (VECTOR_MODE_P (mode) || mode == TImode)
2979 {
2980 /* User-created vectors small enough to fit in EAX. */
2981 if (size < 8)
2982 return 0;
2983
2984 /* MMX/3dNow values are returned on the stack, since we've
2985 got to EMMS/FEMMS before returning. */
2986 if (size == 8)
2987 return 1;
2988
2989 /* SSE values are returned in XMM0, except when it doesn't exist. */
2990 if (size == 16)
2991 return (TARGET_SSE ? 0 : 1);
2992 }
2993
2994 if (mode == XFmode)
2995 return 0;
2996
2997 if (size > 12)
2998 return 1;
2999 return 0;
3000 }
3001
3002 /* When returning SSE vector types, we have a choice of either
3003 (1) being abi incompatible with a -march switch, or
3004 (2) generating an error.
3005 Given no good solution, I think the safest thing is one warning.
3006 The user won't be able to use -Werror, but....
3007
3008 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3009 called in response to actually generating a caller or callee that
3010 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3011 via aggregate_value_p for general type probing from tree-ssa. */
3012
3013 static rtx
3014 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3015 {
3016 static bool warned;
3017
3018 if (!TARGET_SSE && type && !warned)
3019 {
3020 /* Look at the return type of the function, not the function type. */
3021 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3022
3023 if (mode == TImode
3024 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3025 {
3026 warned = true;
3027 warning ("SSE vector return without SSE enabled changes the ABI");
3028 }
3029 }
3030
3031 return NULL;
3032 }
3033
3034 /* Define how to find the value returned by a library function
3035 assuming the value has mode MODE. */
3036 rtx
3037 ix86_libcall_value (enum machine_mode mode)
3038 {
3039 if (TARGET_64BIT)
3040 {
3041 switch (mode)
3042 {
3043 case SFmode:
3044 case SCmode:
3045 case DFmode:
3046 case DCmode:
3047 case TFmode:
3048 return gen_rtx_REG (mode, FIRST_SSE_REG);
3049 case XFmode:
3050 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3051 case XCmode:
3052 case TCmode:
3053 return NULL;
3054 default:
3055 return gen_rtx_REG (mode, 0);
3056 }
3057 }
3058 else
3059 return gen_rtx_REG (mode, ix86_value_regno (mode));
3060 }
3061
3062 /* Given a mode, return the register to use for a return value. */
3063
3064 static int
3065 ix86_value_regno (enum machine_mode mode)
3066 {
3067 /* Floating point return values in %st(0). */
3068 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3069 return FIRST_FLOAT_REG;
3070 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3071 we prevent this case when sse is not available. */
3072 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3073 return FIRST_SSE_REG;
3074 /* Everything else in %eax. */
3075 return 0;
3076 }
3077 \f
3078 /* Create the va_list data type. */
3079
3080 static tree
3081 ix86_build_builtin_va_list (void)
3082 {
3083 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3084
3085 /* For i386 we use plain pointer to argument area. */
3086 if (!TARGET_64BIT)
3087 return build_pointer_type (char_type_node);
3088
3089 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3090 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3091
3092 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3093 unsigned_type_node);
3094 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3095 unsigned_type_node);
3096 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3097 ptr_type_node);
3098 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3099 ptr_type_node);
3100
3101 DECL_FIELD_CONTEXT (f_gpr) = record;
3102 DECL_FIELD_CONTEXT (f_fpr) = record;
3103 DECL_FIELD_CONTEXT (f_ovf) = record;
3104 DECL_FIELD_CONTEXT (f_sav) = record;
3105
3106 TREE_CHAIN (record) = type_decl;
3107 TYPE_NAME (record) = type_decl;
3108 TYPE_FIELDS (record) = f_gpr;
3109 TREE_CHAIN (f_gpr) = f_fpr;
3110 TREE_CHAIN (f_fpr) = f_ovf;
3111 TREE_CHAIN (f_ovf) = f_sav;
3112
3113 layout_type (record);
3114
3115 /* The correct type is an array type of one element. */
3116 return build_array_type (record, build_index_type (size_zero_node));
3117 }
3118
3119 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3120
3121 static void
3122 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3123 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3124 int no_rtl)
3125 {
3126 CUMULATIVE_ARGS next_cum;
3127 rtx save_area = NULL_RTX, mem;
3128 rtx label;
3129 rtx label_ref;
3130 rtx tmp_reg;
3131 rtx nsse_reg;
3132 int set;
3133 tree fntype;
3134 int stdarg_p;
3135 int i;
3136
3137 if (!TARGET_64BIT)
3138 return;
3139
3140 /* Indicate to allocate space on the stack for varargs save area. */
3141 ix86_save_varrargs_registers = 1;
3142
3143 cfun->stack_alignment_needed = 128;
3144
3145 fntype = TREE_TYPE (current_function_decl);
3146 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3147 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3148 != void_type_node));
3149
3150 /* For varargs, we do not want to skip the dummy va_dcl argument.
3151 For stdargs, we do want to skip the last named argument. */
3152 next_cum = *cum;
3153 if (stdarg_p)
3154 function_arg_advance (&next_cum, mode, type, 1);
3155
3156 if (!no_rtl)
3157 save_area = frame_pointer_rtx;
3158
3159 set = get_varargs_alias_set ();
3160
3161 for (i = next_cum.regno; i < ix86_regparm; i++)
3162 {
3163 mem = gen_rtx_MEM (Pmode,
3164 plus_constant (save_area, i * UNITS_PER_WORD));
3165 set_mem_alias_set (mem, set);
3166 emit_move_insn (mem, gen_rtx_REG (Pmode,
3167 x86_64_int_parameter_registers[i]));
3168 }
3169
3170 if (next_cum.sse_nregs)
3171 {
3172 /* Now emit code to save SSE registers. The AX parameter contains number
3173 of SSE parameter registers used to call this function. We use
3174 sse_prologue_save insn template that produces computed jump across
3175 SSE saves. We need some preparation work to get this working. */
3176
3177 label = gen_label_rtx ();
3178 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3179
3180 /* Compute address to jump to :
3181 label - 5*eax + nnamed_sse_arguments*5 */
3182 tmp_reg = gen_reg_rtx (Pmode);
3183 nsse_reg = gen_reg_rtx (Pmode);
3184 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3185 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3186 gen_rtx_MULT (Pmode, nsse_reg,
3187 GEN_INT (4))));
3188 if (next_cum.sse_regno)
3189 emit_move_insn
3190 (nsse_reg,
3191 gen_rtx_CONST (DImode,
3192 gen_rtx_PLUS (DImode,
3193 label_ref,
3194 GEN_INT (next_cum.sse_regno * 4))));
3195 else
3196 emit_move_insn (nsse_reg, label_ref);
3197 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3198
3199 /* Compute address of memory block we save into. We always use pointer
3200 pointing 127 bytes after first byte to store - this is needed to keep
3201 instruction size limited by 4 bytes. */
3202 tmp_reg = gen_reg_rtx (Pmode);
3203 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3204 plus_constant (save_area,
3205 8 * REGPARM_MAX + 127)));
3206 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3207 set_mem_alias_set (mem, set);
3208 set_mem_align (mem, BITS_PER_WORD);
3209
3210 /* And finally do the dirty job! */
3211 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3212 GEN_INT (next_cum.sse_regno), label));
3213 }
3214
3215 }
3216
3217 /* Implement va_start. */
3218
3219 void
3220 ix86_va_start (tree valist, rtx nextarg)
3221 {
3222 HOST_WIDE_INT words, n_gpr, n_fpr;
3223 tree f_gpr, f_fpr, f_ovf, f_sav;
3224 tree gpr, fpr, ovf, sav, t;
3225
3226 /* Only 64bit target needs something special. */
3227 if (!TARGET_64BIT)
3228 {
3229 std_expand_builtin_va_start (valist, nextarg);
3230 return;
3231 }
3232
3233 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3234 f_fpr = TREE_CHAIN (f_gpr);
3235 f_ovf = TREE_CHAIN (f_fpr);
3236 f_sav = TREE_CHAIN (f_ovf);
3237
3238 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3239 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3240 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3241 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3242 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3243
3244 /* Count number of gp and fp argument registers used. */
3245 words = current_function_args_info.words;
3246 n_gpr = current_function_args_info.regno;
3247 n_fpr = current_function_args_info.sse_regno;
3248
3249 if (TARGET_DEBUG_ARG)
3250 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3251 (int) words, (int) n_gpr, (int) n_fpr);
3252
3253 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3254 build_int_cst (NULL_TREE, n_gpr * 8));
3255 TREE_SIDE_EFFECTS (t) = 1;
3256 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3257
3258 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3259 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3260 TREE_SIDE_EFFECTS (t) = 1;
3261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3262
3263 /* Find the overflow area. */
3264 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3265 if (words != 0)
3266 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3267 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3268 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3269 TREE_SIDE_EFFECTS (t) = 1;
3270 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3271
3272 /* Find the register save area.
3273 Prologue of the function save it right above stack frame. */
3274 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3275 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3276 TREE_SIDE_EFFECTS (t) = 1;
3277 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3278 }
3279
3280 /* Implement va_arg. */
3281
3282 tree
3283 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3284 {
3285 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3286 tree f_gpr, f_fpr, f_ovf, f_sav;
3287 tree gpr, fpr, ovf, sav, t;
3288 int size, rsize;
3289 tree lab_false, lab_over = NULL_TREE;
3290 tree addr, t2;
3291 rtx container;
3292 int indirect_p = 0;
3293 tree ptrtype;
3294
3295 /* Only 64bit target needs something special. */
3296 if (!TARGET_64BIT)
3297 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3298
3299 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3300 f_fpr = TREE_CHAIN (f_gpr);
3301 f_ovf = TREE_CHAIN (f_fpr);
3302 f_sav = TREE_CHAIN (f_ovf);
3303
3304 valist = build_va_arg_indirect_ref (valist);
3305 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3306 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3307 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3308 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3309
3310 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3311 if (indirect_p)
3312 type = build_pointer_type (type);
3313 size = int_size_in_bytes (type);
3314 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3315
3316 container = construct_container (TYPE_MODE (type), type, 0,
3317 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3318 /*
3319 * Pull the value out of the saved registers ...
3320 */
3321
3322 addr = create_tmp_var (ptr_type_node, "addr");
3323 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3324
3325 if (container)
3326 {
3327 int needed_intregs, needed_sseregs;
3328 bool need_temp;
3329 tree int_addr, sse_addr;
3330
3331 lab_false = create_artificial_label ();
3332 lab_over = create_artificial_label ();
3333
3334 examine_argument (TYPE_MODE (type), type, 0,
3335 &needed_intregs, &needed_sseregs);
3336
3337 need_temp = (!REG_P (container)
3338 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3339 || TYPE_ALIGN (type) > 128));
3340
3341 /* In case we are passing structure, verify that it is consecutive block
3342 on the register save area. If not we need to do moves. */
3343 if (!need_temp && !REG_P (container))
3344 {
3345 /* Verify that all registers are strictly consecutive */
3346 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3347 {
3348 int i;
3349
3350 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3351 {
3352 rtx slot = XVECEXP (container, 0, i);
3353 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3354 || INTVAL (XEXP (slot, 1)) != i * 16)
3355 need_temp = 1;
3356 }
3357 }
3358 else
3359 {
3360 int i;
3361
3362 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3363 {
3364 rtx slot = XVECEXP (container, 0, i);
3365 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3366 || INTVAL (XEXP (slot, 1)) != i * 8)
3367 need_temp = 1;
3368 }
3369 }
3370 }
3371 if (!need_temp)
3372 {
3373 int_addr = addr;
3374 sse_addr = addr;
3375 }
3376 else
3377 {
3378 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3379 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3380 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3381 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3382 }
3383 /* First ensure that we fit completely in registers. */
3384 if (needed_intregs)
3385 {
3386 t = build_int_cst (TREE_TYPE (gpr),
3387 (REGPARM_MAX - needed_intregs + 1) * 8);
3388 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3389 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3390 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3391 gimplify_and_add (t, pre_p);
3392 }
3393 if (needed_sseregs)
3394 {
3395 t = build_int_cst (TREE_TYPE (fpr),
3396 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3397 + REGPARM_MAX * 8);
3398 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3399 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3400 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3401 gimplify_and_add (t, pre_p);
3402 }
3403
3404 /* Compute index to start of area used for integer regs. */
3405 if (needed_intregs)
3406 {
3407 /* int_addr = gpr + sav; */
3408 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3409 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3410 gimplify_and_add (t, pre_p);
3411 }
3412 if (needed_sseregs)
3413 {
3414 /* sse_addr = fpr + sav; */
3415 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3416 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3417 gimplify_and_add (t, pre_p);
3418 }
3419 if (need_temp)
3420 {
3421 int i;
3422 tree temp = create_tmp_var (type, "va_arg_tmp");
3423
3424 /* addr = &temp; */
3425 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3426 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3427 gimplify_and_add (t, pre_p);
3428
3429 for (i = 0; i < XVECLEN (container, 0); i++)
3430 {
3431 rtx slot = XVECEXP (container, 0, i);
3432 rtx reg = XEXP (slot, 0);
3433 enum machine_mode mode = GET_MODE (reg);
3434 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3435 tree addr_type = build_pointer_type (piece_type);
3436 tree src_addr, src;
3437 int src_offset;
3438 tree dest_addr, dest;
3439
3440 if (SSE_REGNO_P (REGNO (reg)))
3441 {
3442 src_addr = sse_addr;
3443 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3444 }
3445 else
3446 {
3447 src_addr = int_addr;
3448 src_offset = REGNO (reg) * 8;
3449 }
3450 src_addr = fold_convert (addr_type, src_addr);
3451 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3452 size_int (src_offset)));
3453 src = build_va_arg_indirect_ref (src_addr);
3454
3455 dest_addr = fold_convert (addr_type, addr);
3456 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3457 size_int (INTVAL (XEXP (slot, 1)))));
3458 dest = build_va_arg_indirect_ref (dest_addr);
3459
3460 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3461 gimplify_and_add (t, pre_p);
3462 }
3463 }
3464
3465 if (needed_intregs)
3466 {
3467 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3468 build_int_cst (NULL_TREE, needed_intregs * 8));
3469 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3470 gimplify_and_add (t, pre_p);
3471 }
3472 if (needed_sseregs)
3473 {
3474 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3475 build_int_cst (NULL_TREE, needed_sseregs * 16));
3476 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3477 gimplify_and_add (t, pre_p);
3478 }
3479
3480 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3481 gimplify_and_add (t, pre_p);
3482
3483 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3484 append_to_statement_list (t, pre_p);
3485 }
3486
3487 /* ... otherwise out of the overflow area. */
3488
3489 /* Care for on-stack alignment if needed. */
3490 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3491 t = ovf;
3492 else
3493 {
3494 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3495 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3496 build_int_cst (NULL_TREE, align - 1));
3497 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3498 build_int_cst (NULL_TREE, -align));
3499 }
3500 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3501
3502 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3503 gimplify_and_add (t2, pre_p);
3504
3505 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3506 build_int_cst (NULL_TREE, rsize * UNITS_PER_WORD));
3507 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3508 gimplify_and_add (t, pre_p);
3509
3510 if (container)
3511 {
3512 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3513 append_to_statement_list (t, pre_p);
3514 }
3515
3516 ptrtype = build_pointer_type (type);
3517 addr = fold_convert (ptrtype, addr);
3518
3519 if (indirect_p)
3520 addr = build_va_arg_indirect_ref (addr);
3521 return build_va_arg_indirect_ref (addr);
3522 }
3523 \f
3524 /* Return nonzero if OPNUM's MEM should be matched
3525 in movabs* patterns. */
3526
3527 int
3528 ix86_check_movabs (rtx insn, int opnum)
3529 {
3530 rtx set, mem;
3531
3532 set = PATTERN (insn);
3533 if (GET_CODE (set) == PARALLEL)
3534 set = XVECEXP (set, 0, 0);
3535 if (GET_CODE (set) != SET)
3536 abort ();
3537 mem = XEXP (set, opnum);
3538 while (GET_CODE (mem) == SUBREG)
3539 mem = SUBREG_REG (mem);
3540 if (GET_CODE (mem) != MEM)
3541 abort ();
3542 return (volatile_ok || !MEM_VOLATILE_P (mem));
3543 }
3544 \f
3545 /* Initialize the table of extra 80387 mathematical constants. */
3546
3547 static void
3548 init_ext_80387_constants (void)
3549 {
3550 static const char * cst[5] =
3551 {
3552 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3553 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3554 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3555 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3556 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3557 };
3558 int i;
3559
3560 for (i = 0; i < 5; i++)
3561 {
3562 real_from_string (&ext_80387_constants_table[i], cst[i]);
3563 /* Ensure each constant is rounded to XFmode precision. */
3564 real_convert (&ext_80387_constants_table[i],
3565 XFmode, &ext_80387_constants_table[i]);
3566 }
3567
3568 ext_80387_constants_init = 1;
3569 }
3570
3571 /* Return true if the constant is something that can be loaded with
3572 a special instruction. */
3573
3574 int
3575 standard_80387_constant_p (rtx x)
3576 {
3577 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3578 return -1;
3579
3580 if (x == CONST0_RTX (GET_MODE (x)))
3581 return 1;
3582 if (x == CONST1_RTX (GET_MODE (x)))
3583 return 2;
3584
3585 /* For XFmode constants, try to find a special 80387 instruction when
3586 optimizing for size or on those CPUs that benefit from them. */
3587 if (GET_MODE (x) == XFmode
3588 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3589 {
3590 REAL_VALUE_TYPE r;
3591 int i;
3592
3593 if (! ext_80387_constants_init)
3594 init_ext_80387_constants ();
3595
3596 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3597 for (i = 0; i < 5; i++)
3598 if (real_identical (&r, &ext_80387_constants_table[i]))
3599 return i + 3;
3600 }
3601
3602 return 0;
3603 }
3604
3605 /* Return the opcode of the special instruction to be used to load
3606 the constant X. */
3607
3608 const char *
3609 standard_80387_constant_opcode (rtx x)
3610 {
3611 switch (standard_80387_constant_p (x))
3612 {
3613 case 1:
3614 return "fldz";
3615 case 2:
3616 return "fld1";
3617 case 3:
3618 return "fldlg2";
3619 case 4:
3620 return "fldln2";
3621 case 5:
3622 return "fldl2e";
3623 case 6:
3624 return "fldl2t";
3625 case 7:
3626 return "fldpi";
3627 }
3628 abort ();
3629 }
3630
3631 /* Return the CONST_DOUBLE representing the 80387 constant that is
3632 loaded by the specified special instruction. The argument IDX
3633 matches the return value from standard_80387_constant_p. */
3634
3635 rtx
3636 standard_80387_constant_rtx (int idx)
3637 {
3638 int i;
3639
3640 if (! ext_80387_constants_init)
3641 init_ext_80387_constants ();
3642
3643 switch (idx)
3644 {
3645 case 3:
3646 case 4:
3647 case 5:
3648 case 6:
3649 case 7:
3650 i = idx - 3;
3651 break;
3652
3653 default:
3654 abort ();
3655 }
3656
3657 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3658 XFmode);
3659 }
3660
3661 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3662 */
3663 int
3664 standard_sse_constant_p (rtx x)
3665 {
3666 if (x == const0_rtx)
3667 return 1;
3668 return (x == CONST0_RTX (GET_MODE (x)));
3669 }
3670
3671 /* Returns 1 if OP contains a symbol reference */
3672
3673 int
3674 symbolic_reference_mentioned_p (rtx op)
3675 {
3676 const char *fmt;
3677 int i;
3678
3679 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3680 return 1;
3681
3682 fmt = GET_RTX_FORMAT (GET_CODE (op));
3683 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3684 {
3685 if (fmt[i] == 'E')
3686 {
3687 int j;
3688
3689 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3690 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3691 return 1;
3692 }
3693
3694 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3695 return 1;
3696 }
3697
3698 return 0;
3699 }
3700
3701 /* Return 1 if it is appropriate to emit `ret' instructions in the
3702 body of a function. Do this only if the epilogue is simple, needing a
3703 couple of insns. Prior to reloading, we can't tell how many registers
3704 must be saved, so return 0 then. Return 0 if there is no frame
3705 marker to de-allocate.
3706
3707 If NON_SAVING_SETJMP is defined and true, then it is not possible
3708 for the epilogue to be simple, so return 0. This is a special case
3709 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3710 until final, but jump_optimize may need to know sooner if a
3711 `return' is OK. */
3712
3713 int
3714 ix86_can_use_return_insn_p (void)
3715 {
3716 struct ix86_frame frame;
3717
3718 #ifdef NON_SAVING_SETJMP
3719 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3720 return 0;
3721 #endif
3722
3723 if (! reload_completed || frame_pointer_needed)
3724 return 0;
3725
3726 /* Don't allow more than 32 pop, since that's all we can do
3727 with one instruction. */
3728 if (current_function_pops_args
3729 && current_function_args_size >= 32768)
3730 return 0;
3731
3732 ix86_compute_frame_layout (&frame);
3733 return frame.to_allocate == 0 && frame.nregs == 0;
3734 }
3735 \f
3736 /* Value should be nonzero if functions must have frame pointers.
3737 Zero means the frame pointer need not be set up (and parms may
3738 be accessed via the stack pointer) in functions that seem suitable. */
3739
3740 int
3741 ix86_frame_pointer_required (void)
3742 {
3743 /* If we accessed previous frames, then the generated code expects
3744 to be able to access the saved ebp value in our frame. */
3745 if (cfun->machine->accesses_prev_frame)
3746 return 1;
3747
3748 /* Several x86 os'es need a frame pointer for other reasons,
3749 usually pertaining to setjmp. */
3750 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3751 return 1;
3752
3753 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3754 the frame pointer by default. Turn it back on now if we've not
3755 got a leaf function. */
3756 if (TARGET_OMIT_LEAF_FRAME_POINTER
3757 && (!current_function_is_leaf))
3758 return 1;
3759
3760 if (current_function_profile)
3761 return 1;
3762
3763 return 0;
3764 }
3765
3766 /* Record that the current function accesses previous call frames. */
3767
3768 void
3769 ix86_setup_frame_addresses (void)
3770 {
3771 cfun->machine->accesses_prev_frame = 1;
3772 }
3773 \f
3774 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3775 # define USE_HIDDEN_LINKONCE 1
3776 #else
3777 # define USE_HIDDEN_LINKONCE 0
3778 #endif
3779
3780 static int pic_labels_used;
3781
3782 /* Fills in the label name that should be used for a pc thunk for
3783 the given register. */
3784
3785 static void
3786 get_pc_thunk_name (char name[32], unsigned int regno)
3787 {
3788 if (USE_HIDDEN_LINKONCE)
3789 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3790 else
3791 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3792 }
3793
3794
3795 /* This function generates code for -fpic that loads %ebx with
3796 the return address of the caller and then returns. */
3797
3798 void
3799 ix86_file_end (void)
3800 {
3801 rtx xops[2];
3802 int regno;
3803
3804 for (regno = 0; regno < 8; ++regno)
3805 {
3806 char name[32];
3807
3808 if (! ((pic_labels_used >> regno) & 1))
3809 continue;
3810
3811 get_pc_thunk_name (name, regno);
3812
3813 if (USE_HIDDEN_LINKONCE)
3814 {
3815 tree decl;
3816
3817 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3818 error_mark_node);
3819 TREE_PUBLIC (decl) = 1;
3820 TREE_STATIC (decl) = 1;
3821 DECL_ONE_ONLY (decl) = 1;
3822
3823 (*targetm.asm_out.unique_section) (decl, 0);
3824 named_section (decl, NULL, 0);
3825
3826 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3827 fputs ("\t.hidden\t", asm_out_file);
3828 assemble_name (asm_out_file, name);
3829 fputc ('\n', asm_out_file);
3830 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3831 }
3832 else
3833 {
3834 text_section ();
3835 ASM_OUTPUT_LABEL (asm_out_file, name);
3836 }
3837
3838 xops[0] = gen_rtx_REG (SImode, regno);
3839 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3840 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3841 output_asm_insn ("ret", xops);
3842 }
3843
3844 if (NEED_INDICATE_EXEC_STACK)
3845 file_end_indicate_exec_stack ();
3846 }
3847
3848 /* Emit code for the SET_GOT patterns. */
3849
3850 const char *
3851 output_set_got (rtx dest)
3852 {
3853 rtx xops[3];
3854
3855 xops[0] = dest;
3856 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3857
3858 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3859 {
3860 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3861
3862 if (!flag_pic)
3863 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3864 else
3865 output_asm_insn ("call\t%a2", xops);
3866
3867 #if TARGET_MACHO
3868 /* Output the "canonical" label name ("Lxx$pb") here too. This
3869 is what will be referred to by the Mach-O PIC subsystem. */
3870 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3871 #endif
3872 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3873 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3874
3875 if (flag_pic)
3876 output_asm_insn ("pop{l}\t%0", xops);
3877 }
3878 else
3879 {
3880 char name[32];
3881 get_pc_thunk_name (name, REGNO (dest));
3882 pic_labels_used |= 1 << REGNO (dest);
3883
3884 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3885 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3886 output_asm_insn ("call\t%X2", xops);
3887 }
3888
3889 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3890 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3891 else if (!TARGET_MACHO)
3892 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3893
3894 return "";
3895 }
3896
3897 /* Generate an "push" pattern for input ARG. */
3898
3899 static rtx
3900 gen_push (rtx arg)
3901 {
3902 return gen_rtx_SET (VOIDmode,
3903 gen_rtx_MEM (Pmode,
3904 gen_rtx_PRE_DEC (Pmode,
3905 stack_pointer_rtx)),
3906 arg);
3907 }
3908
3909 /* Return >= 0 if there is an unused call-clobbered register available
3910 for the entire function. */
3911
3912 static unsigned int
3913 ix86_select_alt_pic_regnum (void)
3914 {
3915 if (current_function_is_leaf && !current_function_profile)
3916 {
3917 int i;
3918 for (i = 2; i >= 0; --i)
3919 if (!regs_ever_live[i])
3920 return i;
3921 }
3922
3923 return INVALID_REGNUM;
3924 }
3925
3926 /* Return 1 if we need to save REGNO. */
3927 static int
3928 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3929 {
3930 if (pic_offset_table_rtx
3931 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3932 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3933 || current_function_profile
3934 || current_function_calls_eh_return
3935 || current_function_uses_const_pool))
3936 {
3937 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3938 return 0;
3939 return 1;
3940 }
3941
3942 if (current_function_calls_eh_return && maybe_eh_return)
3943 {
3944 unsigned i;
3945 for (i = 0; ; i++)
3946 {
3947 unsigned test = EH_RETURN_DATA_REGNO (i);
3948 if (test == INVALID_REGNUM)
3949 break;
3950 if (test == regno)
3951 return 1;
3952 }
3953 }
3954
3955 return (regs_ever_live[regno]
3956 && !call_used_regs[regno]
3957 && !fixed_regs[regno]
3958 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3959 }
3960
3961 /* Return number of registers to be saved on the stack. */
3962
3963 static int
3964 ix86_nsaved_regs (void)
3965 {
3966 int nregs = 0;
3967 int regno;
3968
3969 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3970 if (ix86_save_reg (regno, true))
3971 nregs++;
3972 return nregs;
3973 }
3974
3975 /* Return the offset between two registers, one to be eliminated, and the other
3976 its replacement, at the start of a routine. */
3977
3978 HOST_WIDE_INT
3979 ix86_initial_elimination_offset (int from, int to)
3980 {
3981 struct ix86_frame frame;
3982 ix86_compute_frame_layout (&frame);
3983
3984 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3985 return frame.hard_frame_pointer_offset;
3986 else if (from == FRAME_POINTER_REGNUM
3987 && to == HARD_FRAME_POINTER_REGNUM)
3988 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3989 else
3990 {
3991 if (to != STACK_POINTER_REGNUM)
3992 abort ();
3993 else if (from == ARG_POINTER_REGNUM)
3994 return frame.stack_pointer_offset;
3995 else if (from != FRAME_POINTER_REGNUM)
3996 abort ();
3997 else
3998 return frame.stack_pointer_offset - frame.frame_pointer_offset;
3999 }
4000 }
4001
4002 /* Fill structure ix86_frame about frame of currently computed function. */
4003
4004 static void
4005 ix86_compute_frame_layout (struct ix86_frame *frame)
4006 {
4007 HOST_WIDE_INT total_size;
4008 unsigned int stack_alignment_needed;
4009 HOST_WIDE_INT offset;
4010 unsigned int preferred_alignment;
4011 HOST_WIDE_INT size = get_frame_size ();
4012
4013 frame->nregs = ix86_nsaved_regs ();
4014 total_size = size;
4015
4016 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4017 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4018
4019 /* During reload iteration the amount of registers saved can change.
4020 Recompute the value as needed. Do not recompute when amount of registers
4021 didn't change as reload does mutiple calls to the function and does not
4022 expect the decision to change within single iteration. */
4023 if (!optimize_size
4024 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4025 {
4026 int count = frame->nregs;
4027
4028 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4029 /* The fast prologue uses move instead of push to save registers. This
4030 is significantly longer, but also executes faster as modern hardware
4031 can execute the moves in parallel, but can't do that for push/pop.
4032
4033 Be careful about choosing what prologue to emit: When function takes
4034 many instructions to execute we may use slow version as well as in
4035 case function is known to be outside hot spot (this is known with
4036 feedback only). Weight the size of function by number of registers
4037 to save as it is cheap to use one or two push instructions but very
4038 slow to use many of them. */
4039 if (count)
4040 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4041 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4042 || (flag_branch_probabilities
4043 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4044 cfun->machine->use_fast_prologue_epilogue = false;
4045 else
4046 cfun->machine->use_fast_prologue_epilogue
4047 = !expensive_function_p (count);
4048 }
4049 if (TARGET_PROLOGUE_USING_MOVE
4050 && cfun->machine->use_fast_prologue_epilogue)
4051 frame->save_regs_using_mov = true;
4052 else
4053 frame->save_regs_using_mov = false;
4054
4055
4056 /* Skip return address and saved base pointer. */
4057 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4058
4059 frame->hard_frame_pointer_offset = offset;
4060
4061 /* Do some sanity checking of stack_alignment_needed and
4062 preferred_alignment, since i386 port is the only using those features
4063 that may break easily. */
4064
4065 if (size && !stack_alignment_needed)
4066 abort ();
4067 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4068 abort ();
4069 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4070 abort ();
4071 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4072 abort ();
4073
4074 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4075 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4076
4077 /* Register save area */
4078 offset += frame->nregs * UNITS_PER_WORD;
4079
4080 /* Va-arg area */
4081 if (ix86_save_varrargs_registers)
4082 {
4083 offset += X86_64_VARARGS_SIZE;
4084 frame->va_arg_size = X86_64_VARARGS_SIZE;
4085 }
4086 else
4087 frame->va_arg_size = 0;
4088
4089 /* Align start of frame for local function. */
4090 frame->padding1 = ((offset + stack_alignment_needed - 1)
4091 & -stack_alignment_needed) - offset;
4092
4093 offset += frame->padding1;
4094
4095 /* Frame pointer points here. */
4096 frame->frame_pointer_offset = offset;
4097
4098 offset += size;
4099
4100 /* Add outgoing arguments area. Can be skipped if we eliminated
4101 all the function calls as dead code.
4102 Skipping is however impossible when function calls alloca. Alloca
4103 expander assumes that last current_function_outgoing_args_size
4104 of stack frame are unused. */
4105 if (ACCUMULATE_OUTGOING_ARGS
4106 && (!current_function_is_leaf || current_function_calls_alloca))
4107 {
4108 offset += current_function_outgoing_args_size;
4109 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4110 }
4111 else
4112 frame->outgoing_arguments_size = 0;
4113
4114 /* Align stack boundary. Only needed if we're calling another function
4115 or using alloca. */
4116 if (!current_function_is_leaf || current_function_calls_alloca)
4117 frame->padding2 = ((offset + preferred_alignment - 1)
4118 & -preferred_alignment) - offset;
4119 else
4120 frame->padding2 = 0;
4121
4122 offset += frame->padding2;
4123
4124 /* We've reached end of stack frame. */
4125 frame->stack_pointer_offset = offset;
4126
4127 /* Size prologue needs to allocate. */
4128 frame->to_allocate =
4129 (size + frame->padding1 + frame->padding2
4130 + frame->outgoing_arguments_size + frame->va_arg_size);
4131
4132 if ((!frame->to_allocate && frame->nregs <= 1)
4133 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4134 frame->save_regs_using_mov = false;
4135
4136 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4137 && current_function_is_leaf)
4138 {
4139 frame->red_zone_size = frame->to_allocate;
4140 if (frame->save_regs_using_mov)
4141 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4142 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4143 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4144 }
4145 else
4146 frame->red_zone_size = 0;
4147 frame->to_allocate -= frame->red_zone_size;
4148 frame->stack_pointer_offset -= frame->red_zone_size;
4149 #if 0
4150 fprintf (stderr, "nregs: %i\n", frame->nregs);
4151 fprintf (stderr, "size: %i\n", size);
4152 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4153 fprintf (stderr, "padding1: %i\n", frame->padding1);
4154 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4155 fprintf (stderr, "padding2: %i\n", frame->padding2);
4156 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4157 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4158 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4159 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4160 frame->hard_frame_pointer_offset);
4161 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4162 #endif
4163 }
4164
4165 /* Emit code to save registers in the prologue. */
4166
4167 static void
4168 ix86_emit_save_regs (void)
4169 {
4170 int regno;
4171 rtx insn;
4172
4173 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4174 if (ix86_save_reg (regno, true))
4175 {
4176 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4177 RTX_FRAME_RELATED_P (insn) = 1;
4178 }
4179 }
4180
4181 /* Emit code to save registers using MOV insns. First register
4182 is restored from POINTER + OFFSET. */
4183 static void
4184 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4185 {
4186 int regno;
4187 rtx insn;
4188
4189 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4190 if (ix86_save_reg (regno, true))
4191 {
4192 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4193 Pmode, offset),
4194 gen_rtx_REG (Pmode, regno));
4195 RTX_FRAME_RELATED_P (insn) = 1;
4196 offset += UNITS_PER_WORD;
4197 }
4198 }
4199
4200 /* Expand prologue or epilogue stack adjustment.
4201 The pattern exist to put a dependency on all ebp-based memory accesses.
4202 STYLE should be negative if instructions should be marked as frame related,
4203 zero if %r11 register is live and cannot be freely used and positive
4204 otherwise. */
4205
4206 static void
4207 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4208 {
4209 rtx insn;
4210
4211 if (! TARGET_64BIT)
4212 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4213 else if (x86_64_immediate_operand (offset, DImode))
4214 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4215 else
4216 {
4217 rtx r11;
4218 /* r11 is used by indirect sibcall return as well, set before the
4219 epilogue and used after the epilogue. ATM indirect sibcall
4220 shouldn't be used together with huge frame sizes in one
4221 function because of the frame_size check in sibcall.c. */
4222 if (style == 0)
4223 abort ();
4224 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4225 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4226 if (style < 0)
4227 RTX_FRAME_RELATED_P (insn) = 1;
4228 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4229 offset));
4230 }
4231 if (style < 0)
4232 RTX_FRAME_RELATED_P (insn) = 1;
4233 }
4234
4235 /* Expand the prologue into a bunch of separate insns. */
4236
4237 void
4238 ix86_expand_prologue (void)
4239 {
4240 rtx insn;
4241 bool pic_reg_used;
4242 struct ix86_frame frame;
4243 HOST_WIDE_INT allocate;
4244
4245 ix86_compute_frame_layout (&frame);
4246
4247 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4248 slower on all targets. Also sdb doesn't like it. */
4249
4250 if (frame_pointer_needed)
4251 {
4252 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4253 RTX_FRAME_RELATED_P (insn) = 1;
4254
4255 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4256 RTX_FRAME_RELATED_P (insn) = 1;
4257 }
4258
4259 allocate = frame.to_allocate;
4260
4261 if (!frame.save_regs_using_mov)
4262 ix86_emit_save_regs ();
4263 else
4264 allocate += frame.nregs * UNITS_PER_WORD;
4265
4266 /* When using red zone we may start register saving before allocating
4267 the stack frame saving one cycle of the prologue. */
4268 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4269 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4270 : stack_pointer_rtx,
4271 -frame.nregs * UNITS_PER_WORD);
4272
4273 if (allocate == 0)
4274 ;
4275 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4276 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4277 GEN_INT (-allocate), -1);
4278 else
4279 {
4280 /* Only valid for Win32. */
4281 rtx eax = gen_rtx_REG (SImode, 0);
4282 bool eax_live = ix86_eax_live_at_start_p ();
4283
4284 if (TARGET_64BIT)
4285 abort ();
4286
4287 if (eax_live)
4288 {
4289 emit_insn (gen_push (eax));
4290 allocate -= 4;
4291 }
4292
4293 insn = emit_move_insn (eax, GEN_INT (allocate));
4294 RTX_FRAME_RELATED_P (insn) = 1;
4295
4296 insn = emit_insn (gen_allocate_stack_worker (eax));
4297 RTX_FRAME_RELATED_P (insn) = 1;
4298
4299 if (eax_live)
4300 {
4301 rtx t;
4302 if (frame_pointer_needed)
4303 t = plus_constant (hard_frame_pointer_rtx,
4304 allocate
4305 - frame.to_allocate
4306 - frame.nregs * UNITS_PER_WORD);
4307 else
4308 t = plus_constant (stack_pointer_rtx, allocate);
4309 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4310 }
4311 }
4312
4313 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4314 {
4315 if (!frame_pointer_needed || !frame.to_allocate)
4316 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4317 else
4318 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4319 -frame.nregs * UNITS_PER_WORD);
4320 }
4321
4322 pic_reg_used = false;
4323 if (pic_offset_table_rtx
4324 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4325 || current_function_profile))
4326 {
4327 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4328
4329 if (alt_pic_reg_used != INVALID_REGNUM)
4330 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4331
4332 pic_reg_used = true;
4333 }
4334
4335 if (pic_reg_used)
4336 {
4337 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4338
4339 /* Even with accurate pre-reload life analysis, we can wind up
4340 deleting all references to the pic register after reload.
4341 Consider if cross-jumping unifies two sides of a branch
4342 controlled by a comparison vs the only read from a global.
4343 In which case, allow the set_got to be deleted, though we're
4344 too late to do anything about the ebx save in the prologue. */
4345 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4346 }
4347
4348 /* Prevent function calls from be scheduled before the call to mcount.
4349 In the pic_reg_used case, make sure that the got load isn't deleted. */
4350 if (current_function_profile)
4351 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4352 }
4353
4354 /* Emit code to restore saved registers using MOV insns. First register
4355 is restored from POINTER + OFFSET. */
4356 static void
4357 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4358 int maybe_eh_return)
4359 {
4360 int regno;
4361 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4362
4363 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4364 if (ix86_save_reg (regno, maybe_eh_return))
4365 {
4366 /* Ensure that adjust_address won't be forced to produce pointer
4367 out of range allowed by x86-64 instruction set. */
4368 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4369 {
4370 rtx r11;
4371
4372 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4373 emit_move_insn (r11, GEN_INT (offset));
4374 emit_insn (gen_adddi3 (r11, r11, pointer));
4375 base_address = gen_rtx_MEM (Pmode, r11);
4376 offset = 0;
4377 }
4378 emit_move_insn (gen_rtx_REG (Pmode, regno),
4379 adjust_address (base_address, Pmode, offset));
4380 offset += UNITS_PER_WORD;
4381 }
4382 }
4383
4384 /* Restore function stack, frame, and registers. */
4385
4386 void
4387 ix86_expand_epilogue (int style)
4388 {
4389 int regno;
4390 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4391 struct ix86_frame frame;
4392 HOST_WIDE_INT offset;
4393
4394 ix86_compute_frame_layout (&frame);
4395
4396 /* Calculate start of saved registers relative to ebp. Special care
4397 must be taken for the normal return case of a function using
4398 eh_return: the eax and edx registers are marked as saved, but not
4399 restored along this path. */
4400 offset = frame.nregs;
4401 if (current_function_calls_eh_return && style != 2)
4402 offset -= 2;
4403 offset *= -UNITS_PER_WORD;
4404
4405 /* If we're only restoring one register and sp is not valid then
4406 using a move instruction to restore the register since it's
4407 less work than reloading sp and popping the register.
4408
4409 The default code result in stack adjustment using add/lea instruction,
4410 while this code results in LEAVE instruction (or discrete equivalent),
4411 so it is profitable in some other cases as well. Especially when there
4412 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4413 and there is exactly one register to pop. This heuristic may need some
4414 tuning in future. */
4415 if ((!sp_valid && frame.nregs <= 1)
4416 || (TARGET_EPILOGUE_USING_MOVE
4417 && cfun->machine->use_fast_prologue_epilogue
4418 && (frame.nregs > 1 || frame.to_allocate))
4419 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4420 || (frame_pointer_needed && TARGET_USE_LEAVE
4421 && cfun->machine->use_fast_prologue_epilogue
4422 && frame.nregs == 1)
4423 || current_function_calls_eh_return)
4424 {
4425 /* Restore registers. We can use ebp or esp to address the memory
4426 locations. If both are available, default to ebp, since offsets
4427 are known to be small. Only exception is esp pointing directly to the
4428 end of block of saved registers, where we may simplify addressing
4429 mode. */
4430
4431 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4432 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4433 frame.to_allocate, style == 2);
4434 else
4435 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4436 offset, style == 2);
4437
4438 /* eh_return epilogues need %ecx added to the stack pointer. */
4439 if (style == 2)
4440 {
4441 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4442
4443 if (frame_pointer_needed)
4444 {
4445 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4446 tmp = plus_constant (tmp, UNITS_PER_WORD);
4447 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4448
4449 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4450 emit_move_insn (hard_frame_pointer_rtx, tmp);
4451
4452 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4453 const0_rtx, style);
4454 }
4455 else
4456 {
4457 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4458 tmp = plus_constant (tmp, (frame.to_allocate
4459 + frame.nregs * UNITS_PER_WORD));
4460 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4461 }
4462 }
4463 else if (!frame_pointer_needed)
4464 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4465 GEN_INT (frame.to_allocate
4466 + frame.nregs * UNITS_PER_WORD),
4467 style);
4468 /* If not an i386, mov & pop is faster than "leave". */
4469 else if (TARGET_USE_LEAVE || optimize_size
4470 || !cfun->machine->use_fast_prologue_epilogue)
4471 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4472 else
4473 {
4474 pro_epilogue_adjust_stack (stack_pointer_rtx,
4475 hard_frame_pointer_rtx,
4476 const0_rtx, style);
4477 if (TARGET_64BIT)
4478 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4479 else
4480 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4481 }
4482 }
4483 else
4484 {
4485 /* First step is to deallocate the stack frame so that we can
4486 pop the registers. */
4487 if (!sp_valid)
4488 {
4489 if (!frame_pointer_needed)
4490 abort ();
4491 pro_epilogue_adjust_stack (stack_pointer_rtx,
4492 hard_frame_pointer_rtx,
4493 GEN_INT (offset), style);
4494 }
4495 else if (frame.to_allocate)
4496 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4497 GEN_INT (frame.to_allocate), style);
4498
4499 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4500 if (ix86_save_reg (regno, false))
4501 {
4502 if (TARGET_64BIT)
4503 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4504 else
4505 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4506 }
4507 if (frame_pointer_needed)
4508 {
4509 /* Leave results in shorter dependency chains on CPUs that are
4510 able to grok it fast. */
4511 if (TARGET_USE_LEAVE)
4512 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4513 else if (TARGET_64BIT)
4514 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4515 else
4516 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4517 }
4518 }
4519
4520 /* Sibcall epilogues don't want a return instruction. */
4521 if (style == 0)
4522 return;
4523
4524 if (current_function_pops_args && current_function_args_size)
4525 {
4526 rtx popc = GEN_INT (current_function_pops_args);
4527
4528 /* i386 can only pop 64K bytes. If asked to pop more, pop
4529 return address, do explicit add, and jump indirectly to the
4530 caller. */
4531
4532 if (current_function_pops_args >= 65536)
4533 {
4534 rtx ecx = gen_rtx_REG (SImode, 2);
4535
4536 /* There is no "pascal" calling convention in 64bit ABI. */
4537 if (TARGET_64BIT)
4538 abort ();
4539
4540 emit_insn (gen_popsi1 (ecx));
4541 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4542 emit_jump_insn (gen_return_indirect_internal (ecx));
4543 }
4544 else
4545 emit_jump_insn (gen_return_pop_internal (popc));
4546 }
4547 else
4548 emit_jump_insn (gen_return_internal ());
4549 }
4550
4551 /* Reset from the function's potential modifications. */
4552
4553 static void
4554 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4555 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4556 {
4557 if (pic_offset_table_rtx)
4558 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4559 }
4560 \f
4561 /* Extract the parts of an RTL expression that is a valid memory address
4562 for an instruction. Return 0 if the structure of the address is
4563 grossly off. Return -1 if the address contains ASHIFT, so it is not
4564 strictly valid, but still used for computing length of lea instruction. */
4565
4566 int
4567 ix86_decompose_address (rtx addr, struct ix86_address *out)
4568 {
4569 rtx base = NULL_RTX;
4570 rtx index = NULL_RTX;
4571 rtx disp = NULL_RTX;
4572 HOST_WIDE_INT scale = 1;
4573 rtx scale_rtx = NULL_RTX;
4574 int retval = 1;
4575 enum ix86_address_seg seg = SEG_DEFAULT;
4576
4577 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4578 base = addr;
4579 else if (GET_CODE (addr) == PLUS)
4580 {
4581 rtx addends[4], op;
4582 int n = 0, i;
4583
4584 op = addr;
4585 do
4586 {
4587 if (n >= 4)
4588 return 0;
4589 addends[n++] = XEXP (op, 1);
4590 op = XEXP (op, 0);
4591 }
4592 while (GET_CODE (op) == PLUS);
4593 if (n >= 4)
4594 return 0;
4595 addends[n] = op;
4596
4597 for (i = n; i >= 0; --i)
4598 {
4599 op = addends[i];
4600 switch (GET_CODE (op))
4601 {
4602 case MULT:
4603 if (index)
4604 return 0;
4605 index = XEXP (op, 0);
4606 scale_rtx = XEXP (op, 1);
4607 break;
4608
4609 case UNSPEC:
4610 if (XINT (op, 1) == UNSPEC_TP
4611 && TARGET_TLS_DIRECT_SEG_REFS
4612 && seg == SEG_DEFAULT)
4613 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4614 else
4615 return 0;
4616 break;
4617
4618 case REG:
4619 case SUBREG:
4620 if (!base)
4621 base = op;
4622 else if (!index)
4623 index = op;
4624 else
4625 return 0;
4626 break;
4627
4628 case CONST:
4629 case CONST_INT:
4630 case SYMBOL_REF:
4631 case LABEL_REF:
4632 if (disp)
4633 return 0;
4634 disp = op;
4635 break;
4636
4637 default:
4638 return 0;
4639 }
4640 }
4641 }
4642 else if (GET_CODE (addr) == MULT)
4643 {
4644 index = XEXP (addr, 0); /* index*scale */
4645 scale_rtx = XEXP (addr, 1);
4646 }
4647 else if (GET_CODE (addr) == ASHIFT)
4648 {
4649 rtx tmp;
4650
4651 /* We're called for lea too, which implements ashift on occasion. */
4652 index = XEXP (addr, 0);
4653 tmp = XEXP (addr, 1);
4654 if (GET_CODE (tmp) != CONST_INT)
4655 return 0;
4656 scale = INTVAL (tmp);
4657 if ((unsigned HOST_WIDE_INT) scale > 3)
4658 return 0;
4659 scale = 1 << scale;
4660 retval = -1;
4661 }
4662 else
4663 disp = addr; /* displacement */
4664
4665 /* Extract the integral value of scale. */
4666 if (scale_rtx)
4667 {
4668 if (GET_CODE (scale_rtx) != CONST_INT)
4669 return 0;
4670 scale = INTVAL (scale_rtx);
4671 }
4672
4673 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4674 if (base && index && scale == 1
4675 && (index == arg_pointer_rtx
4676 || index == frame_pointer_rtx
4677 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4678 {
4679 rtx tmp = base;
4680 base = index;
4681 index = tmp;
4682 }
4683
4684 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4685 if ((base == hard_frame_pointer_rtx
4686 || base == frame_pointer_rtx
4687 || base == arg_pointer_rtx) && !disp)
4688 disp = const0_rtx;
4689
4690 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4691 Avoid this by transforming to [%esi+0]. */
4692 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4693 && base && !index && !disp
4694 && REG_P (base)
4695 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4696 disp = const0_rtx;
4697
4698 /* Special case: encode reg+reg instead of reg*2. */
4699 if (!base && index && scale && scale == 2)
4700 base = index, scale = 1;
4701
4702 /* Special case: scaling cannot be encoded without base or displacement. */
4703 if (!base && !disp && index && scale != 1)
4704 disp = const0_rtx;
4705
4706 out->base = base;
4707 out->index = index;
4708 out->disp = disp;
4709 out->scale = scale;
4710 out->seg = seg;
4711
4712 return retval;
4713 }
4714 \f
4715 /* Return cost of the memory address x.
4716 For i386, it is better to use a complex address than let gcc copy
4717 the address into a reg and make a new pseudo. But not if the address
4718 requires to two regs - that would mean more pseudos with longer
4719 lifetimes. */
4720 static int
4721 ix86_address_cost (rtx x)
4722 {
4723 struct ix86_address parts;
4724 int cost = 1;
4725
4726 if (!ix86_decompose_address (x, &parts))
4727 abort ();
4728
4729 /* More complex memory references are better. */
4730 if (parts.disp && parts.disp != const0_rtx)
4731 cost--;
4732 if (parts.seg != SEG_DEFAULT)
4733 cost--;
4734
4735 /* Attempt to minimize number of registers in the address. */
4736 if ((parts.base
4737 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4738 || (parts.index
4739 && (!REG_P (parts.index)
4740 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4741 cost++;
4742
4743 if (parts.base
4744 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4745 && parts.index
4746 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4747 && parts.base != parts.index)
4748 cost++;
4749
4750 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4751 since it's predecode logic can't detect the length of instructions
4752 and it degenerates to vector decoded. Increase cost of such
4753 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4754 to split such addresses or even refuse such addresses at all.
4755
4756 Following addressing modes are affected:
4757 [base+scale*index]
4758 [scale*index+disp]
4759 [base+index]
4760
4761 The first and last case may be avoidable by explicitly coding the zero in
4762 memory address, but I don't have AMD-K6 machine handy to check this
4763 theory. */
4764
4765 if (TARGET_K6
4766 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4767 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4768 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4769 cost += 10;
4770
4771 return cost;
4772 }
4773 \f
4774 /* If X is a machine specific address (i.e. a symbol or label being
4775 referenced as a displacement from the GOT implemented using an
4776 UNSPEC), then return the base term. Otherwise return X. */
4777
4778 rtx
4779 ix86_find_base_term (rtx x)
4780 {
4781 rtx term;
4782
4783 if (TARGET_64BIT)
4784 {
4785 if (GET_CODE (x) != CONST)
4786 return x;
4787 term = XEXP (x, 0);
4788 if (GET_CODE (term) == PLUS
4789 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4790 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4791 term = XEXP (term, 0);
4792 if (GET_CODE (term) != UNSPEC
4793 || XINT (term, 1) != UNSPEC_GOTPCREL)
4794 return x;
4795
4796 term = XVECEXP (term, 0, 0);
4797
4798 if (GET_CODE (term) != SYMBOL_REF
4799 && GET_CODE (term) != LABEL_REF)
4800 return x;
4801
4802 return term;
4803 }
4804
4805 term = ix86_delegitimize_address (x);
4806
4807 if (GET_CODE (term) != SYMBOL_REF
4808 && GET_CODE (term) != LABEL_REF)
4809 return x;
4810
4811 return term;
4812 }
4813
4814 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4815 this is used for to form addresses to local data when -fPIC is in
4816 use. */
4817
4818 static bool
4819 darwin_local_data_pic (rtx disp)
4820 {
4821 if (GET_CODE (disp) == MINUS)
4822 {
4823 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4824 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4825 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4826 {
4827 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4828 if (! strcmp (sym_name, "<pic base>"))
4829 return true;
4830 }
4831 }
4832
4833 return false;
4834 }
4835 \f
4836 /* Determine if a given RTX is a valid constant. We already know this
4837 satisfies CONSTANT_P. */
4838
4839 bool
4840 legitimate_constant_p (rtx x)
4841 {
4842 switch (GET_CODE (x))
4843 {
4844 case CONST:
4845 x = XEXP (x, 0);
4846
4847 if (GET_CODE (x) == PLUS)
4848 {
4849 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4850 return false;
4851 x = XEXP (x, 0);
4852 }
4853
4854 if (TARGET_MACHO && darwin_local_data_pic (x))
4855 return true;
4856
4857 /* Only some unspecs are valid as "constants". */
4858 if (GET_CODE (x) == UNSPEC)
4859 switch (XINT (x, 1))
4860 {
4861 case UNSPEC_TPOFF:
4862 case UNSPEC_NTPOFF:
4863 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4864 case UNSPEC_DTPOFF:
4865 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4866 default:
4867 return false;
4868 }
4869
4870 /* We must have drilled down to a symbol. */
4871 if (!symbolic_operand (x, Pmode))
4872 return false;
4873 /* FALLTHRU */
4874
4875 case SYMBOL_REF:
4876 /* TLS symbols are never valid. */
4877 if (tls_symbolic_operand (x, Pmode))
4878 return false;
4879 break;
4880
4881 default:
4882 break;
4883 }
4884
4885 /* Otherwise we handle everything else in the move patterns. */
4886 return true;
4887 }
4888
4889 /* Determine if it's legal to put X into the constant pool. This
4890 is not possible for the address of thread-local symbols, which
4891 is checked above. */
4892
4893 static bool
4894 ix86_cannot_force_const_mem (rtx x)
4895 {
4896 return !legitimate_constant_p (x);
4897 }
4898
4899 /* Determine if a given RTX is a valid constant address. */
4900
4901 bool
4902 constant_address_p (rtx x)
4903 {
4904 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4905 }
4906
4907 /* Nonzero if the constant value X is a legitimate general operand
4908 when generating PIC code. It is given that flag_pic is on and
4909 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4910
4911 bool
4912 legitimate_pic_operand_p (rtx x)
4913 {
4914 rtx inner;
4915
4916 switch (GET_CODE (x))
4917 {
4918 case CONST:
4919 inner = XEXP (x, 0);
4920
4921 /* Only some unspecs are valid as "constants". */
4922 if (GET_CODE (inner) == UNSPEC)
4923 switch (XINT (inner, 1))
4924 {
4925 case UNSPEC_TPOFF:
4926 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4927 default:
4928 return false;
4929 }
4930 /* FALLTHRU */
4931
4932 case SYMBOL_REF:
4933 case LABEL_REF:
4934 return legitimate_pic_address_disp_p (x);
4935
4936 default:
4937 return true;
4938 }
4939 }
4940
4941 /* Determine if a given CONST RTX is a valid memory displacement
4942 in PIC mode. */
4943
4944 int
4945 legitimate_pic_address_disp_p (rtx disp)
4946 {
4947 bool saw_plus;
4948
4949 /* In 64bit mode we can allow direct addresses of symbols and labels
4950 when they are not dynamic symbols. */
4951 if (TARGET_64BIT)
4952 {
4953 /* TLS references should always be enclosed in UNSPEC. */
4954 if (tls_symbolic_operand (disp, GET_MODE (disp)))
4955 return 0;
4956 if (GET_CODE (disp) == SYMBOL_REF
4957 && ix86_cmodel == CM_SMALL_PIC
4958 && SYMBOL_REF_LOCAL_P (disp))
4959 return 1;
4960 if (GET_CODE (disp) == LABEL_REF)
4961 return 1;
4962 if (GET_CODE (disp) == CONST
4963 && GET_CODE (XEXP (disp, 0)) == PLUS)
4964 {
4965 rtx op0 = XEXP (XEXP (disp, 0), 0);
4966 rtx op1 = XEXP (XEXP (disp, 0), 1);
4967
4968 /* TLS references should always be enclosed in UNSPEC. */
4969 if (tls_symbolic_operand (op0, GET_MODE (op0)))
4970 return 0;
4971 if (((GET_CODE (op0) == SYMBOL_REF
4972 && ix86_cmodel == CM_SMALL_PIC
4973 && SYMBOL_REF_LOCAL_P (op0))
4974 || GET_CODE (op0) == LABEL_REF)
4975 && GET_CODE (op1) == CONST_INT
4976 && INTVAL (op1) < 16*1024*1024
4977 && INTVAL (op1) >= -16*1024*1024)
4978 return 1;
4979 }
4980 }
4981 if (GET_CODE (disp) != CONST)
4982 return 0;
4983 disp = XEXP (disp, 0);
4984
4985 if (TARGET_64BIT)
4986 {
4987 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4988 of GOT tables. We should not need these anyway. */
4989 if (GET_CODE (disp) != UNSPEC
4990 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4991 return 0;
4992
4993 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4994 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4995 return 0;
4996 return 1;
4997 }
4998
4999 saw_plus = false;
5000 if (GET_CODE (disp) == PLUS)
5001 {
5002 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5003 return 0;
5004 disp = XEXP (disp, 0);
5005 saw_plus = true;
5006 }
5007
5008 if (TARGET_MACHO && darwin_local_data_pic (disp))
5009 return 1;
5010
5011 if (GET_CODE (disp) != UNSPEC)
5012 return 0;
5013
5014 switch (XINT (disp, 1))
5015 {
5016 case UNSPEC_GOT:
5017 if (saw_plus)
5018 return false;
5019 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5020 case UNSPEC_GOTOFF:
5021 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5022 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5023 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5024 return false;
5025 case UNSPEC_GOTTPOFF:
5026 case UNSPEC_GOTNTPOFF:
5027 case UNSPEC_INDNTPOFF:
5028 if (saw_plus)
5029 return false;
5030 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5031 case UNSPEC_NTPOFF:
5032 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5033 case UNSPEC_DTPOFF:
5034 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5035 }
5036
5037 return 0;
5038 }
5039
5040 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5041 memory address for an instruction. The MODE argument is the machine mode
5042 for the MEM expression that wants to use this address.
5043
5044 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5045 convert common non-canonical forms to canonical form so that they will
5046 be recognized. */
5047
5048 int
5049 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5050 {
5051 struct ix86_address parts;
5052 rtx base, index, disp;
5053 HOST_WIDE_INT scale;
5054 const char *reason = NULL;
5055 rtx reason_rtx = NULL_RTX;
5056
5057 if (TARGET_DEBUG_ADDR)
5058 {
5059 fprintf (stderr,
5060 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5061 GET_MODE_NAME (mode), strict);
5062 debug_rtx (addr);
5063 }
5064
5065 if (ix86_decompose_address (addr, &parts) <= 0)
5066 {
5067 reason = "decomposition failed";
5068 goto report_error;
5069 }
5070
5071 base = parts.base;
5072 index = parts.index;
5073 disp = parts.disp;
5074 scale = parts.scale;
5075
5076 /* Validate base register.
5077
5078 Don't allow SUBREG's here, it can lead to spill failures when the base
5079 is one word out of a two word structure, which is represented internally
5080 as a DImode int. */
5081
5082 if (base)
5083 {
5084 reason_rtx = base;
5085
5086 if (GET_CODE (base) != REG)
5087 {
5088 reason = "base is not a register";
5089 goto report_error;
5090 }
5091
5092 if (GET_MODE (base) != Pmode)
5093 {
5094 reason = "base is not in Pmode";
5095 goto report_error;
5096 }
5097
5098 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5099 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5100 {
5101 reason = "base is not valid";
5102 goto report_error;
5103 }
5104 }
5105
5106 /* Validate index register.
5107
5108 Don't allow SUBREG's here, it can lead to spill failures when the index
5109 is one word out of a two word structure, which is represented internally
5110 as a DImode int. */
5111
5112 if (index)
5113 {
5114 reason_rtx = index;
5115
5116 if (GET_CODE (index) != REG)
5117 {
5118 reason = "index is not a register";
5119 goto report_error;
5120 }
5121
5122 if (GET_MODE (index) != Pmode)
5123 {
5124 reason = "index is not in Pmode";
5125 goto report_error;
5126 }
5127
5128 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5129 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5130 {
5131 reason = "index is not valid";
5132 goto report_error;
5133 }
5134 }
5135
5136 /* Validate scale factor. */
5137 if (scale != 1)
5138 {
5139 reason_rtx = GEN_INT (scale);
5140 if (!index)
5141 {
5142 reason = "scale without index";
5143 goto report_error;
5144 }
5145
5146 if (scale != 2 && scale != 4 && scale != 8)
5147 {
5148 reason = "scale is not a valid multiplier";
5149 goto report_error;
5150 }
5151 }
5152
5153 /* Validate displacement. */
5154 if (disp)
5155 {
5156 reason_rtx = disp;
5157
5158 if (GET_CODE (disp) == CONST
5159 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5160 switch (XINT (XEXP (disp, 0), 1))
5161 {
5162 case UNSPEC_GOT:
5163 case UNSPEC_GOTOFF:
5164 case UNSPEC_GOTPCREL:
5165 if (!flag_pic)
5166 abort ();
5167 goto is_legitimate_pic;
5168
5169 case UNSPEC_GOTTPOFF:
5170 case UNSPEC_GOTNTPOFF:
5171 case UNSPEC_INDNTPOFF:
5172 case UNSPEC_NTPOFF:
5173 case UNSPEC_DTPOFF:
5174 break;
5175
5176 default:
5177 reason = "invalid address unspec";
5178 goto report_error;
5179 }
5180
5181 else if (flag_pic && (SYMBOLIC_CONST (disp)
5182 #if TARGET_MACHO
5183 && !machopic_operand_p (disp)
5184 #endif
5185 ))
5186 {
5187 is_legitimate_pic:
5188 if (TARGET_64BIT && (index || base))
5189 {
5190 /* foo@dtpoff(%rX) is ok. */
5191 if (GET_CODE (disp) != CONST
5192 || GET_CODE (XEXP (disp, 0)) != PLUS
5193 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5194 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5195 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5196 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5197 {
5198 reason = "non-constant pic memory reference";
5199 goto report_error;
5200 }
5201 }
5202 else if (! legitimate_pic_address_disp_p (disp))
5203 {
5204 reason = "displacement is an invalid pic construct";
5205 goto report_error;
5206 }
5207
5208 /* This code used to verify that a symbolic pic displacement
5209 includes the pic_offset_table_rtx register.
5210
5211 While this is good idea, unfortunately these constructs may
5212 be created by "adds using lea" optimization for incorrect
5213 code like:
5214
5215 int a;
5216 int foo(int i)
5217 {
5218 return *(&a+i);
5219 }
5220
5221 This code is nonsensical, but results in addressing
5222 GOT table with pic_offset_table_rtx base. We can't
5223 just refuse it easily, since it gets matched by
5224 "addsi3" pattern, that later gets split to lea in the
5225 case output register differs from input. While this
5226 can be handled by separate addsi pattern for this case
5227 that never results in lea, this seems to be easier and
5228 correct fix for crash to disable this test. */
5229 }
5230 else if (GET_CODE (disp) != LABEL_REF
5231 && GET_CODE (disp) != CONST_INT
5232 && (GET_CODE (disp) != CONST
5233 || !legitimate_constant_p (disp))
5234 && (GET_CODE (disp) != SYMBOL_REF
5235 || !legitimate_constant_p (disp)))
5236 {
5237 reason = "displacement is not constant";
5238 goto report_error;
5239 }
5240 else if (TARGET_64BIT
5241 && !x86_64_immediate_operand (disp, VOIDmode))
5242 {
5243 reason = "displacement is out of range";
5244 goto report_error;
5245 }
5246 }
5247
5248 /* Everything looks valid. */
5249 if (TARGET_DEBUG_ADDR)
5250 fprintf (stderr, "Success.\n");
5251 return TRUE;
5252
5253 report_error:
5254 if (TARGET_DEBUG_ADDR)
5255 {
5256 fprintf (stderr, "Error: %s\n", reason);
5257 debug_rtx (reason_rtx);
5258 }
5259 return FALSE;
5260 }
5261 \f
5262 /* Return an unique alias set for the GOT. */
5263
5264 static HOST_WIDE_INT
5265 ix86_GOT_alias_set (void)
5266 {
5267 static HOST_WIDE_INT set = -1;
5268 if (set == -1)
5269 set = new_alias_set ();
5270 return set;
5271 }
5272
5273 /* Return a legitimate reference for ORIG (an address) using the
5274 register REG. If REG is 0, a new pseudo is generated.
5275
5276 There are two types of references that must be handled:
5277
5278 1. Global data references must load the address from the GOT, via
5279 the PIC reg. An insn is emitted to do this load, and the reg is
5280 returned.
5281
5282 2. Static data references, constant pool addresses, and code labels
5283 compute the address as an offset from the GOT, whose base is in
5284 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5285 differentiate them from global data objects. The returned
5286 address is the PIC reg + an unspec constant.
5287
5288 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5289 reg also appears in the address. */
5290
5291 static rtx
5292 legitimize_pic_address (rtx orig, rtx reg)
5293 {
5294 rtx addr = orig;
5295 rtx new = orig;
5296 rtx base;
5297
5298 #if TARGET_MACHO
5299 if (reg == 0)
5300 reg = gen_reg_rtx (Pmode);
5301 /* Use the generic Mach-O PIC machinery. */
5302 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5303 #endif
5304
5305 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5306 new = addr;
5307 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5308 {
5309 /* This symbol may be referenced via a displacement from the PIC
5310 base address (@GOTOFF). */
5311
5312 if (reload_in_progress)
5313 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5314 if (GET_CODE (addr) == CONST)
5315 addr = XEXP (addr, 0);
5316 if (GET_CODE (addr) == PLUS)
5317 {
5318 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5319 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5320 }
5321 else
5322 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5323 new = gen_rtx_CONST (Pmode, new);
5324 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5325
5326 if (reg != 0)
5327 {
5328 emit_move_insn (reg, new);
5329 new = reg;
5330 }
5331 }
5332 else if (GET_CODE (addr) == SYMBOL_REF)
5333 {
5334 if (TARGET_64BIT)
5335 {
5336 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5337 new = gen_rtx_CONST (Pmode, new);
5338 new = gen_const_mem (Pmode, new);
5339 set_mem_alias_set (new, ix86_GOT_alias_set ());
5340
5341 if (reg == 0)
5342 reg = gen_reg_rtx (Pmode);
5343 /* Use directly gen_movsi, otherwise the address is loaded
5344 into register for CSE. We don't want to CSE this addresses,
5345 instead we CSE addresses from the GOT table, so skip this. */
5346 emit_insn (gen_movsi (reg, new));
5347 new = reg;
5348 }
5349 else
5350 {
5351 /* This symbol must be referenced via a load from the
5352 Global Offset Table (@GOT). */
5353
5354 if (reload_in_progress)
5355 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5356 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5357 new = gen_rtx_CONST (Pmode, new);
5358 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5359 new = gen_const_mem (Pmode, new);
5360 set_mem_alias_set (new, ix86_GOT_alias_set ());
5361
5362 if (reg == 0)
5363 reg = gen_reg_rtx (Pmode);
5364 emit_move_insn (reg, new);
5365 new = reg;
5366 }
5367 }
5368 else
5369 {
5370 if (GET_CODE (addr) == CONST)
5371 {
5372 addr = XEXP (addr, 0);
5373
5374 /* We must match stuff we generate before. Assume the only
5375 unspecs that can get here are ours. Not that we could do
5376 anything with them anyway.... */
5377 if (GET_CODE (addr) == UNSPEC
5378 || (GET_CODE (addr) == PLUS
5379 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5380 return orig;
5381 if (GET_CODE (addr) != PLUS)
5382 abort ();
5383 }
5384 if (GET_CODE (addr) == PLUS)
5385 {
5386 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5387
5388 /* Check first to see if this is a constant offset from a @GOTOFF
5389 symbol reference. */
5390 if (local_symbolic_operand (op0, Pmode)
5391 && GET_CODE (op1) == CONST_INT)
5392 {
5393 if (!TARGET_64BIT)
5394 {
5395 if (reload_in_progress)
5396 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5397 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5398 UNSPEC_GOTOFF);
5399 new = gen_rtx_PLUS (Pmode, new, op1);
5400 new = gen_rtx_CONST (Pmode, new);
5401 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5402
5403 if (reg != 0)
5404 {
5405 emit_move_insn (reg, new);
5406 new = reg;
5407 }
5408 }
5409 else
5410 {
5411 if (INTVAL (op1) < -16*1024*1024
5412 || INTVAL (op1) >= 16*1024*1024)
5413 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5414 }
5415 }
5416 else
5417 {
5418 base = legitimize_pic_address (XEXP (addr, 0), reg);
5419 new = legitimize_pic_address (XEXP (addr, 1),
5420 base == reg ? NULL_RTX : reg);
5421
5422 if (GET_CODE (new) == CONST_INT)
5423 new = plus_constant (base, INTVAL (new));
5424 else
5425 {
5426 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5427 {
5428 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5429 new = XEXP (new, 1);
5430 }
5431 new = gen_rtx_PLUS (Pmode, base, new);
5432 }
5433 }
5434 }
5435 }
5436 return new;
5437 }
5438 \f
5439 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5440
5441 static rtx
5442 get_thread_pointer (int to_reg)
5443 {
5444 rtx tp, reg, insn;
5445
5446 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5447 if (!to_reg)
5448 return tp;
5449
5450 reg = gen_reg_rtx (Pmode);
5451 insn = gen_rtx_SET (VOIDmode, reg, tp);
5452 insn = emit_insn (insn);
5453
5454 return reg;
5455 }
5456
5457 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5458 false if we expect this to be used for a memory address and true if
5459 we expect to load the address into a register. */
5460
5461 static rtx
5462 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5463 {
5464 rtx dest, base, off, pic;
5465 int type;
5466
5467 switch (model)
5468 {
5469 case TLS_MODEL_GLOBAL_DYNAMIC:
5470 dest = gen_reg_rtx (Pmode);
5471 if (TARGET_64BIT)
5472 {
5473 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5474
5475 start_sequence ();
5476 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5477 insns = get_insns ();
5478 end_sequence ();
5479
5480 emit_libcall_block (insns, dest, rax, x);
5481 }
5482 else
5483 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5484 break;
5485
5486 case TLS_MODEL_LOCAL_DYNAMIC:
5487 base = gen_reg_rtx (Pmode);
5488 if (TARGET_64BIT)
5489 {
5490 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5491
5492 start_sequence ();
5493 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5494 insns = get_insns ();
5495 end_sequence ();
5496
5497 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5498 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5499 emit_libcall_block (insns, base, rax, note);
5500 }
5501 else
5502 emit_insn (gen_tls_local_dynamic_base_32 (base));
5503
5504 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5505 off = gen_rtx_CONST (Pmode, off);
5506
5507 return gen_rtx_PLUS (Pmode, base, off);
5508
5509 case TLS_MODEL_INITIAL_EXEC:
5510 if (TARGET_64BIT)
5511 {
5512 pic = NULL;
5513 type = UNSPEC_GOTNTPOFF;
5514 }
5515 else if (flag_pic)
5516 {
5517 if (reload_in_progress)
5518 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5519 pic = pic_offset_table_rtx;
5520 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5521 }
5522 else if (!TARGET_GNU_TLS)
5523 {
5524 pic = gen_reg_rtx (Pmode);
5525 emit_insn (gen_set_got (pic));
5526 type = UNSPEC_GOTTPOFF;
5527 }
5528 else
5529 {
5530 pic = NULL;
5531 type = UNSPEC_INDNTPOFF;
5532 }
5533
5534 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5535 off = gen_rtx_CONST (Pmode, off);
5536 if (pic)
5537 off = gen_rtx_PLUS (Pmode, pic, off);
5538 off = gen_const_mem (Pmode, off);
5539 set_mem_alias_set (off, ix86_GOT_alias_set ());
5540
5541 if (TARGET_64BIT || TARGET_GNU_TLS)
5542 {
5543 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5544 off = force_reg (Pmode, off);
5545 return gen_rtx_PLUS (Pmode, base, off);
5546 }
5547 else
5548 {
5549 base = get_thread_pointer (true);
5550 dest = gen_reg_rtx (Pmode);
5551 emit_insn (gen_subsi3 (dest, base, off));
5552 }
5553 break;
5554
5555 case TLS_MODEL_LOCAL_EXEC:
5556 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5557 (TARGET_64BIT || TARGET_GNU_TLS)
5558 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5559 off = gen_rtx_CONST (Pmode, off);
5560
5561 if (TARGET_64BIT || TARGET_GNU_TLS)
5562 {
5563 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5564 return gen_rtx_PLUS (Pmode, base, off);
5565 }
5566 else
5567 {
5568 base = get_thread_pointer (true);
5569 dest = gen_reg_rtx (Pmode);
5570 emit_insn (gen_subsi3 (dest, base, off));
5571 }
5572 break;
5573
5574 default:
5575 abort ();
5576 }
5577
5578 return dest;
5579 }
5580
5581 /* Try machine-dependent ways of modifying an illegitimate address
5582 to be legitimate. If we find one, return the new, valid address.
5583 This macro is used in only one place: `memory_address' in explow.c.
5584
5585 OLDX is the address as it was before break_out_memory_refs was called.
5586 In some cases it is useful to look at this to decide what needs to be done.
5587
5588 MODE and WIN are passed so that this macro can use
5589 GO_IF_LEGITIMATE_ADDRESS.
5590
5591 It is always safe for this macro to do nothing. It exists to recognize
5592 opportunities to optimize the output.
5593
5594 For the 80386, we handle X+REG by loading X into a register R and
5595 using R+REG. R will go in a general reg and indexing will be used.
5596 However, if REG is a broken-out memory address or multiplication,
5597 nothing needs to be done because REG can certainly go in a general reg.
5598
5599 When -fpic is used, special handling is needed for symbolic references.
5600 See comments by legitimize_pic_address in i386.c for details. */
5601
5602 rtx
5603 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5604 {
5605 int changed = 0;
5606 unsigned log;
5607
5608 if (TARGET_DEBUG_ADDR)
5609 {
5610 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5611 GET_MODE_NAME (mode));
5612 debug_rtx (x);
5613 }
5614
5615 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5616 if (log)
5617 return legitimize_tls_address (x, log, false);
5618 if (GET_CODE (x) == CONST
5619 && GET_CODE (XEXP (x, 0)) == PLUS
5620 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5621 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5622 {
5623 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5624 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5625 }
5626
5627 if (flag_pic && SYMBOLIC_CONST (x))
5628 return legitimize_pic_address (x, 0);
5629
5630 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5631 if (GET_CODE (x) == ASHIFT
5632 && GET_CODE (XEXP (x, 1)) == CONST_INT
5633 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5634 {
5635 changed = 1;
5636 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5637 GEN_INT (1 << log));
5638 }
5639
5640 if (GET_CODE (x) == PLUS)
5641 {
5642 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5643
5644 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5645 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5646 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5647 {
5648 changed = 1;
5649 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5650 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5651 GEN_INT (1 << log));
5652 }
5653
5654 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5655 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5656 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5657 {
5658 changed = 1;
5659 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5660 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5661 GEN_INT (1 << log));
5662 }
5663
5664 /* Put multiply first if it isn't already. */
5665 if (GET_CODE (XEXP (x, 1)) == MULT)
5666 {
5667 rtx tmp = XEXP (x, 0);
5668 XEXP (x, 0) = XEXP (x, 1);
5669 XEXP (x, 1) = tmp;
5670 changed = 1;
5671 }
5672
5673 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5674 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5675 created by virtual register instantiation, register elimination, and
5676 similar optimizations. */
5677 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5678 {
5679 changed = 1;
5680 x = gen_rtx_PLUS (Pmode,
5681 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5682 XEXP (XEXP (x, 1), 0)),
5683 XEXP (XEXP (x, 1), 1));
5684 }
5685
5686 /* Canonicalize
5687 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5688 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5689 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5690 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5691 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5692 && CONSTANT_P (XEXP (x, 1)))
5693 {
5694 rtx constant;
5695 rtx other = NULL_RTX;
5696
5697 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5698 {
5699 constant = XEXP (x, 1);
5700 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5701 }
5702 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5703 {
5704 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5705 other = XEXP (x, 1);
5706 }
5707 else
5708 constant = 0;
5709
5710 if (constant)
5711 {
5712 changed = 1;
5713 x = gen_rtx_PLUS (Pmode,
5714 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5715 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5716 plus_constant (other, INTVAL (constant)));
5717 }
5718 }
5719
5720 if (changed && legitimate_address_p (mode, x, FALSE))
5721 return x;
5722
5723 if (GET_CODE (XEXP (x, 0)) == MULT)
5724 {
5725 changed = 1;
5726 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5727 }
5728
5729 if (GET_CODE (XEXP (x, 1)) == MULT)
5730 {
5731 changed = 1;
5732 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5733 }
5734
5735 if (changed
5736 && GET_CODE (XEXP (x, 1)) == REG
5737 && GET_CODE (XEXP (x, 0)) == REG)
5738 return x;
5739
5740 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5741 {
5742 changed = 1;
5743 x = legitimize_pic_address (x, 0);
5744 }
5745
5746 if (changed && legitimate_address_p (mode, x, FALSE))
5747 return x;
5748
5749 if (GET_CODE (XEXP (x, 0)) == REG)
5750 {
5751 rtx temp = gen_reg_rtx (Pmode);
5752 rtx val = force_operand (XEXP (x, 1), temp);
5753 if (val != temp)
5754 emit_move_insn (temp, val);
5755
5756 XEXP (x, 1) = temp;
5757 return x;
5758 }
5759
5760 else if (GET_CODE (XEXP (x, 1)) == REG)
5761 {
5762 rtx temp = gen_reg_rtx (Pmode);
5763 rtx val = force_operand (XEXP (x, 0), temp);
5764 if (val != temp)
5765 emit_move_insn (temp, val);
5766
5767 XEXP (x, 0) = temp;
5768 return x;
5769 }
5770 }
5771
5772 return x;
5773 }
5774 \f
5775 /* Print an integer constant expression in assembler syntax. Addition
5776 and subtraction are the only arithmetic that may appear in these
5777 expressions. FILE is the stdio stream to write to, X is the rtx, and
5778 CODE is the operand print code from the output string. */
5779
5780 static void
5781 output_pic_addr_const (FILE *file, rtx x, int code)
5782 {
5783 char buf[256];
5784
5785 switch (GET_CODE (x))
5786 {
5787 case PC:
5788 if (flag_pic)
5789 putc ('.', file);
5790 else
5791 abort ();
5792 break;
5793
5794 case SYMBOL_REF:
5795 /* Mark the decl as referenced so that cgraph will output the function. */
5796 if (SYMBOL_REF_DECL (x))
5797 mark_decl_referenced (SYMBOL_REF_DECL (x));
5798
5799 assemble_name (file, XSTR (x, 0));
5800 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5801 fputs ("@PLT", file);
5802 break;
5803
5804 case LABEL_REF:
5805 x = XEXP (x, 0);
5806 /* FALLTHRU */
5807 case CODE_LABEL:
5808 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5809 assemble_name (asm_out_file, buf);
5810 break;
5811
5812 case CONST_INT:
5813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5814 break;
5815
5816 case CONST:
5817 /* This used to output parentheses around the expression,
5818 but that does not work on the 386 (either ATT or BSD assembler). */
5819 output_pic_addr_const (file, XEXP (x, 0), code);
5820 break;
5821
5822 case CONST_DOUBLE:
5823 if (GET_MODE (x) == VOIDmode)
5824 {
5825 /* We can use %d if the number is <32 bits and positive. */
5826 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5827 fprintf (file, "0x%lx%08lx",
5828 (unsigned long) CONST_DOUBLE_HIGH (x),
5829 (unsigned long) CONST_DOUBLE_LOW (x));
5830 else
5831 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5832 }
5833 else
5834 /* We can't handle floating point constants;
5835 PRINT_OPERAND must handle them. */
5836 output_operand_lossage ("floating constant misused");
5837 break;
5838
5839 case PLUS:
5840 /* Some assemblers need integer constants to appear first. */
5841 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5842 {
5843 output_pic_addr_const (file, XEXP (x, 0), code);
5844 putc ('+', file);
5845 output_pic_addr_const (file, XEXP (x, 1), code);
5846 }
5847 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5848 {
5849 output_pic_addr_const (file, XEXP (x, 1), code);
5850 putc ('+', file);
5851 output_pic_addr_const (file, XEXP (x, 0), code);
5852 }
5853 else
5854 abort ();
5855 break;
5856
5857 case MINUS:
5858 if (!TARGET_MACHO)
5859 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5860 output_pic_addr_const (file, XEXP (x, 0), code);
5861 putc ('-', file);
5862 output_pic_addr_const (file, XEXP (x, 1), code);
5863 if (!TARGET_MACHO)
5864 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5865 break;
5866
5867 case UNSPEC:
5868 if (XVECLEN (x, 0) != 1)
5869 abort ();
5870 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5871 switch (XINT (x, 1))
5872 {
5873 case UNSPEC_GOT:
5874 fputs ("@GOT", file);
5875 break;
5876 case UNSPEC_GOTOFF:
5877 fputs ("@GOTOFF", file);
5878 break;
5879 case UNSPEC_GOTPCREL:
5880 fputs ("@GOTPCREL(%rip)", file);
5881 break;
5882 case UNSPEC_GOTTPOFF:
5883 /* FIXME: This might be @TPOFF in Sun ld too. */
5884 fputs ("@GOTTPOFF", file);
5885 break;
5886 case UNSPEC_TPOFF:
5887 fputs ("@TPOFF", file);
5888 break;
5889 case UNSPEC_NTPOFF:
5890 if (TARGET_64BIT)
5891 fputs ("@TPOFF", file);
5892 else
5893 fputs ("@NTPOFF", file);
5894 break;
5895 case UNSPEC_DTPOFF:
5896 fputs ("@DTPOFF", file);
5897 break;
5898 case UNSPEC_GOTNTPOFF:
5899 if (TARGET_64BIT)
5900 fputs ("@GOTTPOFF(%rip)", file);
5901 else
5902 fputs ("@GOTNTPOFF", file);
5903 break;
5904 case UNSPEC_INDNTPOFF:
5905 fputs ("@INDNTPOFF", file);
5906 break;
5907 default:
5908 output_operand_lossage ("invalid UNSPEC as operand");
5909 break;
5910 }
5911 break;
5912
5913 default:
5914 output_operand_lossage ("invalid expression as operand");
5915 }
5916 }
5917
5918 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5919 We need to handle our special PIC relocations. */
5920
5921 void
5922 i386_dwarf_output_addr_const (FILE *file, rtx x)
5923 {
5924 #ifdef ASM_QUAD
5925 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5926 #else
5927 if (TARGET_64BIT)
5928 abort ();
5929 fprintf (file, "%s", ASM_LONG);
5930 #endif
5931 if (flag_pic)
5932 output_pic_addr_const (file, x, '\0');
5933 else
5934 output_addr_const (file, x);
5935 fputc ('\n', file);
5936 }
5937
5938 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5939 We need to emit DTP-relative relocations. */
5940
5941 void
5942 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5943 {
5944 fputs (ASM_LONG, file);
5945 output_addr_const (file, x);
5946 fputs ("@DTPOFF", file);
5947 switch (size)
5948 {
5949 case 4:
5950 break;
5951 case 8:
5952 fputs (", 0", file);
5953 break;
5954 default:
5955 abort ();
5956 }
5957 }
5958
5959 /* In the name of slightly smaller debug output, and to cater to
5960 general assembler losage, recognize PIC+GOTOFF and turn it back
5961 into a direct symbol reference. */
5962
5963 static rtx
5964 ix86_delegitimize_address (rtx orig_x)
5965 {
5966 rtx x = orig_x, y;
5967
5968 if (GET_CODE (x) == MEM)
5969 x = XEXP (x, 0);
5970
5971 if (TARGET_64BIT)
5972 {
5973 if (GET_CODE (x) != CONST
5974 || GET_CODE (XEXP (x, 0)) != UNSPEC
5975 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5976 || GET_CODE (orig_x) != MEM)
5977 return orig_x;
5978 return XVECEXP (XEXP (x, 0), 0, 0);
5979 }
5980
5981 if (GET_CODE (x) != PLUS
5982 || GET_CODE (XEXP (x, 1)) != CONST)
5983 return orig_x;
5984
5985 if (GET_CODE (XEXP (x, 0)) == REG
5986 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5987 /* %ebx + GOT/GOTOFF */
5988 y = NULL;
5989 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5990 {
5991 /* %ebx + %reg * scale + GOT/GOTOFF */
5992 y = XEXP (x, 0);
5993 if (GET_CODE (XEXP (y, 0)) == REG
5994 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5995 y = XEXP (y, 1);
5996 else if (GET_CODE (XEXP (y, 1)) == REG
5997 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5998 y = XEXP (y, 0);
5999 else
6000 return orig_x;
6001 if (GET_CODE (y) != REG
6002 && GET_CODE (y) != MULT
6003 && GET_CODE (y) != ASHIFT)
6004 return orig_x;
6005 }
6006 else
6007 return orig_x;
6008
6009 x = XEXP (XEXP (x, 1), 0);
6010 if (GET_CODE (x) == UNSPEC
6011 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6012 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6013 {
6014 if (y)
6015 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6016 return XVECEXP (x, 0, 0);
6017 }
6018
6019 if (GET_CODE (x) == PLUS
6020 && GET_CODE (XEXP (x, 0)) == UNSPEC
6021 && GET_CODE (XEXP (x, 1)) == CONST_INT
6022 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6023 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6024 && GET_CODE (orig_x) != MEM)))
6025 {
6026 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6027 if (y)
6028 return gen_rtx_PLUS (Pmode, y, x);
6029 return x;
6030 }
6031
6032 return orig_x;
6033 }
6034 \f
6035 static void
6036 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6037 int fp, FILE *file)
6038 {
6039 const char *suffix;
6040
6041 if (mode == CCFPmode || mode == CCFPUmode)
6042 {
6043 enum rtx_code second_code, bypass_code;
6044 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6045 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6046 abort ();
6047 code = ix86_fp_compare_code_to_integer (code);
6048 mode = CCmode;
6049 }
6050 if (reverse)
6051 code = reverse_condition (code);
6052
6053 switch (code)
6054 {
6055 case EQ:
6056 suffix = "e";
6057 break;
6058 case NE:
6059 suffix = "ne";
6060 break;
6061 case GT:
6062 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6063 abort ();
6064 suffix = "g";
6065 break;
6066 case GTU:
6067 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6068 Those same assemblers have the same but opposite losage on cmov. */
6069 if (mode != CCmode)
6070 abort ();
6071 suffix = fp ? "nbe" : "a";
6072 break;
6073 case LT:
6074 if (mode == CCNOmode || mode == CCGOCmode)
6075 suffix = "s";
6076 else if (mode == CCmode || mode == CCGCmode)
6077 suffix = "l";
6078 else
6079 abort ();
6080 break;
6081 case LTU:
6082 if (mode != CCmode)
6083 abort ();
6084 suffix = "b";
6085 break;
6086 case GE:
6087 if (mode == CCNOmode || mode == CCGOCmode)
6088 suffix = "ns";
6089 else if (mode == CCmode || mode == CCGCmode)
6090 suffix = "ge";
6091 else
6092 abort ();
6093 break;
6094 case GEU:
6095 /* ??? As above. */
6096 if (mode != CCmode)
6097 abort ();
6098 suffix = fp ? "nb" : "ae";
6099 break;
6100 case LE:
6101 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6102 abort ();
6103 suffix = "le";
6104 break;
6105 case LEU:
6106 if (mode != CCmode)
6107 abort ();
6108 suffix = "be";
6109 break;
6110 case UNORDERED:
6111 suffix = fp ? "u" : "p";
6112 break;
6113 case ORDERED:
6114 suffix = fp ? "nu" : "np";
6115 break;
6116 default:
6117 abort ();
6118 }
6119 fputs (suffix, file);
6120 }
6121
6122 /* Print the name of register X to FILE based on its machine mode and number.
6123 If CODE is 'w', pretend the mode is HImode.
6124 If CODE is 'b', pretend the mode is QImode.
6125 If CODE is 'k', pretend the mode is SImode.
6126 If CODE is 'q', pretend the mode is DImode.
6127 If CODE is 'h', pretend the reg is the `high' byte register.
6128 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6129
6130 void
6131 print_reg (rtx x, int code, FILE *file)
6132 {
6133 if (REGNO (x) == ARG_POINTER_REGNUM
6134 || REGNO (x) == FRAME_POINTER_REGNUM
6135 || REGNO (x) == FLAGS_REG
6136 || REGNO (x) == FPSR_REG)
6137 abort ();
6138
6139 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6140 putc ('%', file);
6141
6142 if (code == 'w' || MMX_REG_P (x))
6143 code = 2;
6144 else if (code == 'b')
6145 code = 1;
6146 else if (code == 'k')
6147 code = 4;
6148 else if (code == 'q')
6149 code = 8;
6150 else if (code == 'y')
6151 code = 3;
6152 else if (code == 'h')
6153 code = 0;
6154 else
6155 code = GET_MODE_SIZE (GET_MODE (x));
6156
6157 /* Irritatingly, AMD extended registers use different naming convention
6158 from the normal registers. */
6159 if (REX_INT_REG_P (x))
6160 {
6161 if (!TARGET_64BIT)
6162 abort ();
6163 switch (code)
6164 {
6165 case 0:
6166 error ("extended registers have no high halves");
6167 break;
6168 case 1:
6169 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6170 break;
6171 case 2:
6172 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6173 break;
6174 case 4:
6175 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6176 break;
6177 case 8:
6178 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6179 break;
6180 default:
6181 error ("unsupported operand size for extended register");
6182 break;
6183 }
6184 return;
6185 }
6186 switch (code)
6187 {
6188 case 3:
6189 if (STACK_TOP_P (x))
6190 {
6191 fputs ("st(0)", file);
6192 break;
6193 }
6194 /* FALLTHRU */
6195 case 8:
6196 case 4:
6197 case 12:
6198 if (! ANY_FP_REG_P (x))
6199 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6200 /* FALLTHRU */
6201 case 16:
6202 case 2:
6203 normal:
6204 fputs (hi_reg_name[REGNO (x)], file);
6205 break;
6206 case 1:
6207 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6208 goto normal;
6209 fputs (qi_reg_name[REGNO (x)], file);
6210 break;
6211 case 0:
6212 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6213 goto normal;
6214 fputs (qi_high_reg_name[REGNO (x)], file);
6215 break;
6216 default:
6217 abort ();
6218 }
6219 }
6220
6221 /* Locate some local-dynamic symbol still in use by this function
6222 so that we can print its name in some tls_local_dynamic_base
6223 pattern. */
6224
6225 static const char *
6226 get_some_local_dynamic_name (void)
6227 {
6228 rtx insn;
6229
6230 if (cfun->machine->some_ld_name)
6231 return cfun->machine->some_ld_name;
6232
6233 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6234 if (INSN_P (insn)
6235 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6236 return cfun->machine->some_ld_name;
6237
6238 abort ();
6239 }
6240
6241 static int
6242 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6243 {
6244 rtx x = *px;
6245
6246 if (GET_CODE (x) == SYMBOL_REF
6247 && local_dynamic_symbolic_operand (x, Pmode))
6248 {
6249 cfun->machine->some_ld_name = XSTR (x, 0);
6250 return 1;
6251 }
6252
6253 return 0;
6254 }
6255
6256 /* Meaning of CODE:
6257 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6258 C -- print opcode suffix for set/cmov insn.
6259 c -- like C, but print reversed condition
6260 F,f -- likewise, but for floating-point.
6261 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6262 otherwise nothing
6263 R -- print the prefix for register names.
6264 z -- print the opcode suffix for the size of the current operand.
6265 * -- print a star (in certain assembler syntax)
6266 A -- print an absolute memory reference.
6267 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6268 s -- print a shift double count, followed by the assemblers argument
6269 delimiter.
6270 b -- print the QImode name of the register for the indicated operand.
6271 %b0 would print %al if operands[0] is reg 0.
6272 w -- likewise, print the HImode name of the register.
6273 k -- likewise, print the SImode name of the register.
6274 q -- likewise, print the DImode name of the register.
6275 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6276 y -- print "st(0)" instead of "st" as a register.
6277 D -- print condition for SSE cmp instruction.
6278 P -- if PIC, print an @PLT suffix.
6279 X -- don't print any sort of PIC '@' suffix for a symbol.
6280 & -- print some in-use local-dynamic symbol name.
6281 */
6282
6283 void
6284 print_operand (FILE *file, rtx x, int code)
6285 {
6286 if (code)
6287 {
6288 switch (code)
6289 {
6290 case '*':
6291 if (ASSEMBLER_DIALECT == ASM_ATT)
6292 putc ('*', file);
6293 return;
6294
6295 case '&':
6296 assemble_name (file, get_some_local_dynamic_name ());
6297 return;
6298
6299 case 'A':
6300 if (ASSEMBLER_DIALECT == ASM_ATT)
6301 putc ('*', file);
6302 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6303 {
6304 /* Intel syntax. For absolute addresses, registers should not
6305 be surrounded by braces. */
6306 if (GET_CODE (x) != REG)
6307 {
6308 putc ('[', file);
6309 PRINT_OPERAND (file, x, 0);
6310 putc (']', file);
6311 return;
6312 }
6313 }
6314 else
6315 abort ();
6316
6317 PRINT_OPERAND (file, x, 0);
6318 return;
6319
6320
6321 case 'L':
6322 if (ASSEMBLER_DIALECT == ASM_ATT)
6323 putc ('l', file);
6324 return;
6325
6326 case 'W':
6327 if (ASSEMBLER_DIALECT == ASM_ATT)
6328 putc ('w', file);
6329 return;
6330
6331 case 'B':
6332 if (ASSEMBLER_DIALECT == ASM_ATT)
6333 putc ('b', file);
6334 return;
6335
6336 case 'Q':
6337 if (ASSEMBLER_DIALECT == ASM_ATT)
6338 putc ('l', file);
6339 return;
6340
6341 case 'S':
6342 if (ASSEMBLER_DIALECT == ASM_ATT)
6343 putc ('s', file);
6344 return;
6345
6346 case 'T':
6347 if (ASSEMBLER_DIALECT == ASM_ATT)
6348 putc ('t', file);
6349 return;
6350
6351 case 'z':
6352 /* 387 opcodes don't get size suffixes if the operands are
6353 registers. */
6354 if (STACK_REG_P (x))
6355 return;
6356
6357 /* Likewise if using Intel opcodes. */
6358 if (ASSEMBLER_DIALECT == ASM_INTEL)
6359 return;
6360
6361 /* This is the size of op from size of operand. */
6362 switch (GET_MODE_SIZE (GET_MODE (x)))
6363 {
6364 case 2:
6365 #ifdef HAVE_GAS_FILDS_FISTS
6366 putc ('s', file);
6367 #endif
6368 return;
6369
6370 case 4:
6371 if (GET_MODE (x) == SFmode)
6372 {
6373 putc ('s', file);
6374 return;
6375 }
6376 else
6377 putc ('l', file);
6378 return;
6379
6380 case 12:
6381 case 16:
6382 putc ('t', file);
6383 return;
6384
6385 case 8:
6386 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6387 {
6388 #ifdef GAS_MNEMONICS
6389 putc ('q', file);
6390 #else
6391 putc ('l', file);
6392 putc ('l', file);
6393 #endif
6394 }
6395 else
6396 putc ('l', file);
6397 return;
6398
6399 default:
6400 abort ();
6401 }
6402
6403 case 'b':
6404 case 'w':
6405 case 'k':
6406 case 'q':
6407 case 'h':
6408 case 'y':
6409 case 'X':
6410 case 'P':
6411 break;
6412
6413 case 's':
6414 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6415 {
6416 PRINT_OPERAND (file, x, 0);
6417 putc (',', file);
6418 }
6419 return;
6420
6421 case 'D':
6422 /* Little bit of braindamage here. The SSE compare instructions
6423 does use completely different names for the comparisons that the
6424 fp conditional moves. */
6425 switch (GET_CODE (x))
6426 {
6427 case EQ:
6428 case UNEQ:
6429 fputs ("eq", file);
6430 break;
6431 case LT:
6432 case UNLT:
6433 fputs ("lt", file);
6434 break;
6435 case LE:
6436 case UNLE:
6437 fputs ("le", file);
6438 break;
6439 case UNORDERED:
6440 fputs ("unord", file);
6441 break;
6442 case NE:
6443 case LTGT:
6444 fputs ("neq", file);
6445 break;
6446 case UNGE:
6447 case GE:
6448 fputs ("nlt", file);
6449 break;
6450 case UNGT:
6451 case GT:
6452 fputs ("nle", file);
6453 break;
6454 case ORDERED:
6455 fputs ("ord", file);
6456 break;
6457 default:
6458 abort ();
6459 break;
6460 }
6461 return;
6462 case 'O':
6463 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6464 if (ASSEMBLER_DIALECT == ASM_ATT)
6465 {
6466 switch (GET_MODE (x))
6467 {
6468 case HImode: putc ('w', file); break;
6469 case SImode:
6470 case SFmode: putc ('l', file); break;
6471 case DImode:
6472 case DFmode: putc ('q', file); break;
6473 default: abort ();
6474 }
6475 putc ('.', file);
6476 }
6477 #endif
6478 return;
6479 case 'C':
6480 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6481 return;
6482 case 'F':
6483 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6484 if (ASSEMBLER_DIALECT == ASM_ATT)
6485 putc ('.', file);
6486 #endif
6487 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6488 return;
6489
6490 /* Like above, but reverse condition */
6491 case 'c':
6492 /* Check to see if argument to %c is really a constant
6493 and not a condition code which needs to be reversed. */
6494 if (!COMPARISON_P (x))
6495 {
6496 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6497 return;
6498 }
6499 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6500 return;
6501 case 'f':
6502 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6503 if (ASSEMBLER_DIALECT == ASM_ATT)
6504 putc ('.', file);
6505 #endif
6506 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6507 return;
6508 case '+':
6509 {
6510 rtx x;
6511
6512 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6513 return;
6514
6515 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6516 if (x)
6517 {
6518 int pred_val = INTVAL (XEXP (x, 0));
6519
6520 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6521 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6522 {
6523 int taken = pred_val > REG_BR_PROB_BASE / 2;
6524 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6525
6526 /* Emit hints only in the case default branch prediction
6527 heuristics would fail. */
6528 if (taken != cputaken)
6529 {
6530 /* We use 3e (DS) prefix for taken branches and
6531 2e (CS) prefix for not taken branches. */
6532 if (taken)
6533 fputs ("ds ; ", file);
6534 else
6535 fputs ("cs ; ", file);
6536 }
6537 }
6538 }
6539 return;
6540 }
6541 default:
6542 output_operand_lossage ("invalid operand code '%c'", code);
6543 }
6544 }
6545
6546 if (GET_CODE (x) == REG)
6547 print_reg (x, code, file);
6548
6549 else if (GET_CODE (x) == MEM)
6550 {
6551 /* No `byte ptr' prefix for call instructions. */
6552 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6553 {
6554 const char * size;
6555 switch (GET_MODE_SIZE (GET_MODE (x)))
6556 {
6557 case 1: size = "BYTE"; break;
6558 case 2: size = "WORD"; break;
6559 case 4: size = "DWORD"; break;
6560 case 8: size = "QWORD"; break;
6561 case 12: size = "XWORD"; break;
6562 case 16: size = "XMMWORD"; break;
6563 default:
6564 abort ();
6565 }
6566
6567 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6568 if (code == 'b')
6569 size = "BYTE";
6570 else if (code == 'w')
6571 size = "WORD";
6572 else if (code == 'k')
6573 size = "DWORD";
6574
6575 fputs (size, file);
6576 fputs (" PTR ", file);
6577 }
6578
6579 x = XEXP (x, 0);
6580 /* Avoid (%rip) for call operands. */
6581 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6582 && GET_CODE (x) != CONST_INT)
6583 output_addr_const (file, x);
6584 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6585 output_operand_lossage ("invalid constraints for operand");
6586 else
6587 output_address (x);
6588 }
6589
6590 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6591 {
6592 REAL_VALUE_TYPE r;
6593 long l;
6594
6595 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6596 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6597
6598 if (ASSEMBLER_DIALECT == ASM_ATT)
6599 putc ('$', file);
6600 fprintf (file, "0x%08lx", l);
6601 }
6602
6603 /* These float cases don't actually occur as immediate operands. */
6604 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6605 {
6606 char dstr[30];
6607
6608 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6609 fprintf (file, "%s", dstr);
6610 }
6611
6612 else if (GET_CODE (x) == CONST_DOUBLE
6613 && GET_MODE (x) == XFmode)
6614 {
6615 char dstr[30];
6616
6617 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6618 fprintf (file, "%s", dstr);
6619 }
6620
6621 else
6622 {
6623 if (code != 'P')
6624 {
6625 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6626 {
6627 if (ASSEMBLER_DIALECT == ASM_ATT)
6628 putc ('$', file);
6629 }
6630 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6631 || GET_CODE (x) == LABEL_REF)
6632 {
6633 if (ASSEMBLER_DIALECT == ASM_ATT)
6634 putc ('$', file);
6635 else
6636 fputs ("OFFSET FLAT:", file);
6637 }
6638 }
6639 if (GET_CODE (x) == CONST_INT)
6640 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6641 else if (flag_pic)
6642 output_pic_addr_const (file, x, code);
6643 else
6644 output_addr_const (file, x);
6645 }
6646 }
6647 \f
6648 /* Print a memory operand whose address is ADDR. */
6649
6650 void
6651 print_operand_address (FILE *file, rtx addr)
6652 {
6653 struct ix86_address parts;
6654 rtx base, index, disp;
6655 int scale;
6656
6657 if (! ix86_decompose_address (addr, &parts))
6658 abort ();
6659
6660 base = parts.base;
6661 index = parts.index;
6662 disp = parts.disp;
6663 scale = parts.scale;
6664
6665 switch (parts.seg)
6666 {
6667 case SEG_DEFAULT:
6668 break;
6669 case SEG_FS:
6670 case SEG_GS:
6671 if (USER_LABEL_PREFIX[0] == 0)
6672 putc ('%', file);
6673 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6674 break;
6675 default:
6676 abort ();
6677 }
6678
6679 if (!base && !index)
6680 {
6681 /* Displacement only requires special attention. */
6682
6683 if (GET_CODE (disp) == CONST_INT)
6684 {
6685 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6686 {
6687 if (USER_LABEL_PREFIX[0] == 0)
6688 putc ('%', file);
6689 fputs ("ds:", file);
6690 }
6691 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6692 }
6693 else if (flag_pic)
6694 output_pic_addr_const (file, disp, 0);
6695 else
6696 output_addr_const (file, disp);
6697
6698 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6699 if (TARGET_64BIT
6700 && ((GET_CODE (disp) == SYMBOL_REF
6701 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6702 || GET_CODE (disp) == LABEL_REF
6703 || (GET_CODE (disp) == CONST
6704 && GET_CODE (XEXP (disp, 0)) == PLUS
6705 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6706 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6707 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6708 fputs ("(%rip)", file);
6709 }
6710 else
6711 {
6712 if (ASSEMBLER_DIALECT == ASM_ATT)
6713 {
6714 if (disp)
6715 {
6716 if (flag_pic)
6717 output_pic_addr_const (file, disp, 0);
6718 else if (GET_CODE (disp) == LABEL_REF)
6719 output_asm_label (disp);
6720 else
6721 output_addr_const (file, disp);
6722 }
6723
6724 putc ('(', file);
6725 if (base)
6726 print_reg (base, 0, file);
6727 if (index)
6728 {
6729 putc (',', file);
6730 print_reg (index, 0, file);
6731 if (scale != 1)
6732 fprintf (file, ",%d", scale);
6733 }
6734 putc (')', file);
6735 }
6736 else
6737 {
6738 rtx offset = NULL_RTX;
6739
6740 if (disp)
6741 {
6742 /* Pull out the offset of a symbol; print any symbol itself. */
6743 if (GET_CODE (disp) == CONST
6744 && GET_CODE (XEXP (disp, 0)) == PLUS
6745 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6746 {
6747 offset = XEXP (XEXP (disp, 0), 1);
6748 disp = gen_rtx_CONST (VOIDmode,
6749 XEXP (XEXP (disp, 0), 0));
6750 }
6751
6752 if (flag_pic)
6753 output_pic_addr_const (file, disp, 0);
6754 else if (GET_CODE (disp) == LABEL_REF)
6755 output_asm_label (disp);
6756 else if (GET_CODE (disp) == CONST_INT)
6757 offset = disp;
6758 else
6759 output_addr_const (file, disp);
6760 }
6761
6762 putc ('[', file);
6763 if (base)
6764 {
6765 print_reg (base, 0, file);
6766 if (offset)
6767 {
6768 if (INTVAL (offset) >= 0)
6769 putc ('+', file);
6770 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6771 }
6772 }
6773 else if (offset)
6774 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6775 else
6776 putc ('0', file);
6777
6778 if (index)
6779 {
6780 putc ('+', file);
6781 print_reg (index, 0, file);
6782 if (scale != 1)
6783 fprintf (file, "*%d", scale);
6784 }
6785 putc (']', file);
6786 }
6787 }
6788 }
6789
6790 bool
6791 output_addr_const_extra (FILE *file, rtx x)
6792 {
6793 rtx op;
6794
6795 if (GET_CODE (x) != UNSPEC)
6796 return false;
6797
6798 op = XVECEXP (x, 0, 0);
6799 switch (XINT (x, 1))
6800 {
6801 case UNSPEC_GOTTPOFF:
6802 output_addr_const (file, op);
6803 /* FIXME: This might be @TPOFF in Sun ld. */
6804 fputs ("@GOTTPOFF", file);
6805 break;
6806 case UNSPEC_TPOFF:
6807 output_addr_const (file, op);
6808 fputs ("@TPOFF", file);
6809 break;
6810 case UNSPEC_NTPOFF:
6811 output_addr_const (file, op);
6812 if (TARGET_64BIT)
6813 fputs ("@TPOFF", file);
6814 else
6815 fputs ("@NTPOFF", file);
6816 break;
6817 case UNSPEC_DTPOFF:
6818 output_addr_const (file, op);
6819 fputs ("@DTPOFF", file);
6820 break;
6821 case UNSPEC_GOTNTPOFF:
6822 output_addr_const (file, op);
6823 if (TARGET_64BIT)
6824 fputs ("@GOTTPOFF(%rip)", file);
6825 else
6826 fputs ("@GOTNTPOFF", file);
6827 break;
6828 case UNSPEC_INDNTPOFF:
6829 output_addr_const (file, op);
6830 fputs ("@INDNTPOFF", file);
6831 break;
6832
6833 default:
6834 return false;
6835 }
6836
6837 return true;
6838 }
6839 \f
6840 /* Split one or more DImode RTL references into pairs of SImode
6841 references. The RTL can be REG, offsettable MEM, integer constant, or
6842 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6843 split and "num" is its length. lo_half and hi_half are output arrays
6844 that parallel "operands". */
6845
6846 void
6847 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6848 {
6849 while (num--)
6850 {
6851 rtx op = operands[num];
6852
6853 /* simplify_subreg refuse to split volatile memory addresses,
6854 but we still have to handle it. */
6855 if (GET_CODE (op) == MEM)
6856 {
6857 lo_half[num] = adjust_address (op, SImode, 0);
6858 hi_half[num] = adjust_address (op, SImode, 4);
6859 }
6860 else
6861 {
6862 lo_half[num] = simplify_gen_subreg (SImode, op,
6863 GET_MODE (op) == VOIDmode
6864 ? DImode : GET_MODE (op), 0);
6865 hi_half[num] = simplify_gen_subreg (SImode, op,
6866 GET_MODE (op) == VOIDmode
6867 ? DImode : GET_MODE (op), 4);
6868 }
6869 }
6870 }
6871 /* Split one or more TImode RTL references into pairs of SImode
6872 references. The RTL can be REG, offsettable MEM, integer constant, or
6873 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6874 split and "num" is its length. lo_half and hi_half are output arrays
6875 that parallel "operands". */
6876
6877 void
6878 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6879 {
6880 while (num--)
6881 {
6882 rtx op = operands[num];
6883
6884 /* simplify_subreg refuse to split volatile memory addresses, but we
6885 still have to handle it. */
6886 if (GET_CODE (op) == MEM)
6887 {
6888 lo_half[num] = adjust_address (op, DImode, 0);
6889 hi_half[num] = adjust_address (op, DImode, 8);
6890 }
6891 else
6892 {
6893 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6894 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6895 }
6896 }
6897 }
6898 \f
6899 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6900 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6901 is the expression of the binary operation. The output may either be
6902 emitted here, or returned to the caller, like all output_* functions.
6903
6904 There is no guarantee that the operands are the same mode, as they
6905 might be within FLOAT or FLOAT_EXTEND expressions. */
6906
6907 #ifndef SYSV386_COMPAT
6908 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6909 wants to fix the assemblers because that causes incompatibility
6910 with gcc. No-one wants to fix gcc because that causes
6911 incompatibility with assemblers... You can use the option of
6912 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6913 #define SYSV386_COMPAT 1
6914 #endif
6915
6916 const char *
6917 output_387_binary_op (rtx insn, rtx *operands)
6918 {
6919 static char buf[30];
6920 const char *p;
6921 const char *ssep;
6922 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6923
6924 #ifdef ENABLE_CHECKING
6925 /* Even if we do not want to check the inputs, this documents input
6926 constraints. Which helps in understanding the following code. */
6927 if (STACK_REG_P (operands[0])
6928 && ((REG_P (operands[1])
6929 && REGNO (operands[0]) == REGNO (operands[1])
6930 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6931 || (REG_P (operands[2])
6932 && REGNO (operands[0]) == REGNO (operands[2])
6933 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6934 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6935 ; /* ok */
6936 else if (!is_sse)
6937 abort ();
6938 #endif
6939
6940 switch (GET_CODE (operands[3]))
6941 {
6942 case PLUS:
6943 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6944 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6945 p = "fiadd";
6946 else
6947 p = "fadd";
6948 ssep = "add";
6949 break;
6950
6951 case MINUS:
6952 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6953 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6954 p = "fisub";
6955 else
6956 p = "fsub";
6957 ssep = "sub";
6958 break;
6959
6960 case MULT:
6961 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6962 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6963 p = "fimul";
6964 else
6965 p = "fmul";
6966 ssep = "mul";
6967 break;
6968
6969 case DIV:
6970 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6971 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6972 p = "fidiv";
6973 else
6974 p = "fdiv";
6975 ssep = "div";
6976 break;
6977
6978 default:
6979 abort ();
6980 }
6981
6982 if (is_sse)
6983 {
6984 strcpy (buf, ssep);
6985 if (GET_MODE (operands[0]) == SFmode)
6986 strcat (buf, "ss\t{%2, %0|%0, %2}");
6987 else
6988 strcat (buf, "sd\t{%2, %0|%0, %2}");
6989 return buf;
6990 }
6991 strcpy (buf, p);
6992
6993 switch (GET_CODE (operands[3]))
6994 {
6995 case MULT:
6996 case PLUS:
6997 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6998 {
6999 rtx temp = operands[2];
7000 operands[2] = operands[1];
7001 operands[1] = temp;
7002 }
7003
7004 /* know operands[0] == operands[1]. */
7005
7006 if (GET_CODE (operands[2]) == MEM)
7007 {
7008 p = "%z2\t%2";
7009 break;
7010 }
7011
7012 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7013 {
7014 if (STACK_TOP_P (operands[0]))
7015 /* How is it that we are storing to a dead operand[2]?
7016 Well, presumably operands[1] is dead too. We can't
7017 store the result to st(0) as st(0) gets popped on this
7018 instruction. Instead store to operands[2] (which I
7019 think has to be st(1)). st(1) will be popped later.
7020 gcc <= 2.8.1 didn't have this check and generated
7021 assembly code that the Unixware assembler rejected. */
7022 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7023 else
7024 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7025 break;
7026 }
7027
7028 if (STACK_TOP_P (operands[0]))
7029 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7030 else
7031 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7032 break;
7033
7034 case MINUS:
7035 case DIV:
7036 if (GET_CODE (operands[1]) == MEM)
7037 {
7038 p = "r%z1\t%1";
7039 break;
7040 }
7041
7042 if (GET_CODE (operands[2]) == MEM)
7043 {
7044 p = "%z2\t%2";
7045 break;
7046 }
7047
7048 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7049 {
7050 #if SYSV386_COMPAT
7051 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7052 derived assemblers, confusingly reverse the direction of
7053 the operation for fsub{r} and fdiv{r} when the
7054 destination register is not st(0). The Intel assembler
7055 doesn't have this brain damage. Read !SYSV386_COMPAT to
7056 figure out what the hardware really does. */
7057 if (STACK_TOP_P (operands[0]))
7058 p = "{p\t%0, %2|rp\t%2, %0}";
7059 else
7060 p = "{rp\t%2, %0|p\t%0, %2}";
7061 #else
7062 if (STACK_TOP_P (operands[0]))
7063 /* As above for fmul/fadd, we can't store to st(0). */
7064 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7065 else
7066 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7067 #endif
7068 break;
7069 }
7070
7071 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7072 {
7073 #if SYSV386_COMPAT
7074 if (STACK_TOP_P (operands[0]))
7075 p = "{rp\t%0, %1|p\t%1, %0}";
7076 else
7077 p = "{p\t%1, %0|rp\t%0, %1}";
7078 #else
7079 if (STACK_TOP_P (operands[0]))
7080 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7081 else
7082 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7083 #endif
7084 break;
7085 }
7086
7087 if (STACK_TOP_P (operands[0]))
7088 {
7089 if (STACK_TOP_P (operands[1]))
7090 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7091 else
7092 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7093 break;
7094 }
7095 else if (STACK_TOP_P (operands[1]))
7096 {
7097 #if SYSV386_COMPAT
7098 p = "{\t%1, %0|r\t%0, %1}";
7099 #else
7100 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7101 #endif
7102 }
7103 else
7104 {
7105 #if SYSV386_COMPAT
7106 p = "{r\t%2, %0|\t%0, %2}";
7107 #else
7108 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7109 #endif
7110 }
7111 break;
7112
7113 default:
7114 abort ();
7115 }
7116
7117 strcat (buf, p);
7118 return buf;
7119 }
7120
7121 /* Output code to initialize control word copies used by trunc?f?i and
7122 rounding patterns. CURRENT_MODE is set to current control word,
7123 while NEW_MODE is set to new control word. */
7124
7125 void
7126 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7127 {
7128 rtx reg = gen_reg_rtx (HImode);
7129
7130 emit_insn (gen_x86_fnstcw_1 (current_mode));
7131 emit_move_insn (reg, current_mode);
7132
7133 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7134 && !TARGET_64BIT)
7135 {
7136 switch (mode)
7137 {
7138 case I387_CW_FLOOR:
7139 /* round down toward -oo */
7140 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7141 break;
7142
7143 case I387_CW_CEIL:
7144 /* round up toward +oo */
7145 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7146 break;
7147
7148 case I387_CW_TRUNC:
7149 /* round toward zero (truncate) */
7150 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7151 break;
7152
7153 case I387_CW_MASK_PM:
7154 /* mask precision exception for nearbyint() */
7155 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7156 break;
7157
7158 default:
7159 abort();
7160 }
7161 }
7162 else
7163 {
7164 switch (mode)
7165 {
7166 case I387_CW_FLOOR:
7167 /* round down toward -oo */
7168 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7169 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7170 break;
7171
7172 case I387_CW_CEIL:
7173 /* round up toward +oo */
7174 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7175 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7176 break;
7177
7178 case I387_CW_TRUNC:
7179 /* round toward zero (truncate) */
7180 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7181 break;
7182
7183 case I387_CW_MASK_PM:
7184 /* mask precision exception for nearbyint() */
7185 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7186 break;
7187
7188 default:
7189 abort();
7190 }
7191 }
7192
7193 emit_move_insn (new_mode, reg);
7194 }
7195
7196 /* Output code for INSN to convert a float to a signed int. OPERANDS
7197 are the insn operands. The output may be [HSD]Imode and the input
7198 operand may be [SDX]Fmode. */
7199
7200 const char *
7201 output_fix_trunc (rtx insn, rtx *operands)
7202 {
7203 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7204 int dimode_p = GET_MODE (operands[0]) == DImode;
7205
7206 /* Jump through a hoop or two for DImode, since the hardware has no
7207 non-popping instruction. We used to do this a different way, but
7208 that was somewhat fragile and broke with post-reload splitters. */
7209 if (dimode_p && !stack_top_dies)
7210 output_asm_insn ("fld\t%y1", operands);
7211
7212 if (!STACK_TOP_P (operands[1]))
7213 abort ();
7214
7215 if (GET_CODE (operands[0]) != MEM)
7216 abort ();
7217
7218 output_asm_insn ("fldcw\t%3", operands);
7219 if (stack_top_dies || dimode_p)
7220 output_asm_insn ("fistp%z0\t%0", operands);
7221 else
7222 output_asm_insn ("fist%z0\t%0", operands);
7223 output_asm_insn ("fldcw\t%2", operands);
7224
7225 return "";
7226 }
7227
7228 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7229 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7230 when fucom should be used. */
7231
7232 const char *
7233 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7234 {
7235 int stack_top_dies;
7236 rtx cmp_op0, cmp_op1;
7237 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7238
7239 if (eflags_p == 2)
7240 {
7241 cmp_op0 = operands[1];
7242 cmp_op1 = operands[2];
7243 }
7244 else
7245 {
7246 cmp_op0 = operands[0];
7247 cmp_op1 = operands[1];
7248 }
7249
7250 if (is_sse)
7251 {
7252 if (GET_MODE (operands[0]) == SFmode)
7253 if (unordered_p)
7254 return "ucomiss\t{%1, %0|%0, %1}";
7255 else
7256 return "comiss\t{%1, %0|%0, %1}";
7257 else
7258 if (unordered_p)
7259 return "ucomisd\t{%1, %0|%0, %1}";
7260 else
7261 return "comisd\t{%1, %0|%0, %1}";
7262 }
7263
7264 if (! STACK_TOP_P (cmp_op0))
7265 abort ();
7266
7267 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7268
7269 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7270 {
7271 if (stack_top_dies)
7272 {
7273 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7274 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7275 }
7276 else
7277 return "ftst\n\tfnstsw\t%0";
7278 }
7279
7280 if (STACK_REG_P (cmp_op1)
7281 && stack_top_dies
7282 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7283 && REGNO (cmp_op1) != FIRST_STACK_REG)
7284 {
7285 /* If both the top of the 387 stack dies, and the other operand
7286 is also a stack register that dies, then this must be a
7287 `fcompp' float compare */
7288
7289 if (eflags_p == 1)
7290 {
7291 /* There is no double popping fcomi variant. Fortunately,
7292 eflags is immune from the fstp's cc clobbering. */
7293 if (unordered_p)
7294 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7295 else
7296 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7297 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7298 }
7299 else
7300 {
7301 if (eflags_p == 2)
7302 {
7303 if (unordered_p)
7304 return "fucompp\n\tfnstsw\t%0";
7305 else
7306 return "fcompp\n\tfnstsw\t%0";
7307 }
7308 else
7309 {
7310 if (unordered_p)
7311 return "fucompp";
7312 else
7313 return "fcompp";
7314 }
7315 }
7316 }
7317 else
7318 {
7319 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7320
7321 static const char * const alt[24] =
7322 {
7323 "fcom%z1\t%y1",
7324 "fcomp%z1\t%y1",
7325 "fucom%z1\t%y1",
7326 "fucomp%z1\t%y1",
7327
7328 "ficom%z1\t%y1",
7329 "ficomp%z1\t%y1",
7330 NULL,
7331 NULL,
7332
7333 "fcomi\t{%y1, %0|%0, %y1}",
7334 "fcomip\t{%y1, %0|%0, %y1}",
7335 "fucomi\t{%y1, %0|%0, %y1}",
7336 "fucomip\t{%y1, %0|%0, %y1}",
7337
7338 NULL,
7339 NULL,
7340 NULL,
7341 NULL,
7342
7343 "fcom%z2\t%y2\n\tfnstsw\t%0",
7344 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7345 "fucom%z2\t%y2\n\tfnstsw\t%0",
7346 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7347
7348 "ficom%z2\t%y2\n\tfnstsw\t%0",
7349 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7350 NULL,
7351 NULL
7352 };
7353
7354 int mask;
7355 const char *ret;
7356
7357 mask = eflags_p << 3;
7358 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7359 mask |= unordered_p << 1;
7360 mask |= stack_top_dies;
7361
7362 if (mask >= 24)
7363 abort ();
7364 ret = alt[mask];
7365 if (ret == NULL)
7366 abort ();
7367
7368 return ret;
7369 }
7370 }
7371
7372 void
7373 ix86_output_addr_vec_elt (FILE *file, int value)
7374 {
7375 const char *directive = ASM_LONG;
7376
7377 if (TARGET_64BIT)
7378 {
7379 #ifdef ASM_QUAD
7380 directive = ASM_QUAD;
7381 #else
7382 abort ();
7383 #endif
7384 }
7385
7386 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7387 }
7388
7389 void
7390 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7391 {
7392 if (TARGET_64BIT)
7393 fprintf (file, "%s%s%d-%s%d\n",
7394 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7395 else if (HAVE_AS_GOTOFF_IN_DATA)
7396 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7397 #if TARGET_MACHO
7398 else if (TARGET_MACHO)
7399 {
7400 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7401 machopic_output_function_base_name (file);
7402 fprintf(file, "\n");
7403 }
7404 #endif
7405 else
7406 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7407 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7408 }
7409 \f
7410 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7411 for the target. */
7412
7413 void
7414 ix86_expand_clear (rtx dest)
7415 {
7416 rtx tmp;
7417
7418 /* We play register width games, which are only valid after reload. */
7419 if (!reload_completed)
7420 abort ();
7421
7422 /* Avoid HImode and its attendant prefix byte. */
7423 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7424 dest = gen_rtx_REG (SImode, REGNO (dest));
7425
7426 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7427
7428 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7429 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7430 {
7431 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7432 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7433 }
7434
7435 emit_insn (tmp);
7436 }
7437
7438 /* X is an unchanging MEM. If it is a constant pool reference, return
7439 the constant pool rtx, else NULL. */
7440
7441 rtx
7442 maybe_get_pool_constant (rtx x)
7443 {
7444 x = ix86_delegitimize_address (XEXP (x, 0));
7445
7446 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7447 return get_pool_constant (x);
7448
7449 return NULL_RTX;
7450 }
7451
7452 void
7453 ix86_expand_move (enum machine_mode mode, rtx operands[])
7454 {
7455 int strict = (reload_in_progress || reload_completed);
7456 rtx op0, op1;
7457 enum tls_model model;
7458
7459 op0 = operands[0];
7460 op1 = operands[1];
7461
7462 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7463 if (model)
7464 {
7465 op1 = legitimize_tls_address (op1, model, true);
7466 op1 = force_operand (op1, op0);
7467 if (op1 == op0)
7468 return;
7469 }
7470
7471 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7472 {
7473 #if TARGET_MACHO
7474 if (MACHOPIC_PURE)
7475 {
7476 rtx temp = ((reload_in_progress
7477 || ((op0 && GET_CODE (op0) == REG)
7478 && mode == Pmode))
7479 ? op0 : gen_reg_rtx (Pmode));
7480 op1 = machopic_indirect_data_reference (op1, temp);
7481 op1 = machopic_legitimize_pic_address (op1, mode,
7482 temp == op1 ? 0 : temp);
7483 }
7484 else if (MACHOPIC_INDIRECT)
7485 op1 = machopic_indirect_data_reference (op1, 0);
7486 if (op0 == op1)
7487 return;
7488 #else
7489 if (GET_CODE (op0) == MEM)
7490 op1 = force_reg (Pmode, op1);
7491 else
7492 op1 = legitimize_address (op1, op1, Pmode);
7493 #endif /* TARGET_MACHO */
7494 }
7495 else
7496 {
7497 if (GET_CODE (op0) == MEM
7498 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7499 || !push_operand (op0, mode))
7500 && GET_CODE (op1) == MEM)
7501 op1 = force_reg (mode, op1);
7502
7503 if (push_operand (op0, mode)
7504 && ! general_no_elim_operand (op1, mode))
7505 op1 = copy_to_mode_reg (mode, op1);
7506
7507 /* Force large constants in 64bit compilation into register
7508 to get them CSEed. */
7509 if (TARGET_64BIT && mode == DImode
7510 && immediate_operand (op1, mode)
7511 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7512 && !register_operand (op0, mode)
7513 && optimize && !reload_completed && !reload_in_progress)
7514 op1 = copy_to_mode_reg (mode, op1);
7515
7516 if (FLOAT_MODE_P (mode))
7517 {
7518 /* If we are loading a floating point constant to a register,
7519 force the value to memory now, since we'll get better code
7520 out the back end. */
7521
7522 if (strict)
7523 ;
7524 else if (GET_CODE (op1) == CONST_DOUBLE)
7525 {
7526 op1 = validize_mem (force_const_mem (mode, op1));
7527 if (!register_operand (op0, mode))
7528 {
7529 rtx temp = gen_reg_rtx (mode);
7530 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7531 emit_move_insn (op0, temp);
7532 return;
7533 }
7534 }
7535 }
7536 }
7537
7538 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7539 }
7540
7541 void
7542 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7543 {
7544 /* Force constants other than zero into memory. We do not know how
7545 the instructions used to build constants modify the upper 64 bits
7546 of the register, once we have that information we may be able
7547 to handle some of them more efficiently. */
7548 if ((reload_in_progress | reload_completed) == 0
7549 && register_operand (operands[0], mode)
7550 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7551 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
7552
7553 /* Make operand1 a register if it isn't already. */
7554 if (!no_new_pseudos
7555 && !register_operand (operands[0], mode)
7556 && !register_operand (operands[1], mode))
7557 {
7558 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7559 emit_move_insn (operands[0], temp);
7560 return;
7561 }
7562
7563 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7564 }
7565
7566 /* Attempt to expand a binary operator. Make the expansion closer to the
7567 actual machine, then just general_operand, which will allow 3 separate
7568 memory references (one output, two input) in a single insn. */
7569
7570 void
7571 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7572 rtx operands[])
7573 {
7574 int matching_memory;
7575 rtx src1, src2, dst, op, clob;
7576
7577 dst = operands[0];
7578 src1 = operands[1];
7579 src2 = operands[2];
7580
7581 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7582 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7583 && (rtx_equal_p (dst, src2)
7584 || immediate_operand (src1, mode)))
7585 {
7586 rtx temp = src1;
7587 src1 = src2;
7588 src2 = temp;
7589 }
7590
7591 /* If the destination is memory, and we do not have matching source
7592 operands, do things in registers. */
7593 matching_memory = 0;
7594 if (GET_CODE (dst) == MEM)
7595 {
7596 if (rtx_equal_p (dst, src1))
7597 matching_memory = 1;
7598 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7599 && rtx_equal_p (dst, src2))
7600 matching_memory = 2;
7601 else
7602 dst = gen_reg_rtx (mode);
7603 }
7604
7605 /* Both source operands cannot be in memory. */
7606 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7607 {
7608 if (matching_memory != 2)
7609 src2 = force_reg (mode, src2);
7610 else
7611 src1 = force_reg (mode, src1);
7612 }
7613
7614 /* If the operation is not commutable, source 1 cannot be a constant
7615 or non-matching memory. */
7616 if ((CONSTANT_P (src1)
7617 || (!matching_memory && GET_CODE (src1) == MEM))
7618 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7619 src1 = force_reg (mode, src1);
7620
7621 /* If optimizing, copy to regs to improve CSE */
7622 if (optimize && ! no_new_pseudos)
7623 {
7624 if (GET_CODE (dst) == MEM)
7625 dst = gen_reg_rtx (mode);
7626 if (GET_CODE (src1) == MEM)
7627 src1 = force_reg (mode, src1);
7628 if (GET_CODE (src2) == MEM)
7629 src2 = force_reg (mode, src2);
7630 }
7631
7632 /* Emit the instruction. */
7633
7634 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7635 if (reload_in_progress)
7636 {
7637 /* Reload doesn't know about the flags register, and doesn't know that
7638 it doesn't want to clobber it. We can only do this with PLUS. */
7639 if (code != PLUS)
7640 abort ();
7641 emit_insn (op);
7642 }
7643 else
7644 {
7645 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7646 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7647 }
7648
7649 /* Fix up the destination if needed. */
7650 if (dst != operands[0])
7651 emit_move_insn (operands[0], dst);
7652 }
7653
7654 /* Return TRUE or FALSE depending on whether the binary operator meets the
7655 appropriate constraints. */
7656
7657 int
7658 ix86_binary_operator_ok (enum rtx_code code,
7659 enum machine_mode mode ATTRIBUTE_UNUSED,
7660 rtx operands[3])
7661 {
7662 /* Both source operands cannot be in memory. */
7663 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7664 return 0;
7665 /* If the operation is not commutable, source 1 cannot be a constant. */
7666 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7667 return 0;
7668 /* If the destination is memory, we must have a matching source operand. */
7669 if (GET_CODE (operands[0]) == MEM
7670 && ! (rtx_equal_p (operands[0], operands[1])
7671 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7672 && rtx_equal_p (operands[0], operands[2]))))
7673 return 0;
7674 /* If the operation is not commutable and the source 1 is memory, we must
7675 have a matching destination. */
7676 if (GET_CODE (operands[1]) == MEM
7677 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7678 && ! rtx_equal_p (operands[0], operands[1]))
7679 return 0;
7680 return 1;
7681 }
7682
7683 /* Attempt to expand a unary operator. Make the expansion closer to the
7684 actual machine, then just general_operand, which will allow 2 separate
7685 memory references (one output, one input) in a single insn. */
7686
7687 void
7688 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7689 rtx operands[])
7690 {
7691 int matching_memory;
7692 rtx src, dst, op, clob;
7693
7694 dst = operands[0];
7695 src = operands[1];
7696
7697 /* If the destination is memory, and we do not have matching source
7698 operands, do things in registers. */
7699 matching_memory = 0;
7700 if (GET_CODE (dst) == MEM)
7701 {
7702 if (rtx_equal_p (dst, src))
7703 matching_memory = 1;
7704 else
7705 dst = gen_reg_rtx (mode);
7706 }
7707
7708 /* When source operand is memory, destination must match. */
7709 if (!matching_memory && GET_CODE (src) == MEM)
7710 src = force_reg (mode, src);
7711
7712 /* If optimizing, copy to regs to improve CSE */
7713 if (optimize && ! no_new_pseudos)
7714 {
7715 if (GET_CODE (dst) == MEM)
7716 dst = gen_reg_rtx (mode);
7717 if (GET_CODE (src) == MEM)
7718 src = force_reg (mode, src);
7719 }
7720
7721 /* Emit the instruction. */
7722
7723 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7724 if (reload_in_progress || code == NOT)
7725 {
7726 /* Reload doesn't know about the flags register, and doesn't know that
7727 it doesn't want to clobber it. */
7728 if (code != NOT)
7729 abort ();
7730 emit_insn (op);
7731 }
7732 else
7733 {
7734 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7735 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7736 }
7737
7738 /* Fix up the destination if needed. */
7739 if (dst != operands[0])
7740 emit_move_insn (operands[0], dst);
7741 }
7742
7743 /* Return TRUE or FALSE depending on whether the unary operator meets the
7744 appropriate constraints. */
7745
7746 int
7747 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7748 enum machine_mode mode ATTRIBUTE_UNUSED,
7749 rtx operands[2] ATTRIBUTE_UNUSED)
7750 {
7751 /* If one of operands is memory, source and destination must match. */
7752 if ((GET_CODE (operands[0]) == MEM
7753 || GET_CODE (operands[1]) == MEM)
7754 && ! rtx_equal_p (operands[0], operands[1]))
7755 return FALSE;
7756 return TRUE;
7757 }
7758
7759 /* Return TRUE or FALSE depending on whether the first SET in INSN
7760 has source and destination with matching CC modes, and that the
7761 CC mode is at least as constrained as REQ_MODE. */
7762
7763 int
7764 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7765 {
7766 rtx set;
7767 enum machine_mode set_mode;
7768
7769 set = PATTERN (insn);
7770 if (GET_CODE (set) == PARALLEL)
7771 set = XVECEXP (set, 0, 0);
7772 if (GET_CODE (set) != SET)
7773 abort ();
7774 if (GET_CODE (SET_SRC (set)) != COMPARE)
7775 abort ();
7776
7777 set_mode = GET_MODE (SET_DEST (set));
7778 switch (set_mode)
7779 {
7780 case CCNOmode:
7781 if (req_mode != CCNOmode
7782 && (req_mode != CCmode
7783 || XEXP (SET_SRC (set), 1) != const0_rtx))
7784 return 0;
7785 break;
7786 case CCmode:
7787 if (req_mode == CCGCmode)
7788 return 0;
7789 /* FALLTHRU */
7790 case CCGCmode:
7791 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7792 return 0;
7793 /* FALLTHRU */
7794 case CCGOCmode:
7795 if (req_mode == CCZmode)
7796 return 0;
7797 /* FALLTHRU */
7798 case CCZmode:
7799 break;
7800
7801 default:
7802 abort ();
7803 }
7804
7805 return (GET_MODE (SET_SRC (set)) == set_mode);
7806 }
7807
7808 /* Generate insn patterns to do an integer compare of OPERANDS. */
7809
7810 static rtx
7811 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
7812 {
7813 enum machine_mode cmpmode;
7814 rtx tmp, flags;
7815
7816 cmpmode = SELECT_CC_MODE (code, op0, op1);
7817 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7818
7819 /* This is very simple, but making the interface the same as in the
7820 FP case makes the rest of the code easier. */
7821 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7822 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7823
7824 /* Return the test that should be put into the flags user, i.e.
7825 the bcc, scc, or cmov instruction. */
7826 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7827 }
7828
7829 /* Figure out whether to use ordered or unordered fp comparisons.
7830 Return the appropriate mode to use. */
7831
7832 enum machine_mode
7833 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
7834 {
7835 /* ??? In order to make all comparisons reversible, we do all comparisons
7836 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7837 all forms trapping and nontrapping comparisons, we can make inequality
7838 comparisons trapping again, since it results in better code when using
7839 FCOM based compares. */
7840 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7841 }
7842
7843 enum machine_mode
7844 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
7845 {
7846 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7847 return ix86_fp_compare_mode (code);
7848 switch (code)
7849 {
7850 /* Only zero flag is needed. */
7851 case EQ: /* ZF=0 */
7852 case NE: /* ZF!=0 */
7853 return CCZmode;
7854 /* Codes needing carry flag. */
7855 case GEU: /* CF=0 */
7856 case GTU: /* CF=0 & ZF=0 */
7857 case LTU: /* CF=1 */
7858 case LEU: /* CF=1 | ZF=1 */
7859 return CCmode;
7860 /* Codes possibly doable only with sign flag when
7861 comparing against zero. */
7862 case GE: /* SF=OF or SF=0 */
7863 case LT: /* SF<>OF or SF=1 */
7864 if (op1 == const0_rtx)
7865 return CCGOCmode;
7866 else
7867 /* For other cases Carry flag is not required. */
7868 return CCGCmode;
7869 /* Codes doable only with sign flag when comparing
7870 against zero, but we miss jump instruction for it
7871 so we need to use relational tests against overflow
7872 that thus needs to be zero. */
7873 case GT: /* ZF=0 & SF=OF */
7874 case LE: /* ZF=1 | SF<>OF */
7875 if (op1 == const0_rtx)
7876 return CCNOmode;
7877 else
7878 return CCGCmode;
7879 /* strcmp pattern do (use flags) and combine may ask us for proper
7880 mode. */
7881 case USE:
7882 return CCmode;
7883 default:
7884 abort ();
7885 }
7886 }
7887
7888 /* Return the fixed registers used for condition codes. */
7889
7890 static bool
7891 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
7892 {
7893 *p1 = FLAGS_REG;
7894 *p2 = FPSR_REG;
7895 return true;
7896 }
7897
7898 /* If two condition code modes are compatible, return a condition code
7899 mode which is compatible with both. Otherwise, return
7900 VOIDmode. */
7901
7902 static enum machine_mode
7903 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
7904 {
7905 if (m1 == m2)
7906 return m1;
7907
7908 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
7909 return VOIDmode;
7910
7911 if ((m1 == CCGCmode && m2 == CCGOCmode)
7912 || (m1 == CCGOCmode && m2 == CCGCmode))
7913 return CCGCmode;
7914
7915 switch (m1)
7916 {
7917 default:
7918 abort ();
7919
7920 case CCmode:
7921 case CCGCmode:
7922 case CCGOCmode:
7923 case CCNOmode:
7924 case CCZmode:
7925 switch (m2)
7926 {
7927 default:
7928 return VOIDmode;
7929
7930 case CCmode:
7931 case CCGCmode:
7932 case CCGOCmode:
7933 case CCNOmode:
7934 case CCZmode:
7935 return CCmode;
7936 }
7937
7938 case CCFPmode:
7939 case CCFPUmode:
7940 /* These are only compatible with themselves, which we already
7941 checked above. */
7942 return VOIDmode;
7943 }
7944 }
7945
7946 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7947
7948 int
7949 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
7950 {
7951 enum rtx_code swapped_code = swap_condition (code);
7952 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7953 || (ix86_fp_comparison_cost (swapped_code)
7954 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7955 }
7956
7957 /* Swap, force into registers, or otherwise massage the two operands
7958 to a fp comparison. The operands are updated in place; the new
7959 comparison code is returned. */
7960
7961 static enum rtx_code
7962 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
7963 {
7964 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7965 rtx op0 = *pop0, op1 = *pop1;
7966 enum machine_mode op_mode = GET_MODE (op0);
7967 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7968
7969 /* All of the unordered compare instructions only work on registers.
7970 The same is true of the fcomi compare instructions. The same is
7971 true of the XFmode compare instructions if not comparing with
7972 zero (ftst insn is used in this case). */
7973
7974 if (!is_sse
7975 && (fpcmp_mode == CCFPUmode
7976 || (op_mode == XFmode
7977 && ! (standard_80387_constant_p (op0) == 1
7978 || standard_80387_constant_p (op1) == 1))
7979 || ix86_use_fcomi_compare (code)))
7980 {
7981 op0 = force_reg (op_mode, op0);
7982 op1 = force_reg (op_mode, op1);
7983 }
7984 else
7985 {
7986 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7987 things around if they appear profitable, otherwise force op0
7988 into a register. */
7989
7990 if (standard_80387_constant_p (op0) == 0
7991 || (GET_CODE (op0) == MEM
7992 && ! (standard_80387_constant_p (op1) == 0
7993 || GET_CODE (op1) == MEM)))
7994 {
7995 rtx tmp;
7996 tmp = op0, op0 = op1, op1 = tmp;
7997 code = swap_condition (code);
7998 }
7999
8000 if (GET_CODE (op0) != REG)
8001 op0 = force_reg (op_mode, op0);
8002
8003 if (CONSTANT_P (op1))
8004 {
8005 int tmp = standard_80387_constant_p (op1);
8006 if (tmp == 0)
8007 op1 = validize_mem (force_const_mem (op_mode, op1));
8008 else if (tmp == 1)
8009 {
8010 if (TARGET_CMOVE)
8011 op1 = force_reg (op_mode, op1);
8012 }
8013 else
8014 op1 = force_reg (op_mode, op1);
8015 }
8016 }
8017
8018 /* Try to rearrange the comparison to make it cheaper. */
8019 if (ix86_fp_comparison_cost (code)
8020 > ix86_fp_comparison_cost (swap_condition (code))
8021 && (GET_CODE (op1) == REG || !no_new_pseudos))
8022 {
8023 rtx tmp;
8024 tmp = op0, op0 = op1, op1 = tmp;
8025 code = swap_condition (code);
8026 if (GET_CODE (op0) != REG)
8027 op0 = force_reg (op_mode, op0);
8028 }
8029
8030 *pop0 = op0;
8031 *pop1 = op1;
8032 return code;
8033 }
8034
8035 /* Convert comparison codes we use to represent FP comparison to integer
8036 code that will result in proper branch. Return UNKNOWN if no such code
8037 is available. */
8038
8039 enum rtx_code
8040 ix86_fp_compare_code_to_integer (enum rtx_code code)
8041 {
8042 switch (code)
8043 {
8044 case GT:
8045 return GTU;
8046 case GE:
8047 return GEU;
8048 case ORDERED:
8049 case UNORDERED:
8050 return code;
8051 break;
8052 case UNEQ:
8053 return EQ;
8054 break;
8055 case UNLT:
8056 return LTU;
8057 break;
8058 case UNLE:
8059 return LEU;
8060 break;
8061 case LTGT:
8062 return NE;
8063 break;
8064 default:
8065 return UNKNOWN;
8066 }
8067 }
8068
8069 /* Split comparison code CODE into comparisons we can do using branch
8070 instructions. BYPASS_CODE is comparison code for branch that will
8071 branch around FIRST_CODE and SECOND_CODE. If some of branches
8072 is not required, set value to UNKNOWN.
8073 We never require more than two branches. */
8074
8075 void
8076 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8077 enum rtx_code *first_code,
8078 enum rtx_code *second_code)
8079 {
8080 *first_code = code;
8081 *bypass_code = UNKNOWN;
8082 *second_code = UNKNOWN;
8083
8084 /* The fcomi comparison sets flags as follows:
8085
8086 cmp ZF PF CF
8087 > 0 0 0
8088 < 0 0 1
8089 = 1 0 0
8090 un 1 1 1 */
8091
8092 switch (code)
8093 {
8094 case GT: /* GTU - CF=0 & ZF=0 */
8095 case GE: /* GEU - CF=0 */
8096 case ORDERED: /* PF=0 */
8097 case UNORDERED: /* PF=1 */
8098 case UNEQ: /* EQ - ZF=1 */
8099 case UNLT: /* LTU - CF=1 */
8100 case UNLE: /* LEU - CF=1 | ZF=1 */
8101 case LTGT: /* EQ - ZF=0 */
8102 break;
8103 case LT: /* LTU - CF=1 - fails on unordered */
8104 *first_code = UNLT;
8105 *bypass_code = UNORDERED;
8106 break;
8107 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8108 *first_code = UNLE;
8109 *bypass_code = UNORDERED;
8110 break;
8111 case EQ: /* EQ - ZF=1 - fails on unordered */
8112 *first_code = UNEQ;
8113 *bypass_code = UNORDERED;
8114 break;
8115 case NE: /* NE - ZF=0 - fails on unordered */
8116 *first_code = LTGT;
8117 *second_code = UNORDERED;
8118 break;
8119 case UNGE: /* GEU - CF=0 - fails on unordered */
8120 *first_code = GE;
8121 *second_code = UNORDERED;
8122 break;
8123 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8124 *first_code = GT;
8125 *second_code = UNORDERED;
8126 break;
8127 default:
8128 abort ();
8129 }
8130 if (!TARGET_IEEE_FP)
8131 {
8132 *second_code = UNKNOWN;
8133 *bypass_code = UNKNOWN;
8134 }
8135 }
8136
8137 /* Return cost of comparison done fcom + arithmetics operations on AX.
8138 All following functions do use number of instructions as a cost metrics.
8139 In future this should be tweaked to compute bytes for optimize_size and
8140 take into account performance of various instructions on various CPUs. */
8141 static int
8142 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8143 {
8144 if (!TARGET_IEEE_FP)
8145 return 4;
8146 /* The cost of code output by ix86_expand_fp_compare. */
8147 switch (code)
8148 {
8149 case UNLE:
8150 case UNLT:
8151 case LTGT:
8152 case GT:
8153 case GE:
8154 case UNORDERED:
8155 case ORDERED:
8156 case UNEQ:
8157 return 4;
8158 break;
8159 case LT:
8160 case NE:
8161 case EQ:
8162 case UNGE:
8163 return 5;
8164 break;
8165 case LE:
8166 case UNGT:
8167 return 6;
8168 break;
8169 default:
8170 abort ();
8171 }
8172 }
8173
8174 /* Return cost of comparison done using fcomi operation.
8175 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8176 static int
8177 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8178 {
8179 enum rtx_code bypass_code, first_code, second_code;
8180 /* Return arbitrarily high cost when instruction is not supported - this
8181 prevents gcc from using it. */
8182 if (!TARGET_CMOVE)
8183 return 1024;
8184 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8185 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8186 }
8187
8188 /* Return cost of comparison done using sahf operation.
8189 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8190 static int
8191 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8192 {
8193 enum rtx_code bypass_code, first_code, second_code;
8194 /* Return arbitrarily high cost when instruction is not preferred - this
8195 avoids gcc from using it. */
8196 if (!TARGET_USE_SAHF && !optimize_size)
8197 return 1024;
8198 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8199 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8200 }
8201
8202 /* Compute cost of the comparison done using any method.
8203 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8204 static int
8205 ix86_fp_comparison_cost (enum rtx_code code)
8206 {
8207 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8208 int min;
8209
8210 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8211 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8212
8213 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8214 if (min > sahf_cost)
8215 min = sahf_cost;
8216 if (min > fcomi_cost)
8217 min = fcomi_cost;
8218 return min;
8219 }
8220
8221 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8222
8223 static rtx
8224 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8225 rtx *second_test, rtx *bypass_test)
8226 {
8227 enum machine_mode fpcmp_mode, intcmp_mode;
8228 rtx tmp, tmp2;
8229 int cost = ix86_fp_comparison_cost (code);
8230 enum rtx_code bypass_code, first_code, second_code;
8231
8232 fpcmp_mode = ix86_fp_compare_mode (code);
8233 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8234
8235 if (second_test)
8236 *second_test = NULL_RTX;
8237 if (bypass_test)
8238 *bypass_test = NULL_RTX;
8239
8240 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8241
8242 /* Do fcomi/sahf based test when profitable. */
8243 if ((bypass_code == UNKNOWN || bypass_test)
8244 && (second_code == UNKNOWN || second_test)
8245 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8246 {
8247 if (TARGET_CMOVE)
8248 {
8249 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8250 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8251 tmp);
8252 emit_insn (tmp);
8253 }
8254 else
8255 {
8256 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8257 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8258 if (!scratch)
8259 scratch = gen_reg_rtx (HImode);
8260 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8261 emit_insn (gen_x86_sahf_1 (scratch));
8262 }
8263
8264 /* The FP codes work out to act like unsigned. */
8265 intcmp_mode = fpcmp_mode;
8266 code = first_code;
8267 if (bypass_code != UNKNOWN)
8268 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8269 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8270 const0_rtx);
8271 if (second_code != UNKNOWN)
8272 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8273 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8274 const0_rtx);
8275 }
8276 else
8277 {
8278 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8279 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8280 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8281 if (!scratch)
8282 scratch = gen_reg_rtx (HImode);
8283 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8284
8285 /* In the unordered case, we have to check C2 for NaN's, which
8286 doesn't happen to work out to anything nice combination-wise.
8287 So do some bit twiddling on the value we've got in AH to come
8288 up with an appropriate set of condition codes. */
8289
8290 intcmp_mode = CCNOmode;
8291 switch (code)
8292 {
8293 case GT:
8294 case UNGT:
8295 if (code == GT || !TARGET_IEEE_FP)
8296 {
8297 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8298 code = EQ;
8299 }
8300 else
8301 {
8302 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8303 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8304 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8305 intcmp_mode = CCmode;
8306 code = GEU;
8307 }
8308 break;
8309 case LT:
8310 case UNLT:
8311 if (code == LT && TARGET_IEEE_FP)
8312 {
8313 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8314 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8315 intcmp_mode = CCmode;
8316 code = EQ;
8317 }
8318 else
8319 {
8320 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8321 code = NE;
8322 }
8323 break;
8324 case GE:
8325 case UNGE:
8326 if (code == GE || !TARGET_IEEE_FP)
8327 {
8328 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8329 code = EQ;
8330 }
8331 else
8332 {
8333 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8334 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8335 GEN_INT (0x01)));
8336 code = NE;
8337 }
8338 break;
8339 case LE:
8340 case UNLE:
8341 if (code == LE && TARGET_IEEE_FP)
8342 {
8343 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8344 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8345 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8346 intcmp_mode = CCmode;
8347 code = LTU;
8348 }
8349 else
8350 {
8351 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8352 code = NE;
8353 }
8354 break;
8355 case EQ:
8356 case UNEQ:
8357 if (code == EQ && TARGET_IEEE_FP)
8358 {
8359 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8360 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8361 intcmp_mode = CCmode;
8362 code = EQ;
8363 }
8364 else
8365 {
8366 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8367 code = NE;
8368 break;
8369 }
8370 break;
8371 case NE:
8372 case LTGT:
8373 if (code == NE && TARGET_IEEE_FP)
8374 {
8375 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8376 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8377 GEN_INT (0x40)));
8378 code = NE;
8379 }
8380 else
8381 {
8382 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8383 code = EQ;
8384 }
8385 break;
8386
8387 case UNORDERED:
8388 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8389 code = NE;
8390 break;
8391 case ORDERED:
8392 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8393 code = EQ;
8394 break;
8395
8396 default:
8397 abort ();
8398 }
8399 }
8400
8401 /* Return the test that should be put into the flags user, i.e.
8402 the bcc, scc, or cmov instruction. */
8403 return gen_rtx_fmt_ee (code, VOIDmode,
8404 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8405 const0_rtx);
8406 }
8407
8408 rtx
8409 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8410 {
8411 rtx op0, op1, ret;
8412 op0 = ix86_compare_op0;
8413 op1 = ix86_compare_op1;
8414
8415 if (second_test)
8416 *second_test = NULL_RTX;
8417 if (bypass_test)
8418 *bypass_test = NULL_RTX;
8419
8420 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8421 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8422 second_test, bypass_test);
8423 else
8424 ret = ix86_expand_int_compare (code, op0, op1);
8425
8426 return ret;
8427 }
8428
8429 /* Return true if the CODE will result in nontrivial jump sequence. */
8430 bool
8431 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8432 {
8433 enum rtx_code bypass_code, first_code, second_code;
8434 if (!TARGET_CMOVE)
8435 return true;
8436 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8437 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8438 }
8439
8440 void
8441 ix86_expand_branch (enum rtx_code code, rtx label)
8442 {
8443 rtx tmp;
8444
8445 switch (GET_MODE (ix86_compare_op0))
8446 {
8447 case QImode:
8448 case HImode:
8449 case SImode:
8450 simple:
8451 tmp = ix86_expand_compare (code, NULL, NULL);
8452 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8453 gen_rtx_LABEL_REF (VOIDmode, label),
8454 pc_rtx);
8455 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8456 return;
8457
8458 case SFmode:
8459 case DFmode:
8460 case XFmode:
8461 {
8462 rtvec vec;
8463 int use_fcomi;
8464 enum rtx_code bypass_code, first_code, second_code;
8465
8466 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8467 &ix86_compare_op1);
8468
8469 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8470
8471 /* Check whether we will use the natural sequence with one jump. If
8472 so, we can expand jump early. Otherwise delay expansion by
8473 creating compound insn to not confuse optimizers. */
8474 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8475 && TARGET_CMOVE)
8476 {
8477 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8478 gen_rtx_LABEL_REF (VOIDmode, label),
8479 pc_rtx, NULL_RTX);
8480 }
8481 else
8482 {
8483 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8484 ix86_compare_op0, ix86_compare_op1);
8485 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8486 gen_rtx_LABEL_REF (VOIDmode, label),
8487 pc_rtx);
8488 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8489
8490 use_fcomi = ix86_use_fcomi_compare (code);
8491 vec = rtvec_alloc (3 + !use_fcomi);
8492 RTVEC_ELT (vec, 0) = tmp;
8493 RTVEC_ELT (vec, 1)
8494 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8495 RTVEC_ELT (vec, 2)
8496 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8497 if (! use_fcomi)
8498 RTVEC_ELT (vec, 3)
8499 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8500
8501 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8502 }
8503 return;
8504 }
8505
8506 case DImode:
8507 if (TARGET_64BIT)
8508 goto simple;
8509 /* Expand DImode branch into multiple compare+branch. */
8510 {
8511 rtx lo[2], hi[2], label2;
8512 enum rtx_code code1, code2, code3;
8513
8514 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8515 {
8516 tmp = ix86_compare_op0;
8517 ix86_compare_op0 = ix86_compare_op1;
8518 ix86_compare_op1 = tmp;
8519 code = swap_condition (code);
8520 }
8521 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8522 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8523
8524 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8525 avoid two branches. This costs one extra insn, so disable when
8526 optimizing for size. */
8527
8528 if ((code == EQ || code == NE)
8529 && (!optimize_size
8530 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8531 {
8532 rtx xor0, xor1;
8533
8534 xor1 = hi[0];
8535 if (hi[1] != const0_rtx)
8536 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8537 NULL_RTX, 0, OPTAB_WIDEN);
8538
8539 xor0 = lo[0];
8540 if (lo[1] != const0_rtx)
8541 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8542 NULL_RTX, 0, OPTAB_WIDEN);
8543
8544 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8545 NULL_RTX, 0, OPTAB_WIDEN);
8546
8547 ix86_compare_op0 = tmp;
8548 ix86_compare_op1 = const0_rtx;
8549 ix86_expand_branch (code, label);
8550 return;
8551 }
8552
8553 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8554 op1 is a constant and the low word is zero, then we can just
8555 examine the high word. */
8556
8557 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8558 switch (code)
8559 {
8560 case LT: case LTU: case GE: case GEU:
8561 ix86_compare_op0 = hi[0];
8562 ix86_compare_op1 = hi[1];
8563 ix86_expand_branch (code, label);
8564 return;
8565 default:
8566 break;
8567 }
8568
8569 /* Otherwise, we need two or three jumps. */
8570
8571 label2 = gen_label_rtx ();
8572
8573 code1 = code;
8574 code2 = swap_condition (code);
8575 code3 = unsigned_condition (code);
8576
8577 switch (code)
8578 {
8579 case LT: case GT: case LTU: case GTU:
8580 break;
8581
8582 case LE: code1 = LT; code2 = GT; break;
8583 case GE: code1 = GT; code2 = LT; break;
8584 case LEU: code1 = LTU; code2 = GTU; break;
8585 case GEU: code1 = GTU; code2 = LTU; break;
8586
8587 case EQ: code1 = UNKNOWN; code2 = NE; break;
8588 case NE: code2 = UNKNOWN; break;
8589
8590 default:
8591 abort ();
8592 }
8593
8594 /*
8595 * a < b =>
8596 * if (hi(a) < hi(b)) goto true;
8597 * if (hi(a) > hi(b)) goto false;
8598 * if (lo(a) < lo(b)) goto true;
8599 * false:
8600 */
8601
8602 ix86_compare_op0 = hi[0];
8603 ix86_compare_op1 = hi[1];
8604
8605 if (code1 != UNKNOWN)
8606 ix86_expand_branch (code1, label);
8607 if (code2 != UNKNOWN)
8608 ix86_expand_branch (code2, label2);
8609
8610 ix86_compare_op0 = lo[0];
8611 ix86_compare_op1 = lo[1];
8612 ix86_expand_branch (code3, label);
8613
8614 if (code2 != UNKNOWN)
8615 emit_label (label2);
8616 return;
8617 }
8618
8619 default:
8620 abort ();
8621 }
8622 }
8623
8624 /* Split branch based on floating point condition. */
8625 void
8626 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8627 rtx target1, rtx target2, rtx tmp)
8628 {
8629 rtx second, bypass;
8630 rtx label = NULL_RTX;
8631 rtx condition;
8632 int bypass_probability = -1, second_probability = -1, probability = -1;
8633 rtx i;
8634
8635 if (target2 != pc_rtx)
8636 {
8637 rtx tmp = target2;
8638 code = reverse_condition_maybe_unordered (code);
8639 target2 = target1;
8640 target1 = tmp;
8641 }
8642
8643 condition = ix86_expand_fp_compare (code, op1, op2,
8644 tmp, &second, &bypass);
8645
8646 if (split_branch_probability >= 0)
8647 {
8648 /* Distribute the probabilities across the jumps.
8649 Assume the BYPASS and SECOND to be always test
8650 for UNORDERED. */
8651 probability = split_branch_probability;
8652
8653 /* Value of 1 is low enough to make no need for probability
8654 to be updated. Later we may run some experiments and see
8655 if unordered values are more frequent in practice. */
8656 if (bypass)
8657 bypass_probability = 1;
8658 if (second)
8659 second_probability = 1;
8660 }
8661 if (bypass != NULL_RTX)
8662 {
8663 label = gen_label_rtx ();
8664 i = emit_jump_insn (gen_rtx_SET
8665 (VOIDmode, pc_rtx,
8666 gen_rtx_IF_THEN_ELSE (VOIDmode,
8667 bypass,
8668 gen_rtx_LABEL_REF (VOIDmode,
8669 label),
8670 pc_rtx)));
8671 if (bypass_probability >= 0)
8672 REG_NOTES (i)
8673 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8674 GEN_INT (bypass_probability),
8675 REG_NOTES (i));
8676 }
8677 i = emit_jump_insn (gen_rtx_SET
8678 (VOIDmode, pc_rtx,
8679 gen_rtx_IF_THEN_ELSE (VOIDmode,
8680 condition, target1, target2)));
8681 if (probability >= 0)
8682 REG_NOTES (i)
8683 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8684 GEN_INT (probability),
8685 REG_NOTES (i));
8686 if (second != NULL_RTX)
8687 {
8688 i = emit_jump_insn (gen_rtx_SET
8689 (VOIDmode, pc_rtx,
8690 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8691 target2)));
8692 if (second_probability >= 0)
8693 REG_NOTES (i)
8694 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8695 GEN_INT (second_probability),
8696 REG_NOTES (i));
8697 }
8698 if (label != NULL_RTX)
8699 emit_label (label);
8700 }
8701
8702 int
8703 ix86_expand_setcc (enum rtx_code code, rtx dest)
8704 {
8705 rtx ret, tmp, tmpreg, equiv;
8706 rtx second_test, bypass_test;
8707
8708 if (GET_MODE (ix86_compare_op0) == DImode
8709 && !TARGET_64BIT)
8710 return 0; /* FAIL */
8711
8712 if (GET_MODE (dest) != QImode)
8713 abort ();
8714
8715 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8716 PUT_MODE (ret, QImode);
8717
8718 tmp = dest;
8719 tmpreg = dest;
8720
8721 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8722 if (bypass_test || second_test)
8723 {
8724 rtx test = second_test;
8725 int bypass = 0;
8726 rtx tmp2 = gen_reg_rtx (QImode);
8727 if (bypass_test)
8728 {
8729 if (second_test)
8730 abort ();
8731 test = bypass_test;
8732 bypass = 1;
8733 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8734 }
8735 PUT_MODE (test, QImode);
8736 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8737
8738 if (bypass)
8739 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8740 else
8741 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8742 }
8743
8744 /* Attach a REG_EQUAL note describing the comparison result. */
8745 equiv = simplify_gen_relational (code, QImode,
8746 GET_MODE (ix86_compare_op0),
8747 ix86_compare_op0, ix86_compare_op1);
8748 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8749
8750 return 1; /* DONE */
8751 }
8752
8753 /* Expand comparison setting or clearing carry flag. Return true when
8754 successful and set pop for the operation. */
8755 static bool
8756 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8757 {
8758 enum machine_mode mode =
8759 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8760
8761 /* Do not handle DImode compares that go trought special path. Also we can't
8762 deal with FP compares yet. This is possible to add. */
8763 if ((mode == DImode && !TARGET_64BIT))
8764 return false;
8765 if (FLOAT_MODE_P (mode))
8766 {
8767 rtx second_test = NULL, bypass_test = NULL;
8768 rtx compare_op, compare_seq;
8769
8770 /* Shortcut: following common codes never translate into carry flag compares. */
8771 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8772 || code == ORDERED || code == UNORDERED)
8773 return false;
8774
8775 /* These comparisons require zero flag; swap operands so they won't. */
8776 if ((code == GT || code == UNLE || code == LE || code == UNGT)
8777 && !TARGET_IEEE_FP)
8778 {
8779 rtx tmp = op0;
8780 op0 = op1;
8781 op1 = tmp;
8782 code = swap_condition (code);
8783 }
8784
8785 /* Try to expand the comparison and verify that we end up with carry flag
8786 based comparison. This is fails to be true only when we decide to expand
8787 comparison using arithmetic that is not too common scenario. */
8788 start_sequence ();
8789 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8790 &second_test, &bypass_test);
8791 compare_seq = get_insns ();
8792 end_sequence ();
8793
8794 if (second_test || bypass_test)
8795 return false;
8796 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8797 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8798 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
8799 else
8800 code = GET_CODE (compare_op);
8801 if (code != LTU && code != GEU)
8802 return false;
8803 emit_insn (compare_seq);
8804 *pop = compare_op;
8805 return true;
8806 }
8807 if (!INTEGRAL_MODE_P (mode))
8808 return false;
8809 switch (code)
8810 {
8811 case LTU:
8812 case GEU:
8813 break;
8814
8815 /* Convert a==0 into (unsigned)a<1. */
8816 case EQ:
8817 case NE:
8818 if (op1 != const0_rtx)
8819 return false;
8820 op1 = const1_rtx;
8821 code = (code == EQ ? LTU : GEU);
8822 break;
8823
8824 /* Convert a>b into b<a or a>=b-1. */
8825 case GTU:
8826 case LEU:
8827 if (GET_CODE (op1) == CONST_INT)
8828 {
8829 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
8830 /* Bail out on overflow. We still can swap operands but that
8831 would force loading of the constant into register. */
8832 if (op1 == const0_rtx
8833 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
8834 return false;
8835 code = (code == GTU ? GEU : LTU);
8836 }
8837 else
8838 {
8839 rtx tmp = op1;
8840 op1 = op0;
8841 op0 = tmp;
8842 code = (code == GTU ? LTU : GEU);
8843 }
8844 break;
8845
8846 /* Convert a>=0 into (unsigned)a<0x80000000. */
8847 case LT:
8848 case GE:
8849 if (mode == DImode || op1 != const0_rtx)
8850 return false;
8851 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8852 code = (code == LT ? GEU : LTU);
8853 break;
8854 case LE:
8855 case GT:
8856 if (mode == DImode || op1 != constm1_rtx)
8857 return false;
8858 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
8859 code = (code == LE ? GEU : LTU);
8860 break;
8861
8862 default:
8863 return false;
8864 }
8865 /* Swapping operands may cause constant to appear as first operand. */
8866 if (!nonimmediate_operand (op0, VOIDmode))
8867 {
8868 if (no_new_pseudos)
8869 return false;
8870 op0 = force_reg (mode, op0);
8871 }
8872 ix86_compare_op0 = op0;
8873 ix86_compare_op1 = op1;
8874 *pop = ix86_expand_compare (code, NULL, NULL);
8875 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
8876 abort ();
8877 return true;
8878 }
8879
8880 int
8881 ix86_expand_int_movcc (rtx operands[])
8882 {
8883 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8884 rtx compare_seq, compare_op;
8885 rtx second_test, bypass_test;
8886 enum machine_mode mode = GET_MODE (operands[0]);
8887 bool sign_bit_compare_p = false;;
8888
8889 start_sequence ();
8890 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8891 compare_seq = get_insns ();
8892 end_sequence ();
8893
8894 compare_code = GET_CODE (compare_op);
8895
8896 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
8897 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
8898 sign_bit_compare_p = true;
8899
8900 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8901 HImode insns, we'd be swallowed in word prefix ops. */
8902
8903 if ((mode != HImode || TARGET_FAST_PREFIX)
8904 && (mode != DImode || TARGET_64BIT)
8905 && GET_CODE (operands[2]) == CONST_INT
8906 && GET_CODE (operands[3]) == CONST_INT)
8907 {
8908 rtx out = operands[0];
8909 HOST_WIDE_INT ct = INTVAL (operands[2]);
8910 HOST_WIDE_INT cf = INTVAL (operands[3]);
8911 HOST_WIDE_INT diff;
8912
8913 diff = ct - cf;
8914 /* Sign bit compares are better done using shifts than we do by using
8915 sbb. */
8916 if (sign_bit_compare_p
8917 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
8918 ix86_compare_op1, &compare_op))
8919 {
8920 /* Detect overlap between destination and compare sources. */
8921 rtx tmp = out;
8922
8923 if (!sign_bit_compare_p)
8924 {
8925 bool fpcmp = false;
8926
8927 compare_code = GET_CODE (compare_op);
8928
8929 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
8930 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
8931 {
8932 fpcmp = true;
8933 compare_code = ix86_fp_compare_code_to_integer (compare_code);
8934 }
8935
8936 /* To simplify rest of code, restrict to the GEU case. */
8937 if (compare_code == LTU)
8938 {
8939 HOST_WIDE_INT tmp = ct;
8940 ct = cf;
8941 cf = tmp;
8942 compare_code = reverse_condition (compare_code);
8943 code = reverse_condition (code);
8944 }
8945 else
8946 {
8947 if (fpcmp)
8948 PUT_CODE (compare_op,
8949 reverse_condition_maybe_unordered
8950 (GET_CODE (compare_op)));
8951 else
8952 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
8953 }
8954 diff = ct - cf;
8955
8956 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8957 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8958 tmp = gen_reg_rtx (mode);
8959
8960 if (mode == DImode)
8961 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
8962 else
8963 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
8964 }
8965 else
8966 {
8967 if (code == GT || code == GE)
8968 code = reverse_condition (code);
8969 else
8970 {
8971 HOST_WIDE_INT tmp = ct;
8972 ct = cf;
8973 cf = tmp;
8974 diff = ct - cf;
8975 }
8976 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
8977 ix86_compare_op1, VOIDmode, 0, -1);
8978 }
8979
8980 if (diff == 1)
8981 {
8982 /*
8983 * cmpl op0,op1
8984 * sbbl dest,dest
8985 * [addl dest, ct]
8986 *
8987 * Size 5 - 8.
8988 */
8989 if (ct)
8990 tmp = expand_simple_binop (mode, PLUS,
8991 tmp, GEN_INT (ct),
8992 copy_rtx (tmp), 1, OPTAB_DIRECT);
8993 }
8994 else if (cf == -1)
8995 {
8996 /*
8997 * cmpl op0,op1
8998 * sbbl dest,dest
8999 * orl $ct, dest
9000 *
9001 * Size 8.
9002 */
9003 tmp = expand_simple_binop (mode, IOR,
9004 tmp, GEN_INT (ct),
9005 copy_rtx (tmp), 1, OPTAB_DIRECT);
9006 }
9007 else if (diff == -1 && ct)
9008 {
9009 /*
9010 * cmpl op0,op1
9011 * sbbl dest,dest
9012 * notl dest
9013 * [addl dest, cf]
9014 *
9015 * Size 8 - 11.
9016 */
9017 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9018 if (cf)
9019 tmp = expand_simple_binop (mode, PLUS,
9020 copy_rtx (tmp), GEN_INT (cf),
9021 copy_rtx (tmp), 1, OPTAB_DIRECT);
9022 }
9023 else
9024 {
9025 /*
9026 * cmpl op0,op1
9027 * sbbl dest,dest
9028 * [notl dest]
9029 * andl cf - ct, dest
9030 * [addl dest, ct]
9031 *
9032 * Size 8 - 11.
9033 */
9034
9035 if (cf == 0)
9036 {
9037 cf = ct;
9038 ct = 0;
9039 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9040 }
9041
9042 tmp = expand_simple_binop (mode, AND,
9043 copy_rtx (tmp),
9044 gen_int_mode (cf - ct, mode),
9045 copy_rtx (tmp), 1, OPTAB_DIRECT);
9046 if (ct)
9047 tmp = expand_simple_binop (mode, PLUS,
9048 copy_rtx (tmp), GEN_INT (ct),
9049 copy_rtx (tmp), 1, OPTAB_DIRECT);
9050 }
9051
9052 if (!rtx_equal_p (tmp, out))
9053 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9054
9055 return 1; /* DONE */
9056 }
9057
9058 if (diff < 0)
9059 {
9060 HOST_WIDE_INT tmp;
9061 tmp = ct, ct = cf, cf = tmp;
9062 diff = -diff;
9063 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9064 {
9065 /* We may be reversing unordered compare to normal compare, that
9066 is not valid in general (we may convert non-trapping condition
9067 to trapping one), however on i386 we currently emit all
9068 comparisons unordered. */
9069 compare_code = reverse_condition_maybe_unordered (compare_code);
9070 code = reverse_condition_maybe_unordered (code);
9071 }
9072 else
9073 {
9074 compare_code = reverse_condition (compare_code);
9075 code = reverse_condition (code);
9076 }
9077 }
9078
9079 compare_code = UNKNOWN;
9080 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9081 && GET_CODE (ix86_compare_op1) == CONST_INT)
9082 {
9083 if (ix86_compare_op1 == const0_rtx
9084 && (code == LT || code == GE))
9085 compare_code = code;
9086 else if (ix86_compare_op1 == constm1_rtx)
9087 {
9088 if (code == LE)
9089 compare_code = LT;
9090 else if (code == GT)
9091 compare_code = GE;
9092 }
9093 }
9094
9095 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9096 if (compare_code != UNKNOWN
9097 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9098 && (cf == -1 || ct == -1))
9099 {
9100 /* If lea code below could be used, only optimize
9101 if it results in a 2 insn sequence. */
9102
9103 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9104 || diff == 3 || diff == 5 || diff == 9)
9105 || (compare_code == LT && ct == -1)
9106 || (compare_code == GE && cf == -1))
9107 {
9108 /*
9109 * notl op1 (if necessary)
9110 * sarl $31, op1
9111 * orl cf, op1
9112 */
9113 if (ct != -1)
9114 {
9115 cf = ct;
9116 ct = -1;
9117 code = reverse_condition (code);
9118 }
9119
9120 out = emit_store_flag (out, code, ix86_compare_op0,
9121 ix86_compare_op1, VOIDmode, 0, -1);
9122
9123 out = expand_simple_binop (mode, IOR,
9124 out, GEN_INT (cf),
9125 out, 1, OPTAB_DIRECT);
9126 if (out != operands[0])
9127 emit_move_insn (operands[0], out);
9128
9129 return 1; /* DONE */
9130 }
9131 }
9132
9133
9134 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9135 || diff == 3 || diff == 5 || diff == 9)
9136 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9137 && (mode != DImode
9138 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9139 {
9140 /*
9141 * xorl dest,dest
9142 * cmpl op1,op2
9143 * setcc dest
9144 * lea cf(dest*(ct-cf)),dest
9145 *
9146 * Size 14.
9147 *
9148 * This also catches the degenerate setcc-only case.
9149 */
9150
9151 rtx tmp;
9152 int nops;
9153
9154 out = emit_store_flag (out, code, ix86_compare_op0,
9155 ix86_compare_op1, VOIDmode, 0, 1);
9156
9157 nops = 0;
9158 /* On x86_64 the lea instruction operates on Pmode, so we need
9159 to get arithmetics done in proper mode to match. */
9160 if (diff == 1)
9161 tmp = copy_rtx (out);
9162 else
9163 {
9164 rtx out1;
9165 out1 = copy_rtx (out);
9166 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9167 nops++;
9168 if (diff & 1)
9169 {
9170 tmp = gen_rtx_PLUS (mode, tmp, out1);
9171 nops++;
9172 }
9173 }
9174 if (cf != 0)
9175 {
9176 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9177 nops++;
9178 }
9179 if (!rtx_equal_p (tmp, out))
9180 {
9181 if (nops == 1)
9182 out = force_operand (tmp, copy_rtx (out));
9183 else
9184 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9185 }
9186 if (!rtx_equal_p (out, operands[0]))
9187 emit_move_insn (operands[0], copy_rtx (out));
9188
9189 return 1; /* DONE */
9190 }
9191
9192 /*
9193 * General case: Jumpful:
9194 * xorl dest,dest cmpl op1, op2
9195 * cmpl op1, op2 movl ct, dest
9196 * setcc dest jcc 1f
9197 * decl dest movl cf, dest
9198 * andl (cf-ct),dest 1:
9199 * addl ct,dest
9200 *
9201 * Size 20. Size 14.
9202 *
9203 * This is reasonably steep, but branch mispredict costs are
9204 * high on modern cpus, so consider failing only if optimizing
9205 * for space.
9206 */
9207
9208 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9209 && BRANCH_COST >= 2)
9210 {
9211 if (cf == 0)
9212 {
9213 cf = ct;
9214 ct = 0;
9215 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9216 /* We may be reversing unordered compare to normal compare,
9217 that is not valid in general (we may convert non-trapping
9218 condition to trapping one), however on i386 we currently
9219 emit all comparisons unordered. */
9220 code = reverse_condition_maybe_unordered (code);
9221 else
9222 {
9223 code = reverse_condition (code);
9224 if (compare_code != UNKNOWN)
9225 compare_code = reverse_condition (compare_code);
9226 }
9227 }
9228
9229 if (compare_code != UNKNOWN)
9230 {
9231 /* notl op1 (if needed)
9232 sarl $31, op1
9233 andl (cf-ct), op1
9234 addl ct, op1
9235
9236 For x < 0 (resp. x <= -1) there will be no notl,
9237 so if possible swap the constants to get rid of the
9238 complement.
9239 True/false will be -1/0 while code below (store flag
9240 followed by decrement) is 0/-1, so the constants need
9241 to be exchanged once more. */
9242
9243 if (compare_code == GE || !cf)
9244 {
9245 code = reverse_condition (code);
9246 compare_code = LT;
9247 }
9248 else
9249 {
9250 HOST_WIDE_INT tmp = cf;
9251 cf = ct;
9252 ct = tmp;
9253 }
9254
9255 out = emit_store_flag (out, code, ix86_compare_op0,
9256 ix86_compare_op1, VOIDmode, 0, -1);
9257 }
9258 else
9259 {
9260 out = emit_store_flag (out, code, ix86_compare_op0,
9261 ix86_compare_op1, VOIDmode, 0, 1);
9262
9263 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9264 copy_rtx (out), 1, OPTAB_DIRECT);
9265 }
9266
9267 out = expand_simple_binop (mode, AND, copy_rtx (out),
9268 gen_int_mode (cf - ct, mode),
9269 copy_rtx (out), 1, OPTAB_DIRECT);
9270 if (ct)
9271 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9272 copy_rtx (out), 1, OPTAB_DIRECT);
9273 if (!rtx_equal_p (out, operands[0]))
9274 emit_move_insn (operands[0], copy_rtx (out));
9275
9276 return 1; /* DONE */
9277 }
9278 }
9279
9280 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9281 {
9282 /* Try a few things more with specific constants and a variable. */
9283
9284 optab op;
9285 rtx var, orig_out, out, tmp;
9286
9287 if (BRANCH_COST <= 2)
9288 return 0; /* FAIL */
9289
9290 /* If one of the two operands is an interesting constant, load a
9291 constant with the above and mask it in with a logical operation. */
9292
9293 if (GET_CODE (operands[2]) == CONST_INT)
9294 {
9295 var = operands[3];
9296 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9297 operands[3] = constm1_rtx, op = and_optab;
9298 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9299 operands[3] = const0_rtx, op = ior_optab;
9300 else
9301 return 0; /* FAIL */
9302 }
9303 else if (GET_CODE (operands[3]) == CONST_INT)
9304 {
9305 var = operands[2];
9306 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9307 operands[2] = constm1_rtx, op = and_optab;
9308 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9309 operands[2] = const0_rtx, op = ior_optab;
9310 else
9311 return 0; /* FAIL */
9312 }
9313 else
9314 return 0; /* FAIL */
9315
9316 orig_out = operands[0];
9317 tmp = gen_reg_rtx (mode);
9318 operands[0] = tmp;
9319
9320 /* Recurse to get the constant loaded. */
9321 if (ix86_expand_int_movcc (operands) == 0)
9322 return 0; /* FAIL */
9323
9324 /* Mask in the interesting variable. */
9325 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9326 OPTAB_WIDEN);
9327 if (!rtx_equal_p (out, orig_out))
9328 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9329
9330 return 1; /* DONE */
9331 }
9332
9333 /*
9334 * For comparison with above,
9335 *
9336 * movl cf,dest
9337 * movl ct,tmp
9338 * cmpl op1,op2
9339 * cmovcc tmp,dest
9340 *
9341 * Size 15.
9342 */
9343
9344 if (! nonimmediate_operand (operands[2], mode))
9345 operands[2] = force_reg (mode, operands[2]);
9346 if (! nonimmediate_operand (operands[3], mode))
9347 operands[3] = force_reg (mode, operands[3]);
9348
9349 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9350 {
9351 rtx tmp = gen_reg_rtx (mode);
9352 emit_move_insn (tmp, operands[3]);
9353 operands[3] = tmp;
9354 }
9355 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9356 {
9357 rtx tmp = gen_reg_rtx (mode);
9358 emit_move_insn (tmp, operands[2]);
9359 operands[2] = tmp;
9360 }
9361
9362 if (! register_operand (operands[2], VOIDmode)
9363 && (mode == QImode
9364 || ! register_operand (operands[3], VOIDmode)))
9365 operands[2] = force_reg (mode, operands[2]);
9366
9367 if (mode == QImode
9368 && ! register_operand (operands[3], VOIDmode))
9369 operands[3] = force_reg (mode, operands[3]);
9370
9371 emit_insn (compare_seq);
9372 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9373 gen_rtx_IF_THEN_ELSE (mode,
9374 compare_op, operands[2],
9375 operands[3])));
9376 if (bypass_test)
9377 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9378 gen_rtx_IF_THEN_ELSE (mode,
9379 bypass_test,
9380 copy_rtx (operands[3]),
9381 copy_rtx (operands[0]))));
9382 if (second_test)
9383 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9384 gen_rtx_IF_THEN_ELSE (mode,
9385 second_test,
9386 copy_rtx (operands[2]),
9387 copy_rtx (operands[0]))));
9388
9389 return 1; /* DONE */
9390 }
9391
9392 int
9393 ix86_expand_fp_movcc (rtx operands[])
9394 {
9395 enum rtx_code code;
9396 rtx tmp;
9397 rtx compare_op, second_test, bypass_test;
9398
9399 /* For SF/DFmode conditional moves based on comparisons
9400 in same mode, we may want to use SSE min/max instructions. */
9401 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9402 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9403 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9404 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9405 && (!TARGET_IEEE_FP
9406 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9407 /* We may be called from the post-reload splitter. */
9408 && (!REG_P (operands[0])
9409 || SSE_REG_P (operands[0])
9410 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9411 {
9412 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9413 code = GET_CODE (operands[1]);
9414
9415 /* See if we have (cross) match between comparison operands and
9416 conditional move operands. */
9417 if (rtx_equal_p (operands[2], op1))
9418 {
9419 rtx tmp = op0;
9420 op0 = op1;
9421 op1 = tmp;
9422 code = reverse_condition_maybe_unordered (code);
9423 }
9424 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9425 {
9426 /* Check for min operation. */
9427 if (code == LT || code == UNLE)
9428 {
9429 if (code == UNLE)
9430 {
9431 rtx tmp = op0;
9432 op0 = op1;
9433 op1 = tmp;
9434 }
9435 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9436 if (memory_operand (op0, VOIDmode))
9437 op0 = force_reg (GET_MODE (operands[0]), op0);
9438 if (GET_MODE (operands[0]) == SFmode)
9439 emit_insn (gen_minsf3 (operands[0], op0, op1));
9440 else
9441 emit_insn (gen_mindf3 (operands[0], op0, op1));
9442 return 1;
9443 }
9444 /* Check for max operation. */
9445 if (code == GT || code == UNGE)
9446 {
9447 if (code == UNGE)
9448 {
9449 rtx tmp = op0;
9450 op0 = op1;
9451 op1 = tmp;
9452 }
9453 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9454 if (memory_operand (op0, VOIDmode))
9455 op0 = force_reg (GET_MODE (operands[0]), op0);
9456 if (GET_MODE (operands[0]) == SFmode)
9457 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9458 else
9459 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9460 return 1;
9461 }
9462 }
9463 /* Manage condition to be sse_comparison_operator. In case we are
9464 in non-ieee mode, try to canonicalize the destination operand
9465 to be first in the comparison - this helps reload to avoid extra
9466 moves. */
9467 if (!sse_comparison_operator (operands[1], VOIDmode)
9468 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9469 {
9470 rtx tmp = ix86_compare_op0;
9471 ix86_compare_op0 = ix86_compare_op1;
9472 ix86_compare_op1 = tmp;
9473 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9474 VOIDmode, ix86_compare_op0,
9475 ix86_compare_op1);
9476 }
9477 /* Similarly try to manage result to be first operand of conditional
9478 move. We also don't support the NE comparison on SSE, so try to
9479 avoid it. */
9480 if ((rtx_equal_p (operands[0], operands[3])
9481 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9482 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9483 {
9484 rtx tmp = operands[2];
9485 operands[2] = operands[3];
9486 operands[3] = tmp;
9487 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9488 (GET_CODE (operands[1])),
9489 VOIDmode, ix86_compare_op0,
9490 ix86_compare_op1);
9491 }
9492 if (GET_MODE (operands[0]) == SFmode)
9493 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9494 operands[2], operands[3],
9495 ix86_compare_op0, ix86_compare_op1));
9496 else
9497 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9498 operands[2], operands[3],
9499 ix86_compare_op0, ix86_compare_op1));
9500 return 1;
9501 }
9502
9503 /* The floating point conditional move instructions don't directly
9504 support conditions resulting from a signed integer comparison. */
9505
9506 code = GET_CODE (operands[1]);
9507 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9508
9509 /* The floating point conditional move instructions don't directly
9510 support signed integer comparisons. */
9511
9512 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9513 {
9514 if (second_test != NULL || bypass_test != NULL)
9515 abort ();
9516 tmp = gen_reg_rtx (QImode);
9517 ix86_expand_setcc (code, tmp);
9518 code = NE;
9519 ix86_compare_op0 = tmp;
9520 ix86_compare_op1 = const0_rtx;
9521 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9522 }
9523 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9524 {
9525 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9526 emit_move_insn (tmp, operands[3]);
9527 operands[3] = tmp;
9528 }
9529 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9530 {
9531 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9532 emit_move_insn (tmp, operands[2]);
9533 operands[2] = tmp;
9534 }
9535
9536 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9537 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9538 compare_op,
9539 operands[2],
9540 operands[3])));
9541 if (bypass_test)
9542 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9543 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9544 bypass_test,
9545 operands[3],
9546 operands[0])));
9547 if (second_test)
9548 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9549 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9550 second_test,
9551 operands[2],
9552 operands[0])));
9553
9554 return 1;
9555 }
9556
9557 /* Expand conditional increment or decrement using adb/sbb instructions.
9558 The default case using setcc followed by the conditional move can be
9559 done by generic code. */
9560 int
9561 ix86_expand_int_addcc (rtx operands[])
9562 {
9563 enum rtx_code code = GET_CODE (operands[1]);
9564 rtx compare_op;
9565 rtx val = const0_rtx;
9566 bool fpcmp = false;
9567 enum machine_mode mode = GET_MODE (operands[0]);
9568
9569 if (operands[3] != const1_rtx
9570 && operands[3] != constm1_rtx)
9571 return 0;
9572 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9573 ix86_compare_op1, &compare_op))
9574 return 0;
9575 code = GET_CODE (compare_op);
9576
9577 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9578 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9579 {
9580 fpcmp = true;
9581 code = ix86_fp_compare_code_to_integer (code);
9582 }
9583
9584 if (code != LTU)
9585 {
9586 val = constm1_rtx;
9587 if (fpcmp)
9588 PUT_CODE (compare_op,
9589 reverse_condition_maybe_unordered
9590 (GET_CODE (compare_op)));
9591 else
9592 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9593 }
9594 PUT_MODE (compare_op, mode);
9595
9596 /* Construct either adc or sbb insn. */
9597 if ((code == LTU) == (operands[3] == constm1_rtx))
9598 {
9599 switch (GET_MODE (operands[0]))
9600 {
9601 case QImode:
9602 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9603 break;
9604 case HImode:
9605 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9606 break;
9607 case SImode:
9608 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9609 break;
9610 case DImode:
9611 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9612 break;
9613 default:
9614 abort ();
9615 }
9616 }
9617 else
9618 {
9619 switch (GET_MODE (operands[0]))
9620 {
9621 case QImode:
9622 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9623 break;
9624 case HImode:
9625 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9626 break;
9627 case SImode:
9628 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9629 break;
9630 case DImode:
9631 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9632 break;
9633 default:
9634 abort ();
9635 }
9636 }
9637 return 1; /* DONE */
9638 }
9639
9640
9641 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9642 works for floating pointer parameters and nonoffsetable memories.
9643 For pushes, it returns just stack offsets; the values will be saved
9644 in the right order. Maximally three parts are generated. */
9645
9646 static int
9647 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9648 {
9649 int size;
9650
9651 if (!TARGET_64BIT)
9652 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9653 else
9654 size = (GET_MODE_SIZE (mode) + 4) / 8;
9655
9656 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9657 abort ();
9658 if (size < 2 || size > 3)
9659 abort ();
9660
9661 /* Optimize constant pool reference to immediates. This is used by fp
9662 moves, that force all constants to memory to allow combining. */
9663 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9664 {
9665 rtx tmp = maybe_get_pool_constant (operand);
9666 if (tmp)
9667 operand = tmp;
9668 }
9669
9670 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9671 {
9672 /* The only non-offsetable memories we handle are pushes. */
9673 if (! push_operand (operand, VOIDmode))
9674 abort ();
9675
9676 operand = copy_rtx (operand);
9677 PUT_MODE (operand, Pmode);
9678 parts[0] = parts[1] = parts[2] = operand;
9679 }
9680 else if (!TARGET_64BIT)
9681 {
9682 if (mode == DImode)
9683 split_di (&operand, 1, &parts[0], &parts[1]);
9684 else
9685 {
9686 if (REG_P (operand))
9687 {
9688 if (!reload_completed)
9689 abort ();
9690 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9691 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9692 if (size == 3)
9693 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9694 }
9695 else if (offsettable_memref_p (operand))
9696 {
9697 operand = adjust_address (operand, SImode, 0);
9698 parts[0] = operand;
9699 parts[1] = adjust_address (operand, SImode, 4);
9700 if (size == 3)
9701 parts[2] = adjust_address (operand, SImode, 8);
9702 }
9703 else if (GET_CODE (operand) == CONST_DOUBLE)
9704 {
9705 REAL_VALUE_TYPE r;
9706 long l[4];
9707
9708 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9709 switch (mode)
9710 {
9711 case XFmode:
9712 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9713 parts[2] = gen_int_mode (l[2], SImode);
9714 break;
9715 case DFmode:
9716 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9717 break;
9718 default:
9719 abort ();
9720 }
9721 parts[1] = gen_int_mode (l[1], SImode);
9722 parts[0] = gen_int_mode (l[0], SImode);
9723 }
9724 else
9725 abort ();
9726 }
9727 }
9728 else
9729 {
9730 if (mode == TImode)
9731 split_ti (&operand, 1, &parts[0], &parts[1]);
9732 if (mode == XFmode || mode == TFmode)
9733 {
9734 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9735 if (REG_P (operand))
9736 {
9737 if (!reload_completed)
9738 abort ();
9739 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9740 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9741 }
9742 else if (offsettable_memref_p (operand))
9743 {
9744 operand = adjust_address (operand, DImode, 0);
9745 parts[0] = operand;
9746 parts[1] = adjust_address (operand, upper_mode, 8);
9747 }
9748 else if (GET_CODE (operand) == CONST_DOUBLE)
9749 {
9750 REAL_VALUE_TYPE r;
9751 long l[3];
9752
9753 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9754 real_to_target (l, &r, mode);
9755 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9756 if (HOST_BITS_PER_WIDE_INT >= 64)
9757 parts[0]
9758 = gen_int_mode
9759 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9760 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9761 DImode);
9762 else
9763 parts[0] = immed_double_const (l[0], l[1], DImode);
9764 if (upper_mode == SImode)
9765 parts[1] = gen_int_mode (l[2], SImode);
9766 else if (HOST_BITS_PER_WIDE_INT >= 64)
9767 parts[1]
9768 = gen_int_mode
9769 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9770 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9771 DImode);
9772 else
9773 parts[1] = immed_double_const (l[2], l[3], DImode);
9774 }
9775 else
9776 abort ();
9777 }
9778 }
9779
9780 return size;
9781 }
9782
9783 /* Emit insns to perform a move or push of DI, DF, and XF values.
9784 Return false when normal moves are needed; true when all required
9785 insns have been emitted. Operands 2-4 contain the input values
9786 int the correct order; operands 5-7 contain the output values. */
9787
9788 void
9789 ix86_split_long_move (rtx operands[])
9790 {
9791 rtx part[2][3];
9792 int nparts;
9793 int push = 0;
9794 int collisions = 0;
9795 enum machine_mode mode = GET_MODE (operands[0]);
9796
9797 /* The DFmode expanders may ask us to move double.
9798 For 64bit target this is single move. By hiding the fact
9799 here we simplify i386.md splitters. */
9800 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9801 {
9802 /* Optimize constant pool reference to immediates. This is used by
9803 fp moves, that force all constants to memory to allow combining. */
9804
9805 if (GET_CODE (operands[1]) == MEM
9806 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9807 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9808 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9809 if (push_operand (operands[0], VOIDmode))
9810 {
9811 operands[0] = copy_rtx (operands[0]);
9812 PUT_MODE (operands[0], Pmode);
9813 }
9814 else
9815 operands[0] = gen_lowpart (DImode, operands[0]);
9816 operands[1] = gen_lowpart (DImode, operands[1]);
9817 emit_move_insn (operands[0], operands[1]);
9818 return;
9819 }
9820
9821 /* The only non-offsettable memory we handle is push. */
9822 if (push_operand (operands[0], VOIDmode))
9823 push = 1;
9824 else if (GET_CODE (operands[0]) == MEM
9825 && ! offsettable_memref_p (operands[0]))
9826 abort ();
9827
9828 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9829 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9830
9831 /* When emitting push, take care for source operands on the stack. */
9832 if (push && GET_CODE (operands[1]) == MEM
9833 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9834 {
9835 if (nparts == 3)
9836 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9837 XEXP (part[1][2], 0));
9838 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9839 XEXP (part[1][1], 0));
9840 }
9841
9842 /* We need to do copy in the right order in case an address register
9843 of the source overlaps the destination. */
9844 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9845 {
9846 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9847 collisions++;
9848 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9849 collisions++;
9850 if (nparts == 3
9851 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9852 collisions++;
9853
9854 /* Collision in the middle part can be handled by reordering. */
9855 if (collisions == 1 && nparts == 3
9856 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9857 {
9858 rtx tmp;
9859 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9860 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9861 }
9862
9863 /* If there are more collisions, we can't handle it by reordering.
9864 Do an lea to the last part and use only one colliding move. */
9865 else if (collisions > 1)
9866 {
9867 rtx base;
9868
9869 collisions = 1;
9870
9871 base = part[0][nparts - 1];
9872
9873 /* Handle the case when the last part isn't valid for lea.
9874 Happens in 64-bit mode storing the 12-byte XFmode. */
9875 if (GET_MODE (base) != Pmode)
9876 base = gen_rtx_REG (Pmode, REGNO (base));
9877
9878 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
9879 part[1][0] = replace_equiv_address (part[1][0], base);
9880 part[1][1] = replace_equiv_address (part[1][1],
9881 plus_constant (base, UNITS_PER_WORD));
9882 if (nparts == 3)
9883 part[1][2] = replace_equiv_address (part[1][2],
9884 plus_constant (base, 8));
9885 }
9886 }
9887
9888 if (push)
9889 {
9890 if (!TARGET_64BIT)
9891 {
9892 if (nparts == 3)
9893 {
9894 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
9895 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
9896 emit_move_insn (part[0][2], part[1][2]);
9897 }
9898 }
9899 else
9900 {
9901 /* In 64bit mode we don't have 32bit push available. In case this is
9902 register, it is OK - we will just use larger counterpart. We also
9903 retype memory - these comes from attempt to avoid REX prefix on
9904 moving of second half of TFmode value. */
9905 if (GET_MODE (part[1][1]) == SImode)
9906 {
9907 if (GET_CODE (part[1][1]) == MEM)
9908 part[1][1] = adjust_address (part[1][1], DImode, 0);
9909 else if (REG_P (part[1][1]))
9910 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9911 else
9912 abort ();
9913 if (GET_MODE (part[1][0]) == SImode)
9914 part[1][0] = part[1][1];
9915 }
9916 }
9917 emit_move_insn (part[0][1], part[1][1]);
9918 emit_move_insn (part[0][0], part[1][0]);
9919 return;
9920 }
9921
9922 /* Choose correct order to not overwrite the source before it is copied. */
9923 if ((REG_P (part[0][0])
9924 && REG_P (part[1][1])
9925 && (REGNO (part[0][0]) == REGNO (part[1][1])
9926 || (nparts == 3
9927 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9928 || (collisions > 0
9929 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9930 {
9931 if (nparts == 3)
9932 {
9933 operands[2] = part[0][2];
9934 operands[3] = part[0][1];
9935 operands[4] = part[0][0];
9936 operands[5] = part[1][2];
9937 operands[6] = part[1][1];
9938 operands[7] = part[1][0];
9939 }
9940 else
9941 {
9942 operands[2] = part[0][1];
9943 operands[3] = part[0][0];
9944 operands[5] = part[1][1];
9945 operands[6] = part[1][0];
9946 }
9947 }
9948 else
9949 {
9950 if (nparts == 3)
9951 {
9952 operands[2] = part[0][0];
9953 operands[3] = part[0][1];
9954 operands[4] = part[0][2];
9955 operands[5] = part[1][0];
9956 operands[6] = part[1][1];
9957 operands[7] = part[1][2];
9958 }
9959 else
9960 {
9961 operands[2] = part[0][0];
9962 operands[3] = part[0][1];
9963 operands[5] = part[1][0];
9964 operands[6] = part[1][1];
9965 }
9966 }
9967
9968 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
9969 if (optimize_size)
9970 {
9971 if (GET_CODE (operands[5]) == CONST_INT
9972 && operands[5] != const0_rtx
9973 && REG_P (operands[2]))
9974 {
9975 if (GET_CODE (operands[6]) == CONST_INT
9976 && INTVAL (operands[6]) == INTVAL (operands[5]))
9977 operands[6] = operands[2];
9978
9979 if (nparts == 3
9980 && GET_CODE (operands[7]) == CONST_INT
9981 && INTVAL (operands[7]) == INTVAL (operands[5]))
9982 operands[7] = operands[2];
9983 }
9984
9985 if (nparts == 3
9986 && GET_CODE (operands[6]) == CONST_INT
9987 && operands[6] != const0_rtx
9988 && REG_P (operands[3])
9989 && GET_CODE (operands[7]) == CONST_INT
9990 && INTVAL (operands[7]) == INTVAL (operands[6]))
9991 operands[7] = operands[3];
9992 }
9993
9994 emit_move_insn (operands[2], operands[5]);
9995 emit_move_insn (operands[3], operands[6]);
9996 if (nparts == 3)
9997 emit_move_insn (operands[4], operands[7]);
9998
9999 return;
10000 }
10001
10002 /* Helper function of ix86_split_ashldi used to generate an SImode
10003 left shift by a constant, either using a single shift or
10004 a sequence of add instructions. */
10005
10006 static void
10007 ix86_expand_ashlsi3_const (rtx operand, int count)
10008 {
10009 if (count == 1)
10010 emit_insn (gen_addsi3 (operand, operand, operand));
10011 else if (!optimize_size
10012 && count * ix86_cost->add <= ix86_cost->shift_const)
10013 {
10014 int i;
10015 for (i=0; i<count; i++)
10016 emit_insn (gen_addsi3 (operand, operand, operand));
10017 }
10018 else
10019 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10020 }
10021
10022 void
10023 ix86_split_ashldi (rtx *operands, rtx scratch)
10024 {
10025 rtx low[2], high[2];
10026 int count;
10027
10028 if (GET_CODE (operands[2]) == CONST_INT)
10029 {
10030 split_di (operands, 2, low, high);
10031 count = INTVAL (operands[2]) & 63;
10032
10033 if (count >= 32)
10034 {
10035 emit_move_insn (high[0], low[1]);
10036 emit_move_insn (low[0], const0_rtx);
10037
10038 if (count > 32)
10039 ix86_expand_ashlsi3_const (high[0], count - 32);
10040 }
10041 else
10042 {
10043 if (!rtx_equal_p (operands[0], operands[1]))
10044 emit_move_insn (operands[0], operands[1]);
10045 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10046 ix86_expand_ashlsi3_const (low[0], count);
10047 }
10048 return;
10049 }
10050
10051 split_di (operands, 1, low, high);
10052
10053 if (operands[1] == const1_rtx)
10054 {
10055 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10056 can be done with two 32-bit shifts, no branches, no cmoves. */
10057 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10058 {
10059 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10060
10061 ix86_expand_clear (low[0]);
10062 ix86_expand_clear (high[0]);
10063 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10064
10065 d = gen_lowpart (QImode, low[0]);
10066 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10067 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10068 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10069
10070 d = gen_lowpart (QImode, high[0]);
10071 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10072 s = gen_rtx_NE (QImode, flags, const0_rtx);
10073 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10074 }
10075
10076 /* Otherwise, we can get the same results by manually performing
10077 a bit extract operation on bit 5, and then performing the two
10078 shifts. The two methods of getting 0/1 into low/high are exactly
10079 the same size. Avoiding the shift in the bit extract case helps
10080 pentium4 a bit; no one else seems to care much either way. */
10081 else
10082 {
10083 rtx x;
10084
10085 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10086 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10087 else
10088 x = gen_lowpart (SImode, operands[2]);
10089 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10090
10091 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10092 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10093 emit_move_insn (low[0], high[0]);
10094 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10095 }
10096
10097 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10098 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10099 return;
10100 }
10101
10102 if (operands[1] == constm1_rtx)
10103 {
10104 /* For -1LL << N, we can avoid the shld instruction, because we
10105 know that we're shifting 0...31 ones into a -1. */
10106 emit_move_insn (low[0], constm1_rtx);
10107 if (optimize_size)
10108 emit_move_insn (high[0], low[0]);
10109 else
10110 emit_move_insn (high[0], constm1_rtx);
10111 }
10112 else
10113 {
10114 if (!rtx_equal_p (operands[0], operands[1]))
10115 emit_move_insn (operands[0], operands[1]);
10116
10117 split_di (operands, 1, low, high);
10118 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10119 }
10120
10121 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10122
10123 if (TARGET_CMOVE && scratch)
10124 {
10125 ix86_expand_clear (scratch);
10126 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10127 }
10128 else
10129 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10130 }
10131
10132 void
10133 ix86_split_ashrdi (rtx *operands, rtx scratch)
10134 {
10135 rtx low[2], high[2];
10136 int count;
10137
10138 if (GET_CODE (operands[2]) == CONST_INT)
10139 {
10140 split_di (operands, 2, low, high);
10141 count = INTVAL (operands[2]) & 63;
10142
10143 if (count == 63)
10144 {
10145 emit_move_insn (high[0], high[1]);
10146 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10147 emit_move_insn (low[0], high[0]);
10148
10149 }
10150 else if (count >= 32)
10151 {
10152 emit_move_insn (low[0], high[1]);
10153 emit_move_insn (high[0], low[0]);
10154 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10155 if (count > 32)
10156 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10157 }
10158 else
10159 {
10160 if (!rtx_equal_p (operands[0], operands[1]))
10161 emit_move_insn (operands[0], operands[1]);
10162 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10163 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10164 }
10165 }
10166 else
10167 {
10168 if (!rtx_equal_p (operands[0], operands[1]))
10169 emit_move_insn (operands[0], operands[1]);
10170
10171 split_di (operands, 1, low, high);
10172
10173 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10174 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10175
10176 if (TARGET_CMOVE && scratch)
10177 {
10178 emit_move_insn (scratch, high[0]);
10179 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10180 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10181 scratch));
10182 }
10183 else
10184 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10185 }
10186 }
10187
10188 void
10189 ix86_split_lshrdi (rtx *operands, rtx scratch)
10190 {
10191 rtx low[2], high[2];
10192 int count;
10193
10194 if (GET_CODE (operands[2]) == CONST_INT)
10195 {
10196 split_di (operands, 2, low, high);
10197 count = INTVAL (operands[2]) & 63;
10198
10199 if (count >= 32)
10200 {
10201 emit_move_insn (low[0], high[1]);
10202 ix86_expand_clear (high[0]);
10203
10204 if (count > 32)
10205 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10206 }
10207 else
10208 {
10209 if (!rtx_equal_p (operands[0], operands[1]))
10210 emit_move_insn (operands[0], operands[1]);
10211 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10212 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10213 }
10214 }
10215 else
10216 {
10217 if (!rtx_equal_p (operands[0], operands[1]))
10218 emit_move_insn (operands[0], operands[1]);
10219
10220 split_di (operands, 1, low, high);
10221
10222 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10223 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10224
10225 /* Heh. By reversing the arguments, we can reuse this pattern. */
10226 if (TARGET_CMOVE && scratch)
10227 {
10228 ix86_expand_clear (scratch);
10229 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10230 scratch));
10231 }
10232 else
10233 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10234 }
10235 }
10236
10237 /* Helper function for the string operations below. Dest VARIABLE whether
10238 it is aligned to VALUE bytes. If true, jump to the label. */
10239 static rtx
10240 ix86_expand_aligntest (rtx variable, int value)
10241 {
10242 rtx label = gen_label_rtx ();
10243 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10244 if (GET_MODE (variable) == DImode)
10245 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10246 else
10247 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10248 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10249 1, label);
10250 return label;
10251 }
10252
10253 /* Adjust COUNTER by the VALUE. */
10254 static void
10255 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10256 {
10257 if (GET_MODE (countreg) == DImode)
10258 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10259 else
10260 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10261 }
10262
10263 /* Zero extend possibly SImode EXP to Pmode register. */
10264 rtx
10265 ix86_zero_extend_to_Pmode (rtx exp)
10266 {
10267 rtx r;
10268 if (GET_MODE (exp) == VOIDmode)
10269 return force_reg (Pmode, exp);
10270 if (GET_MODE (exp) == Pmode)
10271 return copy_to_mode_reg (Pmode, exp);
10272 r = gen_reg_rtx (Pmode);
10273 emit_insn (gen_zero_extendsidi2 (r, exp));
10274 return r;
10275 }
10276
10277 /* Expand string move (memcpy) operation. Use i386 string operations when
10278 profitable. expand_clrmem contains similar code. */
10279 int
10280 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10281 {
10282 rtx srcreg, destreg, countreg, srcexp, destexp;
10283 enum machine_mode counter_mode;
10284 HOST_WIDE_INT align = 0;
10285 unsigned HOST_WIDE_INT count = 0;
10286
10287 if (GET_CODE (align_exp) == CONST_INT)
10288 align = INTVAL (align_exp);
10289
10290 /* Can't use any of this if the user has appropriated esi or edi. */
10291 if (global_regs[4] || global_regs[5])
10292 return 0;
10293
10294 /* This simple hack avoids all inlining code and simplifies code below. */
10295 if (!TARGET_ALIGN_STRINGOPS)
10296 align = 64;
10297
10298 if (GET_CODE (count_exp) == CONST_INT)
10299 {
10300 count = INTVAL (count_exp);
10301 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10302 return 0;
10303 }
10304
10305 /* Figure out proper mode for counter. For 32bits it is always SImode,
10306 for 64bits use SImode when possible, otherwise DImode.
10307 Set count to number of bytes copied when known at compile time. */
10308 if (!TARGET_64BIT
10309 || GET_MODE (count_exp) == SImode
10310 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10311 counter_mode = SImode;
10312 else
10313 counter_mode = DImode;
10314
10315 if (counter_mode != SImode && counter_mode != DImode)
10316 abort ();
10317
10318 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10319 if (destreg != XEXP (dst, 0))
10320 dst = replace_equiv_address_nv (dst, destreg);
10321 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10322 if (srcreg != XEXP (src, 0))
10323 src = replace_equiv_address_nv (src, srcreg);
10324
10325 /* When optimizing for size emit simple rep ; movsb instruction for
10326 counts not divisible by 4. */
10327
10328 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10329 {
10330 emit_insn (gen_cld ());
10331 countreg = ix86_zero_extend_to_Pmode (count_exp);
10332 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10333 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10334 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10335 destexp, srcexp));
10336 }
10337
10338 /* For constant aligned (or small unaligned) copies use rep movsl
10339 followed by code copying the rest. For PentiumPro ensure 8 byte
10340 alignment to allow rep movsl acceleration. */
10341
10342 else if (count != 0
10343 && (align >= 8
10344 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10345 || optimize_size || count < (unsigned int) 64))
10346 {
10347 unsigned HOST_WIDE_INT offset = 0;
10348 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10349 rtx srcmem, dstmem;
10350
10351 emit_insn (gen_cld ());
10352 if (count & ~(size - 1))
10353 {
10354 countreg = copy_to_mode_reg (counter_mode,
10355 GEN_INT ((count >> (size == 4 ? 2 : 3))
10356 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10357 countreg = ix86_zero_extend_to_Pmode (countreg);
10358
10359 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10360 GEN_INT (size == 4 ? 2 : 3));
10361 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10362 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10363
10364 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10365 countreg, destexp, srcexp));
10366 offset = count & ~(size - 1);
10367 }
10368 if (size == 8 && (count & 0x04))
10369 {
10370 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10371 offset);
10372 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10373 offset);
10374 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10375 offset += 4;
10376 }
10377 if (count & 0x02)
10378 {
10379 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10380 offset);
10381 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10382 offset);
10383 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10384 offset += 2;
10385 }
10386 if (count & 0x01)
10387 {
10388 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10389 offset);
10390 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10391 offset);
10392 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10393 }
10394 }
10395 /* The generic code based on the glibc implementation:
10396 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10397 allowing accelerated copying there)
10398 - copy the data using rep movsl
10399 - copy the rest. */
10400 else
10401 {
10402 rtx countreg2;
10403 rtx label = NULL;
10404 rtx srcmem, dstmem;
10405 int desired_alignment = (TARGET_PENTIUMPRO
10406 && (count == 0 || count >= (unsigned int) 260)
10407 ? 8 : UNITS_PER_WORD);
10408 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10409 dst = change_address (dst, BLKmode, destreg);
10410 src = change_address (src, BLKmode, srcreg);
10411
10412 /* In case we don't know anything about the alignment, default to
10413 library version, since it is usually equally fast and result in
10414 shorter code.
10415
10416 Also emit call when we know that the count is large and call overhead
10417 will not be important. */
10418 if (!TARGET_INLINE_ALL_STRINGOPS
10419 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10420 return 0;
10421
10422 if (TARGET_SINGLE_STRINGOP)
10423 emit_insn (gen_cld ());
10424
10425 countreg2 = gen_reg_rtx (Pmode);
10426 countreg = copy_to_mode_reg (counter_mode, count_exp);
10427
10428 /* We don't use loops to align destination and to copy parts smaller
10429 than 4 bytes, because gcc is able to optimize such code better (in
10430 the case the destination or the count really is aligned, gcc is often
10431 able to predict the branches) and also it is friendlier to the
10432 hardware branch prediction.
10433
10434 Using loops is beneficial for generic case, because we can
10435 handle small counts using the loops. Many CPUs (such as Athlon)
10436 have large REP prefix setup costs.
10437
10438 This is quite costly. Maybe we can revisit this decision later or
10439 add some customizability to this code. */
10440
10441 if (count == 0 && align < desired_alignment)
10442 {
10443 label = gen_label_rtx ();
10444 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10445 LEU, 0, counter_mode, 1, label);
10446 }
10447 if (align <= 1)
10448 {
10449 rtx label = ix86_expand_aligntest (destreg, 1);
10450 srcmem = change_address (src, QImode, srcreg);
10451 dstmem = change_address (dst, QImode, destreg);
10452 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10453 ix86_adjust_counter (countreg, 1);
10454 emit_label (label);
10455 LABEL_NUSES (label) = 1;
10456 }
10457 if (align <= 2)
10458 {
10459 rtx label = ix86_expand_aligntest (destreg, 2);
10460 srcmem = change_address (src, HImode, srcreg);
10461 dstmem = change_address (dst, HImode, destreg);
10462 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10463 ix86_adjust_counter (countreg, 2);
10464 emit_label (label);
10465 LABEL_NUSES (label) = 1;
10466 }
10467 if (align <= 4 && desired_alignment > 4)
10468 {
10469 rtx label = ix86_expand_aligntest (destreg, 4);
10470 srcmem = change_address (src, SImode, srcreg);
10471 dstmem = change_address (dst, SImode, destreg);
10472 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10473 ix86_adjust_counter (countreg, 4);
10474 emit_label (label);
10475 LABEL_NUSES (label) = 1;
10476 }
10477
10478 if (label && desired_alignment > 4 && !TARGET_64BIT)
10479 {
10480 emit_label (label);
10481 LABEL_NUSES (label) = 1;
10482 label = NULL_RTX;
10483 }
10484 if (!TARGET_SINGLE_STRINGOP)
10485 emit_insn (gen_cld ());
10486 if (TARGET_64BIT)
10487 {
10488 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10489 GEN_INT (3)));
10490 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10491 }
10492 else
10493 {
10494 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10495 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10496 }
10497 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10498 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10499 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10500 countreg2, destexp, srcexp));
10501
10502 if (label)
10503 {
10504 emit_label (label);
10505 LABEL_NUSES (label) = 1;
10506 }
10507 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10508 {
10509 srcmem = change_address (src, SImode, srcreg);
10510 dstmem = change_address (dst, SImode, destreg);
10511 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10512 }
10513 if ((align <= 4 || count == 0) && TARGET_64BIT)
10514 {
10515 rtx label = ix86_expand_aligntest (countreg, 4);
10516 srcmem = change_address (src, SImode, srcreg);
10517 dstmem = change_address (dst, SImode, destreg);
10518 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10519 emit_label (label);
10520 LABEL_NUSES (label) = 1;
10521 }
10522 if (align > 2 && count != 0 && (count & 2))
10523 {
10524 srcmem = change_address (src, HImode, srcreg);
10525 dstmem = change_address (dst, HImode, destreg);
10526 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10527 }
10528 if (align <= 2 || count == 0)
10529 {
10530 rtx label = ix86_expand_aligntest (countreg, 2);
10531 srcmem = change_address (src, HImode, srcreg);
10532 dstmem = change_address (dst, HImode, destreg);
10533 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10534 emit_label (label);
10535 LABEL_NUSES (label) = 1;
10536 }
10537 if (align > 1 && count != 0 && (count & 1))
10538 {
10539 srcmem = change_address (src, QImode, srcreg);
10540 dstmem = change_address (dst, QImode, destreg);
10541 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10542 }
10543 if (align <= 1 || count == 0)
10544 {
10545 rtx label = ix86_expand_aligntest (countreg, 1);
10546 srcmem = change_address (src, QImode, srcreg);
10547 dstmem = change_address (dst, QImode, destreg);
10548 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10549 emit_label (label);
10550 LABEL_NUSES (label) = 1;
10551 }
10552 }
10553
10554 return 1;
10555 }
10556
10557 /* Expand string clear operation (bzero). Use i386 string operations when
10558 profitable. expand_movmem contains similar code. */
10559 int
10560 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10561 {
10562 rtx destreg, zeroreg, countreg, destexp;
10563 enum machine_mode counter_mode;
10564 HOST_WIDE_INT align = 0;
10565 unsigned HOST_WIDE_INT count = 0;
10566
10567 if (GET_CODE (align_exp) == CONST_INT)
10568 align = INTVAL (align_exp);
10569
10570 /* Can't use any of this if the user has appropriated esi. */
10571 if (global_regs[4])
10572 return 0;
10573
10574 /* This simple hack avoids all inlining code and simplifies code below. */
10575 if (!TARGET_ALIGN_STRINGOPS)
10576 align = 32;
10577
10578 if (GET_CODE (count_exp) == CONST_INT)
10579 {
10580 count = INTVAL (count_exp);
10581 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10582 return 0;
10583 }
10584 /* Figure out proper mode for counter. For 32bits it is always SImode,
10585 for 64bits use SImode when possible, otherwise DImode.
10586 Set count to number of bytes copied when known at compile time. */
10587 if (!TARGET_64BIT
10588 || GET_MODE (count_exp) == SImode
10589 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10590 counter_mode = SImode;
10591 else
10592 counter_mode = DImode;
10593
10594 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10595 if (destreg != XEXP (dst, 0))
10596 dst = replace_equiv_address_nv (dst, destreg);
10597
10598
10599 /* When optimizing for size emit simple rep ; movsb instruction for
10600 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10601 sequence is 7 bytes long, so if optimizing for size and count is
10602 small enough that some stosl, stosw and stosb instructions without
10603 rep are shorter, fall back into the next if. */
10604
10605 if ((!optimize || optimize_size)
10606 && (count == 0
10607 || ((count & 0x03)
10608 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10609 {
10610 emit_insn (gen_cld ());
10611
10612 countreg = ix86_zero_extend_to_Pmode (count_exp);
10613 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10614 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10615 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10616 }
10617 else if (count != 0
10618 && (align >= 8
10619 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10620 || optimize_size || count < (unsigned int) 64))
10621 {
10622 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10623 unsigned HOST_WIDE_INT offset = 0;
10624
10625 emit_insn (gen_cld ());
10626
10627 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10628 if (count & ~(size - 1))
10629 {
10630 unsigned HOST_WIDE_INT repcount;
10631 unsigned int max_nonrep;
10632
10633 repcount = count >> (size == 4 ? 2 : 3);
10634 if (!TARGET_64BIT)
10635 repcount &= 0x3fffffff;
10636
10637 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10638 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10639 bytes. In both cases the latter seems to be faster for small
10640 values of N. */
10641 max_nonrep = size == 4 ? 7 : 4;
10642 if (!optimize_size)
10643 switch (ix86_tune)
10644 {
10645 case PROCESSOR_PENTIUM4:
10646 case PROCESSOR_NOCONA:
10647 max_nonrep = 3;
10648 break;
10649 default:
10650 break;
10651 }
10652
10653 if (repcount <= max_nonrep)
10654 while (repcount-- > 0)
10655 {
10656 rtx mem = adjust_automodify_address_nv (dst,
10657 GET_MODE (zeroreg),
10658 destreg, offset);
10659 emit_insn (gen_strset (destreg, mem, zeroreg));
10660 offset += size;
10661 }
10662 else
10663 {
10664 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10665 countreg = ix86_zero_extend_to_Pmode (countreg);
10666 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10667 GEN_INT (size == 4 ? 2 : 3));
10668 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10669 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10670 destexp));
10671 offset = count & ~(size - 1);
10672 }
10673 }
10674 if (size == 8 && (count & 0x04))
10675 {
10676 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10677 offset);
10678 emit_insn (gen_strset (destreg, mem,
10679 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10680 offset += 4;
10681 }
10682 if (count & 0x02)
10683 {
10684 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10685 offset);
10686 emit_insn (gen_strset (destreg, mem,
10687 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10688 offset += 2;
10689 }
10690 if (count & 0x01)
10691 {
10692 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10693 offset);
10694 emit_insn (gen_strset (destreg, mem,
10695 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10696 }
10697 }
10698 else
10699 {
10700 rtx countreg2;
10701 rtx label = NULL;
10702 /* Compute desired alignment of the string operation. */
10703 int desired_alignment = (TARGET_PENTIUMPRO
10704 && (count == 0 || count >= (unsigned int) 260)
10705 ? 8 : UNITS_PER_WORD);
10706
10707 /* In case we don't know anything about the alignment, default to
10708 library version, since it is usually equally fast and result in
10709 shorter code.
10710
10711 Also emit call when we know that the count is large and call overhead
10712 will not be important. */
10713 if (!TARGET_INLINE_ALL_STRINGOPS
10714 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10715 return 0;
10716
10717 if (TARGET_SINGLE_STRINGOP)
10718 emit_insn (gen_cld ());
10719
10720 countreg2 = gen_reg_rtx (Pmode);
10721 countreg = copy_to_mode_reg (counter_mode, count_exp);
10722 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10723 /* Get rid of MEM_OFFSET, it won't be accurate. */
10724 dst = change_address (dst, BLKmode, destreg);
10725
10726 if (count == 0 && align < desired_alignment)
10727 {
10728 label = gen_label_rtx ();
10729 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10730 LEU, 0, counter_mode, 1, label);
10731 }
10732 if (align <= 1)
10733 {
10734 rtx label = ix86_expand_aligntest (destreg, 1);
10735 emit_insn (gen_strset (destreg, dst,
10736 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10737 ix86_adjust_counter (countreg, 1);
10738 emit_label (label);
10739 LABEL_NUSES (label) = 1;
10740 }
10741 if (align <= 2)
10742 {
10743 rtx label = ix86_expand_aligntest (destreg, 2);
10744 emit_insn (gen_strset (destreg, dst,
10745 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10746 ix86_adjust_counter (countreg, 2);
10747 emit_label (label);
10748 LABEL_NUSES (label) = 1;
10749 }
10750 if (align <= 4 && desired_alignment > 4)
10751 {
10752 rtx label = ix86_expand_aligntest (destreg, 4);
10753 emit_insn (gen_strset (destreg, dst,
10754 (TARGET_64BIT
10755 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10756 : zeroreg)));
10757 ix86_adjust_counter (countreg, 4);
10758 emit_label (label);
10759 LABEL_NUSES (label) = 1;
10760 }
10761
10762 if (label && desired_alignment > 4 && !TARGET_64BIT)
10763 {
10764 emit_label (label);
10765 LABEL_NUSES (label) = 1;
10766 label = NULL_RTX;
10767 }
10768
10769 if (!TARGET_SINGLE_STRINGOP)
10770 emit_insn (gen_cld ());
10771 if (TARGET_64BIT)
10772 {
10773 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10774 GEN_INT (3)));
10775 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10776 }
10777 else
10778 {
10779 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10780 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10781 }
10782 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10783 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
10784
10785 if (label)
10786 {
10787 emit_label (label);
10788 LABEL_NUSES (label) = 1;
10789 }
10790
10791 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10792 emit_insn (gen_strset (destreg, dst,
10793 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10794 if (TARGET_64BIT && (align <= 4 || count == 0))
10795 {
10796 rtx label = ix86_expand_aligntest (countreg, 4);
10797 emit_insn (gen_strset (destreg, dst,
10798 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10799 emit_label (label);
10800 LABEL_NUSES (label) = 1;
10801 }
10802 if (align > 2 && count != 0 && (count & 2))
10803 emit_insn (gen_strset (destreg, dst,
10804 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10805 if (align <= 2 || count == 0)
10806 {
10807 rtx label = ix86_expand_aligntest (countreg, 2);
10808 emit_insn (gen_strset (destreg, dst,
10809 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10810 emit_label (label);
10811 LABEL_NUSES (label) = 1;
10812 }
10813 if (align > 1 && count != 0 && (count & 1))
10814 emit_insn (gen_strset (destreg, dst,
10815 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10816 if (align <= 1 || count == 0)
10817 {
10818 rtx label = ix86_expand_aligntest (countreg, 1);
10819 emit_insn (gen_strset (destreg, dst,
10820 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10821 emit_label (label);
10822 LABEL_NUSES (label) = 1;
10823 }
10824 }
10825 return 1;
10826 }
10827
10828 /* Expand strlen. */
10829 int
10830 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
10831 {
10832 rtx addr, scratch1, scratch2, scratch3, scratch4;
10833
10834 /* The generic case of strlen expander is long. Avoid it's
10835 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10836
10837 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10838 && !TARGET_INLINE_ALL_STRINGOPS
10839 && !optimize_size
10840 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10841 return 0;
10842
10843 addr = force_reg (Pmode, XEXP (src, 0));
10844 scratch1 = gen_reg_rtx (Pmode);
10845
10846 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10847 && !optimize_size)
10848 {
10849 /* Well it seems that some optimizer does not combine a call like
10850 foo(strlen(bar), strlen(bar));
10851 when the move and the subtraction is done here. It does calculate
10852 the length just once when these instructions are done inside of
10853 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10854 often used and I use one fewer register for the lifetime of
10855 output_strlen_unroll() this is better. */
10856
10857 emit_move_insn (out, addr);
10858
10859 ix86_expand_strlensi_unroll_1 (out, src, align);
10860
10861 /* strlensi_unroll_1 returns the address of the zero at the end of
10862 the string, like memchr(), so compute the length by subtracting
10863 the start address. */
10864 if (TARGET_64BIT)
10865 emit_insn (gen_subdi3 (out, out, addr));
10866 else
10867 emit_insn (gen_subsi3 (out, out, addr));
10868 }
10869 else
10870 {
10871 rtx unspec;
10872 scratch2 = gen_reg_rtx (Pmode);
10873 scratch3 = gen_reg_rtx (Pmode);
10874 scratch4 = force_reg (Pmode, constm1_rtx);
10875
10876 emit_move_insn (scratch3, addr);
10877 eoschar = force_reg (QImode, eoschar);
10878
10879 emit_insn (gen_cld ());
10880 src = replace_equiv_address_nv (src, scratch3);
10881
10882 /* If .md starts supporting :P, this can be done in .md. */
10883 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
10884 scratch4), UNSPEC_SCAS);
10885 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
10886 if (TARGET_64BIT)
10887 {
10888 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10889 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10890 }
10891 else
10892 {
10893 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10894 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10895 }
10896 }
10897 return 1;
10898 }
10899
10900 /* Expand the appropriate insns for doing strlen if not just doing
10901 repnz; scasb
10902
10903 out = result, initialized with the start address
10904 align_rtx = alignment of the address.
10905 scratch = scratch register, initialized with the startaddress when
10906 not aligned, otherwise undefined
10907
10908 This is just the body. It needs the initializations mentioned above and
10909 some address computing at the end. These things are done in i386.md. */
10910
10911 static void
10912 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
10913 {
10914 int align;
10915 rtx tmp;
10916 rtx align_2_label = NULL_RTX;
10917 rtx align_3_label = NULL_RTX;
10918 rtx align_4_label = gen_label_rtx ();
10919 rtx end_0_label = gen_label_rtx ();
10920 rtx mem;
10921 rtx tmpreg = gen_reg_rtx (SImode);
10922 rtx scratch = gen_reg_rtx (SImode);
10923 rtx cmp;
10924
10925 align = 0;
10926 if (GET_CODE (align_rtx) == CONST_INT)
10927 align = INTVAL (align_rtx);
10928
10929 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10930
10931 /* Is there a known alignment and is it less than 4? */
10932 if (align < 4)
10933 {
10934 rtx scratch1 = gen_reg_rtx (Pmode);
10935 emit_move_insn (scratch1, out);
10936 /* Is there a known alignment and is it not 2? */
10937 if (align != 2)
10938 {
10939 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10940 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10941
10942 /* Leave just the 3 lower bits. */
10943 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10944 NULL_RTX, 0, OPTAB_WIDEN);
10945
10946 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10947 Pmode, 1, align_4_label);
10948 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
10949 Pmode, 1, align_2_label);
10950 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
10951 Pmode, 1, align_3_label);
10952 }
10953 else
10954 {
10955 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10956 check if is aligned to 4 - byte. */
10957
10958 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
10959 NULL_RTX, 0, OPTAB_WIDEN);
10960
10961 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10962 Pmode, 1, align_4_label);
10963 }
10964
10965 mem = change_address (src, QImode, out);
10966
10967 /* Now compare the bytes. */
10968
10969 /* Compare the first n unaligned byte on a byte per byte basis. */
10970 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10971 QImode, 1, end_0_label);
10972
10973 /* Increment the address. */
10974 if (TARGET_64BIT)
10975 emit_insn (gen_adddi3 (out, out, const1_rtx));
10976 else
10977 emit_insn (gen_addsi3 (out, out, const1_rtx));
10978
10979 /* Not needed with an alignment of 2 */
10980 if (align != 2)
10981 {
10982 emit_label (align_2_label);
10983
10984 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10985 end_0_label);
10986
10987 if (TARGET_64BIT)
10988 emit_insn (gen_adddi3 (out, out, const1_rtx));
10989 else
10990 emit_insn (gen_addsi3 (out, out, const1_rtx));
10991
10992 emit_label (align_3_label);
10993 }
10994
10995 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10996 end_0_label);
10997
10998 if (TARGET_64BIT)
10999 emit_insn (gen_adddi3 (out, out, const1_rtx));
11000 else
11001 emit_insn (gen_addsi3 (out, out, const1_rtx));
11002 }
11003
11004 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11005 align this loop. It gives only huge programs, but does not help to
11006 speed up. */
11007 emit_label (align_4_label);
11008
11009 mem = change_address (src, SImode, out);
11010 emit_move_insn (scratch, mem);
11011 if (TARGET_64BIT)
11012 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11013 else
11014 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11015
11016 /* This formula yields a nonzero result iff one of the bytes is zero.
11017 This saves three branches inside loop and many cycles. */
11018
11019 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11020 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11021 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11022 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11023 gen_int_mode (0x80808080, SImode)));
11024 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11025 align_4_label);
11026
11027 if (TARGET_CMOVE)
11028 {
11029 rtx reg = gen_reg_rtx (SImode);
11030 rtx reg2 = gen_reg_rtx (Pmode);
11031 emit_move_insn (reg, tmpreg);
11032 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11033
11034 /* If zero is not in the first two bytes, move two bytes forward. */
11035 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11036 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11037 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11038 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11039 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11040 reg,
11041 tmpreg)));
11042 /* Emit lea manually to avoid clobbering of flags. */
11043 emit_insn (gen_rtx_SET (SImode, reg2,
11044 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11045
11046 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11047 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11048 emit_insn (gen_rtx_SET (VOIDmode, out,
11049 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11050 reg2,
11051 out)));
11052
11053 }
11054 else
11055 {
11056 rtx end_2_label = gen_label_rtx ();
11057 /* Is zero in the first two bytes? */
11058
11059 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11060 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11061 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11062 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11063 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11064 pc_rtx);
11065 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11066 JUMP_LABEL (tmp) = end_2_label;
11067
11068 /* Not in the first two. Move two bytes forward. */
11069 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11070 if (TARGET_64BIT)
11071 emit_insn (gen_adddi3 (out, out, const2_rtx));
11072 else
11073 emit_insn (gen_addsi3 (out, out, const2_rtx));
11074
11075 emit_label (end_2_label);
11076
11077 }
11078
11079 /* Avoid branch in fixing the byte. */
11080 tmpreg = gen_lowpart (QImode, tmpreg);
11081 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11082 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11083 if (TARGET_64BIT)
11084 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11085 else
11086 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11087
11088 emit_label (end_0_label);
11089 }
11090
11091 void
11092 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11093 rtx callarg2 ATTRIBUTE_UNUSED,
11094 rtx pop, int sibcall)
11095 {
11096 rtx use = NULL, call;
11097
11098 if (pop == const0_rtx)
11099 pop = NULL;
11100 if (TARGET_64BIT && pop)
11101 abort ();
11102
11103 #if TARGET_MACHO
11104 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11105 fnaddr = machopic_indirect_call_target (fnaddr);
11106 #else
11107 /* Static functions and indirect calls don't need the pic register. */
11108 if (! TARGET_64BIT && flag_pic
11109 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11110 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11111 use_reg (&use, pic_offset_table_rtx);
11112
11113 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11114 {
11115 rtx al = gen_rtx_REG (QImode, 0);
11116 emit_move_insn (al, callarg2);
11117 use_reg (&use, al);
11118 }
11119 #endif /* TARGET_MACHO */
11120
11121 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11122 {
11123 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11124 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11125 }
11126 if (sibcall && TARGET_64BIT
11127 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11128 {
11129 rtx addr;
11130 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11131 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11132 emit_move_insn (fnaddr, addr);
11133 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11134 }
11135
11136 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11137 if (retval)
11138 call = gen_rtx_SET (VOIDmode, retval, call);
11139 if (pop)
11140 {
11141 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11142 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11143 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11144 }
11145
11146 call = emit_call_insn (call);
11147 if (use)
11148 CALL_INSN_FUNCTION_USAGE (call) = use;
11149 }
11150
11151 \f
11152 /* Clear stack slot assignments remembered from previous functions.
11153 This is called from INIT_EXPANDERS once before RTL is emitted for each
11154 function. */
11155
11156 static struct machine_function *
11157 ix86_init_machine_status (void)
11158 {
11159 struct machine_function *f;
11160
11161 f = ggc_alloc_cleared (sizeof (struct machine_function));
11162 f->use_fast_prologue_epilogue_nregs = -1;
11163
11164 return f;
11165 }
11166
11167 /* Return a MEM corresponding to a stack slot with mode MODE.
11168 Allocate a new slot if necessary.
11169
11170 The RTL for a function can have several slots available: N is
11171 which slot to use. */
11172
11173 rtx
11174 assign_386_stack_local (enum machine_mode mode, int n)
11175 {
11176 struct stack_local_entry *s;
11177
11178 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11179 abort ();
11180
11181 for (s = ix86_stack_locals; s; s = s->next)
11182 if (s->mode == mode && s->n == n)
11183 return s->rtl;
11184
11185 s = (struct stack_local_entry *)
11186 ggc_alloc (sizeof (struct stack_local_entry));
11187 s->n = n;
11188 s->mode = mode;
11189 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11190
11191 s->next = ix86_stack_locals;
11192 ix86_stack_locals = s;
11193 return s->rtl;
11194 }
11195
11196 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11197
11198 static GTY(()) rtx ix86_tls_symbol;
11199 rtx
11200 ix86_tls_get_addr (void)
11201 {
11202
11203 if (!ix86_tls_symbol)
11204 {
11205 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11206 (TARGET_GNU_TLS && !TARGET_64BIT)
11207 ? "___tls_get_addr"
11208 : "__tls_get_addr");
11209 }
11210
11211 return ix86_tls_symbol;
11212 }
11213 \f
11214 /* Calculate the length of the memory address in the instruction
11215 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11216
11217 int
11218 memory_address_length (rtx addr)
11219 {
11220 struct ix86_address parts;
11221 rtx base, index, disp;
11222 int len;
11223
11224 if (GET_CODE (addr) == PRE_DEC
11225 || GET_CODE (addr) == POST_INC
11226 || GET_CODE (addr) == PRE_MODIFY
11227 || GET_CODE (addr) == POST_MODIFY)
11228 return 0;
11229
11230 if (! ix86_decompose_address (addr, &parts))
11231 abort ();
11232
11233 base = parts.base;
11234 index = parts.index;
11235 disp = parts.disp;
11236 len = 0;
11237
11238 /* Rule of thumb:
11239 - esp as the base always wants an index,
11240 - ebp as the base always wants a displacement. */
11241
11242 /* Register Indirect. */
11243 if (base && !index && !disp)
11244 {
11245 /* esp (for its index) and ebp (for its displacement) need
11246 the two-byte modrm form. */
11247 if (addr == stack_pointer_rtx
11248 || addr == arg_pointer_rtx
11249 || addr == frame_pointer_rtx
11250 || addr == hard_frame_pointer_rtx)
11251 len = 1;
11252 }
11253
11254 /* Direct Addressing. */
11255 else if (disp && !base && !index)
11256 len = 4;
11257
11258 else
11259 {
11260 /* Find the length of the displacement constant. */
11261 if (disp)
11262 {
11263 if (GET_CODE (disp) == CONST_INT
11264 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11265 && base)
11266 len = 1;
11267 else
11268 len = 4;
11269 }
11270 /* ebp always wants a displacement. */
11271 else if (base == hard_frame_pointer_rtx)
11272 len = 1;
11273
11274 /* An index requires the two-byte modrm form.... */
11275 if (index
11276 /* ...like esp, which always wants an index. */
11277 || base == stack_pointer_rtx
11278 || base == arg_pointer_rtx
11279 || base == frame_pointer_rtx)
11280 len += 1;
11281 }
11282
11283 return len;
11284 }
11285
11286 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11287 is set, expect that insn have 8bit immediate alternative. */
11288 int
11289 ix86_attr_length_immediate_default (rtx insn, int shortform)
11290 {
11291 int len = 0;
11292 int i;
11293 extract_insn_cached (insn);
11294 for (i = recog_data.n_operands - 1; i >= 0; --i)
11295 if (CONSTANT_P (recog_data.operand[i]))
11296 {
11297 if (len)
11298 abort ();
11299 if (shortform
11300 && GET_CODE (recog_data.operand[i]) == CONST_INT
11301 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11302 len = 1;
11303 else
11304 {
11305 switch (get_attr_mode (insn))
11306 {
11307 case MODE_QI:
11308 len+=1;
11309 break;
11310 case MODE_HI:
11311 len+=2;
11312 break;
11313 case MODE_SI:
11314 len+=4;
11315 break;
11316 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11317 case MODE_DI:
11318 len+=4;
11319 break;
11320 default:
11321 fatal_insn ("unknown insn mode", insn);
11322 }
11323 }
11324 }
11325 return len;
11326 }
11327 /* Compute default value for "length_address" attribute. */
11328 int
11329 ix86_attr_length_address_default (rtx insn)
11330 {
11331 int i;
11332
11333 if (get_attr_type (insn) == TYPE_LEA)
11334 {
11335 rtx set = PATTERN (insn);
11336 if (GET_CODE (set) == SET)
11337 ;
11338 else if (GET_CODE (set) == PARALLEL
11339 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11340 set = XVECEXP (set, 0, 0);
11341 else
11342 {
11343 #ifdef ENABLE_CHECKING
11344 abort ();
11345 #endif
11346 return 0;
11347 }
11348
11349 return memory_address_length (SET_SRC (set));
11350 }
11351
11352 extract_insn_cached (insn);
11353 for (i = recog_data.n_operands - 1; i >= 0; --i)
11354 if (GET_CODE (recog_data.operand[i]) == MEM)
11355 {
11356 return memory_address_length (XEXP (recog_data.operand[i], 0));
11357 break;
11358 }
11359 return 0;
11360 }
11361 \f
11362 /* Return the maximum number of instructions a cpu can issue. */
11363
11364 static int
11365 ix86_issue_rate (void)
11366 {
11367 switch (ix86_tune)
11368 {
11369 case PROCESSOR_PENTIUM:
11370 case PROCESSOR_K6:
11371 return 2;
11372
11373 case PROCESSOR_PENTIUMPRO:
11374 case PROCESSOR_PENTIUM4:
11375 case PROCESSOR_ATHLON:
11376 case PROCESSOR_K8:
11377 case PROCESSOR_NOCONA:
11378 return 3;
11379
11380 default:
11381 return 1;
11382 }
11383 }
11384
11385 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11386 by DEP_INSN and nothing set by DEP_INSN. */
11387
11388 static int
11389 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11390 {
11391 rtx set, set2;
11392
11393 /* Simplify the test for uninteresting insns. */
11394 if (insn_type != TYPE_SETCC
11395 && insn_type != TYPE_ICMOV
11396 && insn_type != TYPE_FCMOV
11397 && insn_type != TYPE_IBR)
11398 return 0;
11399
11400 if ((set = single_set (dep_insn)) != 0)
11401 {
11402 set = SET_DEST (set);
11403 set2 = NULL_RTX;
11404 }
11405 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11406 && XVECLEN (PATTERN (dep_insn), 0) == 2
11407 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11408 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11409 {
11410 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11411 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11412 }
11413 else
11414 return 0;
11415
11416 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11417 return 0;
11418
11419 /* This test is true if the dependent insn reads the flags but
11420 not any other potentially set register. */
11421 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11422 return 0;
11423
11424 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11425 return 0;
11426
11427 return 1;
11428 }
11429
11430 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11431 address with operands set by DEP_INSN. */
11432
11433 static int
11434 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11435 {
11436 rtx addr;
11437
11438 if (insn_type == TYPE_LEA
11439 && TARGET_PENTIUM)
11440 {
11441 addr = PATTERN (insn);
11442 if (GET_CODE (addr) == SET)
11443 ;
11444 else if (GET_CODE (addr) == PARALLEL
11445 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11446 addr = XVECEXP (addr, 0, 0);
11447 else
11448 abort ();
11449 addr = SET_SRC (addr);
11450 }
11451 else
11452 {
11453 int i;
11454 extract_insn_cached (insn);
11455 for (i = recog_data.n_operands - 1; i >= 0; --i)
11456 if (GET_CODE (recog_data.operand[i]) == MEM)
11457 {
11458 addr = XEXP (recog_data.operand[i], 0);
11459 goto found;
11460 }
11461 return 0;
11462 found:;
11463 }
11464
11465 return modified_in_p (addr, dep_insn);
11466 }
11467
11468 static int
11469 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11470 {
11471 enum attr_type insn_type, dep_insn_type;
11472 enum attr_memory memory;
11473 rtx set, set2;
11474 int dep_insn_code_number;
11475
11476 /* Anti and output dependencies have zero cost on all CPUs. */
11477 if (REG_NOTE_KIND (link) != 0)
11478 return 0;
11479
11480 dep_insn_code_number = recog_memoized (dep_insn);
11481
11482 /* If we can't recognize the insns, we can't really do anything. */
11483 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11484 return cost;
11485
11486 insn_type = get_attr_type (insn);
11487 dep_insn_type = get_attr_type (dep_insn);
11488
11489 switch (ix86_tune)
11490 {
11491 case PROCESSOR_PENTIUM:
11492 /* Address Generation Interlock adds a cycle of latency. */
11493 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11494 cost += 1;
11495
11496 /* ??? Compares pair with jump/setcc. */
11497 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11498 cost = 0;
11499
11500 /* Floating point stores require value to be ready one cycle earlier. */
11501 if (insn_type == TYPE_FMOV
11502 && get_attr_memory (insn) == MEMORY_STORE
11503 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11504 cost += 1;
11505 break;
11506
11507 case PROCESSOR_PENTIUMPRO:
11508 memory = get_attr_memory (insn);
11509
11510 /* INT->FP conversion is expensive. */
11511 if (get_attr_fp_int_src (dep_insn))
11512 cost += 5;
11513
11514 /* There is one cycle extra latency between an FP op and a store. */
11515 if (insn_type == TYPE_FMOV
11516 && (set = single_set (dep_insn)) != NULL_RTX
11517 && (set2 = single_set (insn)) != NULL_RTX
11518 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11519 && GET_CODE (SET_DEST (set2)) == MEM)
11520 cost += 1;
11521
11522 /* Show ability of reorder buffer to hide latency of load by executing
11523 in parallel with previous instruction in case
11524 previous instruction is not needed to compute the address. */
11525 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11526 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11527 {
11528 /* Claim moves to take one cycle, as core can issue one load
11529 at time and the next load can start cycle later. */
11530 if (dep_insn_type == TYPE_IMOV
11531 || dep_insn_type == TYPE_FMOV)
11532 cost = 1;
11533 else if (cost > 1)
11534 cost--;
11535 }
11536 break;
11537
11538 case PROCESSOR_K6:
11539 memory = get_attr_memory (insn);
11540
11541 /* The esp dependency is resolved before the instruction is really
11542 finished. */
11543 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11544 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11545 return 1;
11546
11547 /* INT->FP conversion is expensive. */
11548 if (get_attr_fp_int_src (dep_insn))
11549 cost += 5;
11550
11551 /* Show ability of reorder buffer to hide latency of load by executing
11552 in parallel with previous instruction in case
11553 previous instruction is not needed to compute the address. */
11554 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11555 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11556 {
11557 /* Claim moves to take one cycle, as core can issue one load
11558 at time and the next load can start cycle later. */
11559 if (dep_insn_type == TYPE_IMOV
11560 || dep_insn_type == TYPE_FMOV)
11561 cost = 1;
11562 else if (cost > 2)
11563 cost -= 2;
11564 else
11565 cost = 1;
11566 }
11567 break;
11568
11569 case PROCESSOR_ATHLON:
11570 case PROCESSOR_K8:
11571 memory = get_attr_memory (insn);
11572
11573 /* Show ability of reorder buffer to hide latency of load by executing
11574 in parallel with previous instruction in case
11575 previous instruction is not needed to compute the address. */
11576 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11577 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11578 {
11579 enum attr_unit unit = get_attr_unit (insn);
11580 int loadcost = 3;
11581
11582 /* Because of the difference between the length of integer and
11583 floating unit pipeline preparation stages, the memory operands
11584 for floating point are cheaper.
11585
11586 ??? For Athlon it the difference is most probably 2. */
11587 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11588 loadcost = 3;
11589 else
11590 loadcost = TARGET_ATHLON ? 2 : 0;
11591
11592 if (cost >= loadcost)
11593 cost -= loadcost;
11594 else
11595 cost = 0;
11596 }
11597
11598 default:
11599 break;
11600 }
11601
11602 return cost;
11603 }
11604
11605 /* How many alternative schedules to try. This should be as wide as the
11606 scheduling freedom in the DFA, but no wider. Making this value too
11607 large results extra work for the scheduler. */
11608
11609 static int
11610 ia32_multipass_dfa_lookahead (void)
11611 {
11612 if (ix86_tune == PROCESSOR_PENTIUM)
11613 return 2;
11614
11615 if (ix86_tune == PROCESSOR_PENTIUMPRO
11616 || ix86_tune == PROCESSOR_K6)
11617 return 1;
11618
11619 else
11620 return 0;
11621 }
11622
11623 \f
11624 /* Implement the target hook targetm.vectorize.misaligned_mem_ok. */
11625
11626 static bool
11627 ix86_misaligned_mem_ok (enum machine_mode mode)
11628 {
11629 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
11630 return true;
11631 else
11632 return false;
11633 }
11634
11635 /* Compute the alignment given to a constant that is being placed in memory.
11636 EXP is the constant and ALIGN is the alignment that the object would
11637 ordinarily have.
11638 The value of this function is used instead of that alignment to align
11639 the object. */
11640
11641 int
11642 ix86_constant_alignment (tree exp, int align)
11643 {
11644 if (TREE_CODE (exp) == REAL_CST)
11645 {
11646 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11647 return 64;
11648 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11649 return 128;
11650 }
11651 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11652 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11653 return BITS_PER_WORD;
11654
11655 return align;
11656 }
11657
11658 /* Compute the alignment for a static variable.
11659 TYPE is the data type, and ALIGN is the alignment that
11660 the object would ordinarily have. The value of this function is used
11661 instead of that alignment to align the object. */
11662
11663 int
11664 ix86_data_alignment (tree type, int align)
11665 {
11666 if (AGGREGATE_TYPE_P (type)
11667 && TYPE_SIZE (type)
11668 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11669 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11670 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11671 return 256;
11672
11673 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11674 to 16byte boundary. */
11675 if (TARGET_64BIT)
11676 {
11677 if (AGGREGATE_TYPE_P (type)
11678 && TYPE_SIZE (type)
11679 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11680 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11681 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11682 return 128;
11683 }
11684
11685 if (TREE_CODE (type) == ARRAY_TYPE)
11686 {
11687 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11688 return 64;
11689 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11690 return 128;
11691 }
11692 else if (TREE_CODE (type) == COMPLEX_TYPE)
11693 {
11694
11695 if (TYPE_MODE (type) == DCmode && align < 64)
11696 return 64;
11697 if (TYPE_MODE (type) == XCmode && align < 128)
11698 return 128;
11699 }
11700 else if ((TREE_CODE (type) == RECORD_TYPE
11701 || TREE_CODE (type) == UNION_TYPE
11702 || TREE_CODE (type) == QUAL_UNION_TYPE)
11703 && TYPE_FIELDS (type))
11704 {
11705 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11706 return 64;
11707 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11708 return 128;
11709 }
11710 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11711 || TREE_CODE (type) == INTEGER_TYPE)
11712 {
11713 if (TYPE_MODE (type) == DFmode && align < 64)
11714 return 64;
11715 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11716 return 128;
11717 }
11718
11719 return align;
11720 }
11721
11722 /* Compute the alignment for a local variable.
11723 TYPE is the data type, and ALIGN is the alignment that
11724 the object would ordinarily have. The value of this macro is used
11725 instead of that alignment to align the object. */
11726
11727 int
11728 ix86_local_alignment (tree type, int align)
11729 {
11730 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11731 to 16byte boundary. */
11732 if (TARGET_64BIT)
11733 {
11734 if (AGGREGATE_TYPE_P (type)
11735 && TYPE_SIZE (type)
11736 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11737 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11738 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11739 return 128;
11740 }
11741 if (TREE_CODE (type) == ARRAY_TYPE)
11742 {
11743 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11744 return 64;
11745 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11746 return 128;
11747 }
11748 else if (TREE_CODE (type) == COMPLEX_TYPE)
11749 {
11750 if (TYPE_MODE (type) == DCmode && align < 64)
11751 return 64;
11752 if (TYPE_MODE (type) == XCmode && align < 128)
11753 return 128;
11754 }
11755 else if ((TREE_CODE (type) == RECORD_TYPE
11756 || TREE_CODE (type) == UNION_TYPE
11757 || TREE_CODE (type) == QUAL_UNION_TYPE)
11758 && TYPE_FIELDS (type))
11759 {
11760 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11761 return 64;
11762 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11763 return 128;
11764 }
11765 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11766 || TREE_CODE (type) == INTEGER_TYPE)
11767 {
11768
11769 if (TYPE_MODE (type) == DFmode && align < 64)
11770 return 64;
11771 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11772 return 128;
11773 }
11774 return align;
11775 }
11776 \f
11777 /* Emit RTL insns to initialize the variable parts of a trampoline.
11778 FNADDR is an RTX for the address of the function's pure code.
11779 CXT is an RTX for the static chain value for the function. */
11780 void
11781 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11782 {
11783 if (!TARGET_64BIT)
11784 {
11785 /* Compute offset from the end of the jmp to the target function. */
11786 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11787 plus_constant (tramp, 10),
11788 NULL_RTX, 1, OPTAB_DIRECT);
11789 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11790 gen_int_mode (0xb9, QImode));
11791 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11792 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11793 gen_int_mode (0xe9, QImode));
11794 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11795 }
11796 else
11797 {
11798 int offset = 0;
11799 /* Try to load address using shorter movl instead of movabs.
11800 We may want to support movq for kernel mode, but kernel does not use
11801 trampolines at the moment. */
11802 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
11803 {
11804 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11805 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11806 gen_int_mode (0xbb41, HImode));
11807 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11808 gen_lowpart (SImode, fnaddr));
11809 offset += 6;
11810 }
11811 else
11812 {
11813 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11814 gen_int_mode (0xbb49, HImode));
11815 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11816 fnaddr);
11817 offset += 10;
11818 }
11819 /* Load static chain using movabs to r10. */
11820 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11821 gen_int_mode (0xba49, HImode));
11822 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11823 cxt);
11824 offset += 10;
11825 /* Jump to the r11 */
11826 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11827 gen_int_mode (0xff49, HImode));
11828 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11829 gen_int_mode (0xe3, QImode));
11830 offset += 3;
11831 if (offset > TRAMPOLINE_SIZE)
11832 abort ();
11833 }
11834
11835 #ifdef ENABLE_EXECUTE_STACK
11836 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
11837 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11838 #endif
11839 }
11840 \f
11841 #define def_builtin(MASK, NAME, TYPE, CODE) \
11842 do { \
11843 if ((MASK) & target_flags \
11844 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
11845 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11846 NULL, NULL_TREE); \
11847 } while (0)
11848
11849 struct builtin_description
11850 {
11851 const unsigned int mask;
11852 const enum insn_code icode;
11853 const char *const name;
11854 const enum ix86_builtins code;
11855 const enum rtx_code comparison;
11856 const unsigned int flag;
11857 };
11858
11859 static const struct builtin_description bdesc_comi[] =
11860 {
11861 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11862 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11863 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11864 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11865 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11866 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11867 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11868 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11869 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11870 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11871 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11872 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11873 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11874 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11875 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11876 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11877 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11878 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11879 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11880 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11881 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11882 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11883 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11884 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11885 };
11886
11887 static const struct builtin_description bdesc_2arg[] =
11888 {
11889 /* SSE */
11890 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11891 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11892 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11893 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11894 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11895 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11896 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11897 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11898
11899 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11900 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11901 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11902 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11903 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11904 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11905 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11906 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11907 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11908 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11909 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11910 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11911 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11912 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11913 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11914 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11915 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11916 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11917 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11918 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11919
11920 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11921 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11922 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11923 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11924
11925 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11926 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11927 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11928 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11929
11930 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11931 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11932 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11933 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11934 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11935
11936 /* MMX */
11937 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11938 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11939 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11940 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
11941 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11942 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11943 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11944 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
11945
11946 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11947 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11948 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11949 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11950 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11951 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11952 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11953 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11954
11955 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11956 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11957 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11958
11959 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11960 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11961 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11962 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11963
11964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11965 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11966
11967 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11968 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11969 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11970 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11971 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11972 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11973
11974 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11975 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11976 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11977 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11978
11979 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11980 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11981 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11982 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11983 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11984 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11985
11986 /* Special. */
11987 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11988 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11989 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11990
11991 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11992 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11993 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
11994
11995 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11996 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11997 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11998 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11999 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12000 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12001
12002 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12003 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12004 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12005 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12006 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12007 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12008
12009 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12010 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12011 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12012 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12013
12014 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12015 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12016
12017 /* SSE2 */
12018 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12019 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12020 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12021 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12022 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12023 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12024 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12025 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12026
12027 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12028 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12029 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12030 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12031 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12032 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12033 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12034 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12035 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12036 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12037 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12038 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12039 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12040 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12041 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12042 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12043 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12044 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12045 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12046 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12047
12048 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12050 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12051 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12052
12053 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12055 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12056 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12057
12058 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12059 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12060 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12061
12062 /* SSE2 MMX */
12063 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12064 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12065 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12066 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12067 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12068 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12069 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12070 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12071
12072 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12073 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12074 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12075 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12076 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12077 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12078 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12079 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12080
12081 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12082 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12083
12084 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12085 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12086 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12087 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12088
12089 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12090 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12091
12092 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12095 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12096 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12097 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12098
12099 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12100 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12101 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12102 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12103
12104 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12105 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12106 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12107 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12108 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12109 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12110 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12111 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12112
12113 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12114 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12115 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12116
12117 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12118 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12119
12120 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12121 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12122
12123 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12124 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12125 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12126 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12129
12130 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12133 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12134 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12135 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12136
12137 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12138 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12139 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12140 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12141
12142 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12143
12144 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12145 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12146 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12147 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12148
12149 /* SSE3 MMX */
12150 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12151 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12152 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12153 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12154 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12155 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12156 };
12157
12158 static const struct builtin_description bdesc_1arg[] =
12159 {
12160 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12161 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12162
12163 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12164 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12165 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12166
12167 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12168 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12169 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12170 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12171 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12172 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12173
12174 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12175 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12176 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12177 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12178
12179 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12180
12181 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12182 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12183
12184 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12185 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12186 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12187 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12188 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12189
12190 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12191
12192 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12193 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12194 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12195 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12196
12197 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12198 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12199 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12200
12201 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12202
12203 /* SSE3 */
12204 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12205 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12206 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12207 };
12208
12209 void
12210 ix86_init_builtins (void)
12211 {
12212 if (TARGET_MMX)
12213 ix86_init_mmx_sse_builtins ();
12214 }
12215
12216 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12217 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12218 builtins. */
12219 static void
12220 ix86_init_mmx_sse_builtins (void)
12221 {
12222 const struct builtin_description * d;
12223 size_t i;
12224
12225 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12226 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12227 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12228 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12229 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12230 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12231 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12232 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12233 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12234 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12235
12236 tree pchar_type_node = build_pointer_type (char_type_node);
12237 tree pcchar_type_node = build_pointer_type (
12238 build_type_variant (char_type_node, 1, 0));
12239 tree pfloat_type_node = build_pointer_type (float_type_node);
12240 tree pcfloat_type_node = build_pointer_type (
12241 build_type_variant (float_type_node, 1, 0));
12242 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12243 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12244 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12245
12246 /* Comparisons. */
12247 tree int_ftype_v4sf_v4sf
12248 = build_function_type_list (integer_type_node,
12249 V4SF_type_node, V4SF_type_node, NULL_TREE);
12250 tree v4si_ftype_v4sf_v4sf
12251 = build_function_type_list (V4SI_type_node,
12252 V4SF_type_node, V4SF_type_node, NULL_TREE);
12253 /* MMX/SSE/integer conversions. */
12254 tree int_ftype_v4sf
12255 = build_function_type_list (integer_type_node,
12256 V4SF_type_node, NULL_TREE);
12257 tree int64_ftype_v4sf
12258 = build_function_type_list (long_long_integer_type_node,
12259 V4SF_type_node, NULL_TREE);
12260 tree int_ftype_v8qi
12261 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12262 tree v4sf_ftype_v4sf_int
12263 = build_function_type_list (V4SF_type_node,
12264 V4SF_type_node, integer_type_node, NULL_TREE);
12265 tree v4sf_ftype_v4sf_int64
12266 = build_function_type_list (V4SF_type_node,
12267 V4SF_type_node, long_long_integer_type_node,
12268 NULL_TREE);
12269 tree v4sf_ftype_v4sf_v2si
12270 = build_function_type_list (V4SF_type_node,
12271 V4SF_type_node, V2SI_type_node, NULL_TREE);
12272 tree int_ftype_v4hi_int
12273 = build_function_type_list (integer_type_node,
12274 V4HI_type_node, integer_type_node, NULL_TREE);
12275 tree v4hi_ftype_v4hi_int_int
12276 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12277 integer_type_node, integer_type_node,
12278 NULL_TREE);
12279 /* Miscellaneous. */
12280 tree v8qi_ftype_v4hi_v4hi
12281 = build_function_type_list (V8QI_type_node,
12282 V4HI_type_node, V4HI_type_node, NULL_TREE);
12283 tree v4hi_ftype_v2si_v2si
12284 = build_function_type_list (V4HI_type_node,
12285 V2SI_type_node, V2SI_type_node, NULL_TREE);
12286 tree v4sf_ftype_v4sf_v4sf_int
12287 = build_function_type_list (V4SF_type_node,
12288 V4SF_type_node, V4SF_type_node,
12289 integer_type_node, NULL_TREE);
12290 tree v2si_ftype_v4hi_v4hi
12291 = build_function_type_list (V2SI_type_node,
12292 V4HI_type_node, V4HI_type_node, NULL_TREE);
12293 tree v4hi_ftype_v4hi_int
12294 = build_function_type_list (V4HI_type_node,
12295 V4HI_type_node, integer_type_node, NULL_TREE);
12296 tree v4hi_ftype_v4hi_di
12297 = build_function_type_list (V4HI_type_node,
12298 V4HI_type_node, long_long_unsigned_type_node,
12299 NULL_TREE);
12300 tree v2si_ftype_v2si_di
12301 = build_function_type_list (V2SI_type_node,
12302 V2SI_type_node, long_long_unsigned_type_node,
12303 NULL_TREE);
12304 tree void_ftype_void
12305 = build_function_type (void_type_node, void_list_node);
12306 tree void_ftype_unsigned
12307 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12308 tree void_ftype_unsigned_unsigned
12309 = build_function_type_list (void_type_node, unsigned_type_node,
12310 unsigned_type_node, NULL_TREE);
12311 tree void_ftype_pcvoid_unsigned_unsigned
12312 = build_function_type_list (void_type_node, const_ptr_type_node,
12313 unsigned_type_node, unsigned_type_node,
12314 NULL_TREE);
12315 tree unsigned_ftype_void
12316 = build_function_type (unsigned_type_node, void_list_node);
12317 tree di_ftype_void
12318 = build_function_type (long_long_unsigned_type_node, void_list_node);
12319 tree v4sf_ftype_void
12320 = build_function_type (V4SF_type_node, void_list_node);
12321 tree v2si_ftype_v4sf
12322 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12323 /* Loads/stores. */
12324 tree void_ftype_v8qi_v8qi_pchar
12325 = build_function_type_list (void_type_node,
12326 V8QI_type_node, V8QI_type_node,
12327 pchar_type_node, NULL_TREE);
12328 tree v4sf_ftype_pcfloat
12329 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12330 /* @@@ the type is bogus */
12331 tree v4sf_ftype_v4sf_pv2si
12332 = build_function_type_list (V4SF_type_node,
12333 V4SF_type_node, pv2si_type_node, NULL_TREE);
12334 tree void_ftype_pv2si_v4sf
12335 = build_function_type_list (void_type_node,
12336 pv2si_type_node, V4SF_type_node, NULL_TREE);
12337 tree void_ftype_pfloat_v4sf
12338 = build_function_type_list (void_type_node,
12339 pfloat_type_node, V4SF_type_node, NULL_TREE);
12340 tree void_ftype_pdi_di
12341 = build_function_type_list (void_type_node,
12342 pdi_type_node, long_long_unsigned_type_node,
12343 NULL_TREE);
12344 tree void_ftype_pv2di_v2di
12345 = build_function_type_list (void_type_node,
12346 pv2di_type_node, V2DI_type_node, NULL_TREE);
12347 /* Normal vector unops. */
12348 tree v4sf_ftype_v4sf
12349 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12350
12351 /* Normal vector binops. */
12352 tree v4sf_ftype_v4sf_v4sf
12353 = build_function_type_list (V4SF_type_node,
12354 V4SF_type_node, V4SF_type_node, NULL_TREE);
12355 tree v8qi_ftype_v8qi_v8qi
12356 = build_function_type_list (V8QI_type_node,
12357 V8QI_type_node, V8QI_type_node, NULL_TREE);
12358 tree v4hi_ftype_v4hi_v4hi
12359 = build_function_type_list (V4HI_type_node,
12360 V4HI_type_node, V4HI_type_node, NULL_TREE);
12361 tree v2si_ftype_v2si_v2si
12362 = build_function_type_list (V2SI_type_node,
12363 V2SI_type_node, V2SI_type_node, NULL_TREE);
12364 tree di_ftype_di_di
12365 = build_function_type_list (long_long_unsigned_type_node,
12366 long_long_unsigned_type_node,
12367 long_long_unsigned_type_node, NULL_TREE);
12368
12369 tree v2si_ftype_v2sf
12370 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12371 tree v2sf_ftype_v2si
12372 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12373 tree v2si_ftype_v2si
12374 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12375 tree v2sf_ftype_v2sf
12376 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12377 tree v2sf_ftype_v2sf_v2sf
12378 = build_function_type_list (V2SF_type_node,
12379 V2SF_type_node, V2SF_type_node, NULL_TREE);
12380 tree v2si_ftype_v2sf_v2sf
12381 = build_function_type_list (V2SI_type_node,
12382 V2SF_type_node, V2SF_type_node, NULL_TREE);
12383 tree pint_type_node = build_pointer_type (integer_type_node);
12384 tree pcint_type_node = build_pointer_type (
12385 build_type_variant (integer_type_node, 1, 0));
12386 tree pdouble_type_node = build_pointer_type (double_type_node);
12387 tree pcdouble_type_node = build_pointer_type (
12388 build_type_variant (double_type_node, 1, 0));
12389 tree int_ftype_v2df_v2df
12390 = build_function_type_list (integer_type_node,
12391 V2DF_type_node, V2DF_type_node, NULL_TREE);
12392
12393 tree ti_ftype_void
12394 = build_function_type (intTI_type_node, void_list_node);
12395 tree v2di_ftype_void
12396 = build_function_type (V2DI_type_node, void_list_node);
12397 tree ti_ftype_ti_ti
12398 = build_function_type_list (intTI_type_node,
12399 intTI_type_node, intTI_type_node, NULL_TREE);
12400 tree void_ftype_pcvoid
12401 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12402 tree v2di_ftype_di
12403 = build_function_type_list (V2DI_type_node,
12404 long_long_unsigned_type_node, NULL_TREE);
12405 tree di_ftype_v2di
12406 = build_function_type_list (long_long_unsigned_type_node,
12407 V2DI_type_node, NULL_TREE);
12408 tree v4sf_ftype_v4si
12409 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12410 tree v4si_ftype_v4sf
12411 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12412 tree v2df_ftype_v4si
12413 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12414 tree v4si_ftype_v2df
12415 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12416 tree v2si_ftype_v2df
12417 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12418 tree v4sf_ftype_v2df
12419 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12420 tree v2df_ftype_v2si
12421 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12422 tree v2df_ftype_v4sf
12423 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12424 tree int_ftype_v2df
12425 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12426 tree int64_ftype_v2df
12427 = build_function_type_list (long_long_integer_type_node,
12428 V2DF_type_node, NULL_TREE);
12429 tree v2df_ftype_v2df_int
12430 = build_function_type_list (V2DF_type_node,
12431 V2DF_type_node, integer_type_node, NULL_TREE);
12432 tree v2df_ftype_v2df_int64
12433 = build_function_type_list (V2DF_type_node,
12434 V2DF_type_node, long_long_integer_type_node,
12435 NULL_TREE);
12436 tree v4sf_ftype_v4sf_v2df
12437 = build_function_type_list (V4SF_type_node,
12438 V4SF_type_node, V2DF_type_node, NULL_TREE);
12439 tree v2df_ftype_v2df_v4sf
12440 = build_function_type_list (V2DF_type_node,
12441 V2DF_type_node, V4SF_type_node, NULL_TREE);
12442 tree v2df_ftype_v2df_v2df_int
12443 = build_function_type_list (V2DF_type_node,
12444 V2DF_type_node, V2DF_type_node,
12445 integer_type_node,
12446 NULL_TREE);
12447 tree v2df_ftype_v2df_pv2si
12448 = build_function_type_list (V2DF_type_node,
12449 V2DF_type_node, pv2si_type_node, NULL_TREE);
12450 tree void_ftype_pv2si_v2df
12451 = build_function_type_list (void_type_node,
12452 pv2si_type_node, V2DF_type_node, NULL_TREE);
12453 tree void_ftype_pdouble_v2df
12454 = build_function_type_list (void_type_node,
12455 pdouble_type_node, V2DF_type_node, NULL_TREE);
12456 tree void_ftype_pint_int
12457 = build_function_type_list (void_type_node,
12458 pint_type_node, integer_type_node, NULL_TREE);
12459 tree void_ftype_v16qi_v16qi_pchar
12460 = build_function_type_list (void_type_node,
12461 V16QI_type_node, V16QI_type_node,
12462 pchar_type_node, NULL_TREE);
12463 tree v2df_ftype_pcdouble
12464 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12465 tree v2df_ftype_v2df_v2df
12466 = build_function_type_list (V2DF_type_node,
12467 V2DF_type_node, V2DF_type_node, NULL_TREE);
12468 tree v16qi_ftype_v16qi_v16qi
12469 = build_function_type_list (V16QI_type_node,
12470 V16QI_type_node, V16QI_type_node, NULL_TREE);
12471 tree v8hi_ftype_v8hi_v8hi
12472 = build_function_type_list (V8HI_type_node,
12473 V8HI_type_node, V8HI_type_node, NULL_TREE);
12474 tree v4si_ftype_v4si_v4si
12475 = build_function_type_list (V4SI_type_node,
12476 V4SI_type_node, V4SI_type_node, NULL_TREE);
12477 tree v2di_ftype_v2di_v2di
12478 = build_function_type_list (V2DI_type_node,
12479 V2DI_type_node, V2DI_type_node, NULL_TREE);
12480 tree v2di_ftype_v2df_v2df
12481 = build_function_type_list (V2DI_type_node,
12482 V2DF_type_node, V2DF_type_node, NULL_TREE);
12483 tree v2df_ftype_v2df
12484 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12485 tree v2df_ftype_double
12486 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12487 tree v2df_ftype_double_double
12488 = build_function_type_list (V2DF_type_node,
12489 double_type_node, double_type_node, NULL_TREE);
12490 tree int_ftype_v8hi_int
12491 = build_function_type_list (integer_type_node,
12492 V8HI_type_node, integer_type_node, NULL_TREE);
12493 tree v8hi_ftype_v8hi_int_int
12494 = build_function_type_list (V8HI_type_node,
12495 V8HI_type_node, integer_type_node,
12496 integer_type_node, NULL_TREE);
12497 tree v2di_ftype_v2di_int
12498 = build_function_type_list (V2DI_type_node,
12499 V2DI_type_node, integer_type_node, NULL_TREE);
12500 tree v4si_ftype_v4si_int
12501 = build_function_type_list (V4SI_type_node,
12502 V4SI_type_node, integer_type_node, NULL_TREE);
12503 tree v8hi_ftype_v8hi_int
12504 = build_function_type_list (V8HI_type_node,
12505 V8HI_type_node, integer_type_node, NULL_TREE);
12506 tree v8hi_ftype_v8hi_v2di
12507 = build_function_type_list (V8HI_type_node,
12508 V8HI_type_node, V2DI_type_node, NULL_TREE);
12509 tree v4si_ftype_v4si_v2di
12510 = build_function_type_list (V4SI_type_node,
12511 V4SI_type_node, V2DI_type_node, NULL_TREE);
12512 tree v4si_ftype_v8hi_v8hi
12513 = build_function_type_list (V4SI_type_node,
12514 V8HI_type_node, V8HI_type_node, NULL_TREE);
12515 tree di_ftype_v8qi_v8qi
12516 = build_function_type_list (long_long_unsigned_type_node,
12517 V8QI_type_node, V8QI_type_node, NULL_TREE);
12518 tree di_ftype_v2si_v2si
12519 = build_function_type_list (long_long_unsigned_type_node,
12520 V2SI_type_node, V2SI_type_node, NULL_TREE);
12521 tree v2di_ftype_v16qi_v16qi
12522 = build_function_type_list (V2DI_type_node,
12523 V16QI_type_node, V16QI_type_node, NULL_TREE);
12524 tree v2di_ftype_v4si_v4si
12525 = build_function_type_list (V2DI_type_node,
12526 V4SI_type_node, V4SI_type_node, NULL_TREE);
12527 tree int_ftype_v16qi
12528 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12529 tree v16qi_ftype_pcchar
12530 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12531 tree void_ftype_pchar_v16qi
12532 = build_function_type_list (void_type_node,
12533 pchar_type_node, V16QI_type_node, NULL_TREE);
12534 tree v4si_ftype_pcint
12535 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12536 tree void_ftype_pcint_v4si
12537 = build_function_type_list (void_type_node,
12538 pcint_type_node, V4SI_type_node, NULL_TREE);
12539 tree v2di_ftype_v2di
12540 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12541
12542 tree float80_type;
12543 tree float128_type;
12544
12545 /* The __float80 type. */
12546 if (TYPE_MODE (long_double_type_node) == XFmode)
12547 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12548 "__float80");
12549 else
12550 {
12551 /* The __float80 type. */
12552 float80_type = make_node (REAL_TYPE);
12553 TYPE_PRECISION (float80_type) = 80;
12554 layout_type (float80_type);
12555 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12556 }
12557
12558 float128_type = make_node (REAL_TYPE);
12559 TYPE_PRECISION (float128_type) = 128;
12560 layout_type (float128_type);
12561 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12562
12563 /* Add all builtins that are more or less simple operations on two
12564 operands. */
12565 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12566 {
12567 /* Use one of the operands; the target can have a different mode for
12568 mask-generating compares. */
12569 enum machine_mode mode;
12570 tree type;
12571
12572 if (d->name == 0)
12573 continue;
12574 mode = insn_data[d->icode].operand[1].mode;
12575
12576 switch (mode)
12577 {
12578 case V16QImode:
12579 type = v16qi_ftype_v16qi_v16qi;
12580 break;
12581 case V8HImode:
12582 type = v8hi_ftype_v8hi_v8hi;
12583 break;
12584 case V4SImode:
12585 type = v4si_ftype_v4si_v4si;
12586 break;
12587 case V2DImode:
12588 type = v2di_ftype_v2di_v2di;
12589 break;
12590 case V2DFmode:
12591 type = v2df_ftype_v2df_v2df;
12592 break;
12593 case TImode:
12594 type = ti_ftype_ti_ti;
12595 break;
12596 case V4SFmode:
12597 type = v4sf_ftype_v4sf_v4sf;
12598 break;
12599 case V8QImode:
12600 type = v8qi_ftype_v8qi_v8qi;
12601 break;
12602 case V4HImode:
12603 type = v4hi_ftype_v4hi_v4hi;
12604 break;
12605 case V2SImode:
12606 type = v2si_ftype_v2si_v2si;
12607 break;
12608 case DImode:
12609 type = di_ftype_di_di;
12610 break;
12611
12612 default:
12613 abort ();
12614 }
12615
12616 /* Override for comparisons. */
12617 if (d->icode == CODE_FOR_maskcmpv4sf3
12618 || d->icode == CODE_FOR_maskncmpv4sf3
12619 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12620 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12621 type = v4si_ftype_v4sf_v4sf;
12622
12623 if (d->icode == CODE_FOR_maskcmpv2df3
12624 || d->icode == CODE_FOR_maskncmpv2df3
12625 || d->icode == CODE_FOR_vmmaskcmpv2df3
12626 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12627 type = v2di_ftype_v2df_v2df;
12628
12629 def_builtin (d->mask, d->name, type, d->code);
12630 }
12631
12632 /* Add the remaining MMX insns with somewhat more complicated types. */
12633 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12634 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12635 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12636 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12637 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12638
12639 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12640 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12641 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12642
12643 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12644 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12645
12646 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12647 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12648
12649 /* comi/ucomi insns. */
12650 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12651 if (d->mask == MASK_SSE2)
12652 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12653 else
12654 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12655
12656 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12657 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12658 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12659
12660 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12661 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12662 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12663 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12664 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12665 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12666 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12667 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12668 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12669 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12670 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12671
12672 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12673 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12674
12675 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12676
12677 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12678 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12679 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12680 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12681 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12682 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12683
12684 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12685 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12686 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12687 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12688
12689 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12690 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12691 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12692 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12693
12694 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12695
12696 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12697
12698 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12699 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12700 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12701 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12702 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12703 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12704
12705 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12706
12707 /* Original 3DNow! */
12708 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12709 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12710 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12711 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12712 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12713 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12714 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12715 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12716 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12717 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12718 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12719 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12720 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12721 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12722 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12723 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12724 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12725 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12726 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12727 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12728
12729 /* 3DNow! extension as used in the Athlon CPU. */
12730 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12731 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12732 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12733 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12734 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12735 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12736
12737 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12738
12739 /* SSE2 */
12740 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12741 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12742
12743 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12744 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12745 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12746
12747 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12748 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12749 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12750 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12751 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12752 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12753
12754 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12755 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12756 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12757 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12758
12759 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12760 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12761 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12762 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12763 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12764
12765 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12766 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12767 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12768 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12769
12770 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12771 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12772
12773 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12774
12775 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12776 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12777
12778 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12779 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12780 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12781 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12782 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12783
12784 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12785
12786 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12787 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12788 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12789 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12790
12791 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12792 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12793 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12794
12795 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12796 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12797 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12798 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12799
12800 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12801 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12802 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12803 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12804 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12805 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12806 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12807
12808 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12809 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12810 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12811
12812 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12813 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12814 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12815 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12816 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12817 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12818 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12819
12820 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12821
12822 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
12823 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
12824
12825 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12826 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12827 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12828
12829 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12830 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12831 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12832
12833 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12834 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12835
12836 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12837 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12838 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12839 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12840
12841 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12842 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12843 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12844 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12845
12846 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12847 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12848
12849 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12850
12851 /* Prescott New Instructions. */
12852 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
12853 void_ftype_pcvoid_unsigned_unsigned,
12854 IX86_BUILTIN_MONITOR);
12855 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
12856 void_ftype_unsigned_unsigned,
12857 IX86_BUILTIN_MWAIT);
12858 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
12859 v4sf_ftype_v4sf,
12860 IX86_BUILTIN_MOVSHDUP);
12861 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
12862 v4sf_ftype_v4sf,
12863 IX86_BUILTIN_MOVSLDUP);
12864 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
12865 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
12866 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
12867 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
12868 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
12869 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
12870 }
12871
12872 /* Errors in the source file can cause expand_expr to return const0_rtx
12873 where we expect a vector. To avoid crashing, use one of the vector
12874 clear instructions. */
12875 static rtx
12876 safe_vector_operand (rtx x, enum machine_mode mode)
12877 {
12878 if (x != const0_rtx)
12879 return x;
12880 x = gen_reg_rtx (mode);
12881
12882 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12883 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12884 : gen_rtx_SUBREG (DImode, x, 0)));
12885 else
12886 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12887 : gen_rtx_SUBREG (V4SFmode, x, 0),
12888 CONST0_RTX (V4SFmode)));
12889 return x;
12890 }
12891
12892 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12893
12894 static rtx
12895 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
12896 {
12897 rtx pat;
12898 tree arg0 = TREE_VALUE (arglist);
12899 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12900 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12901 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12902 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12903 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12904 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12905
12906 if (VECTOR_MODE_P (mode0))
12907 op0 = safe_vector_operand (op0, mode0);
12908 if (VECTOR_MODE_P (mode1))
12909 op1 = safe_vector_operand (op1, mode1);
12910
12911 if (! target
12912 || GET_MODE (target) != tmode
12913 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12914 target = gen_reg_rtx (tmode);
12915
12916 if (GET_MODE (op1) == SImode && mode1 == TImode)
12917 {
12918 rtx x = gen_reg_rtx (V4SImode);
12919 emit_insn (gen_sse2_loadd (x, op1));
12920 op1 = gen_lowpart (TImode, x);
12921 }
12922
12923 /* In case the insn wants input operands in modes different from
12924 the result, abort. */
12925 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
12926 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
12927 abort ();
12928
12929 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12930 op0 = copy_to_mode_reg (mode0, op0);
12931 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12932 op1 = copy_to_mode_reg (mode1, op1);
12933
12934 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12935 yet one of the two must not be a memory. This is normally enforced
12936 by expanders, but we didn't bother to create one here. */
12937 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12938 op0 = copy_to_mode_reg (mode0, op0);
12939
12940 pat = GEN_FCN (icode) (target, op0, op1);
12941 if (! pat)
12942 return 0;
12943 emit_insn (pat);
12944 return target;
12945 }
12946
12947 /* Subroutine of ix86_expand_builtin to take care of stores. */
12948
12949 static rtx
12950 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
12951 {
12952 rtx pat;
12953 tree arg0 = TREE_VALUE (arglist);
12954 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12955 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12956 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12957 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12958 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12959
12960 if (VECTOR_MODE_P (mode1))
12961 op1 = safe_vector_operand (op1, mode1);
12962
12963 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12964 op1 = copy_to_mode_reg (mode1, op1);
12965
12966 pat = GEN_FCN (icode) (op0, op1);
12967 if (pat)
12968 emit_insn (pat);
12969 return 0;
12970 }
12971
12972 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12973
12974 static rtx
12975 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
12976 rtx target, int do_load)
12977 {
12978 rtx pat;
12979 tree arg0 = TREE_VALUE (arglist);
12980 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12981 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12982 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12983
12984 if (! target
12985 || GET_MODE (target) != tmode
12986 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12987 target = gen_reg_rtx (tmode);
12988 if (do_load)
12989 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12990 else
12991 {
12992 if (VECTOR_MODE_P (mode0))
12993 op0 = safe_vector_operand (op0, mode0);
12994
12995 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12996 op0 = copy_to_mode_reg (mode0, op0);
12997 }
12998
12999 pat = GEN_FCN (icode) (target, op0);
13000 if (! pat)
13001 return 0;
13002 emit_insn (pat);
13003 return target;
13004 }
13005
13006 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13007 sqrtss, rsqrtss, rcpss. */
13008
13009 static rtx
13010 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13011 {
13012 rtx pat;
13013 tree arg0 = TREE_VALUE (arglist);
13014 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13015 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13016 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13017
13018 if (! target
13019 || GET_MODE (target) != tmode
13020 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13021 target = gen_reg_rtx (tmode);
13022
13023 if (VECTOR_MODE_P (mode0))
13024 op0 = safe_vector_operand (op0, mode0);
13025
13026 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13027 op0 = copy_to_mode_reg (mode0, op0);
13028
13029 op1 = op0;
13030 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13031 op1 = copy_to_mode_reg (mode0, op1);
13032
13033 pat = GEN_FCN (icode) (target, op0, op1);
13034 if (! pat)
13035 return 0;
13036 emit_insn (pat);
13037 return target;
13038 }
13039
13040 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13041
13042 static rtx
13043 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13044 rtx target)
13045 {
13046 rtx pat;
13047 tree arg0 = TREE_VALUE (arglist);
13048 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13049 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13050 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13051 rtx op2;
13052 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13053 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13054 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13055 enum rtx_code comparison = d->comparison;
13056
13057 if (VECTOR_MODE_P (mode0))
13058 op0 = safe_vector_operand (op0, mode0);
13059 if (VECTOR_MODE_P (mode1))
13060 op1 = safe_vector_operand (op1, mode1);
13061
13062 /* Swap operands if we have a comparison that isn't available in
13063 hardware. */
13064 if (d->flag)
13065 {
13066 rtx tmp = gen_reg_rtx (mode1);
13067 emit_move_insn (tmp, op1);
13068 op1 = op0;
13069 op0 = tmp;
13070 }
13071
13072 if (! target
13073 || GET_MODE (target) != tmode
13074 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13075 target = gen_reg_rtx (tmode);
13076
13077 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13078 op0 = copy_to_mode_reg (mode0, op0);
13079 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13080 op1 = copy_to_mode_reg (mode1, op1);
13081
13082 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13083 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13084 if (! pat)
13085 return 0;
13086 emit_insn (pat);
13087 return target;
13088 }
13089
13090 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13091
13092 static rtx
13093 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13094 rtx target)
13095 {
13096 rtx pat;
13097 tree arg0 = TREE_VALUE (arglist);
13098 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13099 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13100 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13101 rtx op2;
13102 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13103 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13104 enum rtx_code comparison = d->comparison;
13105
13106 if (VECTOR_MODE_P (mode0))
13107 op0 = safe_vector_operand (op0, mode0);
13108 if (VECTOR_MODE_P (mode1))
13109 op1 = safe_vector_operand (op1, mode1);
13110
13111 /* Swap operands if we have a comparison that isn't available in
13112 hardware. */
13113 if (d->flag)
13114 {
13115 rtx tmp = op1;
13116 op1 = op0;
13117 op0 = tmp;
13118 }
13119
13120 target = gen_reg_rtx (SImode);
13121 emit_move_insn (target, const0_rtx);
13122 target = gen_rtx_SUBREG (QImode, target, 0);
13123
13124 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13125 op0 = copy_to_mode_reg (mode0, op0);
13126 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13127 op1 = copy_to_mode_reg (mode1, op1);
13128
13129 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13130 pat = GEN_FCN (d->icode) (op0, op1);
13131 if (! pat)
13132 return 0;
13133 emit_insn (pat);
13134 emit_insn (gen_rtx_SET (VOIDmode,
13135 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13136 gen_rtx_fmt_ee (comparison, QImode,
13137 SET_DEST (pat),
13138 const0_rtx)));
13139
13140 return SUBREG_REG (target);
13141 }
13142
13143 /* Expand an expression EXP that calls a built-in function,
13144 with result going to TARGET if that's convenient
13145 (and in mode MODE if that's convenient).
13146 SUBTARGET may be used as the target for computing one of EXP's operands.
13147 IGNORE is nonzero if the value is to be ignored. */
13148
13149 rtx
13150 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13151 enum machine_mode mode ATTRIBUTE_UNUSED,
13152 int ignore ATTRIBUTE_UNUSED)
13153 {
13154 const struct builtin_description *d;
13155 size_t i;
13156 enum insn_code icode;
13157 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13158 tree arglist = TREE_OPERAND (exp, 1);
13159 tree arg0, arg1, arg2;
13160 rtx op0, op1, op2, pat;
13161 enum machine_mode tmode, mode0, mode1, mode2;
13162 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13163
13164 switch (fcode)
13165 {
13166 case IX86_BUILTIN_EMMS:
13167 emit_insn (gen_emms ());
13168 return 0;
13169
13170 case IX86_BUILTIN_SFENCE:
13171 emit_insn (gen_sfence ());
13172 return 0;
13173
13174 case IX86_BUILTIN_PEXTRW:
13175 case IX86_BUILTIN_PEXTRW128:
13176 icode = (fcode == IX86_BUILTIN_PEXTRW
13177 ? CODE_FOR_mmx_pextrw
13178 : CODE_FOR_sse2_pextrw);
13179 arg0 = TREE_VALUE (arglist);
13180 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13181 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13182 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13183 tmode = insn_data[icode].operand[0].mode;
13184 mode0 = insn_data[icode].operand[1].mode;
13185 mode1 = insn_data[icode].operand[2].mode;
13186
13187 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13188 op0 = copy_to_mode_reg (mode0, op0);
13189 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13190 {
13191 error ("selector must be an integer constant in the range 0..%i",
13192 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13193 return gen_reg_rtx (tmode);
13194 }
13195 if (target == 0
13196 || GET_MODE (target) != tmode
13197 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13198 target = gen_reg_rtx (tmode);
13199 pat = GEN_FCN (icode) (target, op0, op1);
13200 if (! pat)
13201 return 0;
13202 emit_insn (pat);
13203 return target;
13204
13205 case IX86_BUILTIN_PINSRW:
13206 case IX86_BUILTIN_PINSRW128:
13207 icode = (fcode == IX86_BUILTIN_PINSRW
13208 ? CODE_FOR_mmx_pinsrw
13209 : CODE_FOR_sse2_pinsrw);
13210 arg0 = TREE_VALUE (arglist);
13211 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13212 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13213 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13214 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13215 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13216 tmode = insn_data[icode].operand[0].mode;
13217 mode0 = insn_data[icode].operand[1].mode;
13218 mode1 = insn_data[icode].operand[2].mode;
13219 mode2 = insn_data[icode].operand[3].mode;
13220
13221 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13222 op0 = copy_to_mode_reg (mode0, op0);
13223 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13224 op1 = copy_to_mode_reg (mode1, op1);
13225 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13226 {
13227 error ("selector must be an integer constant in the range 0..%i",
13228 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13229 return const0_rtx;
13230 }
13231 if (target == 0
13232 || GET_MODE (target) != tmode
13233 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13234 target = gen_reg_rtx (tmode);
13235 pat = GEN_FCN (icode) (target, op0, op1, op2);
13236 if (! pat)
13237 return 0;
13238 emit_insn (pat);
13239 return target;
13240
13241 case IX86_BUILTIN_MASKMOVQ:
13242 case IX86_BUILTIN_MASKMOVDQU:
13243 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13244 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13245 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13246 : CODE_FOR_sse2_maskmovdqu));
13247 /* Note the arg order is different from the operand order. */
13248 arg1 = TREE_VALUE (arglist);
13249 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13250 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13251 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13252 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13253 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13254 mode0 = insn_data[icode].operand[0].mode;
13255 mode1 = insn_data[icode].operand[1].mode;
13256 mode2 = insn_data[icode].operand[2].mode;
13257
13258 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13259 op0 = copy_to_mode_reg (mode0, op0);
13260 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13261 op1 = copy_to_mode_reg (mode1, op1);
13262 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13263 op2 = copy_to_mode_reg (mode2, op2);
13264 pat = GEN_FCN (icode) (op0, op1, op2);
13265 if (! pat)
13266 return 0;
13267 emit_insn (pat);
13268 return 0;
13269
13270 case IX86_BUILTIN_SQRTSS:
13271 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13272 case IX86_BUILTIN_RSQRTSS:
13273 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13274 case IX86_BUILTIN_RCPSS:
13275 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13276
13277 case IX86_BUILTIN_LOADAPS:
13278 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13279
13280 case IX86_BUILTIN_LOADUPS:
13281 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13282
13283 case IX86_BUILTIN_STOREAPS:
13284 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13285
13286 case IX86_BUILTIN_STOREUPS:
13287 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13288
13289 case IX86_BUILTIN_LOADSS:
13290 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13291
13292 case IX86_BUILTIN_STORESS:
13293 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13294
13295 case IX86_BUILTIN_LOADHPS:
13296 case IX86_BUILTIN_LOADLPS:
13297 case IX86_BUILTIN_LOADHPD:
13298 case IX86_BUILTIN_LOADLPD:
13299 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13300 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13301 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13302 : CODE_FOR_sse2_movsd);
13303 arg0 = TREE_VALUE (arglist);
13304 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13305 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13306 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13307 tmode = insn_data[icode].operand[0].mode;
13308 mode0 = insn_data[icode].operand[1].mode;
13309 mode1 = insn_data[icode].operand[2].mode;
13310
13311 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13312 op0 = copy_to_mode_reg (mode0, op0);
13313 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13314 if (target == 0
13315 || GET_MODE (target) != tmode
13316 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13317 target = gen_reg_rtx (tmode);
13318 pat = GEN_FCN (icode) (target, op0, op1);
13319 if (! pat)
13320 return 0;
13321 emit_insn (pat);
13322 return target;
13323
13324 case IX86_BUILTIN_STOREHPS:
13325 case IX86_BUILTIN_STORELPS:
13326 case IX86_BUILTIN_STOREHPD:
13327 case IX86_BUILTIN_STORELPD:
13328 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13329 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13330 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13331 : CODE_FOR_sse2_movsd);
13332 arg0 = TREE_VALUE (arglist);
13333 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13334 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13335 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13336 mode0 = insn_data[icode].operand[1].mode;
13337 mode1 = insn_data[icode].operand[2].mode;
13338
13339 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13340 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13341 op1 = copy_to_mode_reg (mode1, op1);
13342
13343 pat = GEN_FCN (icode) (op0, op0, op1);
13344 if (! pat)
13345 return 0;
13346 emit_insn (pat);
13347 return 0;
13348
13349 case IX86_BUILTIN_MOVNTPS:
13350 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13351 case IX86_BUILTIN_MOVNTQ:
13352 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13353
13354 case IX86_BUILTIN_LDMXCSR:
13355 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13356 target = assign_386_stack_local (SImode, 0);
13357 emit_move_insn (target, op0);
13358 emit_insn (gen_ldmxcsr (target));
13359 return 0;
13360
13361 case IX86_BUILTIN_STMXCSR:
13362 target = assign_386_stack_local (SImode, 0);
13363 emit_insn (gen_stmxcsr (target));
13364 return copy_to_mode_reg (SImode, target);
13365
13366 case IX86_BUILTIN_SHUFPS:
13367 case IX86_BUILTIN_SHUFPD:
13368 icode = (fcode == IX86_BUILTIN_SHUFPS
13369 ? CODE_FOR_sse_shufps
13370 : CODE_FOR_sse2_shufpd);
13371 arg0 = TREE_VALUE (arglist);
13372 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13373 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13374 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13375 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13376 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13377 tmode = insn_data[icode].operand[0].mode;
13378 mode0 = insn_data[icode].operand[1].mode;
13379 mode1 = insn_data[icode].operand[2].mode;
13380 mode2 = insn_data[icode].operand[3].mode;
13381
13382 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13383 op0 = copy_to_mode_reg (mode0, op0);
13384 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13385 op1 = copy_to_mode_reg (mode1, op1);
13386 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13387 {
13388 /* @@@ better error message */
13389 error ("mask must be an immediate");
13390 return gen_reg_rtx (tmode);
13391 }
13392 if (target == 0
13393 || GET_MODE (target) != tmode
13394 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13395 target = gen_reg_rtx (tmode);
13396 pat = GEN_FCN (icode) (target, op0, op1, op2);
13397 if (! pat)
13398 return 0;
13399 emit_insn (pat);
13400 return target;
13401
13402 case IX86_BUILTIN_PSHUFW:
13403 case IX86_BUILTIN_PSHUFD:
13404 case IX86_BUILTIN_PSHUFHW:
13405 case IX86_BUILTIN_PSHUFLW:
13406 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13407 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13408 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13409 : CODE_FOR_mmx_pshufw);
13410 arg0 = TREE_VALUE (arglist);
13411 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13412 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13413 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13414 tmode = insn_data[icode].operand[0].mode;
13415 mode1 = insn_data[icode].operand[1].mode;
13416 mode2 = insn_data[icode].operand[2].mode;
13417
13418 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13419 op0 = copy_to_mode_reg (mode1, op0);
13420 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13421 {
13422 /* @@@ better error message */
13423 error ("mask must be an immediate");
13424 return const0_rtx;
13425 }
13426 if (target == 0
13427 || GET_MODE (target) != tmode
13428 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13429 target = gen_reg_rtx (tmode);
13430 pat = GEN_FCN (icode) (target, op0, op1);
13431 if (! pat)
13432 return 0;
13433 emit_insn (pat);
13434 return target;
13435
13436 case IX86_BUILTIN_PSLLDQI128:
13437 case IX86_BUILTIN_PSRLDQI128:
13438 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13439 : CODE_FOR_sse2_lshrti3);
13440 arg0 = TREE_VALUE (arglist);
13441 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13442 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13443 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13444 tmode = insn_data[icode].operand[0].mode;
13445 mode1 = insn_data[icode].operand[1].mode;
13446 mode2 = insn_data[icode].operand[2].mode;
13447
13448 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13449 {
13450 op0 = copy_to_reg (op0);
13451 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13452 }
13453 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13454 {
13455 error ("shift must be an immediate");
13456 return const0_rtx;
13457 }
13458 target = gen_reg_rtx (V2DImode);
13459 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13460 if (! pat)
13461 return 0;
13462 emit_insn (pat);
13463 return target;
13464
13465 case IX86_BUILTIN_FEMMS:
13466 emit_insn (gen_femms ());
13467 return NULL_RTX;
13468
13469 case IX86_BUILTIN_PAVGUSB:
13470 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13471
13472 case IX86_BUILTIN_PF2ID:
13473 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13474
13475 case IX86_BUILTIN_PFACC:
13476 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13477
13478 case IX86_BUILTIN_PFADD:
13479 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13480
13481 case IX86_BUILTIN_PFCMPEQ:
13482 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13483
13484 case IX86_BUILTIN_PFCMPGE:
13485 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13486
13487 case IX86_BUILTIN_PFCMPGT:
13488 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13489
13490 case IX86_BUILTIN_PFMAX:
13491 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13492
13493 case IX86_BUILTIN_PFMIN:
13494 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13495
13496 case IX86_BUILTIN_PFMUL:
13497 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13498
13499 case IX86_BUILTIN_PFRCP:
13500 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13501
13502 case IX86_BUILTIN_PFRCPIT1:
13503 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13504
13505 case IX86_BUILTIN_PFRCPIT2:
13506 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13507
13508 case IX86_BUILTIN_PFRSQIT1:
13509 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13510
13511 case IX86_BUILTIN_PFRSQRT:
13512 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13513
13514 case IX86_BUILTIN_PFSUB:
13515 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13516
13517 case IX86_BUILTIN_PFSUBR:
13518 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13519
13520 case IX86_BUILTIN_PI2FD:
13521 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13522
13523 case IX86_BUILTIN_PMULHRW:
13524 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13525
13526 case IX86_BUILTIN_PF2IW:
13527 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13528
13529 case IX86_BUILTIN_PFNACC:
13530 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13531
13532 case IX86_BUILTIN_PFPNACC:
13533 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13534
13535 case IX86_BUILTIN_PI2FW:
13536 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13537
13538 case IX86_BUILTIN_PSWAPDSI:
13539 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13540
13541 case IX86_BUILTIN_PSWAPDSF:
13542 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13543
13544 case IX86_BUILTIN_SSE_ZERO:
13545 target = gen_reg_rtx (V4SFmode);
13546 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13547 return target;
13548
13549 case IX86_BUILTIN_MMX_ZERO:
13550 target = gen_reg_rtx (DImode);
13551 emit_insn (gen_mmx_clrdi (target));
13552 return target;
13553
13554 case IX86_BUILTIN_CLRTI:
13555 target = gen_reg_rtx (V2DImode);
13556 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13557 return target;
13558
13559
13560 case IX86_BUILTIN_SQRTSD:
13561 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13562 case IX86_BUILTIN_LOADAPD:
13563 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13564 case IX86_BUILTIN_LOADUPD:
13565 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13566
13567 case IX86_BUILTIN_STOREAPD:
13568 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13569 case IX86_BUILTIN_STOREUPD:
13570 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13571
13572 case IX86_BUILTIN_LOADSD:
13573 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13574
13575 case IX86_BUILTIN_STORESD:
13576 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13577
13578 case IX86_BUILTIN_SETPD1:
13579 target = assign_386_stack_local (DFmode, 0);
13580 arg0 = TREE_VALUE (arglist);
13581 emit_move_insn (adjust_address (target, DFmode, 0),
13582 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13583 op0 = gen_reg_rtx (V2DFmode);
13584 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13585 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13586 return op0;
13587
13588 case IX86_BUILTIN_SETPD:
13589 target = assign_386_stack_local (V2DFmode, 0);
13590 arg0 = TREE_VALUE (arglist);
13591 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13592 emit_move_insn (adjust_address (target, DFmode, 0),
13593 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13594 emit_move_insn (adjust_address (target, DFmode, 8),
13595 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13596 op0 = gen_reg_rtx (V2DFmode);
13597 emit_insn (gen_sse2_movapd (op0, target));
13598 return op0;
13599
13600 case IX86_BUILTIN_LOADRPD:
13601 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13602 gen_reg_rtx (V2DFmode), 1);
13603 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13604 return target;
13605
13606 case IX86_BUILTIN_LOADPD1:
13607 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13608 gen_reg_rtx (V2DFmode), 1);
13609 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13610 return target;
13611
13612 case IX86_BUILTIN_STOREPD1:
13613 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13614 case IX86_BUILTIN_STORERPD:
13615 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13616
13617 case IX86_BUILTIN_CLRPD:
13618 target = gen_reg_rtx (V2DFmode);
13619 emit_insn (gen_sse_clrv2df (target));
13620 return target;
13621
13622 case IX86_BUILTIN_MFENCE:
13623 emit_insn (gen_sse2_mfence ());
13624 return 0;
13625 case IX86_BUILTIN_LFENCE:
13626 emit_insn (gen_sse2_lfence ());
13627 return 0;
13628
13629 case IX86_BUILTIN_CLFLUSH:
13630 arg0 = TREE_VALUE (arglist);
13631 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13632 icode = CODE_FOR_sse2_clflush;
13633 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13634 op0 = copy_to_mode_reg (Pmode, op0);
13635
13636 emit_insn (gen_sse2_clflush (op0));
13637 return 0;
13638
13639 case IX86_BUILTIN_MOVNTPD:
13640 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13641 case IX86_BUILTIN_MOVNTDQ:
13642 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13643 case IX86_BUILTIN_MOVNTI:
13644 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13645
13646 case IX86_BUILTIN_LOADDQA:
13647 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13648 case IX86_BUILTIN_LOADDQU:
13649 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13650 case IX86_BUILTIN_LOADD:
13651 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13652
13653 case IX86_BUILTIN_STOREDQA:
13654 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13655 case IX86_BUILTIN_STOREDQU:
13656 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13657 case IX86_BUILTIN_STORED:
13658 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13659
13660 case IX86_BUILTIN_MONITOR:
13661 arg0 = TREE_VALUE (arglist);
13662 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13663 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13664 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13665 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13666 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13667 if (!REG_P (op0))
13668 op0 = copy_to_mode_reg (SImode, op0);
13669 if (!REG_P (op1))
13670 op1 = copy_to_mode_reg (SImode, op1);
13671 if (!REG_P (op2))
13672 op2 = copy_to_mode_reg (SImode, op2);
13673 emit_insn (gen_monitor (op0, op1, op2));
13674 return 0;
13675
13676 case IX86_BUILTIN_MWAIT:
13677 arg0 = TREE_VALUE (arglist);
13678 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13679 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13680 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13681 if (!REG_P (op0))
13682 op0 = copy_to_mode_reg (SImode, op0);
13683 if (!REG_P (op1))
13684 op1 = copy_to_mode_reg (SImode, op1);
13685 emit_insn (gen_mwait (op0, op1));
13686 return 0;
13687
13688 case IX86_BUILTIN_LOADDDUP:
13689 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13690
13691 case IX86_BUILTIN_LDDQU:
13692 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13693 1);
13694
13695 default:
13696 break;
13697 }
13698
13699 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13700 if (d->code == fcode)
13701 {
13702 /* Compares are treated specially. */
13703 if (d->icode == CODE_FOR_maskcmpv4sf3
13704 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13705 || d->icode == CODE_FOR_maskncmpv4sf3
13706 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13707 || d->icode == CODE_FOR_maskcmpv2df3
13708 || d->icode == CODE_FOR_vmmaskcmpv2df3
13709 || d->icode == CODE_FOR_maskncmpv2df3
13710 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13711 return ix86_expand_sse_compare (d, arglist, target);
13712
13713 return ix86_expand_binop_builtin (d->icode, arglist, target);
13714 }
13715
13716 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13717 if (d->code == fcode)
13718 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13719
13720 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13721 if (d->code == fcode)
13722 return ix86_expand_sse_comi (d, arglist, target);
13723
13724 /* @@@ Should really do something sensible here. */
13725 return 0;
13726 }
13727
13728 /* Store OPERAND to the memory after reload is completed. This means
13729 that we can't easily use assign_stack_local. */
13730 rtx
13731 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13732 {
13733 rtx result;
13734 if (!reload_completed)
13735 abort ();
13736 if (TARGET_RED_ZONE)
13737 {
13738 result = gen_rtx_MEM (mode,
13739 gen_rtx_PLUS (Pmode,
13740 stack_pointer_rtx,
13741 GEN_INT (-RED_ZONE_SIZE)));
13742 emit_move_insn (result, operand);
13743 }
13744 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13745 {
13746 switch (mode)
13747 {
13748 case HImode:
13749 case SImode:
13750 operand = gen_lowpart (DImode, operand);
13751 /* FALLTHRU */
13752 case DImode:
13753 emit_insn (
13754 gen_rtx_SET (VOIDmode,
13755 gen_rtx_MEM (DImode,
13756 gen_rtx_PRE_DEC (DImode,
13757 stack_pointer_rtx)),
13758 operand));
13759 break;
13760 default:
13761 abort ();
13762 }
13763 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13764 }
13765 else
13766 {
13767 switch (mode)
13768 {
13769 case DImode:
13770 {
13771 rtx operands[2];
13772 split_di (&operand, 1, operands, operands + 1);
13773 emit_insn (
13774 gen_rtx_SET (VOIDmode,
13775 gen_rtx_MEM (SImode,
13776 gen_rtx_PRE_DEC (Pmode,
13777 stack_pointer_rtx)),
13778 operands[1]));
13779 emit_insn (
13780 gen_rtx_SET (VOIDmode,
13781 gen_rtx_MEM (SImode,
13782 gen_rtx_PRE_DEC (Pmode,
13783 stack_pointer_rtx)),
13784 operands[0]));
13785 }
13786 break;
13787 case HImode:
13788 /* It is better to store HImodes as SImodes. */
13789 if (!TARGET_PARTIAL_REG_STALL)
13790 operand = gen_lowpart (SImode, operand);
13791 /* FALLTHRU */
13792 case SImode:
13793 emit_insn (
13794 gen_rtx_SET (VOIDmode,
13795 gen_rtx_MEM (GET_MODE (operand),
13796 gen_rtx_PRE_DEC (SImode,
13797 stack_pointer_rtx)),
13798 operand));
13799 break;
13800 default:
13801 abort ();
13802 }
13803 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13804 }
13805 return result;
13806 }
13807
13808 /* Free operand from the memory. */
13809 void
13810 ix86_free_from_memory (enum machine_mode mode)
13811 {
13812 if (!TARGET_RED_ZONE)
13813 {
13814 int size;
13815
13816 if (mode == DImode || TARGET_64BIT)
13817 size = 8;
13818 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13819 size = 2;
13820 else
13821 size = 4;
13822 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13823 to pop or add instruction if registers are available. */
13824 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13825 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13826 GEN_INT (size))));
13827 }
13828 }
13829
13830 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13831 QImode must go into class Q_REGS.
13832 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13833 movdf to do mem-to-mem moves through integer regs. */
13834 enum reg_class
13835 ix86_preferred_reload_class (rtx x, enum reg_class class)
13836 {
13837 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13838 return NO_REGS;
13839 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13840 {
13841 /* SSE can't load any constant directly yet. */
13842 if (SSE_CLASS_P (class))
13843 return NO_REGS;
13844 /* Floats can load 0 and 1. */
13845 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13846 {
13847 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13848 if (MAYBE_SSE_CLASS_P (class))
13849 return (reg_class_subset_p (class, GENERAL_REGS)
13850 ? GENERAL_REGS : FLOAT_REGS);
13851 else
13852 return class;
13853 }
13854 /* General regs can load everything. */
13855 if (reg_class_subset_p (class, GENERAL_REGS))
13856 return GENERAL_REGS;
13857 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13858 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13859 return NO_REGS;
13860 }
13861 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13862 return NO_REGS;
13863 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13864 return Q_REGS;
13865 return class;
13866 }
13867
13868 /* If we are copying between general and FP registers, we need a memory
13869 location. The same is true for SSE and MMX registers.
13870
13871 The macro can't work reliably when one of the CLASSES is class containing
13872 registers from multiple units (SSE, MMX, integer). We avoid this by never
13873 combining those units in single alternative in the machine description.
13874 Ensure that this constraint holds to avoid unexpected surprises.
13875
13876 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13877 enforce these sanity checks. */
13878 int
13879 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
13880 enum machine_mode mode, int strict)
13881 {
13882 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13883 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13884 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13885 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13886 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13887 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13888 {
13889 if (strict)
13890 abort ();
13891 else
13892 return 1;
13893 }
13894 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13895 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13896 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
13897 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
13898 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
13899 }
13900 /* Return the cost of moving data from a register in class CLASS1 to
13901 one in class CLASS2.
13902
13903 It is not required that the cost always equal 2 when FROM is the same as TO;
13904 on some machines it is expensive to move between registers if they are not
13905 general registers. */
13906 int
13907 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
13908 enum reg_class class2)
13909 {
13910 /* In case we require secondary memory, compute cost of the store followed
13911 by load. In order to avoid bad register allocation choices, we need
13912 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13913
13914 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13915 {
13916 int cost = 1;
13917
13918 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13919 MEMORY_MOVE_COST (mode, class1, 1));
13920 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13921 MEMORY_MOVE_COST (mode, class2, 1));
13922
13923 /* In case of copying from general_purpose_register we may emit multiple
13924 stores followed by single load causing memory size mismatch stall.
13925 Count this as arbitrarily high cost of 20. */
13926 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13927 cost += 20;
13928
13929 /* In the case of FP/MMX moves, the registers actually overlap, and we
13930 have to switch modes in order to treat them differently. */
13931 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13932 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13933 cost += 20;
13934
13935 return cost;
13936 }
13937
13938 /* Moves between SSE/MMX and integer unit are expensive. */
13939 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13940 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13941 return ix86_cost->mmxsse_to_integer;
13942 if (MAYBE_FLOAT_CLASS_P (class1))
13943 return ix86_cost->fp_move;
13944 if (MAYBE_SSE_CLASS_P (class1))
13945 return ix86_cost->sse_move;
13946 if (MAYBE_MMX_CLASS_P (class1))
13947 return ix86_cost->mmx_move;
13948 return 2;
13949 }
13950
13951 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13952 int
13953 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
13954 {
13955 /* Flags and only flags can only hold CCmode values. */
13956 if (CC_REGNO_P (regno))
13957 return GET_MODE_CLASS (mode) == MODE_CC;
13958 if (GET_MODE_CLASS (mode) == MODE_CC
13959 || GET_MODE_CLASS (mode) == MODE_RANDOM
13960 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13961 return 0;
13962 if (FP_REGNO_P (regno))
13963 return VALID_FP_MODE_P (mode);
13964 if (SSE_REGNO_P (regno))
13965 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
13966 if (MMX_REGNO_P (regno))
13967 return (TARGET_MMX
13968 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
13969 /* We handle both integer and floats in the general purpose registers.
13970 In future we should be able to handle vector modes as well. */
13971 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13972 return 0;
13973 /* Take care for QImode values - they can be in non-QI regs, but then
13974 they do cause partial register stalls. */
13975 if (regno < 4 || mode != QImode || TARGET_64BIT)
13976 return 1;
13977 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13978 }
13979
13980 /* Return the cost of moving data of mode M between a
13981 register and memory. A value of 2 is the default; this cost is
13982 relative to those in `REGISTER_MOVE_COST'.
13983
13984 If moving between registers and memory is more expensive than
13985 between two registers, you should define this macro to express the
13986 relative cost.
13987
13988 Model also increased moving costs of QImode registers in non
13989 Q_REGS classes.
13990 */
13991 int
13992 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
13993 {
13994 if (FLOAT_CLASS_P (class))
13995 {
13996 int index;
13997 switch (mode)
13998 {
13999 case SFmode:
14000 index = 0;
14001 break;
14002 case DFmode:
14003 index = 1;
14004 break;
14005 case XFmode:
14006 index = 2;
14007 break;
14008 default:
14009 return 100;
14010 }
14011 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14012 }
14013 if (SSE_CLASS_P (class))
14014 {
14015 int index;
14016 switch (GET_MODE_SIZE (mode))
14017 {
14018 case 4:
14019 index = 0;
14020 break;
14021 case 8:
14022 index = 1;
14023 break;
14024 case 16:
14025 index = 2;
14026 break;
14027 default:
14028 return 100;
14029 }
14030 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14031 }
14032 if (MMX_CLASS_P (class))
14033 {
14034 int index;
14035 switch (GET_MODE_SIZE (mode))
14036 {
14037 case 4:
14038 index = 0;
14039 break;
14040 case 8:
14041 index = 1;
14042 break;
14043 default:
14044 return 100;
14045 }
14046 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14047 }
14048 switch (GET_MODE_SIZE (mode))
14049 {
14050 case 1:
14051 if (in)
14052 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14053 : ix86_cost->movzbl_load);
14054 else
14055 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14056 : ix86_cost->int_store[0] + 4);
14057 break;
14058 case 2:
14059 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14060 default:
14061 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14062 if (mode == TFmode)
14063 mode = XFmode;
14064 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14065 * (((int) GET_MODE_SIZE (mode)
14066 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14067 }
14068 }
14069
14070 /* Compute a (partial) cost for rtx X. Return true if the complete
14071 cost has been computed, and false if subexpressions should be
14072 scanned. In either case, *TOTAL contains the cost result. */
14073
14074 static bool
14075 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14076 {
14077 enum machine_mode mode = GET_MODE (x);
14078
14079 switch (code)
14080 {
14081 case CONST_INT:
14082 case CONST:
14083 case LABEL_REF:
14084 case SYMBOL_REF:
14085 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14086 *total = 3;
14087 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14088 *total = 2;
14089 else if (flag_pic && SYMBOLIC_CONST (x)
14090 && (!TARGET_64BIT
14091 || (!GET_CODE (x) != LABEL_REF
14092 && (GET_CODE (x) != SYMBOL_REF
14093 || !SYMBOL_REF_LOCAL_P (x)))))
14094 *total = 1;
14095 else
14096 *total = 0;
14097 return true;
14098
14099 case CONST_DOUBLE:
14100 if (mode == VOIDmode)
14101 *total = 0;
14102 else
14103 switch (standard_80387_constant_p (x))
14104 {
14105 case 1: /* 0.0 */
14106 *total = 1;
14107 break;
14108 default: /* Other constants */
14109 *total = 2;
14110 break;
14111 case 0:
14112 case -1:
14113 /* Start with (MEM (SYMBOL_REF)), since that's where
14114 it'll probably end up. Add a penalty for size. */
14115 *total = (COSTS_N_INSNS (1)
14116 + (flag_pic != 0 && !TARGET_64BIT)
14117 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14118 break;
14119 }
14120 return true;
14121
14122 case ZERO_EXTEND:
14123 /* The zero extensions is often completely free on x86_64, so make
14124 it as cheap as possible. */
14125 if (TARGET_64BIT && mode == DImode
14126 && GET_MODE (XEXP (x, 0)) == SImode)
14127 *total = 1;
14128 else if (TARGET_ZERO_EXTEND_WITH_AND)
14129 *total = COSTS_N_INSNS (ix86_cost->add);
14130 else
14131 *total = COSTS_N_INSNS (ix86_cost->movzx);
14132 return false;
14133
14134 case SIGN_EXTEND:
14135 *total = COSTS_N_INSNS (ix86_cost->movsx);
14136 return false;
14137
14138 case ASHIFT:
14139 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14140 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14141 {
14142 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14143 if (value == 1)
14144 {
14145 *total = COSTS_N_INSNS (ix86_cost->add);
14146 return false;
14147 }
14148 if ((value == 2 || value == 3)
14149 && ix86_cost->lea <= ix86_cost->shift_const)
14150 {
14151 *total = COSTS_N_INSNS (ix86_cost->lea);
14152 return false;
14153 }
14154 }
14155 /* FALLTHRU */
14156
14157 case ROTATE:
14158 case ASHIFTRT:
14159 case LSHIFTRT:
14160 case ROTATERT:
14161 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14162 {
14163 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14164 {
14165 if (INTVAL (XEXP (x, 1)) > 32)
14166 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14167 else
14168 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14169 }
14170 else
14171 {
14172 if (GET_CODE (XEXP (x, 1)) == AND)
14173 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14174 else
14175 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14176 }
14177 }
14178 else
14179 {
14180 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14181 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14182 else
14183 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14184 }
14185 return false;
14186
14187 case MULT:
14188 if (FLOAT_MODE_P (mode))
14189 {
14190 *total = COSTS_N_INSNS (ix86_cost->fmul);
14191 return false;
14192 }
14193 else
14194 {
14195 rtx op0 = XEXP (x, 0);
14196 rtx op1 = XEXP (x, 1);
14197 int nbits;
14198 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14199 {
14200 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14201 for (nbits = 0; value != 0; value &= value - 1)
14202 nbits++;
14203 }
14204 else
14205 /* This is arbitrary. */
14206 nbits = 7;
14207
14208 /* Compute costs correctly for widening multiplication. */
14209 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14210 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14211 == GET_MODE_SIZE (mode))
14212 {
14213 int is_mulwiden = 0;
14214 enum machine_mode inner_mode = GET_MODE (op0);
14215
14216 if (GET_CODE (op0) == GET_CODE (op1))
14217 is_mulwiden = 1, op1 = XEXP (op1, 0);
14218 else if (GET_CODE (op1) == CONST_INT)
14219 {
14220 if (GET_CODE (op0) == SIGN_EXTEND)
14221 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14222 == INTVAL (op1);
14223 else
14224 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14225 }
14226
14227 if (is_mulwiden)
14228 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14229 }
14230
14231 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14232 + nbits * ix86_cost->mult_bit)
14233 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14234
14235 return true;
14236 }
14237
14238 case DIV:
14239 case UDIV:
14240 case MOD:
14241 case UMOD:
14242 if (FLOAT_MODE_P (mode))
14243 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14244 else
14245 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14246 return false;
14247
14248 case PLUS:
14249 if (FLOAT_MODE_P (mode))
14250 *total = COSTS_N_INSNS (ix86_cost->fadd);
14251 else if (GET_MODE_CLASS (mode) == MODE_INT
14252 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14253 {
14254 if (GET_CODE (XEXP (x, 0)) == PLUS
14255 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14256 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14257 && CONSTANT_P (XEXP (x, 1)))
14258 {
14259 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14260 if (val == 2 || val == 4 || val == 8)
14261 {
14262 *total = COSTS_N_INSNS (ix86_cost->lea);
14263 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14264 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14265 outer_code);
14266 *total += rtx_cost (XEXP (x, 1), outer_code);
14267 return true;
14268 }
14269 }
14270 else if (GET_CODE (XEXP (x, 0)) == MULT
14271 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14272 {
14273 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14274 if (val == 2 || val == 4 || val == 8)
14275 {
14276 *total = COSTS_N_INSNS (ix86_cost->lea);
14277 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14278 *total += rtx_cost (XEXP (x, 1), outer_code);
14279 return true;
14280 }
14281 }
14282 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14283 {
14284 *total = COSTS_N_INSNS (ix86_cost->lea);
14285 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14286 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14287 *total += rtx_cost (XEXP (x, 1), outer_code);
14288 return true;
14289 }
14290 }
14291 /* FALLTHRU */
14292
14293 case MINUS:
14294 if (FLOAT_MODE_P (mode))
14295 {
14296 *total = COSTS_N_INSNS (ix86_cost->fadd);
14297 return false;
14298 }
14299 /* FALLTHRU */
14300
14301 case AND:
14302 case IOR:
14303 case XOR:
14304 if (!TARGET_64BIT && mode == DImode)
14305 {
14306 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14307 + (rtx_cost (XEXP (x, 0), outer_code)
14308 << (GET_MODE (XEXP (x, 0)) != DImode))
14309 + (rtx_cost (XEXP (x, 1), outer_code)
14310 << (GET_MODE (XEXP (x, 1)) != DImode)));
14311 return true;
14312 }
14313 /* FALLTHRU */
14314
14315 case NEG:
14316 if (FLOAT_MODE_P (mode))
14317 {
14318 *total = COSTS_N_INSNS (ix86_cost->fchs);
14319 return false;
14320 }
14321 /* FALLTHRU */
14322
14323 case NOT:
14324 if (!TARGET_64BIT && mode == DImode)
14325 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14326 else
14327 *total = COSTS_N_INSNS (ix86_cost->add);
14328 return false;
14329
14330 case COMPARE:
14331 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
14332 && XEXP (XEXP (x, 0), 1) == const1_rtx
14333 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
14334 && XEXP (x, 1) == const0_rtx)
14335 {
14336 /* This kind of construct is implemented using test[bwl].
14337 Treat it as if we had an AND. */
14338 *total = (COSTS_N_INSNS (ix86_cost->add)
14339 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
14340 + rtx_cost (const1_rtx, outer_code));
14341 return true;
14342 }
14343 return false;
14344
14345 case FLOAT_EXTEND:
14346 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14347 *total = 0;
14348 return false;
14349
14350 case ABS:
14351 if (FLOAT_MODE_P (mode))
14352 *total = COSTS_N_INSNS (ix86_cost->fabs);
14353 return false;
14354
14355 case SQRT:
14356 if (FLOAT_MODE_P (mode))
14357 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14358 return false;
14359
14360 case UNSPEC:
14361 if (XINT (x, 1) == UNSPEC_TP)
14362 *total = 0;
14363 return false;
14364
14365 default:
14366 return false;
14367 }
14368 }
14369
14370 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14371 static void
14372 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14373 {
14374 init_section ();
14375 fputs ("\tpushl $", asm_out_file);
14376 assemble_name (asm_out_file, XSTR (symbol, 0));
14377 fputc ('\n', asm_out_file);
14378 }
14379 #endif
14380
14381 #if TARGET_MACHO
14382
14383 static int current_machopic_label_num;
14384
14385 /* Given a symbol name and its associated stub, write out the
14386 definition of the stub. */
14387
14388 void
14389 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14390 {
14391 unsigned int length;
14392 char *binder_name, *symbol_name, lazy_ptr_name[32];
14393 int label = ++current_machopic_label_num;
14394
14395 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14396 symb = (*targetm.strip_name_encoding) (symb);
14397
14398 length = strlen (stub);
14399 binder_name = alloca (length + 32);
14400 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14401
14402 length = strlen (symb);
14403 symbol_name = alloca (length + 32);
14404 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14405
14406 sprintf (lazy_ptr_name, "L%d$lz", label);
14407
14408 if (MACHOPIC_PURE)
14409 machopic_picsymbol_stub_section ();
14410 else
14411 machopic_symbol_stub_section ();
14412
14413 fprintf (file, "%s:\n", stub);
14414 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14415
14416 if (MACHOPIC_PURE)
14417 {
14418 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14419 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14420 fprintf (file, "\tjmp %%edx\n");
14421 }
14422 else
14423 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14424
14425 fprintf (file, "%s:\n", binder_name);
14426
14427 if (MACHOPIC_PURE)
14428 {
14429 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14430 fprintf (file, "\tpushl %%eax\n");
14431 }
14432 else
14433 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14434
14435 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14436
14437 machopic_lazy_symbol_ptr_section ();
14438 fprintf (file, "%s:\n", lazy_ptr_name);
14439 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14440 fprintf (file, "\t.long %s\n", binder_name);
14441 }
14442 #endif /* TARGET_MACHO */
14443
14444 /* Order the registers for register allocator. */
14445
14446 void
14447 x86_order_regs_for_local_alloc (void)
14448 {
14449 int pos = 0;
14450 int i;
14451
14452 /* First allocate the local general purpose registers. */
14453 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14454 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14455 reg_alloc_order [pos++] = i;
14456
14457 /* Global general purpose registers. */
14458 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14459 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14460 reg_alloc_order [pos++] = i;
14461
14462 /* x87 registers come first in case we are doing FP math
14463 using them. */
14464 if (!TARGET_SSE_MATH)
14465 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14466 reg_alloc_order [pos++] = i;
14467
14468 /* SSE registers. */
14469 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14470 reg_alloc_order [pos++] = i;
14471 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14472 reg_alloc_order [pos++] = i;
14473
14474 /* x87 registers. */
14475 if (TARGET_SSE_MATH)
14476 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14477 reg_alloc_order [pos++] = i;
14478
14479 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14480 reg_alloc_order [pos++] = i;
14481
14482 /* Initialize the rest of array as we do not allocate some registers
14483 at all. */
14484 while (pos < FIRST_PSEUDO_REGISTER)
14485 reg_alloc_order [pos++] = 0;
14486 }
14487
14488 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14489 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14490 #endif
14491
14492 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14493 struct attribute_spec.handler. */
14494 static tree
14495 ix86_handle_struct_attribute (tree *node, tree name,
14496 tree args ATTRIBUTE_UNUSED,
14497 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14498 {
14499 tree *type = NULL;
14500 if (DECL_P (*node))
14501 {
14502 if (TREE_CODE (*node) == TYPE_DECL)
14503 type = &TREE_TYPE (*node);
14504 }
14505 else
14506 type = node;
14507
14508 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14509 || TREE_CODE (*type) == UNION_TYPE)))
14510 {
14511 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
14512 *no_add_attrs = true;
14513 }
14514
14515 else if ((is_attribute_p ("ms_struct", name)
14516 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14517 || ((is_attribute_p ("gcc_struct", name)
14518 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14519 {
14520 warning ("%qs incompatible attribute ignored",
14521 IDENTIFIER_POINTER (name));
14522 *no_add_attrs = true;
14523 }
14524
14525 return NULL_TREE;
14526 }
14527
14528 static bool
14529 ix86_ms_bitfield_layout_p (tree record_type)
14530 {
14531 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14532 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14533 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14534 }
14535
14536 /* Returns an expression indicating where the this parameter is
14537 located on entry to the FUNCTION. */
14538
14539 static rtx
14540 x86_this_parameter (tree function)
14541 {
14542 tree type = TREE_TYPE (function);
14543
14544 if (TARGET_64BIT)
14545 {
14546 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14547 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14548 }
14549
14550 if (ix86_function_regparm (type, function) > 0)
14551 {
14552 tree parm;
14553
14554 parm = TYPE_ARG_TYPES (type);
14555 /* Figure out whether or not the function has a variable number of
14556 arguments. */
14557 for (; parm; parm = TREE_CHAIN (parm))
14558 if (TREE_VALUE (parm) == void_type_node)
14559 break;
14560 /* If not, the this parameter is in the first argument. */
14561 if (parm)
14562 {
14563 int regno = 0;
14564 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14565 regno = 2;
14566 return gen_rtx_REG (SImode, regno);
14567 }
14568 }
14569
14570 if (aggregate_value_p (TREE_TYPE (type), type))
14571 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14572 else
14573 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14574 }
14575
14576 /* Determine whether x86_output_mi_thunk can succeed. */
14577
14578 static bool
14579 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14580 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14581 HOST_WIDE_INT vcall_offset, tree function)
14582 {
14583 /* 64-bit can handle anything. */
14584 if (TARGET_64BIT)
14585 return true;
14586
14587 /* For 32-bit, everything's fine if we have one free register. */
14588 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14589 return true;
14590
14591 /* Need a free register for vcall_offset. */
14592 if (vcall_offset)
14593 return false;
14594
14595 /* Need a free register for GOT references. */
14596 if (flag_pic && !(*targetm.binds_local_p) (function))
14597 return false;
14598
14599 /* Otherwise ok. */
14600 return true;
14601 }
14602
14603 /* Output the assembler code for a thunk function. THUNK_DECL is the
14604 declaration for the thunk function itself, FUNCTION is the decl for
14605 the target function. DELTA is an immediate constant offset to be
14606 added to THIS. If VCALL_OFFSET is nonzero, the word at
14607 *(*this + vcall_offset) should be added to THIS. */
14608
14609 static void
14610 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14611 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14612 HOST_WIDE_INT vcall_offset, tree function)
14613 {
14614 rtx xops[3];
14615 rtx this = x86_this_parameter (function);
14616 rtx this_reg, tmp;
14617
14618 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14619 pull it in now and let DELTA benefit. */
14620 if (REG_P (this))
14621 this_reg = this;
14622 else if (vcall_offset)
14623 {
14624 /* Put the this parameter into %eax. */
14625 xops[0] = this;
14626 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14627 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14628 }
14629 else
14630 this_reg = NULL_RTX;
14631
14632 /* Adjust the this parameter by a fixed constant. */
14633 if (delta)
14634 {
14635 xops[0] = GEN_INT (delta);
14636 xops[1] = this_reg ? this_reg : this;
14637 if (TARGET_64BIT)
14638 {
14639 if (!x86_64_general_operand (xops[0], DImode))
14640 {
14641 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14642 xops[1] = tmp;
14643 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14644 xops[0] = tmp;
14645 xops[1] = this;
14646 }
14647 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14648 }
14649 else
14650 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14651 }
14652
14653 /* Adjust the this parameter by a value stored in the vtable. */
14654 if (vcall_offset)
14655 {
14656 if (TARGET_64BIT)
14657 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14658 else
14659 {
14660 int tmp_regno = 2 /* ECX */;
14661 if (lookup_attribute ("fastcall",
14662 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14663 tmp_regno = 0 /* EAX */;
14664 tmp = gen_rtx_REG (SImode, tmp_regno);
14665 }
14666
14667 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14668 xops[1] = tmp;
14669 if (TARGET_64BIT)
14670 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14671 else
14672 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14673
14674 /* Adjust the this parameter. */
14675 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14676 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14677 {
14678 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14679 xops[0] = GEN_INT (vcall_offset);
14680 xops[1] = tmp2;
14681 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14682 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14683 }
14684 xops[1] = this_reg;
14685 if (TARGET_64BIT)
14686 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14687 else
14688 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14689 }
14690
14691 /* If necessary, drop THIS back to its stack slot. */
14692 if (this_reg && this_reg != this)
14693 {
14694 xops[0] = this_reg;
14695 xops[1] = this;
14696 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14697 }
14698
14699 xops[0] = XEXP (DECL_RTL (function), 0);
14700 if (TARGET_64BIT)
14701 {
14702 if (!flag_pic || (*targetm.binds_local_p) (function))
14703 output_asm_insn ("jmp\t%P0", xops);
14704 else
14705 {
14706 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14707 tmp = gen_rtx_CONST (Pmode, tmp);
14708 tmp = gen_rtx_MEM (QImode, tmp);
14709 xops[0] = tmp;
14710 output_asm_insn ("jmp\t%A0", xops);
14711 }
14712 }
14713 else
14714 {
14715 if (!flag_pic || (*targetm.binds_local_p) (function))
14716 output_asm_insn ("jmp\t%P0", xops);
14717 else
14718 #if TARGET_MACHO
14719 if (TARGET_MACHO)
14720 {
14721 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14722 tmp = (gen_rtx_SYMBOL_REF
14723 (Pmode,
14724 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14725 tmp = gen_rtx_MEM (QImode, tmp);
14726 xops[0] = tmp;
14727 output_asm_insn ("jmp\t%0", xops);
14728 }
14729 else
14730 #endif /* TARGET_MACHO */
14731 {
14732 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14733 output_set_got (tmp);
14734
14735 xops[1] = tmp;
14736 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14737 output_asm_insn ("jmp\t{*}%1", xops);
14738 }
14739 }
14740 }
14741
14742 static void
14743 x86_file_start (void)
14744 {
14745 default_file_start ();
14746 if (X86_FILE_START_VERSION_DIRECTIVE)
14747 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14748 if (X86_FILE_START_FLTUSED)
14749 fputs ("\t.global\t__fltused\n", asm_out_file);
14750 if (ix86_asm_dialect == ASM_INTEL)
14751 fputs ("\t.intel_syntax\n", asm_out_file);
14752 }
14753
14754 int
14755 x86_field_alignment (tree field, int computed)
14756 {
14757 enum machine_mode mode;
14758 tree type = TREE_TYPE (field);
14759
14760 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14761 return computed;
14762 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14763 ? get_inner_array_type (type) : type);
14764 if (mode == DFmode || mode == DCmode
14765 || GET_MODE_CLASS (mode) == MODE_INT
14766 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14767 return MIN (32, computed);
14768 return computed;
14769 }
14770
14771 /* Output assembler code to FILE to increment profiler label # LABELNO
14772 for profiling a function entry. */
14773 void
14774 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
14775 {
14776 if (TARGET_64BIT)
14777 if (flag_pic)
14778 {
14779 #ifndef NO_PROFILE_COUNTERS
14780 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14781 #endif
14782 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14783 }
14784 else
14785 {
14786 #ifndef NO_PROFILE_COUNTERS
14787 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14788 #endif
14789 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14790 }
14791 else if (flag_pic)
14792 {
14793 #ifndef NO_PROFILE_COUNTERS
14794 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14795 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14796 #endif
14797 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14798 }
14799 else
14800 {
14801 #ifndef NO_PROFILE_COUNTERS
14802 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14803 PROFILE_COUNT_REGISTER);
14804 #endif
14805 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14806 }
14807 }
14808
14809 /* We don't have exact information about the insn sizes, but we may assume
14810 quite safely that we are informed about all 1 byte insns and memory
14811 address sizes. This is enough to eliminate unnecessary padding in
14812 99% of cases. */
14813
14814 static int
14815 min_insn_size (rtx insn)
14816 {
14817 int l = 0;
14818
14819 if (!INSN_P (insn) || !active_insn_p (insn))
14820 return 0;
14821
14822 /* Discard alignments we've emit and jump instructions. */
14823 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
14824 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
14825 return 0;
14826 if (GET_CODE (insn) == JUMP_INSN
14827 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
14828 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
14829 return 0;
14830
14831 /* Important case - calls are always 5 bytes.
14832 It is common to have many calls in the row. */
14833 if (GET_CODE (insn) == CALL_INSN
14834 && symbolic_reference_mentioned_p (PATTERN (insn))
14835 && !SIBLING_CALL_P (insn))
14836 return 5;
14837 if (get_attr_length (insn) <= 1)
14838 return 1;
14839
14840 /* For normal instructions we may rely on the sizes of addresses
14841 and the presence of symbol to require 4 bytes of encoding.
14842 This is not the case for jumps where references are PC relative. */
14843 if (GET_CODE (insn) != JUMP_INSN)
14844 {
14845 l = get_attr_length_address (insn);
14846 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
14847 l = 4;
14848 }
14849 if (l)
14850 return 1+l;
14851 else
14852 return 2;
14853 }
14854
14855 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
14856 window. */
14857
14858 static void
14859 ix86_avoid_jump_misspredicts (void)
14860 {
14861 rtx insn, start = get_insns ();
14862 int nbytes = 0, njumps = 0;
14863 int isjump = 0;
14864
14865 /* Look for all minimal intervals of instructions containing 4 jumps.
14866 The intervals are bounded by START and INSN. NBYTES is the total
14867 size of instructions in the interval including INSN and not including
14868 START. When the NBYTES is smaller than 16 bytes, it is possible
14869 that the end of START and INSN ends up in the same 16byte page.
14870
14871 The smallest offset in the page INSN can start is the case where START
14872 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
14873 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
14874 */
14875 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14876 {
14877
14878 nbytes += min_insn_size (insn);
14879 if (dump_file)
14880 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
14881 INSN_UID (insn), min_insn_size (insn));
14882 if ((GET_CODE (insn) == JUMP_INSN
14883 && GET_CODE (PATTERN (insn)) != ADDR_VEC
14884 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
14885 || GET_CODE (insn) == CALL_INSN)
14886 njumps++;
14887 else
14888 continue;
14889
14890 while (njumps > 3)
14891 {
14892 start = NEXT_INSN (start);
14893 if ((GET_CODE (start) == JUMP_INSN
14894 && GET_CODE (PATTERN (start)) != ADDR_VEC
14895 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
14896 || GET_CODE (start) == CALL_INSN)
14897 njumps--, isjump = 1;
14898 else
14899 isjump = 0;
14900 nbytes -= min_insn_size (start);
14901 }
14902 if (njumps < 0)
14903 abort ();
14904 if (dump_file)
14905 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
14906 INSN_UID (start), INSN_UID (insn), nbytes);
14907
14908 if (njumps == 3 && isjump && nbytes < 16)
14909 {
14910 int padsize = 15 - nbytes + min_insn_size (insn);
14911
14912 if (dump_file)
14913 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
14914 INSN_UID (insn), padsize);
14915 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
14916 }
14917 }
14918 }
14919
14920 /* AMD Athlon works faster
14921 when RET is not destination of conditional jump or directly preceded
14922 by other jump instruction. We avoid the penalty by inserting NOP just
14923 before the RET instructions in such cases. */
14924 static void
14925 ix86_pad_returns (void)
14926 {
14927 edge e;
14928 edge_iterator ei;
14929
14930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
14931 {
14932 basic_block bb = e->src;
14933 rtx ret = BB_END (bb);
14934 rtx prev;
14935 bool replace = false;
14936
14937 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
14938 || !maybe_hot_bb_p (bb))
14939 continue;
14940 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14941 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14942 break;
14943 if (prev && GET_CODE (prev) == CODE_LABEL)
14944 {
14945 edge e;
14946 edge_iterator ei;
14947
14948 FOR_EACH_EDGE (e, ei, bb->preds)
14949 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14950 && !(e->flags & EDGE_FALLTHRU))
14951 replace = true;
14952 }
14953 if (!replace)
14954 {
14955 prev = prev_active_insn (ret);
14956 if (prev
14957 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
14958 || GET_CODE (prev) == CALL_INSN))
14959 replace = true;
14960 /* Empty functions get branch mispredict even when the jump destination
14961 is not visible to us. */
14962 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14963 replace = true;
14964 }
14965 if (replace)
14966 {
14967 emit_insn_before (gen_return_internal_long (), ret);
14968 delete_insn (ret);
14969 }
14970 }
14971 }
14972
14973 /* Implement machine specific optimizations. We implement padding of returns
14974 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
14975 static void
14976 ix86_reorg (void)
14977 {
14978 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
14979 ix86_pad_returns ();
14980 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
14981 ix86_avoid_jump_misspredicts ();
14982 }
14983
14984 /* Return nonzero when QImode register that must be represented via REX prefix
14985 is used. */
14986 bool
14987 x86_extended_QIreg_mentioned_p (rtx insn)
14988 {
14989 int i;
14990 extract_insn_cached (insn);
14991 for (i = 0; i < recog_data.n_operands; i++)
14992 if (REG_P (recog_data.operand[i])
14993 && REGNO (recog_data.operand[i]) >= 4)
14994 return true;
14995 return false;
14996 }
14997
14998 /* Return nonzero when P points to register encoded via REX prefix.
14999 Called via for_each_rtx. */
15000 static int
15001 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15002 {
15003 unsigned int regno;
15004 if (!REG_P (*p))
15005 return 0;
15006 regno = REGNO (*p);
15007 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15008 }
15009
15010 /* Return true when INSN mentions register that must be encoded using REX
15011 prefix. */
15012 bool
15013 x86_extended_reg_mentioned_p (rtx insn)
15014 {
15015 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15016 }
15017
15018 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15019 optabs would emit if we didn't have TFmode patterns. */
15020
15021 void
15022 x86_emit_floatuns (rtx operands[2])
15023 {
15024 rtx neglab, donelab, i0, i1, f0, in, out;
15025 enum machine_mode mode, inmode;
15026
15027 inmode = GET_MODE (operands[1]);
15028 if (inmode != SImode
15029 && inmode != DImode)
15030 abort ();
15031
15032 out = operands[0];
15033 in = force_reg (inmode, operands[1]);
15034 mode = GET_MODE (out);
15035 neglab = gen_label_rtx ();
15036 donelab = gen_label_rtx ();
15037 i1 = gen_reg_rtx (Pmode);
15038 f0 = gen_reg_rtx (mode);
15039
15040 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15041
15042 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15043 emit_jump_insn (gen_jump (donelab));
15044 emit_barrier ();
15045
15046 emit_label (neglab);
15047
15048 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15049 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15050 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15051 expand_float (f0, i0, 0);
15052 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15053
15054 emit_label (donelab);
15055 }
15056
15057 /* Initialize vector TARGET via VALS. */
15058 void
15059 ix86_expand_vector_init (rtx target, rtx vals)
15060 {
15061 enum machine_mode mode = GET_MODE (target);
15062 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15063 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15064 int i;
15065
15066 for (i = n_elts - 1; i >= 0; i--)
15067 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15068 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15069 break;
15070
15071 /* Few special cases first...
15072 ... constants are best loaded from constant pool. */
15073 if (i < 0)
15074 {
15075 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15076 return;
15077 }
15078
15079 /* ... values where only first field is non-constant are best loaded
15080 from the pool and overwritten via move later. */
15081 if (!i)
15082 {
15083 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15084 GET_MODE_INNER (mode), 0);
15085
15086 op = force_reg (mode, op);
15087 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15088 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15089 switch (GET_MODE (target))
15090 {
15091 case V2DFmode:
15092 emit_insn (gen_sse2_movsd (target, target, op));
15093 break;
15094 case V4SFmode:
15095 emit_insn (gen_sse_movss (target, target, op));
15096 break;
15097 default:
15098 break;
15099 }
15100 return;
15101 }
15102
15103 /* And the busy sequence doing rotations. */
15104 switch (GET_MODE (target))
15105 {
15106 case V2DFmode:
15107 {
15108 rtx vecop0 =
15109 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15110 rtx vecop1 =
15111 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15112
15113 vecop0 = force_reg (V2DFmode, vecop0);
15114 vecop1 = force_reg (V2DFmode, vecop1);
15115 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15116 }
15117 break;
15118 case V4SFmode:
15119 {
15120 rtx vecop0 =
15121 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15122 rtx vecop1 =
15123 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15124 rtx vecop2 =
15125 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15126 rtx vecop3 =
15127 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15128 rtx tmp1 = gen_reg_rtx (V4SFmode);
15129 rtx tmp2 = gen_reg_rtx (V4SFmode);
15130
15131 vecop0 = force_reg (V4SFmode, vecop0);
15132 vecop1 = force_reg (V4SFmode, vecop1);
15133 vecop2 = force_reg (V4SFmode, vecop2);
15134 vecop3 = force_reg (V4SFmode, vecop3);
15135 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15136 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15137 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15138 }
15139 break;
15140 default:
15141 abort ();
15142 }
15143 }
15144
15145 /* Implements target hook vector_mode_supported_p. */
15146 static bool
15147 ix86_vector_mode_supported_p (enum machine_mode mode)
15148 {
15149 if (TARGET_SSE
15150 && VALID_SSE_REG_MODE (mode))
15151 return true;
15152
15153 else if (TARGET_MMX
15154 && VALID_MMX_REG_MODE (mode))
15155 return true;
15156
15157 else if (TARGET_3DNOW
15158 && VALID_MMX_REG_MODE_3DNOW (mode))
15159 return true;
15160
15161 else
15162 return false;
15163 }
15164
15165 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15166
15167 We do this in the new i386 backend to maintain source compatibility
15168 with the old cc0-based compiler. */
15169
15170 static tree
15171 ix86_md_asm_clobbers (tree clobbers)
15172 {
15173 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15174 clobbers);
15175 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15176 clobbers);
15177 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15178 clobbers);
15179 return clobbers;
15180 }
15181
15182 /* Worker function for REVERSE_CONDITION. */
15183
15184 enum rtx_code
15185 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15186 {
15187 return (mode != CCFPmode && mode != CCFPUmode
15188 ? reverse_condition (code)
15189 : reverse_condition_maybe_unordered (code));
15190 }
15191
15192 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15193 to OPERANDS[0]. */
15194
15195 const char *
15196 output_387_reg_move (rtx insn, rtx *operands)
15197 {
15198 if (REG_P (operands[1])
15199 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15200 {
15201 if (REGNO (operands[0]) == FIRST_STACK_REG
15202 && TARGET_USE_FFREEP)
15203 return "ffreep\t%y0";
15204 return "fstp\t%y0";
15205 }
15206 if (STACK_TOP_P (operands[0]))
15207 return "fld%z1\t%y1";
15208 return "fst\t%y0";
15209 }
15210
15211 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15212 FP status register is set. */
15213
15214 void
15215 ix86_emit_fp_unordered_jump (rtx label)
15216 {
15217 rtx reg = gen_reg_rtx (HImode);
15218 rtx temp;
15219
15220 emit_insn (gen_x86_fnstsw_1 (reg));
15221
15222 if (TARGET_USE_SAHF)
15223 {
15224 emit_insn (gen_x86_sahf_1 (reg));
15225
15226 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15227 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15228 }
15229 else
15230 {
15231 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15232
15233 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15234 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15235 }
15236
15237 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15238 gen_rtx_LABEL_REF (VOIDmode, label),
15239 pc_rtx);
15240 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15241 emit_jump_insn (temp);
15242 }
15243
15244 /* Output code to perform a log1p XFmode calculation. */
15245
15246 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15247 {
15248 rtx label1 = gen_label_rtx ();
15249 rtx label2 = gen_label_rtx ();
15250
15251 rtx tmp = gen_reg_rtx (XFmode);
15252 rtx tmp2 = gen_reg_rtx (XFmode);
15253
15254 emit_insn (gen_absxf2 (tmp, op1));
15255 emit_insn (gen_cmpxf (tmp,
15256 CONST_DOUBLE_FROM_REAL_VALUE (
15257 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15258 XFmode)));
15259 emit_jump_insn (gen_bge (label1));
15260
15261 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15262 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15263 emit_jump (label2);
15264
15265 emit_label (label1);
15266 emit_move_insn (tmp, CONST1_RTX (XFmode));
15267 emit_insn (gen_addxf3 (tmp, op1, tmp));
15268 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15269 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15270
15271 emit_label (label2);
15272 }
15273
15274 /* Solaris named-section hook. Parameters are as for
15275 named_section_real. */
15276
15277 static void
15278 i386_solaris_elf_named_section (const char *name, unsigned int flags,
15279 tree decl)
15280 {
15281 /* With Binutils 2.15, the "@unwind" marker must be specified on
15282 every occurrence of the ".eh_frame" section, not just the first
15283 one. */
15284 if (TARGET_64BIT
15285 && strcmp (name, ".eh_frame") == 0)
15286 {
15287 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
15288 flags & SECTION_WRITE ? "aw" : "a");
15289 return;
15290 }
15291 default_elf_asm_named_section (name, flags, decl);
15292 }
15293
15294 #include "gt-i386.h"
This page took 0.753034 seconds and 6 git commands to generate.