]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
re PR target/14631 (common subexpression elimilation error with sse2 instrinsic _mm_i...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 /* Branch hints were put in P4 based on simulation result. But
531 after P4 was made, no performance benefit was observed with
532 branch hints. It also increases the code size. As the result,
533 icc never generates branch hints. */
534 const int x86_branch_hints = 0;
535 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
536 const int x86_partial_reg_stall = m_PPRO;
537 const int x86_use_loop = m_K6;
538 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
539 const int x86_use_mov0 = m_K6;
540 const int x86_use_cltd = ~(m_PENT | m_K6);
541 const int x86_read_modify_write = ~m_PENT;
542 const int x86_read_modify = ~(m_PENT | m_PPRO);
543 const int x86_split_long_moves = m_PPRO;
544 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
545 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
546 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
547 const int x86_qimode_math = ~(0);
548 const int x86_promote_qi_regs = 0;
549 const int x86_himode_math = ~(m_PPRO);
550 const int x86_promote_hi_regs = m_PPRO;
551 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
552 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
553 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
554 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
556 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
557 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
558 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
559 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
560 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
561 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
562 const int x86_shift1 = ~m_486;
563 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
564 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
565 /* Set for machines where the type and dependencies are resolved on SSE
566 register parts instead of whole registers, so we may maintain just
567 lower part of scalar values in proper format leaving the upper part
568 undefined. */
569 const int x86_sse_split_regs = m_ATHLON_K8;
570 const int x86_sse_typeless_stores = m_ATHLON_K8;
571 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
572 const int x86_use_ffreep = m_ATHLON_K8;
573 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
574 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
575 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
576 /* Some CPU cores are not able to predict more than 4 branch instructions in
577 the 16 byte window. */
578 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
579 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
580 const int x86_use_bt = m_ATHLON_K8;
581
582 /* In case the average insn count for single function invocation is
583 lower than this constant, emit fast (but longer) prologue and
584 epilogue code. */
585 #define FAST_PROLOGUE_INSN_COUNT 20
586
587 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
588 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
589 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
590 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
591
592 /* Array of the smallest class containing reg number REGNO, indexed by
593 REGNO. Used by REGNO_REG_CLASS in i386.h. */
594
595 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
596 {
597 /* ax, dx, cx, bx */
598 AREG, DREG, CREG, BREG,
599 /* si, di, bp, sp */
600 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
601 /* FP registers */
602 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
603 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
604 /* arg pointer */
605 NON_Q_REGS,
606 /* flags, fpsr, dirflag, frame */
607 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
608 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
609 SSE_REGS, SSE_REGS,
610 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
611 MMX_REGS, MMX_REGS,
612 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
613 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
614 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
615 SSE_REGS, SSE_REGS,
616 };
617
618 /* The "default" register map used in 32bit mode. */
619
620 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
621 {
622 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
623 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
624 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
625 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
626 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
628 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
629 };
630
631 static int const x86_64_int_parameter_registers[6] =
632 {
633 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
634 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
635 };
636
637 static int const x86_64_int_return_registers[4] =
638 {
639 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
640 };
641
642 /* The "default" register map used in 64bit mode. */
643 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
644 {
645 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
646 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
647 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
648 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
649 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
650 8,9,10,11,12,13,14,15, /* extended integer registers */
651 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
652 };
653
654 /* Define the register numbers to be used in Dwarf debugging information.
655 The SVR4 reference port C compiler uses the following register numbers
656 in its Dwarf output code:
657 0 for %eax (gcc regno = 0)
658 1 for %ecx (gcc regno = 2)
659 2 for %edx (gcc regno = 1)
660 3 for %ebx (gcc regno = 3)
661 4 for %esp (gcc regno = 7)
662 5 for %ebp (gcc regno = 6)
663 6 for %esi (gcc regno = 4)
664 7 for %edi (gcc regno = 5)
665 The following three DWARF register numbers are never generated by
666 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
667 believes these numbers have these meanings.
668 8 for %eip (no gcc equivalent)
669 9 for %eflags (gcc regno = 17)
670 10 for %trapno (no gcc equivalent)
671 It is not at all clear how we should number the FP stack registers
672 for the x86 architecture. If the version of SDB on x86/svr4 were
673 a bit less brain dead with respect to floating-point then we would
674 have a precedent to follow with respect to DWARF register numbers
675 for x86 FP registers, but the SDB on x86/svr4 is so completely
676 broken with respect to FP registers that it is hardly worth thinking
677 of it as something to strive for compatibility with.
678 The version of x86/svr4 SDB I have at the moment does (partially)
679 seem to believe that DWARF register number 11 is associated with
680 the x86 register %st(0), but that's about all. Higher DWARF
681 register numbers don't seem to be associated with anything in
682 particular, and even for DWARF regno 11, SDB only seems to under-
683 stand that it should say that a variable lives in %st(0) (when
684 asked via an `=' command) if we said it was in DWARF regno 11,
685 but SDB still prints garbage when asked for the value of the
686 variable in question (via a `/' command).
687 (Also note that the labels SDB prints for various FP stack regs
688 when doing an `x' command are all wrong.)
689 Note that these problems generally don't affect the native SVR4
690 C compiler because it doesn't allow the use of -O with -g and
691 because when it is *not* optimizing, it allocates a memory
692 location for each floating-point variable, and the memory
693 location is what gets described in the DWARF AT_location
694 attribute for the variable in question.
695 Regardless of the severe mental illness of the x86/svr4 SDB, we
696 do something sensible here and we use the following DWARF
697 register numbers. Note that these are all stack-top-relative
698 numbers.
699 11 for %st(0) (gcc regno = 8)
700 12 for %st(1) (gcc regno = 9)
701 13 for %st(2) (gcc regno = 10)
702 14 for %st(3) (gcc regno = 11)
703 15 for %st(4) (gcc regno = 12)
704 16 for %st(5) (gcc regno = 13)
705 17 for %st(6) (gcc regno = 14)
706 18 for %st(7) (gcc regno = 15)
707 */
708 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
709 {
710 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
711 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
712 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
713 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
714 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
715 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
716 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
717 };
718
719 /* Test and compare insns in i386.md store the information needed to
720 generate branch and scc insns here. */
721
722 rtx ix86_compare_op0 = NULL_RTX;
723 rtx ix86_compare_op1 = NULL_RTX;
724
725 #define MAX_386_STACK_LOCALS 3
726 /* Size of the register save area. */
727 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
728
729 /* Define the structure for the machine field in struct function. */
730
731 struct stack_local_entry GTY(())
732 {
733 unsigned short mode;
734 unsigned short n;
735 rtx rtl;
736 struct stack_local_entry *next;
737 };
738
739 /* Structure describing stack frame layout.
740 Stack grows downward:
741
742 [arguments]
743 <- ARG_POINTER
744 saved pc
745
746 saved frame pointer if frame_pointer_needed
747 <- HARD_FRAME_POINTER
748 [saved regs]
749
750 [padding1] \
751 )
752 [va_arg registers] (
753 > to_allocate <- FRAME_POINTER
754 [frame] (
755 )
756 [padding2] /
757 */
758 struct ix86_frame
759 {
760 int nregs;
761 int padding1;
762 int va_arg_size;
763 HOST_WIDE_INT frame;
764 int padding2;
765 int outgoing_arguments_size;
766 int red_zone_size;
767
768 HOST_WIDE_INT to_allocate;
769 /* The offsets relative to ARG_POINTER. */
770 HOST_WIDE_INT frame_pointer_offset;
771 HOST_WIDE_INT hard_frame_pointer_offset;
772 HOST_WIDE_INT stack_pointer_offset;
773
774 /* When save_regs_using_mov is set, emit prologue using
775 move instead of push instructions. */
776 bool save_regs_using_mov;
777 };
778
779 /* Used to enable/disable debugging features. */
780 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
781 /* Code model option as passed by user. */
782 const char *ix86_cmodel_string;
783 /* Parsed value. */
784 enum cmodel ix86_cmodel;
785 /* Asm dialect. */
786 const char *ix86_asm_string;
787 enum asm_dialect ix86_asm_dialect = ASM_ATT;
788 /* TLS dialext. */
789 const char *ix86_tls_dialect_string;
790 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
791
792 /* Which unit we are generating floating point math for. */
793 enum fpmath_unit ix86_fpmath;
794
795 /* Which cpu are we scheduling for. */
796 enum processor_type ix86_tune;
797 /* Which instruction set architecture to use. */
798 enum processor_type ix86_arch;
799
800 /* Strings to hold which cpu and instruction set architecture to use. */
801 const char *ix86_tune_string; /* for -mtune=<xxx> */
802 const char *ix86_arch_string; /* for -march=<xxx> */
803 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
804
805 /* # of registers to use to pass arguments. */
806 const char *ix86_regparm_string;
807
808 /* true if sse prefetch instruction is not NOOP. */
809 int x86_prefetch_sse;
810
811 /* ix86_regparm_string as a number */
812 int ix86_regparm;
813
814 /* Alignment to use for loops and jumps: */
815
816 /* Power of two alignment for loops. */
817 const char *ix86_align_loops_string;
818
819 /* Power of two alignment for non-loop jumps. */
820 const char *ix86_align_jumps_string;
821
822 /* Power of two alignment for stack boundary in bytes. */
823 const char *ix86_preferred_stack_boundary_string;
824
825 /* Preferred alignment for stack boundary in bits. */
826 unsigned int ix86_preferred_stack_boundary;
827
828 /* Values 1-5: see jump.c */
829 int ix86_branch_cost;
830 const char *ix86_branch_cost_string;
831
832 /* Power of two alignment for functions. */
833 const char *ix86_align_funcs_string;
834
835 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
836 char internal_label_prefix[16];
837 int internal_label_prefix_len;
838 \f
839 static void output_pic_addr_const (FILE *, rtx, int);
840 static void put_condition_code (enum rtx_code, enum machine_mode,
841 int, int, FILE *);
842 static const char *get_some_local_dynamic_name (void);
843 static int get_some_local_dynamic_name_1 (rtx *, void *);
844 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
845 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
846 rtx *);
847 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
848 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
849 enum machine_mode);
850 static rtx get_thread_pointer (int);
851 static rtx legitimize_tls_address (rtx, enum tls_model, int);
852 static void get_pc_thunk_name (char [32], unsigned int);
853 static rtx gen_push (rtx);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
882 static bool ix86_vector_mode_supported_p (enum machine_mode);
883
884 static int ix86_address_cost (rtx);
885 static bool ix86_cannot_force_const_mem (rtx);
886 static rtx ix86_delegitimize_address (rtx);
887
888 struct builtin_description;
889 static rtx ix86_expand_sse_comi (const struct builtin_description *,
890 tree, rtx);
891 static rtx ix86_expand_sse_compare (const struct builtin_description *,
892 tree, rtx);
893 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
894 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
895 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
896 static rtx ix86_expand_store_builtin (enum insn_code, tree);
897 static rtx safe_vector_operand (rtx, enum machine_mode);
898 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
899 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
900 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
901 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
902 static int ix86_fp_comparison_cost (enum rtx_code code);
903 static unsigned int ix86_select_alt_pic_regnum (void);
904 static int ix86_save_reg (unsigned int, int);
905 static void ix86_compute_frame_layout (struct ix86_frame *);
906 static int ix86_comp_type_attributes (tree, tree);
907 static int ix86_function_regparm (tree, tree);
908 const struct attribute_spec ix86_attribute_table[];
909 static bool ix86_function_ok_for_sibcall (tree, tree);
910 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
911 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
912 static int ix86_value_regno (enum machine_mode);
913 static bool contains_128bit_aligned_vector_p (tree);
914 static rtx ix86_struct_value_rtx (tree, int);
915 static bool ix86_ms_bitfield_layout_p (tree);
916 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
917 static int extended_reg_mentioned_1 (rtx *, void *);
918 static bool ix86_rtx_costs (rtx, int, int, int *);
919 static int min_insn_size (rtx);
920 static tree ix86_md_asm_clobbers (tree clobbers);
921 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
922 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
923 tree, bool);
924
925 /* This function is only used on Solaris. */
926 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
927 ATTRIBUTE_UNUSED;
928
929 /* Register class used for passing given 64bit part of the argument.
930 These represent classes as documented by the PS ABI, with the exception
931 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
932 use SF or DFmode move instead of DImode to avoid reformatting penalties.
933
934 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
935 whenever possible (upper half does contain padding).
936 */
937 enum x86_64_reg_class
938 {
939 X86_64_NO_CLASS,
940 X86_64_INTEGER_CLASS,
941 X86_64_INTEGERSI_CLASS,
942 X86_64_SSE_CLASS,
943 X86_64_SSESF_CLASS,
944 X86_64_SSEDF_CLASS,
945 X86_64_SSEUP_CLASS,
946 X86_64_X87_CLASS,
947 X86_64_X87UP_CLASS,
948 X86_64_COMPLEX_X87_CLASS,
949 X86_64_MEMORY_CLASS
950 };
951 static const char * const x86_64_reg_class_name[] = {
952 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
953 "sseup", "x87", "x87up", "cplx87", "no"
954 };
955
956 #define MAX_CLASSES 4
957
958 /* Table of constants used by fldpi, fldln2, etc.... */
959 static REAL_VALUE_TYPE ext_80387_constants_table [5];
960 static bool ext_80387_constants_init = 0;
961 static void init_ext_80387_constants (void);
962 \f
963 /* Initialize the GCC target structure. */
964 #undef TARGET_ATTRIBUTE_TABLE
965 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967 # undef TARGET_MERGE_DECL_ATTRIBUTES
968 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
969 #endif
970
971 #undef TARGET_COMP_TYPE_ATTRIBUTES
972 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
973
974 #undef TARGET_INIT_BUILTINS
975 #define TARGET_INIT_BUILTINS ix86_init_builtins
976
977 #undef TARGET_EXPAND_BUILTIN
978 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
979
980 #undef TARGET_ASM_FUNCTION_EPILOGUE
981 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
982
983 #undef TARGET_ASM_OPEN_PAREN
984 #define TARGET_ASM_OPEN_PAREN ""
985 #undef TARGET_ASM_CLOSE_PAREN
986 #define TARGET_ASM_CLOSE_PAREN ""
987
988 #undef TARGET_ASM_ALIGNED_HI_OP
989 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
990 #undef TARGET_ASM_ALIGNED_SI_OP
991 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
992 #ifdef ASM_QUAD
993 #undef TARGET_ASM_ALIGNED_DI_OP
994 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
995 #endif
996
997 #undef TARGET_ASM_UNALIGNED_HI_OP
998 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
999 #undef TARGET_ASM_UNALIGNED_SI_OP
1000 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1001 #undef TARGET_ASM_UNALIGNED_DI_OP
1002 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1003
1004 #undef TARGET_SCHED_ADJUST_COST
1005 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1006 #undef TARGET_SCHED_ISSUE_RATE
1007 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1008 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1009 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1010 ia32_multipass_dfa_lookahead
1011
1012 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1013 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1014
1015 #ifdef HAVE_AS_TLS
1016 #undef TARGET_HAVE_TLS
1017 #define TARGET_HAVE_TLS true
1018 #endif
1019 #undef TARGET_CANNOT_FORCE_CONST_MEM
1020 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1021
1022 #undef TARGET_DELEGITIMIZE_ADDRESS
1023 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1024
1025 #undef TARGET_MS_BITFIELD_LAYOUT_P
1026 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1027
1028 #undef TARGET_ASM_OUTPUT_MI_THUNK
1029 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1030 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1031 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1032
1033 #undef TARGET_ASM_FILE_START
1034 #define TARGET_ASM_FILE_START x86_file_start
1035
1036 #undef TARGET_RTX_COSTS
1037 #define TARGET_RTX_COSTS ix86_rtx_costs
1038 #undef TARGET_ADDRESS_COST
1039 #define TARGET_ADDRESS_COST ix86_address_cost
1040
1041 #undef TARGET_FIXED_CONDITION_CODE_REGS
1042 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1043 #undef TARGET_CC_MODES_COMPATIBLE
1044 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1045
1046 #undef TARGET_MACHINE_DEPENDENT_REORG
1047 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1048
1049 #undef TARGET_BUILD_BUILTIN_VA_LIST
1050 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1051
1052 #undef TARGET_MD_ASM_CLOBBERS
1053 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1054
1055 #undef TARGET_PROMOTE_PROTOTYPES
1056 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1057 #undef TARGET_STRUCT_VALUE_RTX
1058 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1059 #undef TARGET_SETUP_INCOMING_VARARGS
1060 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1061 #undef TARGET_MUST_PASS_IN_STACK
1062 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1063 #undef TARGET_PASS_BY_REFERENCE
1064 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1065
1066 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1067 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1068
1069 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1070 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1071
1072 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1073 #undef TARGET_INSERT_ATTRIBUTES
1074 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1075 #endif
1076
1077 struct gcc_target targetm = TARGET_INITIALIZER;
1078
1079 \f
1080 /* The svr4 ABI for the i386 says that records and unions are returned
1081 in memory. */
1082 #ifndef DEFAULT_PCC_STRUCT_RETURN
1083 #define DEFAULT_PCC_STRUCT_RETURN 1
1084 #endif
1085
1086 /* Sometimes certain combinations of command options do not make
1087 sense on a particular target machine. You can define a macro
1088 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1089 defined, is executed once just after all the command options have
1090 been parsed.
1091
1092 Don't use this macro to turn on various extra optimizations for
1093 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1094
1095 void
1096 override_options (void)
1097 {
1098 int i;
1099 int ix86_tune_defaulted = 0;
1100
1101 /* Comes from final.c -- no real reason to change it. */
1102 #define MAX_CODE_ALIGN 16
1103
1104 static struct ptt
1105 {
1106 const struct processor_costs *cost; /* Processor costs */
1107 const int target_enable; /* Target flags to enable. */
1108 const int target_disable; /* Target flags to disable. */
1109 const int align_loop; /* Default alignments. */
1110 const int align_loop_max_skip;
1111 const int align_jump;
1112 const int align_jump_max_skip;
1113 const int align_func;
1114 }
1115 const processor_target_table[PROCESSOR_max] =
1116 {
1117 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1118 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1119 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1120 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1121 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1122 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1123 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1124 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1125 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1126 };
1127
1128 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1129 static struct pta
1130 {
1131 const char *const name; /* processor name or nickname. */
1132 const enum processor_type processor;
1133 const enum pta_flags
1134 {
1135 PTA_SSE = 1,
1136 PTA_SSE2 = 2,
1137 PTA_SSE3 = 4,
1138 PTA_MMX = 8,
1139 PTA_PREFETCH_SSE = 16,
1140 PTA_3DNOW = 32,
1141 PTA_3DNOW_A = 64,
1142 PTA_64BIT = 128
1143 } flags;
1144 }
1145 const processor_alias_table[] =
1146 {
1147 {"i386", PROCESSOR_I386, 0},
1148 {"i486", PROCESSOR_I486, 0},
1149 {"i586", PROCESSOR_PENTIUM, 0},
1150 {"pentium", PROCESSOR_PENTIUM, 0},
1151 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1152 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1153 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1154 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1155 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1156 {"i686", PROCESSOR_PENTIUMPRO, 0},
1157 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1158 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1159 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1160 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1161 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1162 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1163 | PTA_MMX | PTA_PREFETCH_SSE},
1164 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1165 | PTA_MMX | PTA_PREFETCH_SSE},
1166 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1167 | PTA_MMX | PTA_PREFETCH_SSE},
1168 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1169 | PTA_MMX | PTA_PREFETCH_SSE},
1170 {"k6", PROCESSOR_K6, PTA_MMX},
1171 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1172 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1173 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1174 | PTA_3DNOW_A},
1175 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1176 | PTA_3DNOW | PTA_3DNOW_A},
1177 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1178 | PTA_3DNOW_A | PTA_SSE},
1179 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1180 | PTA_3DNOW_A | PTA_SSE},
1181 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1182 | PTA_3DNOW_A | PTA_SSE},
1183 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1184 | PTA_SSE | PTA_SSE2 },
1185 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1186 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1187 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1188 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1189 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1190 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1191 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1192 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1193 };
1194
1195 int const pta_size = ARRAY_SIZE (processor_alias_table);
1196
1197 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1198 SUBTARGET_OVERRIDE_OPTIONS;
1199 #endif
1200
1201 /* Set the default values for switches whose default depends on TARGET_64BIT
1202 in case they weren't overwritten by command line options. */
1203 if (TARGET_64BIT)
1204 {
1205 if (flag_omit_frame_pointer == 2)
1206 flag_omit_frame_pointer = 1;
1207 if (flag_asynchronous_unwind_tables == 2)
1208 flag_asynchronous_unwind_tables = 1;
1209 if (flag_pcc_struct_return == 2)
1210 flag_pcc_struct_return = 0;
1211 }
1212 else
1213 {
1214 if (flag_omit_frame_pointer == 2)
1215 flag_omit_frame_pointer = 0;
1216 if (flag_asynchronous_unwind_tables == 2)
1217 flag_asynchronous_unwind_tables = 0;
1218 if (flag_pcc_struct_return == 2)
1219 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1220 }
1221
1222 if (!ix86_tune_string && ix86_arch_string)
1223 ix86_tune_string = ix86_arch_string;
1224 if (!ix86_tune_string)
1225 {
1226 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1227 ix86_tune_defaulted = 1;
1228 }
1229 if (!ix86_arch_string)
1230 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1231
1232 if (ix86_cmodel_string != 0)
1233 {
1234 if (!strcmp (ix86_cmodel_string, "small"))
1235 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1236 else if (flag_pic)
1237 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1238 else if (!strcmp (ix86_cmodel_string, "32"))
1239 ix86_cmodel = CM_32;
1240 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1241 ix86_cmodel = CM_KERNEL;
1242 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1243 ix86_cmodel = CM_MEDIUM;
1244 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1245 ix86_cmodel = CM_LARGE;
1246 else
1247 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1248 }
1249 else
1250 {
1251 ix86_cmodel = CM_32;
1252 if (TARGET_64BIT)
1253 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1254 }
1255 if (ix86_asm_string != 0)
1256 {
1257 if (!strcmp (ix86_asm_string, "intel"))
1258 ix86_asm_dialect = ASM_INTEL;
1259 else if (!strcmp (ix86_asm_string, "att"))
1260 ix86_asm_dialect = ASM_ATT;
1261 else
1262 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1263 }
1264 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1265 error ("code model %qs not supported in the %s bit mode",
1266 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1267 if (ix86_cmodel == CM_LARGE)
1268 sorry ("code model %<large%> not supported yet");
1269 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1270 sorry ("%i-bit mode not compiled in",
1271 (target_flags & MASK_64BIT) ? 64 : 32);
1272
1273 for (i = 0; i < pta_size; i++)
1274 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1275 {
1276 ix86_arch = processor_alias_table[i].processor;
1277 /* Default cpu tuning to the architecture. */
1278 ix86_tune = ix86_arch;
1279 if (processor_alias_table[i].flags & PTA_MMX
1280 && !(target_flags_explicit & MASK_MMX))
1281 target_flags |= MASK_MMX;
1282 if (processor_alias_table[i].flags & PTA_3DNOW
1283 && !(target_flags_explicit & MASK_3DNOW))
1284 target_flags |= MASK_3DNOW;
1285 if (processor_alias_table[i].flags & PTA_3DNOW_A
1286 && !(target_flags_explicit & MASK_3DNOW_A))
1287 target_flags |= MASK_3DNOW_A;
1288 if (processor_alias_table[i].flags & PTA_SSE
1289 && !(target_flags_explicit & MASK_SSE))
1290 target_flags |= MASK_SSE;
1291 if (processor_alias_table[i].flags & PTA_SSE2
1292 && !(target_flags_explicit & MASK_SSE2))
1293 target_flags |= MASK_SSE2;
1294 if (processor_alias_table[i].flags & PTA_SSE3
1295 && !(target_flags_explicit & MASK_SSE3))
1296 target_flags |= MASK_SSE3;
1297 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1298 x86_prefetch_sse = true;
1299 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1300 error ("CPU you selected does not support x86-64 "
1301 "instruction set");
1302 break;
1303 }
1304
1305 if (i == pta_size)
1306 error ("bad value (%s) for -march= switch", ix86_arch_string);
1307
1308 for (i = 0; i < pta_size; i++)
1309 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1310 {
1311 ix86_tune = processor_alias_table[i].processor;
1312 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1313 {
1314 if (ix86_tune_defaulted)
1315 {
1316 ix86_tune_string = "x86-64";
1317 for (i = 0; i < pta_size; i++)
1318 if (! strcmp (ix86_tune_string,
1319 processor_alias_table[i].name))
1320 break;
1321 ix86_tune = processor_alias_table[i].processor;
1322 }
1323 else
1324 error ("CPU you selected does not support x86-64 "
1325 "instruction set");
1326 }
1327 /* Intel CPUs have always interpreted SSE prefetch instructions as
1328 NOPs; so, we can enable SSE prefetch instructions even when
1329 -mtune (rather than -march) points us to a processor that has them.
1330 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1331 higher processors. */
1332 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1333 x86_prefetch_sse = true;
1334 break;
1335 }
1336 if (i == pta_size)
1337 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1338
1339 if (optimize_size)
1340 ix86_cost = &size_cost;
1341 else
1342 ix86_cost = processor_target_table[ix86_tune].cost;
1343 target_flags |= processor_target_table[ix86_tune].target_enable;
1344 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1345
1346 /* Arrange to set up i386_stack_locals for all functions. */
1347 init_machine_status = ix86_init_machine_status;
1348
1349 /* Validate -mregparm= value. */
1350 if (ix86_regparm_string)
1351 {
1352 i = atoi (ix86_regparm_string);
1353 if (i < 0 || i > REGPARM_MAX)
1354 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1355 else
1356 ix86_regparm = i;
1357 }
1358 else
1359 if (TARGET_64BIT)
1360 ix86_regparm = REGPARM_MAX;
1361
1362 /* If the user has provided any of the -malign-* options,
1363 warn and use that value only if -falign-* is not set.
1364 Remove this code in GCC 3.2 or later. */
1365 if (ix86_align_loops_string)
1366 {
1367 warning ("-malign-loops is obsolete, use -falign-loops");
1368 if (align_loops == 0)
1369 {
1370 i = atoi (ix86_align_loops_string);
1371 if (i < 0 || i > MAX_CODE_ALIGN)
1372 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1373 else
1374 align_loops = 1 << i;
1375 }
1376 }
1377
1378 if (ix86_align_jumps_string)
1379 {
1380 warning ("-malign-jumps is obsolete, use -falign-jumps");
1381 if (align_jumps == 0)
1382 {
1383 i = atoi (ix86_align_jumps_string);
1384 if (i < 0 || i > MAX_CODE_ALIGN)
1385 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1386 else
1387 align_jumps = 1 << i;
1388 }
1389 }
1390
1391 if (ix86_align_funcs_string)
1392 {
1393 warning ("-malign-functions is obsolete, use -falign-functions");
1394 if (align_functions == 0)
1395 {
1396 i = atoi (ix86_align_funcs_string);
1397 if (i < 0 || i > MAX_CODE_ALIGN)
1398 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1399 else
1400 align_functions = 1 << i;
1401 }
1402 }
1403
1404 /* Default align_* from the processor table. */
1405 if (align_loops == 0)
1406 {
1407 align_loops = processor_target_table[ix86_tune].align_loop;
1408 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1409 }
1410 if (align_jumps == 0)
1411 {
1412 align_jumps = processor_target_table[ix86_tune].align_jump;
1413 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1414 }
1415 if (align_functions == 0)
1416 {
1417 align_functions = processor_target_table[ix86_tune].align_func;
1418 }
1419
1420 /* Validate -mpreferred-stack-boundary= value, or provide default.
1421 The default of 128 bits is for Pentium III's SSE __m128, but we
1422 don't want additional code to keep the stack aligned when
1423 optimizing for code size. */
1424 ix86_preferred_stack_boundary = (optimize_size
1425 ? TARGET_64BIT ? 128 : 32
1426 : 128);
1427 if (ix86_preferred_stack_boundary_string)
1428 {
1429 i = atoi (ix86_preferred_stack_boundary_string);
1430 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1431 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1432 TARGET_64BIT ? 4 : 2);
1433 else
1434 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1435 }
1436
1437 /* Validate -mbranch-cost= value, or provide default. */
1438 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1439 if (ix86_branch_cost_string)
1440 {
1441 i = atoi (ix86_branch_cost_string);
1442 if (i < 0 || i > 5)
1443 error ("-mbranch-cost=%d is not between 0 and 5", i);
1444 else
1445 ix86_branch_cost = i;
1446 }
1447
1448 if (ix86_tls_dialect_string)
1449 {
1450 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1451 ix86_tls_dialect = TLS_DIALECT_GNU;
1452 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1453 ix86_tls_dialect = TLS_DIALECT_SUN;
1454 else
1455 error ("bad value (%s) for -mtls-dialect= switch",
1456 ix86_tls_dialect_string);
1457 }
1458
1459 /* Keep nonleaf frame pointers. */
1460 if (flag_omit_frame_pointer)
1461 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1462 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1463 flag_omit_frame_pointer = 1;
1464
1465 /* If we're doing fast math, we don't care about comparison order
1466 wrt NaNs. This lets us use a shorter comparison sequence. */
1467 if (flag_unsafe_math_optimizations)
1468 target_flags &= ~MASK_IEEE_FP;
1469
1470 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1471 since the insns won't need emulation. */
1472 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1473 target_flags &= ~MASK_NO_FANCY_MATH_387;
1474
1475 /* Likewise, if the target doesn't have a 387, or we've specified
1476 software floating point, don't use 387 inline instrinsics. */
1477 if (!TARGET_80387)
1478 target_flags |= MASK_NO_FANCY_MATH_387;
1479
1480 /* Turn on SSE2 builtins for -msse3. */
1481 if (TARGET_SSE3)
1482 target_flags |= MASK_SSE2;
1483
1484 /* Turn on SSE builtins for -msse2. */
1485 if (TARGET_SSE2)
1486 target_flags |= MASK_SSE;
1487
1488 /* Turn on MMX builtins for -msse. */
1489 if (TARGET_SSE)
1490 {
1491 target_flags |= MASK_MMX & ~target_flags_explicit;
1492 x86_prefetch_sse = true;
1493 }
1494
1495 /* Turn on MMX builtins for 3Dnow. */
1496 if (TARGET_3DNOW)
1497 target_flags |= MASK_MMX;
1498
1499 if (TARGET_64BIT)
1500 {
1501 if (TARGET_ALIGN_DOUBLE)
1502 error ("-malign-double makes no sense in the 64bit mode");
1503 if (TARGET_RTD)
1504 error ("-mrtd calling convention not supported in the 64bit mode");
1505
1506 /* Enable by default the SSE and MMX builtins. Do allow the user to
1507 explicitly disable any of these. In particular, disabling SSE and
1508 MMX for kernel code is extremely useful. */
1509 target_flags
1510 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1511 & ~target_flags_explicit);
1512
1513 if (TARGET_SSE)
1514 ix86_fpmath = FPMATH_SSE;
1515 }
1516 else
1517 {
1518 ix86_fpmath = FPMATH_387;
1519 /* i386 ABI does not specify red zone. It still makes sense to use it
1520 when programmer takes care to stack from being destroyed. */
1521 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1522 target_flags |= MASK_NO_RED_ZONE;
1523 }
1524
1525 if (ix86_fpmath_string != 0)
1526 {
1527 if (! strcmp (ix86_fpmath_string, "387"))
1528 ix86_fpmath = FPMATH_387;
1529 else if (! strcmp (ix86_fpmath_string, "sse"))
1530 {
1531 if (!TARGET_SSE)
1532 {
1533 warning ("SSE instruction set disabled, using 387 arithmetics");
1534 ix86_fpmath = FPMATH_387;
1535 }
1536 else
1537 ix86_fpmath = FPMATH_SSE;
1538 }
1539 else if (! strcmp (ix86_fpmath_string, "387,sse")
1540 || ! strcmp (ix86_fpmath_string, "sse,387"))
1541 {
1542 if (!TARGET_SSE)
1543 {
1544 warning ("SSE instruction set disabled, using 387 arithmetics");
1545 ix86_fpmath = FPMATH_387;
1546 }
1547 else if (!TARGET_80387)
1548 {
1549 warning ("387 instruction set disabled, using SSE arithmetics");
1550 ix86_fpmath = FPMATH_SSE;
1551 }
1552 else
1553 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1554 }
1555 else
1556 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1557 }
1558
1559 /* If fpmath doesn't include 387, disable use of x87 intrinsics. */
1560 if (! (ix86_fpmath & FPMATH_387))
1561 target_flags |= MASK_NO_FANCY_MATH_387;
1562
1563 if ((x86_accumulate_outgoing_args & TUNEMASK)
1564 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1565 && !optimize_size)
1566 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1567
1568 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1569 {
1570 char *p;
1571 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1572 p = strchr (internal_label_prefix, 'X');
1573 internal_label_prefix_len = p - internal_label_prefix;
1574 *p = '\0';
1575 }
1576
1577 /* When scheduling description is not available, disable scheduler pass
1578 so it won't slow down the compilation and make x87 code slower. */
1579 if (!TARGET_SCHEDULE)
1580 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1581 }
1582 \f
1583 void
1584 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1585 {
1586 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1587 make the problem with not enough registers even worse. */
1588 #ifdef INSN_SCHEDULING
1589 if (level > 1)
1590 flag_schedule_insns = 0;
1591 #endif
1592
1593 /* The default values of these switches depend on the TARGET_64BIT
1594 that is not known at this moment. Mark these values with 2 and
1595 let user the to override these. In case there is no command line option
1596 specifying them, we will set the defaults in override_options. */
1597 if (optimize >= 1)
1598 flag_omit_frame_pointer = 2;
1599 flag_pcc_struct_return = 2;
1600 flag_asynchronous_unwind_tables = 2;
1601 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1602 SUBTARGET_OPTIMIZATION_OPTIONS;
1603 #endif
1604 }
1605 \f
1606 /* Table of valid machine attributes. */
1607 const struct attribute_spec ix86_attribute_table[] =
1608 {
1609 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1610 /* Stdcall attribute says callee is responsible for popping arguments
1611 if they are not variable. */
1612 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1613 /* Fastcall attribute says callee is responsible for popping arguments
1614 if they are not variable. */
1615 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1616 /* Cdecl attribute says the callee is a normal C declaration */
1617 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1618 /* Regparm attribute specifies how many integer arguments are to be
1619 passed in registers. */
1620 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1621 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1622 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1623 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1624 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1625 #endif
1626 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1627 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1628 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1629 SUBTARGET_ATTRIBUTE_TABLE,
1630 #endif
1631 { NULL, 0, 0, false, false, false, NULL }
1632 };
1633
1634 /* Decide whether we can make a sibling call to a function. DECL is the
1635 declaration of the function being targeted by the call and EXP is the
1636 CALL_EXPR representing the call. */
1637
1638 static bool
1639 ix86_function_ok_for_sibcall (tree decl, tree exp)
1640 {
1641 /* If we are generating position-independent code, we cannot sibcall
1642 optimize any indirect call, or a direct call to a global function,
1643 as the PLT requires %ebx be live. */
1644 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1645 return false;
1646
1647 /* If we are returning floats on the 80387 register stack, we cannot
1648 make a sibcall from a function that doesn't return a float to a
1649 function that does or, conversely, from a function that does return
1650 a float to a function that doesn't; the necessary stack adjustment
1651 would not be executed. */
1652 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1653 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1654 return false;
1655
1656 /* If this call is indirect, we'll need to be able to use a call-clobbered
1657 register for the address of the target function. Make sure that all
1658 such registers are not used for passing parameters. */
1659 if (!decl && !TARGET_64BIT)
1660 {
1661 tree type;
1662
1663 /* We're looking at the CALL_EXPR, we need the type of the function. */
1664 type = TREE_OPERAND (exp, 0); /* pointer expression */
1665 type = TREE_TYPE (type); /* pointer type */
1666 type = TREE_TYPE (type); /* function type */
1667
1668 if (ix86_function_regparm (type, NULL) >= 3)
1669 {
1670 /* ??? Need to count the actual number of registers to be used,
1671 not the possible number of registers. Fix later. */
1672 return false;
1673 }
1674 }
1675
1676 /* Otherwise okay. That also includes certain types of indirect calls. */
1677 return true;
1678 }
1679
1680 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1681 arguments as in struct attribute_spec.handler. */
1682 static tree
1683 ix86_handle_cdecl_attribute (tree *node, tree name,
1684 tree args ATTRIBUTE_UNUSED,
1685 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1686 {
1687 if (TREE_CODE (*node) != FUNCTION_TYPE
1688 && TREE_CODE (*node) != METHOD_TYPE
1689 && TREE_CODE (*node) != FIELD_DECL
1690 && TREE_CODE (*node) != TYPE_DECL)
1691 {
1692 warning ("%qs attribute only applies to functions",
1693 IDENTIFIER_POINTER (name));
1694 *no_add_attrs = true;
1695 }
1696 else
1697 {
1698 if (is_attribute_p ("fastcall", name))
1699 {
1700 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1701 {
1702 error ("fastcall and stdcall attributes are not compatible");
1703 }
1704 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1705 {
1706 error ("fastcall and regparm attributes are not compatible");
1707 }
1708 }
1709 else if (is_attribute_p ("stdcall", name))
1710 {
1711 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1712 {
1713 error ("fastcall and stdcall attributes are not compatible");
1714 }
1715 }
1716 }
1717
1718 if (TARGET_64BIT)
1719 {
1720 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
1721 *no_add_attrs = true;
1722 }
1723
1724 return NULL_TREE;
1725 }
1726
1727 /* Handle a "regparm" attribute;
1728 arguments as in struct attribute_spec.handler. */
1729 static tree
1730 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1731 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1732 {
1733 if (TREE_CODE (*node) != FUNCTION_TYPE
1734 && TREE_CODE (*node) != METHOD_TYPE
1735 && TREE_CODE (*node) != FIELD_DECL
1736 && TREE_CODE (*node) != TYPE_DECL)
1737 {
1738 warning ("%qs attribute only applies to functions",
1739 IDENTIFIER_POINTER (name));
1740 *no_add_attrs = true;
1741 }
1742 else
1743 {
1744 tree cst;
1745
1746 cst = TREE_VALUE (args);
1747 if (TREE_CODE (cst) != INTEGER_CST)
1748 {
1749 warning ("%qs attribute requires an integer constant argument",
1750 IDENTIFIER_POINTER (name));
1751 *no_add_attrs = true;
1752 }
1753 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1754 {
1755 warning ("argument to %qs attribute larger than %d",
1756 IDENTIFIER_POINTER (name), REGPARM_MAX);
1757 *no_add_attrs = true;
1758 }
1759
1760 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1761 {
1762 error ("fastcall and regparm attributes are not compatible");
1763 }
1764 }
1765
1766 return NULL_TREE;
1767 }
1768
1769 /* Return 0 if the attributes for two types are incompatible, 1 if they
1770 are compatible, and 2 if they are nearly compatible (which causes a
1771 warning to be generated). */
1772
1773 static int
1774 ix86_comp_type_attributes (tree type1, tree type2)
1775 {
1776 /* Check for mismatch of non-default calling convention. */
1777 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1778
1779 if (TREE_CODE (type1) != FUNCTION_TYPE)
1780 return 1;
1781
1782 /* Check for mismatched fastcall types */
1783 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1784 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1785 return 0;
1786
1787 /* Check for mismatched return types (cdecl vs stdcall). */
1788 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1789 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1790 return 0;
1791 if (ix86_function_regparm (type1, NULL)
1792 != ix86_function_regparm (type2, NULL))
1793 return 0;
1794 return 1;
1795 }
1796 \f
1797 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1798 DECL may be NULL when calling function indirectly
1799 or considering a libcall. */
1800
1801 static int
1802 ix86_function_regparm (tree type, tree decl)
1803 {
1804 tree attr;
1805 int regparm = ix86_regparm;
1806 bool user_convention = false;
1807
1808 if (!TARGET_64BIT)
1809 {
1810 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1811 if (attr)
1812 {
1813 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1814 user_convention = true;
1815 }
1816
1817 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1818 {
1819 regparm = 2;
1820 user_convention = true;
1821 }
1822
1823 /* Use register calling convention for local functions when possible. */
1824 if (!TARGET_64BIT && !user_convention && decl
1825 && flag_unit_at_a_time && !profile_flag)
1826 {
1827 struct cgraph_local_info *i = cgraph_local_info (decl);
1828 if (i && i->local)
1829 {
1830 /* We can't use regparm(3) for nested functions as these use
1831 static chain pointer in third argument. */
1832 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1833 regparm = 2;
1834 else
1835 regparm = 3;
1836 }
1837 }
1838 }
1839 return regparm;
1840 }
1841
1842 /* Return true if EAX is live at the start of the function. Used by
1843 ix86_expand_prologue to determine if we need special help before
1844 calling allocate_stack_worker. */
1845
1846 static bool
1847 ix86_eax_live_at_start_p (void)
1848 {
1849 /* Cheat. Don't bother working forward from ix86_function_regparm
1850 to the function type to whether an actual argument is located in
1851 eax. Instead just look at cfg info, which is still close enough
1852 to correct at this point. This gives false positives for broken
1853 functions that might use uninitialized data that happens to be
1854 allocated in eax, but who cares? */
1855 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1856 }
1857
1858 /* Value is the number of bytes of arguments automatically
1859 popped when returning from a subroutine call.
1860 FUNDECL is the declaration node of the function (as a tree),
1861 FUNTYPE is the data type of the function (as a tree),
1862 or for a library call it is an identifier node for the subroutine name.
1863 SIZE is the number of bytes of arguments passed on the stack.
1864
1865 On the 80386, the RTD insn may be used to pop them if the number
1866 of args is fixed, but if the number is variable then the caller
1867 must pop them all. RTD can't be used for library calls now
1868 because the library is compiled with the Unix compiler.
1869 Use of RTD is a selectable option, since it is incompatible with
1870 standard Unix calling sequences. If the option is not selected,
1871 the caller must always pop the args.
1872
1873 The attribute stdcall is equivalent to RTD on a per module basis. */
1874
1875 int
1876 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1877 {
1878 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1879
1880 /* Cdecl functions override -mrtd, and never pop the stack. */
1881 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1882
1883 /* Stdcall and fastcall functions will pop the stack if not
1884 variable args. */
1885 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1886 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1887 rtd = 1;
1888
1889 if (rtd
1890 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1891 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1892 == void_type_node)))
1893 return size;
1894 }
1895
1896 /* Lose any fake structure return argument if it is passed on the stack. */
1897 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1898 && !TARGET_64BIT
1899 && !KEEP_AGGREGATE_RETURN_POINTER)
1900 {
1901 int nregs = ix86_function_regparm (funtype, fundecl);
1902
1903 if (!nregs)
1904 return GET_MODE_SIZE (Pmode);
1905 }
1906
1907 return 0;
1908 }
1909 \f
1910 /* Argument support functions. */
1911
1912 /* Return true when register may be used to pass function parameters. */
1913 bool
1914 ix86_function_arg_regno_p (int regno)
1915 {
1916 int i;
1917 if (!TARGET_64BIT)
1918 return (regno < REGPARM_MAX
1919 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1920 if (SSE_REGNO_P (regno) && TARGET_SSE)
1921 return true;
1922 /* RAX is used as hidden argument to va_arg functions. */
1923 if (!regno)
1924 return true;
1925 for (i = 0; i < REGPARM_MAX; i++)
1926 if (regno == x86_64_int_parameter_registers[i])
1927 return true;
1928 return false;
1929 }
1930
1931 /* Return if we do not know how to pass TYPE solely in registers. */
1932
1933 static bool
1934 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1935 {
1936 if (must_pass_in_stack_var_size_or_pad (mode, type))
1937 return true;
1938
1939 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1940 The layout_type routine is crafty and tries to trick us into passing
1941 currently unsupported vector types on the stack by using TImode. */
1942 return (!TARGET_64BIT && mode == TImode
1943 && type && TREE_CODE (type) != VECTOR_TYPE);
1944 }
1945
1946 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1947 for a call to a function whose data type is FNTYPE.
1948 For a library call, FNTYPE is 0. */
1949
1950 void
1951 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1952 tree fntype, /* tree ptr for function decl */
1953 rtx libname, /* SYMBOL_REF of library name or 0 */
1954 tree fndecl)
1955 {
1956 static CUMULATIVE_ARGS zero_cum;
1957 tree param, next_param;
1958
1959 if (TARGET_DEBUG_ARG)
1960 {
1961 fprintf (stderr, "\ninit_cumulative_args (");
1962 if (fntype)
1963 fprintf (stderr, "fntype code = %s, ret code = %s",
1964 tree_code_name[(int) TREE_CODE (fntype)],
1965 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1966 else
1967 fprintf (stderr, "no fntype");
1968
1969 if (libname)
1970 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1971 }
1972
1973 *cum = zero_cum;
1974
1975 /* Set up the number of registers to use for passing arguments. */
1976 if (fntype)
1977 cum->nregs = ix86_function_regparm (fntype, fndecl);
1978 else
1979 cum->nregs = ix86_regparm;
1980 if (TARGET_SSE)
1981 cum->sse_nregs = SSE_REGPARM_MAX;
1982 if (TARGET_MMX)
1983 cum->mmx_nregs = MMX_REGPARM_MAX;
1984 cum->warn_sse = true;
1985 cum->warn_mmx = true;
1986 cum->maybe_vaarg = false;
1987
1988 /* Use ecx and edx registers if function has fastcall attribute */
1989 if (fntype && !TARGET_64BIT)
1990 {
1991 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1992 {
1993 cum->nregs = 2;
1994 cum->fastcall = 1;
1995 }
1996 }
1997
1998 /* Determine if this function has variable arguments. This is
1999 indicated by the last argument being 'void_type_mode' if there
2000 are no variable arguments. If there are variable arguments, then
2001 we won't pass anything in registers in 32-bit mode. */
2002
2003 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2004 {
2005 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2006 param != 0; param = next_param)
2007 {
2008 next_param = TREE_CHAIN (param);
2009 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2010 {
2011 if (!TARGET_64BIT)
2012 {
2013 cum->nregs = 0;
2014 cum->sse_nregs = 0;
2015 cum->mmx_nregs = 0;
2016 cum->warn_sse = 0;
2017 cum->warn_mmx = 0;
2018 cum->fastcall = 0;
2019 }
2020 cum->maybe_vaarg = true;
2021 }
2022 }
2023 }
2024 if ((!fntype && !libname)
2025 || (fntype && !TYPE_ARG_TYPES (fntype)))
2026 cum->maybe_vaarg = 1;
2027
2028 if (TARGET_DEBUG_ARG)
2029 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2030
2031 return;
2032 }
2033
2034 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2035 But in the case of vector types, it is some vector mode.
2036
2037 When we have only some of our vector isa extensions enabled, then there
2038 are some modes for which vector_mode_supported_p is false. For these
2039 modes, the generic vector support in gcc will choose some non-vector mode
2040 in order to implement the type. By computing the natural mode, we'll
2041 select the proper ABI location for the operand and not depend on whatever
2042 the middle-end decides to do with these vector types. */
2043
2044 static enum machine_mode
2045 type_natural_mode (tree type)
2046 {
2047 enum machine_mode mode = TYPE_MODE (type);
2048
2049 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2050 {
2051 HOST_WIDE_INT size = int_size_in_bytes (type);
2052 if ((size == 8 || size == 16)
2053 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2054 && TYPE_VECTOR_SUBPARTS (type) > 1)
2055 {
2056 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2057
2058 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2059 mode = MIN_MODE_VECTOR_FLOAT;
2060 else
2061 mode = MIN_MODE_VECTOR_INT;
2062
2063 /* Get the mode which has this inner mode and number of units. */
2064 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2065 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2066 && GET_MODE_INNER (mode) == innermode)
2067 return mode;
2068
2069 abort ();
2070 }
2071 }
2072
2073 return mode;
2074 }
2075
2076 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2077 this may not agree with the mode that the type system has chosen for the
2078 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2079 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2080
2081 static rtx
2082 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2083 unsigned int regno)
2084 {
2085 rtx tmp;
2086
2087 if (orig_mode != BLKmode)
2088 tmp = gen_rtx_REG (orig_mode, regno);
2089 else
2090 {
2091 tmp = gen_rtx_REG (mode, regno);
2092 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2093 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2094 }
2095
2096 return tmp;
2097 }
2098
2099 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2100 of this code is to classify each 8bytes of incoming argument by the register
2101 class and assign registers accordingly. */
2102
2103 /* Return the union class of CLASS1 and CLASS2.
2104 See the x86-64 PS ABI for details. */
2105
2106 static enum x86_64_reg_class
2107 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2108 {
2109 /* Rule #1: If both classes are equal, this is the resulting class. */
2110 if (class1 == class2)
2111 return class1;
2112
2113 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2114 the other class. */
2115 if (class1 == X86_64_NO_CLASS)
2116 return class2;
2117 if (class2 == X86_64_NO_CLASS)
2118 return class1;
2119
2120 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2121 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2122 return X86_64_MEMORY_CLASS;
2123
2124 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2125 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2126 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2127 return X86_64_INTEGERSI_CLASS;
2128 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2129 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2130 return X86_64_INTEGER_CLASS;
2131
2132 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2133 MEMORY is used. */
2134 if (class1 == X86_64_X87_CLASS
2135 || class1 == X86_64_X87UP_CLASS
2136 || class1 == X86_64_COMPLEX_X87_CLASS
2137 || class2 == X86_64_X87_CLASS
2138 || class2 == X86_64_X87UP_CLASS
2139 || class2 == X86_64_COMPLEX_X87_CLASS)
2140 return X86_64_MEMORY_CLASS;
2141
2142 /* Rule #6: Otherwise class SSE is used. */
2143 return X86_64_SSE_CLASS;
2144 }
2145
2146 /* Classify the argument of type TYPE and mode MODE.
2147 CLASSES will be filled by the register class used to pass each word
2148 of the operand. The number of words is returned. In case the parameter
2149 should be passed in memory, 0 is returned. As a special case for zero
2150 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2151
2152 BIT_OFFSET is used internally for handling records and specifies offset
2153 of the offset in bits modulo 256 to avoid overflow cases.
2154
2155 See the x86-64 PS ABI for details.
2156 */
2157
2158 static int
2159 classify_argument (enum machine_mode mode, tree type,
2160 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2161 {
2162 HOST_WIDE_INT bytes =
2163 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2164 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2165
2166 /* Variable sized entities are always passed/returned in memory. */
2167 if (bytes < 0)
2168 return 0;
2169
2170 if (mode != VOIDmode
2171 && targetm.calls.must_pass_in_stack (mode, type))
2172 return 0;
2173
2174 if (type && AGGREGATE_TYPE_P (type))
2175 {
2176 int i;
2177 tree field;
2178 enum x86_64_reg_class subclasses[MAX_CLASSES];
2179
2180 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2181 if (bytes > 16)
2182 return 0;
2183
2184 for (i = 0; i < words; i++)
2185 classes[i] = X86_64_NO_CLASS;
2186
2187 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2188 signalize memory class, so handle it as special case. */
2189 if (!words)
2190 {
2191 classes[0] = X86_64_NO_CLASS;
2192 return 1;
2193 }
2194
2195 /* Classify each field of record and merge classes. */
2196 if (TREE_CODE (type) == RECORD_TYPE)
2197 {
2198 /* For classes first merge in the field of the subclasses. */
2199 if (TYPE_BINFO (type))
2200 {
2201 tree binfo, base_binfo;
2202 int basenum;
2203
2204 for (binfo = TYPE_BINFO (type), basenum = 0;
2205 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2206 {
2207 int num;
2208 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2209 tree type = BINFO_TYPE (base_binfo);
2210
2211 num = classify_argument (TYPE_MODE (type),
2212 type, subclasses,
2213 (offset + bit_offset) % 256);
2214 if (!num)
2215 return 0;
2216 for (i = 0; i < num; i++)
2217 {
2218 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2219 classes[i + pos] =
2220 merge_classes (subclasses[i], classes[i + pos]);
2221 }
2222 }
2223 }
2224 /* And now merge the fields of structure. */
2225 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2226 {
2227 if (TREE_CODE (field) == FIELD_DECL)
2228 {
2229 int num;
2230
2231 /* Bitfields are always classified as integer. Handle them
2232 early, since later code would consider them to be
2233 misaligned integers. */
2234 if (DECL_BIT_FIELD (field))
2235 {
2236 for (i = int_bit_position (field) / 8 / 8;
2237 i < (int_bit_position (field)
2238 + tree_low_cst (DECL_SIZE (field), 0)
2239 + 63) / 8 / 8; i++)
2240 classes[i] =
2241 merge_classes (X86_64_INTEGER_CLASS,
2242 classes[i]);
2243 }
2244 else
2245 {
2246 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2247 TREE_TYPE (field), subclasses,
2248 (int_bit_position (field)
2249 + bit_offset) % 256);
2250 if (!num)
2251 return 0;
2252 for (i = 0; i < num; i++)
2253 {
2254 int pos =
2255 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2256 classes[i + pos] =
2257 merge_classes (subclasses[i], classes[i + pos]);
2258 }
2259 }
2260 }
2261 }
2262 }
2263 /* Arrays are handled as small records. */
2264 else if (TREE_CODE (type) == ARRAY_TYPE)
2265 {
2266 int num;
2267 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2268 TREE_TYPE (type), subclasses, bit_offset);
2269 if (!num)
2270 return 0;
2271
2272 /* The partial classes are now full classes. */
2273 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2274 subclasses[0] = X86_64_SSE_CLASS;
2275 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2276 subclasses[0] = X86_64_INTEGER_CLASS;
2277
2278 for (i = 0; i < words; i++)
2279 classes[i] = subclasses[i % num];
2280 }
2281 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2282 else if (TREE_CODE (type) == UNION_TYPE
2283 || TREE_CODE (type) == QUAL_UNION_TYPE)
2284 {
2285 /* For classes first merge in the field of the subclasses. */
2286 if (TYPE_BINFO (type))
2287 {
2288 tree binfo, base_binfo;
2289 int basenum;
2290
2291 for (binfo = TYPE_BINFO (type), basenum = 0;
2292 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2293 {
2294 int num;
2295 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2296 tree type = BINFO_TYPE (base_binfo);
2297
2298 num = classify_argument (TYPE_MODE (type),
2299 type, subclasses,
2300 (offset + (bit_offset % 64)) % 256);
2301 if (!num)
2302 return 0;
2303 for (i = 0; i < num; i++)
2304 {
2305 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2306 classes[i + pos] =
2307 merge_classes (subclasses[i], classes[i + pos]);
2308 }
2309 }
2310 }
2311 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2312 {
2313 if (TREE_CODE (field) == FIELD_DECL)
2314 {
2315 int num;
2316 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2317 TREE_TYPE (field), subclasses,
2318 bit_offset);
2319 if (!num)
2320 return 0;
2321 for (i = 0; i < num; i++)
2322 classes[i] = merge_classes (subclasses[i], classes[i]);
2323 }
2324 }
2325 }
2326 else
2327 abort ();
2328
2329 /* Final merger cleanup. */
2330 for (i = 0; i < words; i++)
2331 {
2332 /* If one class is MEMORY, everything should be passed in
2333 memory. */
2334 if (classes[i] == X86_64_MEMORY_CLASS)
2335 return 0;
2336
2337 /* The X86_64_SSEUP_CLASS should be always preceded by
2338 X86_64_SSE_CLASS. */
2339 if (classes[i] == X86_64_SSEUP_CLASS
2340 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2341 classes[i] = X86_64_SSE_CLASS;
2342
2343 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2344 if (classes[i] == X86_64_X87UP_CLASS
2345 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2346 classes[i] = X86_64_SSE_CLASS;
2347 }
2348 return words;
2349 }
2350
2351 /* Compute alignment needed. We align all types to natural boundaries with
2352 exception of XFmode that is aligned to 64bits. */
2353 if (mode != VOIDmode && mode != BLKmode)
2354 {
2355 int mode_alignment = GET_MODE_BITSIZE (mode);
2356
2357 if (mode == XFmode)
2358 mode_alignment = 128;
2359 else if (mode == XCmode)
2360 mode_alignment = 256;
2361 if (COMPLEX_MODE_P (mode))
2362 mode_alignment /= 2;
2363 /* Misaligned fields are always returned in memory. */
2364 if (bit_offset % mode_alignment)
2365 return 0;
2366 }
2367
2368 /* for V1xx modes, just use the base mode */
2369 if (VECTOR_MODE_P (mode)
2370 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2371 mode = GET_MODE_INNER (mode);
2372
2373 /* Classification of atomic types. */
2374 switch (mode)
2375 {
2376 case DImode:
2377 case SImode:
2378 case HImode:
2379 case QImode:
2380 case CSImode:
2381 case CHImode:
2382 case CQImode:
2383 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2384 classes[0] = X86_64_INTEGERSI_CLASS;
2385 else
2386 classes[0] = X86_64_INTEGER_CLASS;
2387 return 1;
2388 case CDImode:
2389 case TImode:
2390 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2391 return 2;
2392 case CTImode:
2393 return 0;
2394 case SFmode:
2395 if (!(bit_offset % 64))
2396 classes[0] = X86_64_SSESF_CLASS;
2397 else
2398 classes[0] = X86_64_SSE_CLASS;
2399 return 1;
2400 case DFmode:
2401 classes[0] = X86_64_SSEDF_CLASS;
2402 return 1;
2403 case XFmode:
2404 classes[0] = X86_64_X87_CLASS;
2405 classes[1] = X86_64_X87UP_CLASS;
2406 return 2;
2407 case TFmode:
2408 classes[0] = X86_64_SSE_CLASS;
2409 classes[1] = X86_64_SSEUP_CLASS;
2410 return 2;
2411 case SCmode:
2412 classes[0] = X86_64_SSE_CLASS;
2413 return 1;
2414 case DCmode:
2415 classes[0] = X86_64_SSEDF_CLASS;
2416 classes[1] = X86_64_SSEDF_CLASS;
2417 return 2;
2418 case XCmode:
2419 classes[0] = X86_64_COMPLEX_X87_CLASS;
2420 return 1;
2421 case TCmode:
2422 /* This modes is larger than 16 bytes. */
2423 return 0;
2424 case V4SFmode:
2425 case V4SImode:
2426 case V16QImode:
2427 case V8HImode:
2428 case V2DFmode:
2429 case V2DImode:
2430 classes[0] = X86_64_SSE_CLASS;
2431 classes[1] = X86_64_SSEUP_CLASS;
2432 return 2;
2433 case V2SFmode:
2434 case V2SImode:
2435 case V4HImode:
2436 case V8QImode:
2437 classes[0] = X86_64_SSE_CLASS;
2438 return 1;
2439 case BLKmode:
2440 case VOIDmode:
2441 return 0;
2442 default:
2443 if (VECTOR_MODE_P (mode))
2444 {
2445 if (bytes > 16)
2446 return 0;
2447 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2448 {
2449 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2450 classes[0] = X86_64_INTEGERSI_CLASS;
2451 else
2452 classes[0] = X86_64_INTEGER_CLASS;
2453 classes[1] = X86_64_INTEGER_CLASS;
2454 return 1 + (bytes > 8);
2455 }
2456 }
2457 abort ();
2458 }
2459 }
2460
2461 /* Examine the argument and return set number of register required in each
2462 class. Return 0 iff parameter should be passed in memory. */
2463 static int
2464 examine_argument (enum machine_mode mode, tree type, int in_return,
2465 int *int_nregs, int *sse_nregs)
2466 {
2467 enum x86_64_reg_class class[MAX_CLASSES];
2468 int n = classify_argument (mode, type, class, 0);
2469
2470 *int_nregs = 0;
2471 *sse_nregs = 0;
2472 if (!n)
2473 return 0;
2474 for (n--; n >= 0; n--)
2475 switch (class[n])
2476 {
2477 case X86_64_INTEGER_CLASS:
2478 case X86_64_INTEGERSI_CLASS:
2479 (*int_nregs)++;
2480 break;
2481 case X86_64_SSE_CLASS:
2482 case X86_64_SSESF_CLASS:
2483 case X86_64_SSEDF_CLASS:
2484 (*sse_nregs)++;
2485 break;
2486 case X86_64_NO_CLASS:
2487 case X86_64_SSEUP_CLASS:
2488 break;
2489 case X86_64_X87_CLASS:
2490 case X86_64_X87UP_CLASS:
2491 if (!in_return)
2492 return 0;
2493 break;
2494 case X86_64_COMPLEX_X87_CLASS:
2495 return in_return ? 2 : 0;
2496 case X86_64_MEMORY_CLASS:
2497 abort ();
2498 }
2499 return 1;
2500 }
2501
2502 /* Construct container for the argument used by GCC interface. See
2503 FUNCTION_ARG for the detailed description. */
2504
2505 static rtx
2506 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2507 tree type, int in_return, int nintregs, int nsseregs,
2508 const int *intreg, int sse_regno)
2509 {
2510 enum machine_mode tmpmode;
2511 int bytes =
2512 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2513 enum x86_64_reg_class class[MAX_CLASSES];
2514 int n;
2515 int i;
2516 int nexps = 0;
2517 int needed_sseregs, needed_intregs;
2518 rtx exp[MAX_CLASSES];
2519 rtx ret;
2520
2521 n = classify_argument (mode, type, class, 0);
2522 if (TARGET_DEBUG_ARG)
2523 {
2524 if (!n)
2525 fprintf (stderr, "Memory class\n");
2526 else
2527 {
2528 fprintf (stderr, "Classes:");
2529 for (i = 0; i < n; i++)
2530 {
2531 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2532 }
2533 fprintf (stderr, "\n");
2534 }
2535 }
2536 if (!n)
2537 return NULL;
2538 if (!examine_argument (mode, type, in_return, &needed_intregs,
2539 &needed_sseregs))
2540 return NULL;
2541 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2542 return NULL;
2543
2544 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2545 some less clueful developer tries to use floating-point anyway. */
2546 if (needed_sseregs && !TARGET_SSE)
2547 {
2548 static bool issued_error;
2549 if (!issued_error)
2550 {
2551 issued_error = true;
2552 if (in_return)
2553 error ("SSE register return with SSE disabled");
2554 else
2555 error ("SSE register argument with SSE disabled");
2556 }
2557 return NULL;
2558 }
2559
2560 /* First construct simple cases. Avoid SCmode, since we want to use
2561 single register to pass this type. */
2562 if (n == 1 && mode != SCmode)
2563 switch (class[0])
2564 {
2565 case X86_64_INTEGER_CLASS:
2566 case X86_64_INTEGERSI_CLASS:
2567 return gen_rtx_REG (mode, intreg[0]);
2568 case X86_64_SSE_CLASS:
2569 case X86_64_SSESF_CLASS:
2570 case X86_64_SSEDF_CLASS:
2571 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2572 case X86_64_X87_CLASS:
2573 case X86_64_COMPLEX_X87_CLASS:
2574 return gen_rtx_REG (mode, FIRST_STACK_REG);
2575 case X86_64_NO_CLASS:
2576 /* Zero sized array, struct or class. */
2577 return NULL;
2578 default:
2579 abort ();
2580 }
2581 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2582 && mode != BLKmode)
2583 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2584 if (n == 2
2585 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2586 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2587 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2588 && class[1] == X86_64_INTEGER_CLASS
2589 && (mode == CDImode || mode == TImode || mode == TFmode)
2590 && intreg[0] + 1 == intreg[1])
2591 return gen_rtx_REG (mode, intreg[0]);
2592
2593 /* Otherwise figure out the entries of the PARALLEL. */
2594 for (i = 0; i < n; i++)
2595 {
2596 switch (class[i])
2597 {
2598 case X86_64_NO_CLASS:
2599 break;
2600 case X86_64_INTEGER_CLASS:
2601 case X86_64_INTEGERSI_CLASS:
2602 /* Merge TImodes on aligned occasions here too. */
2603 if (i * 8 + 8 > bytes)
2604 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2605 else if (class[i] == X86_64_INTEGERSI_CLASS)
2606 tmpmode = SImode;
2607 else
2608 tmpmode = DImode;
2609 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2610 if (tmpmode == BLKmode)
2611 tmpmode = DImode;
2612 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2613 gen_rtx_REG (tmpmode, *intreg),
2614 GEN_INT (i*8));
2615 intreg++;
2616 break;
2617 case X86_64_SSESF_CLASS:
2618 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2619 gen_rtx_REG (SFmode,
2620 SSE_REGNO (sse_regno)),
2621 GEN_INT (i*8));
2622 sse_regno++;
2623 break;
2624 case X86_64_SSEDF_CLASS:
2625 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2626 gen_rtx_REG (DFmode,
2627 SSE_REGNO (sse_regno)),
2628 GEN_INT (i*8));
2629 sse_regno++;
2630 break;
2631 case X86_64_SSE_CLASS:
2632 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2633 tmpmode = TImode;
2634 else
2635 tmpmode = DImode;
2636 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2637 gen_rtx_REG (tmpmode,
2638 SSE_REGNO (sse_regno)),
2639 GEN_INT (i*8));
2640 if (tmpmode == TImode)
2641 i++;
2642 sse_regno++;
2643 break;
2644 default:
2645 abort ();
2646 }
2647 }
2648 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2649 for (i = 0; i < nexps; i++)
2650 XVECEXP (ret, 0, i) = exp [i];
2651 return ret;
2652 }
2653
2654 /* Update the data in CUM to advance over an argument
2655 of mode MODE and data type TYPE.
2656 (TYPE is null for libcalls where that information may not be available.) */
2657
2658 void
2659 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2660 tree type, int named)
2661 {
2662 int bytes =
2663 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2664 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2665
2666 if (TARGET_DEBUG_ARG)
2667 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2668 "mode=%s, named=%d)\n\n",
2669 words, cum->words, cum->nregs, cum->sse_nregs,
2670 GET_MODE_NAME (mode), named);
2671 if (TARGET_64BIT)
2672 {
2673 int int_nregs, sse_nregs;
2674 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2675 cum->words += words;
2676 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2677 {
2678 cum->nregs -= int_nregs;
2679 cum->sse_nregs -= sse_nregs;
2680 cum->regno += int_nregs;
2681 cum->sse_regno += sse_nregs;
2682 }
2683 else
2684 cum->words += words;
2685 }
2686 else
2687 {
2688 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2689 && (!type || !AGGREGATE_TYPE_P (type)))
2690 {
2691 cum->sse_words += words;
2692 cum->sse_nregs -= 1;
2693 cum->sse_regno += 1;
2694 if (cum->sse_nregs <= 0)
2695 {
2696 cum->sse_nregs = 0;
2697 cum->sse_regno = 0;
2698 }
2699 }
2700 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2701 && (!type || !AGGREGATE_TYPE_P (type)))
2702 {
2703 cum->mmx_words += words;
2704 cum->mmx_nregs -= 1;
2705 cum->mmx_regno += 1;
2706 if (cum->mmx_nregs <= 0)
2707 {
2708 cum->mmx_nregs = 0;
2709 cum->mmx_regno = 0;
2710 }
2711 }
2712 else
2713 {
2714 cum->words += words;
2715 cum->nregs -= words;
2716 cum->regno += words;
2717
2718 if (cum->nregs <= 0)
2719 {
2720 cum->nregs = 0;
2721 cum->regno = 0;
2722 }
2723 }
2724 }
2725 return;
2726 }
2727
2728 /* Define where to put the arguments to a function.
2729 Value is zero to push the argument on the stack,
2730 or a hard register in which to store the argument.
2731
2732 MODE is the argument's machine mode.
2733 TYPE is the data type of the argument (as a tree).
2734 This is null for libcalls where that information may
2735 not be available.
2736 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2737 the preceding args and about the function being called.
2738 NAMED is nonzero if this argument is a named parameter
2739 (otherwise it is an extra parameter matching an ellipsis). */
2740
2741 rtx
2742 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2743 tree type, int named)
2744 {
2745 enum machine_mode mode = orig_mode;
2746 rtx ret = NULL_RTX;
2747 int bytes =
2748 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2749 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2750 static bool warnedsse, warnedmmx;
2751
2752 /* To simplify the code below, represent vector types with a vector mode
2753 even if MMX/SSE are not active. */
2754 if (type && TREE_CODE (type) == VECTOR_TYPE)
2755 mode = type_natural_mode (type);
2756
2757 /* Handle a hidden AL argument containing number of registers for varargs
2758 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2759 any AL settings. */
2760 if (mode == VOIDmode)
2761 {
2762 if (TARGET_64BIT)
2763 return GEN_INT (cum->maybe_vaarg
2764 ? (cum->sse_nregs < 0
2765 ? SSE_REGPARM_MAX
2766 : cum->sse_regno)
2767 : -1);
2768 else
2769 return constm1_rtx;
2770 }
2771 if (TARGET_64BIT)
2772 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2773 cum->sse_nregs,
2774 &x86_64_int_parameter_registers [cum->regno],
2775 cum->sse_regno);
2776 else
2777 switch (mode)
2778 {
2779 /* For now, pass fp/complex values on the stack. */
2780 default:
2781 break;
2782
2783 case BLKmode:
2784 if (bytes < 0)
2785 break;
2786 /* FALLTHRU */
2787 case DImode:
2788 case SImode:
2789 case HImode:
2790 case QImode:
2791 if (words <= cum->nregs)
2792 {
2793 int regno = cum->regno;
2794
2795 /* Fastcall allocates the first two DWORD (SImode) or
2796 smaller arguments to ECX and EDX. */
2797 if (cum->fastcall)
2798 {
2799 if (mode == BLKmode || mode == DImode)
2800 break;
2801
2802 /* ECX not EAX is the first allocated register. */
2803 if (regno == 0)
2804 regno = 2;
2805 }
2806 ret = gen_rtx_REG (mode, regno);
2807 }
2808 break;
2809 case TImode:
2810 case V16QImode:
2811 case V8HImode:
2812 case V4SImode:
2813 case V2DImode:
2814 case V4SFmode:
2815 case V2DFmode:
2816 if (!type || !AGGREGATE_TYPE_P (type))
2817 {
2818 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2819 {
2820 warnedsse = true;
2821 warning ("SSE vector argument without SSE enabled "
2822 "changes the ABI");
2823 }
2824 if (cum->sse_nregs)
2825 ret = gen_reg_or_parallel (mode, orig_mode,
2826 cum->sse_regno + FIRST_SSE_REG);
2827 }
2828 break;
2829 case V8QImode:
2830 case V4HImode:
2831 case V2SImode:
2832 case V2SFmode:
2833 if (!type || !AGGREGATE_TYPE_P (type))
2834 {
2835 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2836 {
2837 warnedmmx = true;
2838 warning ("MMX vector argument without MMX enabled "
2839 "changes the ABI");
2840 }
2841 if (cum->mmx_nregs)
2842 ret = gen_reg_or_parallel (mode, orig_mode,
2843 cum->mmx_regno + FIRST_MMX_REG);
2844 }
2845 break;
2846 }
2847
2848 if (TARGET_DEBUG_ARG)
2849 {
2850 fprintf (stderr,
2851 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2852 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2853
2854 if (ret)
2855 print_simple_rtl (stderr, ret);
2856 else
2857 fprintf (stderr, ", stack");
2858
2859 fprintf (stderr, " )\n");
2860 }
2861
2862 return ret;
2863 }
2864
2865 /* A C expression that indicates when an argument must be passed by
2866 reference. If nonzero for an argument, a copy of that argument is
2867 made in memory and a pointer to the argument is passed instead of
2868 the argument itself. The pointer is passed in whatever way is
2869 appropriate for passing a pointer to that type. */
2870
2871 static bool
2872 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2873 enum machine_mode mode ATTRIBUTE_UNUSED,
2874 tree type, bool named ATTRIBUTE_UNUSED)
2875 {
2876 if (!TARGET_64BIT)
2877 return 0;
2878
2879 if (type && int_size_in_bytes (type) == -1)
2880 {
2881 if (TARGET_DEBUG_ARG)
2882 fprintf (stderr, "function_arg_pass_by_reference\n");
2883 return 1;
2884 }
2885
2886 return 0;
2887 }
2888
2889 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2890 ABI. Only called if TARGET_SSE. */
2891 static bool
2892 contains_128bit_aligned_vector_p (tree type)
2893 {
2894 enum machine_mode mode = TYPE_MODE (type);
2895 if (SSE_REG_MODE_P (mode)
2896 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2897 return true;
2898 if (TYPE_ALIGN (type) < 128)
2899 return false;
2900
2901 if (AGGREGATE_TYPE_P (type))
2902 {
2903 /* Walk the aggregates recursively. */
2904 if (TREE_CODE (type) == RECORD_TYPE
2905 || TREE_CODE (type) == UNION_TYPE
2906 || TREE_CODE (type) == QUAL_UNION_TYPE)
2907 {
2908 tree field;
2909
2910 if (TYPE_BINFO (type))
2911 {
2912 tree binfo, base_binfo;
2913 int i;
2914
2915 for (binfo = TYPE_BINFO (type), i = 0;
2916 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2917 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2918 return true;
2919 }
2920 /* And now merge the fields of structure. */
2921 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2922 {
2923 if (TREE_CODE (field) == FIELD_DECL
2924 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2925 return true;
2926 }
2927 }
2928 /* Just for use if some languages passes arrays by value. */
2929 else if (TREE_CODE (type) == ARRAY_TYPE)
2930 {
2931 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2932 return true;
2933 }
2934 else
2935 abort ();
2936 }
2937 return false;
2938 }
2939
2940 /* Gives the alignment boundary, in bits, of an argument with the
2941 specified mode and type. */
2942
2943 int
2944 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2945 {
2946 int align;
2947 if (type)
2948 align = TYPE_ALIGN (type);
2949 else
2950 align = GET_MODE_ALIGNMENT (mode);
2951 if (align < PARM_BOUNDARY)
2952 align = PARM_BOUNDARY;
2953 if (!TARGET_64BIT)
2954 {
2955 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2956 make an exception for SSE modes since these require 128bit
2957 alignment.
2958
2959 The handling here differs from field_alignment. ICC aligns MMX
2960 arguments to 4 byte boundaries, while structure fields are aligned
2961 to 8 byte boundaries. */
2962 if (!TARGET_SSE)
2963 align = PARM_BOUNDARY;
2964 else if (!type)
2965 {
2966 if (!SSE_REG_MODE_P (mode))
2967 align = PARM_BOUNDARY;
2968 }
2969 else
2970 {
2971 if (!contains_128bit_aligned_vector_p (type))
2972 align = PARM_BOUNDARY;
2973 }
2974 }
2975 if (align > 128)
2976 align = 128;
2977 return align;
2978 }
2979
2980 /* Return true if N is a possible register number of function value. */
2981 bool
2982 ix86_function_value_regno_p (int regno)
2983 {
2984 if (!TARGET_64BIT)
2985 {
2986 return ((regno) == 0
2987 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2988 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2989 }
2990 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2991 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2992 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2993 }
2994
2995 /* Define how to find the value returned by a function.
2996 VALTYPE is the data type of the value (as a tree).
2997 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2998 otherwise, FUNC is 0. */
2999 rtx
3000 ix86_function_value (tree valtype)
3001 {
3002 if (TARGET_64BIT)
3003 {
3004 rtx ret = construct_container (type_natural_mode (valtype),
3005 TYPE_MODE (valtype), valtype,
3006 1, REGPARM_MAX, SSE_REGPARM_MAX,
3007 x86_64_int_return_registers, 0);
3008 /* For zero sized structures, construct_container return NULL, but we
3009 need to keep rest of compiler happy by returning meaningful value. */
3010 if (!ret)
3011 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3012 return ret;
3013 }
3014 else
3015 return gen_rtx_REG (TYPE_MODE (valtype),
3016 ix86_value_regno (TYPE_MODE (valtype)));
3017 }
3018
3019 /* Return false iff type is returned in memory. */
3020 int
3021 ix86_return_in_memory (tree type)
3022 {
3023 int needed_intregs, needed_sseregs, size;
3024 enum machine_mode mode = TYPE_MODE (type);
3025
3026 if (TARGET_64BIT)
3027 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3028
3029 if (mode == BLKmode)
3030 return 1;
3031
3032 size = int_size_in_bytes (type);
3033
3034 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3035 return 0;
3036
3037 if (VECTOR_MODE_P (mode) || mode == TImode)
3038 {
3039 /* User-created vectors small enough to fit in EAX. */
3040 if (size < 8)
3041 return 0;
3042
3043 /* MMX/3dNow values are returned on the stack, since we've
3044 got to EMMS/FEMMS before returning. */
3045 if (size == 8)
3046 return 1;
3047
3048 /* SSE values are returned in XMM0, except when it doesn't exist. */
3049 if (size == 16)
3050 return (TARGET_SSE ? 0 : 1);
3051 }
3052
3053 if (mode == XFmode)
3054 return 0;
3055
3056 if (size > 12)
3057 return 1;
3058 return 0;
3059 }
3060
3061 /* When returning SSE vector types, we have a choice of either
3062 (1) being abi incompatible with a -march switch, or
3063 (2) generating an error.
3064 Given no good solution, I think the safest thing is one warning.
3065 The user won't be able to use -Werror, but....
3066
3067 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3068 called in response to actually generating a caller or callee that
3069 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3070 via aggregate_value_p for general type probing from tree-ssa. */
3071
3072 static rtx
3073 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3074 {
3075 static bool warned;
3076
3077 if (!TARGET_SSE && type && !warned)
3078 {
3079 /* Look at the return type of the function, not the function type. */
3080 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3081
3082 if (mode == TImode
3083 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3084 {
3085 warned = true;
3086 warning ("SSE vector return without SSE enabled changes the ABI");
3087 }
3088 }
3089
3090 return NULL;
3091 }
3092
3093 /* Define how to find the value returned by a library function
3094 assuming the value has mode MODE. */
3095 rtx
3096 ix86_libcall_value (enum machine_mode mode)
3097 {
3098 if (TARGET_64BIT)
3099 {
3100 switch (mode)
3101 {
3102 case SFmode:
3103 case SCmode:
3104 case DFmode:
3105 case DCmode:
3106 case TFmode:
3107 return gen_rtx_REG (mode, FIRST_SSE_REG);
3108 case XFmode:
3109 case XCmode:
3110 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3111 case TCmode:
3112 return NULL;
3113 default:
3114 return gen_rtx_REG (mode, 0);
3115 }
3116 }
3117 else
3118 return gen_rtx_REG (mode, ix86_value_regno (mode));
3119 }
3120
3121 /* Given a mode, return the register to use for a return value. */
3122
3123 static int
3124 ix86_value_regno (enum machine_mode mode)
3125 {
3126 /* Floating point return values in %st(0). */
3127 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3128 return FIRST_FLOAT_REG;
3129 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3130 we prevent this case when sse is not available. */
3131 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3132 return FIRST_SSE_REG;
3133 /* Everything else in %eax. */
3134 return 0;
3135 }
3136 \f
3137 /* Create the va_list data type. */
3138
3139 static tree
3140 ix86_build_builtin_va_list (void)
3141 {
3142 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3143
3144 /* For i386 we use plain pointer to argument area. */
3145 if (!TARGET_64BIT)
3146 return build_pointer_type (char_type_node);
3147
3148 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3149 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3150
3151 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3152 unsigned_type_node);
3153 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3154 unsigned_type_node);
3155 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3156 ptr_type_node);
3157 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3158 ptr_type_node);
3159
3160 DECL_FIELD_CONTEXT (f_gpr) = record;
3161 DECL_FIELD_CONTEXT (f_fpr) = record;
3162 DECL_FIELD_CONTEXT (f_ovf) = record;
3163 DECL_FIELD_CONTEXT (f_sav) = record;
3164
3165 TREE_CHAIN (record) = type_decl;
3166 TYPE_NAME (record) = type_decl;
3167 TYPE_FIELDS (record) = f_gpr;
3168 TREE_CHAIN (f_gpr) = f_fpr;
3169 TREE_CHAIN (f_fpr) = f_ovf;
3170 TREE_CHAIN (f_ovf) = f_sav;
3171
3172 layout_type (record);
3173
3174 /* The correct type is an array type of one element. */
3175 return build_array_type (record, build_index_type (size_zero_node));
3176 }
3177
3178 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3179
3180 static void
3181 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3182 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3183 int no_rtl)
3184 {
3185 CUMULATIVE_ARGS next_cum;
3186 rtx save_area = NULL_RTX, mem;
3187 rtx label;
3188 rtx label_ref;
3189 rtx tmp_reg;
3190 rtx nsse_reg;
3191 int set;
3192 tree fntype;
3193 int stdarg_p;
3194 int i;
3195
3196 if (!TARGET_64BIT)
3197 return;
3198
3199 /* Indicate to allocate space on the stack for varargs save area. */
3200 ix86_save_varrargs_registers = 1;
3201
3202 cfun->stack_alignment_needed = 128;
3203
3204 fntype = TREE_TYPE (current_function_decl);
3205 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3206 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3207 != void_type_node));
3208
3209 /* For varargs, we do not want to skip the dummy va_dcl argument.
3210 For stdargs, we do want to skip the last named argument. */
3211 next_cum = *cum;
3212 if (stdarg_p)
3213 function_arg_advance (&next_cum, mode, type, 1);
3214
3215 if (!no_rtl)
3216 save_area = frame_pointer_rtx;
3217
3218 set = get_varargs_alias_set ();
3219
3220 for (i = next_cum.regno; i < ix86_regparm; i++)
3221 {
3222 mem = gen_rtx_MEM (Pmode,
3223 plus_constant (save_area, i * UNITS_PER_WORD));
3224 set_mem_alias_set (mem, set);
3225 emit_move_insn (mem, gen_rtx_REG (Pmode,
3226 x86_64_int_parameter_registers[i]));
3227 }
3228
3229 if (next_cum.sse_nregs)
3230 {
3231 /* Now emit code to save SSE registers. The AX parameter contains number
3232 of SSE parameter registers used to call this function. We use
3233 sse_prologue_save insn template that produces computed jump across
3234 SSE saves. We need some preparation work to get this working. */
3235
3236 label = gen_label_rtx ();
3237 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3238
3239 /* Compute address to jump to :
3240 label - 5*eax + nnamed_sse_arguments*5 */
3241 tmp_reg = gen_reg_rtx (Pmode);
3242 nsse_reg = gen_reg_rtx (Pmode);
3243 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3244 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3245 gen_rtx_MULT (Pmode, nsse_reg,
3246 GEN_INT (4))));
3247 if (next_cum.sse_regno)
3248 emit_move_insn
3249 (nsse_reg,
3250 gen_rtx_CONST (DImode,
3251 gen_rtx_PLUS (DImode,
3252 label_ref,
3253 GEN_INT (next_cum.sse_regno * 4))));
3254 else
3255 emit_move_insn (nsse_reg, label_ref);
3256 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3257
3258 /* Compute address of memory block we save into. We always use pointer
3259 pointing 127 bytes after first byte to store - this is needed to keep
3260 instruction size limited by 4 bytes. */
3261 tmp_reg = gen_reg_rtx (Pmode);
3262 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3263 plus_constant (save_area,
3264 8 * REGPARM_MAX + 127)));
3265 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3266 set_mem_alias_set (mem, set);
3267 set_mem_align (mem, BITS_PER_WORD);
3268
3269 /* And finally do the dirty job! */
3270 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3271 GEN_INT (next_cum.sse_regno), label));
3272 }
3273
3274 }
3275
3276 /* Implement va_start. */
3277
3278 void
3279 ix86_va_start (tree valist, rtx nextarg)
3280 {
3281 HOST_WIDE_INT words, n_gpr, n_fpr;
3282 tree f_gpr, f_fpr, f_ovf, f_sav;
3283 tree gpr, fpr, ovf, sav, t;
3284
3285 /* Only 64bit target needs something special. */
3286 if (!TARGET_64BIT)
3287 {
3288 std_expand_builtin_va_start (valist, nextarg);
3289 return;
3290 }
3291
3292 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3293 f_fpr = TREE_CHAIN (f_gpr);
3294 f_ovf = TREE_CHAIN (f_fpr);
3295 f_sav = TREE_CHAIN (f_ovf);
3296
3297 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3298 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3299 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3300 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3301 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3302
3303 /* Count number of gp and fp argument registers used. */
3304 words = current_function_args_info.words;
3305 n_gpr = current_function_args_info.regno;
3306 n_fpr = current_function_args_info.sse_regno;
3307
3308 if (TARGET_DEBUG_ARG)
3309 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3310 (int) words, (int) n_gpr, (int) n_fpr);
3311
3312 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3313 build_int_cst (NULL_TREE, n_gpr * 8));
3314 TREE_SIDE_EFFECTS (t) = 1;
3315 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3316
3317 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3318 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3319 TREE_SIDE_EFFECTS (t) = 1;
3320 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3321
3322 /* Find the overflow area. */
3323 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3324 if (words != 0)
3325 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3326 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3327 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3328 TREE_SIDE_EFFECTS (t) = 1;
3329 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3330
3331 /* Find the register save area.
3332 Prologue of the function save it right above stack frame. */
3333 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3334 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3335 TREE_SIDE_EFFECTS (t) = 1;
3336 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3337 }
3338
3339 /* Implement va_arg. */
3340
3341 tree
3342 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3343 {
3344 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3345 tree f_gpr, f_fpr, f_ovf, f_sav;
3346 tree gpr, fpr, ovf, sav, t;
3347 int size, rsize;
3348 tree lab_false, lab_over = NULL_TREE;
3349 tree addr, t2;
3350 rtx container;
3351 int indirect_p = 0;
3352 tree ptrtype;
3353 enum machine_mode nat_mode;
3354
3355 /* Only 64bit target needs something special. */
3356 if (!TARGET_64BIT)
3357 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3358
3359 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3360 f_fpr = TREE_CHAIN (f_gpr);
3361 f_ovf = TREE_CHAIN (f_fpr);
3362 f_sav = TREE_CHAIN (f_ovf);
3363
3364 valist = build_va_arg_indirect_ref (valist);
3365 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3366 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3367 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3368 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3369
3370 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3371 if (indirect_p)
3372 type = build_pointer_type (type);
3373 size = int_size_in_bytes (type);
3374 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3375
3376 nat_mode = type_natural_mode (type);
3377 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3378 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3379
3380 /* Pull the value out of the saved registers. */
3381
3382 addr = create_tmp_var (ptr_type_node, "addr");
3383 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3384
3385 if (container)
3386 {
3387 int needed_intregs, needed_sseregs;
3388 bool need_temp;
3389 tree int_addr, sse_addr;
3390
3391 lab_false = create_artificial_label ();
3392 lab_over = create_artificial_label ();
3393
3394 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3395
3396 need_temp = (!REG_P (container)
3397 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3398 || TYPE_ALIGN (type) > 128));
3399
3400 /* In case we are passing structure, verify that it is consecutive block
3401 on the register save area. If not we need to do moves. */
3402 if (!need_temp && !REG_P (container))
3403 {
3404 /* Verify that all registers are strictly consecutive */
3405 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3406 {
3407 int i;
3408
3409 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3410 {
3411 rtx slot = XVECEXP (container, 0, i);
3412 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3413 || INTVAL (XEXP (slot, 1)) != i * 16)
3414 need_temp = 1;
3415 }
3416 }
3417 else
3418 {
3419 int i;
3420
3421 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3422 {
3423 rtx slot = XVECEXP (container, 0, i);
3424 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3425 || INTVAL (XEXP (slot, 1)) != i * 8)
3426 need_temp = 1;
3427 }
3428 }
3429 }
3430 if (!need_temp)
3431 {
3432 int_addr = addr;
3433 sse_addr = addr;
3434 }
3435 else
3436 {
3437 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3438 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3439 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3440 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3441 }
3442
3443 /* First ensure that we fit completely in registers. */
3444 if (needed_intregs)
3445 {
3446 t = build_int_cst (TREE_TYPE (gpr),
3447 (REGPARM_MAX - needed_intregs + 1) * 8);
3448 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3449 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3450 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3451 gimplify_and_add (t, pre_p);
3452 }
3453 if (needed_sseregs)
3454 {
3455 t = build_int_cst (TREE_TYPE (fpr),
3456 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3457 + REGPARM_MAX * 8);
3458 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3459 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3460 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3461 gimplify_and_add (t, pre_p);
3462 }
3463
3464 /* Compute index to start of area used for integer regs. */
3465 if (needed_intregs)
3466 {
3467 /* int_addr = gpr + sav; */
3468 t = fold_convert (ptr_type_node, gpr);
3469 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3470 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3471 gimplify_and_add (t, pre_p);
3472 }
3473 if (needed_sseregs)
3474 {
3475 /* sse_addr = fpr + sav; */
3476 t = fold_convert (ptr_type_node, fpr);
3477 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3478 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3479 gimplify_and_add (t, pre_p);
3480 }
3481 if (need_temp)
3482 {
3483 int i;
3484 tree temp = create_tmp_var (type, "va_arg_tmp");
3485
3486 /* addr = &temp; */
3487 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3488 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3489 gimplify_and_add (t, pre_p);
3490
3491 for (i = 0; i < XVECLEN (container, 0); i++)
3492 {
3493 rtx slot = XVECEXP (container, 0, i);
3494 rtx reg = XEXP (slot, 0);
3495 enum machine_mode mode = GET_MODE (reg);
3496 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3497 tree addr_type = build_pointer_type (piece_type);
3498 tree src_addr, src;
3499 int src_offset;
3500 tree dest_addr, dest;
3501
3502 if (SSE_REGNO_P (REGNO (reg)))
3503 {
3504 src_addr = sse_addr;
3505 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3506 }
3507 else
3508 {
3509 src_addr = int_addr;
3510 src_offset = REGNO (reg) * 8;
3511 }
3512 src_addr = fold_convert (addr_type, src_addr);
3513 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3514 size_int (src_offset)));
3515 src = build_va_arg_indirect_ref (src_addr);
3516
3517 dest_addr = fold_convert (addr_type, addr);
3518 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3519 size_int (INTVAL (XEXP (slot, 1)))));
3520 dest = build_va_arg_indirect_ref (dest_addr);
3521
3522 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3523 gimplify_and_add (t, pre_p);
3524 }
3525 }
3526
3527 if (needed_intregs)
3528 {
3529 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3530 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3531 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3532 gimplify_and_add (t, pre_p);
3533 }
3534 if (needed_sseregs)
3535 {
3536 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3537 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3538 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3539 gimplify_and_add (t, pre_p);
3540 }
3541
3542 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3543 gimplify_and_add (t, pre_p);
3544
3545 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3546 append_to_statement_list (t, pre_p);
3547 }
3548
3549 /* ... otherwise out of the overflow area. */
3550
3551 /* Care for on-stack alignment if needed. */
3552 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3553 t = ovf;
3554 else
3555 {
3556 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3557 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3558 build_int_cst (TREE_TYPE (ovf), align - 1));
3559 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3560 build_int_cst (TREE_TYPE (t), -align));
3561 }
3562 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3563
3564 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3565 gimplify_and_add (t2, pre_p);
3566
3567 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3568 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3569 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3570 gimplify_and_add (t, pre_p);
3571
3572 if (container)
3573 {
3574 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3575 append_to_statement_list (t, pre_p);
3576 }
3577
3578 ptrtype = build_pointer_type (type);
3579 addr = fold_convert (ptrtype, addr);
3580
3581 if (indirect_p)
3582 addr = build_va_arg_indirect_ref (addr);
3583 return build_va_arg_indirect_ref (addr);
3584 }
3585 \f
3586 /* Return nonzero if OPNUM's MEM should be matched
3587 in movabs* patterns. */
3588
3589 int
3590 ix86_check_movabs (rtx insn, int opnum)
3591 {
3592 rtx set, mem;
3593
3594 set = PATTERN (insn);
3595 if (GET_CODE (set) == PARALLEL)
3596 set = XVECEXP (set, 0, 0);
3597 if (GET_CODE (set) != SET)
3598 abort ();
3599 mem = XEXP (set, opnum);
3600 while (GET_CODE (mem) == SUBREG)
3601 mem = SUBREG_REG (mem);
3602 if (GET_CODE (mem) != MEM)
3603 abort ();
3604 return (volatile_ok || !MEM_VOLATILE_P (mem));
3605 }
3606 \f
3607 /* Initialize the table of extra 80387 mathematical constants. */
3608
3609 static void
3610 init_ext_80387_constants (void)
3611 {
3612 static const char * cst[5] =
3613 {
3614 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3615 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3616 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3617 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3618 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3619 };
3620 int i;
3621
3622 for (i = 0; i < 5; i++)
3623 {
3624 real_from_string (&ext_80387_constants_table[i], cst[i]);
3625 /* Ensure each constant is rounded to XFmode precision. */
3626 real_convert (&ext_80387_constants_table[i],
3627 XFmode, &ext_80387_constants_table[i]);
3628 }
3629
3630 ext_80387_constants_init = 1;
3631 }
3632
3633 /* Return true if the constant is something that can be loaded with
3634 a special instruction. */
3635
3636 int
3637 standard_80387_constant_p (rtx x)
3638 {
3639 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3640 return -1;
3641
3642 if (x == CONST0_RTX (GET_MODE (x)))
3643 return 1;
3644 if (x == CONST1_RTX (GET_MODE (x)))
3645 return 2;
3646
3647 /* For XFmode constants, try to find a special 80387 instruction when
3648 optimizing for size or on those CPUs that benefit from them. */
3649 if (GET_MODE (x) == XFmode
3650 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3651 {
3652 REAL_VALUE_TYPE r;
3653 int i;
3654
3655 if (! ext_80387_constants_init)
3656 init_ext_80387_constants ();
3657
3658 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3659 for (i = 0; i < 5; i++)
3660 if (real_identical (&r, &ext_80387_constants_table[i]))
3661 return i + 3;
3662 }
3663
3664 return 0;
3665 }
3666
3667 /* Return the opcode of the special instruction to be used to load
3668 the constant X. */
3669
3670 const char *
3671 standard_80387_constant_opcode (rtx x)
3672 {
3673 switch (standard_80387_constant_p (x))
3674 {
3675 case 1:
3676 return "fldz";
3677 case 2:
3678 return "fld1";
3679 case 3:
3680 return "fldlg2";
3681 case 4:
3682 return "fldln2";
3683 case 5:
3684 return "fldl2e";
3685 case 6:
3686 return "fldl2t";
3687 case 7:
3688 return "fldpi";
3689 }
3690 abort ();
3691 }
3692
3693 /* Return the CONST_DOUBLE representing the 80387 constant that is
3694 loaded by the specified special instruction. The argument IDX
3695 matches the return value from standard_80387_constant_p. */
3696
3697 rtx
3698 standard_80387_constant_rtx (int idx)
3699 {
3700 int i;
3701
3702 if (! ext_80387_constants_init)
3703 init_ext_80387_constants ();
3704
3705 switch (idx)
3706 {
3707 case 3:
3708 case 4:
3709 case 5:
3710 case 6:
3711 case 7:
3712 i = idx - 3;
3713 break;
3714
3715 default:
3716 abort ();
3717 }
3718
3719 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3720 XFmode);
3721 }
3722
3723 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3724 */
3725 int
3726 standard_sse_constant_p (rtx x)
3727 {
3728 if (x == const0_rtx)
3729 return 1;
3730 return (x == CONST0_RTX (GET_MODE (x)));
3731 }
3732
3733 /* Returns 1 if OP contains a symbol reference */
3734
3735 int
3736 symbolic_reference_mentioned_p (rtx op)
3737 {
3738 const char *fmt;
3739 int i;
3740
3741 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3742 return 1;
3743
3744 fmt = GET_RTX_FORMAT (GET_CODE (op));
3745 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3746 {
3747 if (fmt[i] == 'E')
3748 {
3749 int j;
3750
3751 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3752 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3753 return 1;
3754 }
3755
3756 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3757 return 1;
3758 }
3759
3760 return 0;
3761 }
3762
3763 /* Return 1 if it is appropriate to emit `ret' instructions in the
3764 body of a function. Do this only if the epilogue is simple, needing a
3765 couple of insns. Prior to reloading, we can't tell how many registers
3766 must be saved, so return 0 then. Return 0 if there is no frame
3767 marker to de-allocate. */
3768
3769 int
3770 ix86_can_use_return_insn_p (void)
3771 {
3772 struct ix86_frame frame;
3773
3774 if (! reload_completed || frame_pointer_needed)
3775 return 0;
3776
3777 /* Don't allow more than 32 pop, since that's all we can do
3778 with one instruction. */
3779 if (current_function_pops_args
3780 && current_function_args_size >= 32768)
3781 return 0;
3782
3783 ix86_compute_frame_layout (&frame);
3784 return frame.to_allocate == 0 && frame.nregs == 0;
3785 }
3786 \f
3787 /* Value should be nonzero if functions must have frame pointers.
3788 Zero means the frame pointer need not be set up (and parms may
3789 be accessed via the stack pointer) in functions that seem suitable. */
3790
3791 int
3792 ix86_frame_pointer_required (void)
3793 {
3794 /* If we accessed previous frames, then the generated code expects
3795 to be able to access the saved ebp value in our frame. */
3796 if (cfun->machine->accesses_prev_frame)
3797 return 1;
3798
3799 /* Several x86 os'es need a frame pointer for other reasons,
3800 usually pertaining to setjmp. */
3801 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3802 return 1;
3803
3804 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3805 the frame pointer by default. Turn it back on now if we've not
3806 got a leaf function. */
3807 if (TARGET_OMIT_LEAF_FRAME_POINTER
3808 && (!current_function_is_leaf))
3809 return 1;
3810
3811 if (current_function_profile)
3812 return 1;
3813
3814 return 0;
3815 }
3816
3817 /* Record that the current function accesses previous call frames. */
3818
3819 void
3820 ix86_setup_frame_addresses (void)
3821 {
3822 cfun->machine->accesses_prev_frame = 1;
3823 }
3824 \f
3825 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3826 # define USE_HIDDEN_LINKONCE 1
3827 #else
3828 # define USE_HIDDEN_LINKONCE 0
3829 #endif
3830
3831 static int pic_labels_used;
3832
3833 /* Fills in the label name that should be used for a pc thunk for
3834 the given register. */
3835
3836 static void
3837 get_pc_thunk_name (char name[32], unsigned int regno)
3838 {
3839 if (USE_HIDDEN_LINKONCE)
3840 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3841 else
3842 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3843 }
3844
3845
3846 /* This function generates code for -fpic that loads %ebx with
3847 the return address of the caller and then returns. */
3848
3849 void
3850 ix86_file_end (void)
3851 {
3852 rtx xops[2];
3853 int regno;
3854
3855 for (regno = 0; regno < 8; ++regno)
3856 {
3857 char name[32];
3858
3859 if (! ((pic_labels_used >> regno) & 1))
3860 continue;
3861
3862 get_pc_thunk_name (name, regno);
3863
3864 if (USE_HIDDEN_LINKONCE)
3865 {
3866 tree decl;
3867
3868 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3869 error_mark_node);
3870 TREE_PUBLIC (decl) = 1;
3871 TREE_STATIC (decl) = 1;
3872 DECL_ONE_ONLY (decl) = 1;
3873
3874 (*targetm.asm_out.unique_section) (decl, 0);
3875 named_section (decl, NULL, 0);
3876
3877 (*targetm.asm_out.globalize_label) (asm_out_file, name);
3878 fputs ("\t.hidden\t", asm_out_file);
3879 assemble_name (asm_out_file, name);
3880 fputc ('\n', asm_out_file);
3881 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
3882 }
3883 else
3884 {
3885 text_section ();
3886 ASM_OUTPUT_LABEL (asm_out_file, name);
3887 }
3888
3889 xops[0] = gen_rtx_REG (SImode, regno);
3890 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3891 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3892 output_asm_insn ("ret", xops);
3893 }
3894
3895 if (NEED_INDICATE_EXEC_STACK)
3896 file_end_indicate_exec_stack ();
3897 }
3898
3899 /* Emit code for the SET_GOT patterns. */
3900
3901 const char *
3902 output_set_got (rtx dest)
3903 {
3904 rtx xops[3];
3905
3906 xops[0] = dest;
3907 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3908
3909 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3910 {
3911 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3912
3913 if (!flag_pic)
3914 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3915 else
3916 output_asm_insn ("call\t%a2", xops);
3917
3918 #if TARGET_MACHO
3919 /* Output the "canonical" label name ("Lxx$pb") here too. This
3920 is what will be referred to by the Mach-O PIC subsystem. */
3921 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3922 #endif
3923 (*targetm.asm_out.internal_label) (asm_out_file, "L",
3924 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3925
3926 if (flag_pic)
3927 output_asm_insn ("pop{l}\t%0", xops);
3928 }
3929 else
3930 {
3931 char name[32];
3932 get_pc_thunk_name (name, REGNO (dest));
3933 pic_labels_used |= 1 << REGNO (dest);
3934
3935 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3936 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3937 output_asm_insn ("call\t%X2", xops);
3938 }
3939
3940 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3941 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3942 else if (!TARGET_MACHO)
3943 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3944
3945 return "";
3946 }
3947
3948 /* Generate an "push" pattern for input ARG. */
3949
3950 static rtx
3951 gen_push (rtx arg)
3952 {
3953 return gen_rtx_SET (VOIDmode,
3954 gen_rtx_MEM (Pmode,
3955 gen_rtx_PRE_DEC (Pmode,
3956 stack_pointer_rtx)),
3957 arg);
3958 }
3959
3960 /* Return >= 0 if there is an unused call-clobbered register available
3961 for the entire function. */
3962
3963 static unsigned int
3964 ix86_select_alt_pic_regnum (void)
3965 {
3966 if (current_function_is_leaf && !current_function_profile)
3967 {
3968 int i;
3969 for (i = 2; i >= 0; --i)
3970 if (!regs_ever_live[i])
3971 return i;
3972 }
3973
3974 return INVALID_REGNUM;
3975 }
3976
3977 /* Return 1 if we need to save REGNO. */
3978 static int
3979 ix86_save_reg (unsigned int regno, int maybe_eh_return)
3980 {
3981 if (pic_offset_table_rtx
3982 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
3983 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
3984 || current_function_profile
3985 || current_function_calls_eh_return
3986 || current_function_uses_const_pool))
3987 {
3988 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
3989 return 0;
3990 return 1;
3991 }
3992
3993 if (current_function_calls_eh_return && maybe_eh_return)
3994 {
3995 unsigned i;
3996 for (i = 0; ; i++)
3997 {
3998 unsigned test = EH_RETURN_DATA_REGNO (i);
3999 if (test == INVALID_REGNUM)
4000 break;
4001 if (test == regno)
4002 return 1;
4003 }
4004 }
4005
4006 return (regs_ever_live[regno]
4007 && !call_used_regs[regno]
4008 && !fixed_regs[regno]
4009 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4010 }
4011
4012 /* Return number of registers to be saved on the stack. */
4013
4014 static int
4015 ix86_nsaved_regs (void)
4016 {
4017 int nregs = 0;
4018 int regno;
4019
4020 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4021 if (ix86_save_reg (regno, true))
4022 nregs++;
4023 return nregs;
4024 }
4025
4026 /* Return the offset between two registers, one to be eliminated, and the other
4027 its replacement, at the start of a routine. */
4028
4029 HOST_WIDE_INT
4030 ix86_initial_elimination_offset (int from, int to)
4031 {
4032 struct ix86_frame frame;
4033 ix86_compute_frame_layout (&frame);
4034
4035 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4036 return frame.hard_frame_pointer_offset;
4037 else if (from == FRAME_POINTER_REGNUM
4038 && to == HARD_FRAME_POINTER_REGNUM)
4039 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4040 else
4041 {
4042 if (to != STACK_POINTER_REGNUM)
4043 abort ();
4044 else if (from == ARG_POINTER_REGNUM)
4045 return frame.stack_pointer_offset;
4046 else if (from != FRAME_POINTER_REGNUM)
4047 abort ();
4048 else
4049 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4050 }
4051 }
4052
4053 /* Fill structure ix86_frame about frame of currently computed function. */
4054
4055 static void
4056 ix86_compute_frame_layout (struct ix86_frame *frame)
4057 {
4058 HOST_WIDE_INT total_size;
4059 unsigned int stack_alignment_needed;
4060 HOST_WIDE_INT offset;
4061 unsigned int preferred_alignment;
4062 HOST_WIDE_INT size = get_frame_size ();
4063
4064 frame->nregs = ix86_nsaved_regs ();
4065 total_size = size;
4066
4067 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4068 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4069
4070 /* During reload iteration the amount of registers saved can change.
4071 Recompute the value as needed. Do not recompute when amount of registers
4072 didn't change as reload does mutiple calls to the function and does not
4073 expect the decision to change within single iteration. */
4074 if (!optimize_size
4075 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4076 {
4077 int count = frame->nregs;
4078
4079 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4080 /* The fast prologue uses move instead of push to save registers. This
4081 is significantly longer, but also executes faster as modern hardware
4082 can execute the moves in parallel, but can't do that for push/pop.
4083
4084 Be careful about choosing what prologue to emit: When function takes
4085 many instructions to execute we may use slow version as well as in
4086 case function is known to be outside hot spot (this is known with
4087 feedback only). Weight the size of function by number of registers
4088 to save as it is cheap to use one or two push instructions but very
4089 slow to use many of them. */
4090 if (count)
4091 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4092 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4093 || (flag_branch_probabilities
4094 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4095 cfun->machine->use_fast_prologue_epilogue = false;
4096 else
4097 cfun->machine->use_fast_prologue_epilogue
4098 = !expensive_function_p (count);
4099 }
4100 if (TARGET_PROLOGUE_USING_MOVE
4101 && cfun->machine->use_fast_prologue_epilogue)
4102 frame->save_regs_using_mov = true;
4103 else
4104 frame->save_regs_using_mov = false;
4105
4106
4107 /* Skip return address and saved base pointer. */
4108 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4109
4110 frame->hard_frame_pointer_offset = offset;
4111
4112 /* Do some sanity checking of stack_alignment_needed and
4113 preferred_alignment, since i386 port is the only using those features
4114 that may break easily. */
4115
4116 if (size && !stack_alignment_needed)
4117 abort ();
4118 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4119 abort ();
4120 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4121 abort ();
4122 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4123 abort ();
4124
4125 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4126 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4127
4128 /* Register save area */
4129 offset += frame->nregs * UNITS_PER_WORD;
4130
4131 /* Va-arg area */
4132 if (ix86_save_varrargs_registers)
4133 {
4134 offset += X86_64_VARARGS_SIZE;
4135 frame->va_arg_size = X86_64_VARARGS_SIZE;
4136 }
4137 else
4138 frame->va_arg_size = 0;
4139
4140 /* Align start of frame for local function. */
4141 frame->padding1 = ((offset + stack_alignment_needed - 1)
4142 & -stack_alignment_needed) - offset;
4143
4144 offset += frame->padding1;
4145
4146 /* Frame pointer points here. */
4147 frame->frame_pointer_offset = offset;
4148
4149 offset += size;
4150
4151 /* Add outgoing arguments area. Can be skipped if we eliminated
4152 all the function calls as dead code.
4153 Skipping is however impossible when function calls alloca. Alloca
4154 expander assumes that last current_function_outgoing_args_size
4155 of stack frame are unused. */
4156 if (ACCUMULATE_OUTGOING_ARGS
4157 && (!current_function_is_leaf || current_function_calls_alloca))
4158 {
4159 offset += current_function_outgoing_args_size;
4160 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4161 }
4162 else
4163 frame->outgoing_arguments_size = 0;
4164
4165 /* Align stack boundary. Only needed if we're calling another function
4166 or using alloca. */
4167 if (!current_function_is_leaf || current_function_calls_alloca)
4168 frame->padding2 = ((offset + preferred_alignment - 1)
4169 & -preferred_alignment) - offset;
4170 else
4171 frame->padding2 = 0;
4172
4173 offset += frame->padding2;
4174
4175 /* We've reached end of stack frame. */
4176 frame->stack_pointer_offset = offset;
4177
4178 /* Size prologue needs to allocate. */
4179 frame->to_allocate =
4180 (size + frame->padding1 + frame->padding2
4181 + frame->outgoing_arguments_size + frame->va_arg_size);
4182
4183 if ((!frame->to_allocate && frame->nregs <= 1)
4184 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4185 frame->save_regs_using_mov = false;
4186
4187 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4188 && current_function_is_leaf)
4189 {
4190 frame->red_zone_size = frame->to_allocate;
4191 if (frame->save_regs_using_mov)
4192 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4193 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4194 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4195 }
4196 else
4197 frame->red_zone_size = 0;
4198 frame->to_allocate -= frame->red_zone_size;
4199 frame->stack_pointer_offset -= frame->red_zone_size;
4200 #if 0
4201 fprintf (stderr, "nregs: %i\n", frame->nregs);
4202 fprintf (stderr, "size: %i\n", size);
4203 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4204 fprintf (stderr, "padding1: %i\n", frame->padding1);
4205 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4206 fprintf (stderr, "padding2: %i\n", frame->padding2);
4207 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4208 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4209 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4210 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4211 frame->hard_frame_pointer_offset);
4212 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4213 #endif
4214 }
4215
4216 /* Emit code to save registers in the prologue. */
4217
4218 static void
4219 ix86_emit_save_regs (void)
4220 {
4221 int regno;
4222 rtx insn;
4223
4224 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4225 if (ix86_save_reg (regno, true))
4226 {
4227 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4228 RTX_FRAME_RELATED_P (insn) = 1;
4229 }
4230 }
4231
4232 /* Emit code to save registers using MOV insns. First register
4233 is restored from POINTER + OFFSET. */
4234 static void
4235 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4236 {
4237 int regno;
4238 rtx insn;
4239
4240 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4241 if (ix86_save_reg (regno, true))
4242 {
4243 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4244 Pmode, offset),
4245 gen_rtx_REG (Pmode, regno));
4246 RTX_FRAME_RELATED_P (insn) = 1;
4247 offset += UNITS_PER_WORD;
4248 }
4249 }
4250
4251 /* Expand prologue or epilogue stack adjustment.
4252 The pattern exist to put a dependency on all ebp-based memory accesses.
4253 STYLE should be negative if instructions should be marked as frame related,
4254 zero if %r11 register is live and cannot be freely used and positive
4255 otherwise. */
4256
4257 static void
4258 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4259 {
4260 rtx insn;
4261
4262 if (! TARGET_64BIT)
4263 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4264 else if (x86_64_immediate_operand (offset, DImode))
4265 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4266 else
4267 {
4268 rtx r11;
4269 /* r11 is used by indirect sibcall return as well, set before the
4270 epilogue and used after the epilogue. ATM indirect sibcall
4271 shouldn't be used together with huge frame sizes in one
4272 function because of the frame_size check in sibcall.c. */
4273 if (style == 0)
4274 abort ();
4275 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4276 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4277 if (style < 0)
4278 RTX_FRAME_RELATED_P (insn) = 1;
4279 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4280 offset));
4281 }
4282 if (style < 0)
4283 RTX_FRAME_RELATED_P (insn) = 1;
4284 }
4285
4286 /* Expand the prologue into a bunch of separate insns. */
4287
4288 void
4289 ix86_expand_prologue (void)
4290 {
4291 rtx insn;
4292 bool pic_reg_used;
4293 struct ix86_frame frame;
4294 HOST_WIDE_INT allocate;
4295
4296 ix86_compute_frame_layout (&frame);
4297
4298 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4299 slower on all targets. Also sdb doesn't like it. */
4300
4301 if (frame_pointer_needed)
4302 {
4303 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4304 RTX_FRAME_RELATED_P (insn) = 1;
4305
4306 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4307 RTX_FRAME_RELATED_P (insn) = 1;
4308 }
4309
4310 allocate = frame.to_allocate;
4311
4312 if (!frame.save_regs_using_mov)
4313 ix86_emit_save_regs ();
4314 else
4315 allocate += frame.nregs * UNITS_PER_WORD;
4316
4317 /* When using red zone we may start register saving before allocating
4318 the stack frame saving one cycle of the prologue. */
4319 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4320 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4321 : stack_pointer_rtx,
4322 -frame.nregs * UNITS_PER_WORD);
4323
4324 if (allocate == 0)
4325 ;
4326 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4327 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4328 GEN_INT (-allocate), -1);
4329 else
4330 {
4331 /* Only valid for Win32. */
4332 rtx eax = gen_rtx_REG (SImode, 0);
4333 bool eax_live = ix86_eax_live_at_start_p ();
4334 rtx t;
4335
4336 if (TARGET_64BIT)
4337 abort ();
4338
4339 if (eax_live)
4340 {
4341 emit_insn (gen_push (eax));
4342 allocate -= 4;
4343 }
4344
4345 emit_move_insn (eax, GEN_INT (allocate));
4346
4347 insn = emit_insn (gen_allocate_stack_worker (eax));
4348 RTX_FRAME_RELATED_P (insn) = 1;
4349 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4350 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4351 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4352 t, REG_NOTES (insn));
4353
4354 if (eax_live)
4355 {
4356 if (frame_pointer_needed)
4357 t = plus_constant (hard_frame_pointer_rtx,
4358 allocate
4359 - frame.to_allocate
4360 - frame.nregs * UNITS_PER_WORD);
4361 else
4362 t = plus_constant (stack_pointer_rtx, allocate);
4363 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4364 }
4365 }
4366
4367 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4368 {
4369 if (!frame_pointer_needed || !frame.to_allocate)
4370 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4371 else
4372 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4373 -frame.nregs * UNITS_PER_WORD);
4374 }
4375
4376 pic_reg_used = false;
4377 if (pic_offset_table_rtx
4378 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4379 || current_function_profile))
4380 {
4381 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4382
4383 if (alt_pic_reg_used != INVALID_REGNUM)
4384 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4385
4386 pic_reg_used = true;
4387 }
4388
4389 if (pic_reg_used)
4390 {
4391 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4392
4393 /* Even with accurate pre-reload life analysis, we can wind up
4394 deleting all references to the pic register after reload.
4395 Consider if cross-jumping unifies two sides of a branch
4396 controlled by a comparison vs the only read from a global.
4397 In which case, allow the set_got to be deleted, though we're
4398 too late to do anything about the ebx save in the prologue. */
4399 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4400 }
4401
4402 /* Prevent function calls from be scheduled before the call to mcount.
4403 In the pic_reg_used case, make sure that the got load isn't deleted. */
4404 if (current_function_profile)
4405 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4406 }
4407
4408 /* Emit code to restore saved registers using MOV insns. First register
4409 is restored from POINTER + OFFSET. */
4410 static void
4411 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4412 int maybe_eh_return)
4413 {
4414 int regno;
4415 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4416
4417 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4418 if (ix86_save_reg (regno, maybe_eh_return))
4419 {
4420 /* Ensure that adjust_address won't be forced to produce pointer
4421 out of range allowed by x86-64 instruction set. */
4422 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4423 {
4424 rtx r11;
4425
4426 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4427 emit_move_insn (r11, GEN_INT (offset));
4428 emit_insn (gen_adddi3 (r11, r11, pointer));
4429 base_address = gen_rtx_MEM (Pmode, r11);
4430 offset = 0;
4431 }
4432 emit_move_insn (gen_rtx_REG (Pmode, regno),
4433 adjust_address (base_address, Pmode, offset));
4434 offset += UNITS_PER_WORD;
4435 }
4436 }
4437
4438 /* Restore function stack, frame, and registers. */
4439
4440 void
4441 ix86_expand_epilogue (int style)
4442 {
4443 int regno;
4444 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4445 struct ix86_frame frame;
4446 HOST_WIDE_INT offset;
4447
4448 ix86_compute_frame_layout (&frame);
4449
4450 /* Calculate start of saved registers relative to ebp. Special care
4451 must be taken for the normal return case of a function using
4452 eh_return: the eax and edx registers are marked as saved, but not
4453 restored along this path. */
4454 offset = frame.nregs;
4455 if (current_function_calls_eh_return && style != 2)
4456 offset -= 2;
4457 offset *= -UNITS_PER_WORD;
4458
4459 /* If we're only restoring one register and sp is not valid then
4460 using a move instruction to restore the register since it's
4461 less work than reloading sp and popping the register.
4462
4463 The default code result in stack adjustment using add/lea instruction,
4464 while this code results in LEAVE instruction (or discrete equivalent),
4465 so it is profitable in some other cases as well. Especially when there
4466 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4467 and there is exactly one register to pop. This heuristic may need some
4468 tuning in future. */
4469 if ((!sp_valid && frame.nregs <= 1)
4470 || (TARGET_EPILOGUE_USING_MOVE
4471 && cfun->machine->use_fast_prologue_epilogue
4472 && (frame.nregs > 1 || frame.to_allocate))
4473 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4474 || (frame_pointer_needed && TARGET_USE_LEAVE
4475 && cfun->machine->use_fast_prologue_epilogue
4476 && frame.nregs == 1)
4477 || current_function_calls_eh_return)
4478 {
4479 /* Restore registers. We can use ebp or esp to address the memory
4480 locations. If both are available, default to ebp, since offsets
4481 are known to be small. Only exception is esp pointing directly to the
4482 end of block of saved registers, where we may simplify addressing
4483 mode. */
4484
4485 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4486 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4487 frame.to_allocate, style == 2);
4488 else
4489 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4490 offset, style == 2);
4491
4492 /* eh_return epilogues need %ecx added to the stack pointer. */
4493 if (style == 2)
4494 {
4495 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4496
4497 if (frame_pointer_needed)
4498 {
4499 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4500 tmp = plus_constant (tmp, UNITS_PER_WORD);
4501 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4502
4503 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4504 emit_move_insn (hard_frame_pointer_rtx, tmp);
4505
4506 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4507 const0_rtx, style);
4508 }
4509 else
4510 {
4511 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4512 tmp = plus_constant (tmp, (frame.to_allocate
4513 + frame.nregs * UNITS_PER_WORD));
4514 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4515 }
4516 }
4517 else if (!frame_pointer_needed)
4518 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4519 GEN_INT (frame.to_allocate
4520 + frame.nregs * UNITS_PER_WORD),
4521 style);
4522 /* If not an i386, mov & pop is faster than "leave". */
4523 else if (TARGET_USE_LEAVE || optimize_size
4524 || !cfun->machine->use_fast_prologue_epilogue)
4525 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4526 else
4527 {
4528 pro_epilogue_adjust_stack (stack_pointer_rtx,
4529 hard_frame_pointer_rtx,
4530 const0_rtx, style);
4531 if (TARGET_64BIT)
4532 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4533 else
4534 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4535 }
4536 }
4537 else
4538 {
4539 /* First step is to deallocate the stack frame so that we can
4540 pop the registers. */
4541 if (!sp_valid)
4542 {
4543 if (!frame_pointer_needed)
4544 abort ();
4545 pro_epilogue_adjust_stack (stack_pointer_rtx,
4546 hard_frame_pointer_rtx,
4547 GEN_INT (offset), style);
4548 }
4549 else if (frame.to_allocate)
4550 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4551 GEN_INT (frame.to_allocate), style);
4552
4553 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4554 if (ix86_save_reg (regno, false))
4555 {
4556 if (TARGET_64BIT)
4557 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4558 else
4559 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4560 }
4561 if (frame_pointer_needed)
4562 {
4563 /* Leave results in shorter dependency chains on CPUs that are
4564 able to grok it fast. */
4565 if (TARGET_USE_LEAVE)
4566 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4567 else if (TARGET_64BIT)
4568 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4569 else
4570 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4571 }
4572 }
4573
4574 /* Sibcall epilogues don't want a return instruction. */
4575 if (style == 0)
4576 return;
4577
4578 if (current_function_pops_args && current_function_args_size)
4579 {
4580 rtx popc = GEN_INT (current_function_pops_args);
4581
4582 /* i386 can only pop 64K bytes. If asked to pop more, pop
4583 return address, do explicit add, and jump indirectly to the
4584 caller. */
4585
4586 if (current_function_pops_args >= 65536)
4587 {
4588 rtx ecx = gen_rtx_REG (SImode, 2);
4589
4590 /* There is no "pascal" calling convention in 64bit ABI. */
4591 if (TARGET_64BIT)
4592 abort ();
4593
4594 emit_insn (gen_popsi1 (ecx));
4595 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4596 emit_jump_insn (gen_return_indirect_internal (ecx));
4597 }
4598 else
4599 emit_jump_insn (gen_return_pop_internal (popc));
4600 }
4601 else
4602 emit_jump_insn (gen_return_internal ());
4603 }
4604
4605 /* Reset from the function's potential modifications. */
4606
4607 static void
4608 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4609 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4610 {
4611 if (pic_offset_table_rtx)
4612 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4613 }
4614 \f
4615 /* Extract the parts of an RTL expression that is a valid memory address
4616 for an instruction. Return 0 if the structure of the address is
4617 grossly off. Return -1 if the address contains ASHIFT, so it is not
4618 strictly valid, but still used for computing length of lea instruction. */
4619
4620 int
4621 ix86_decompose_address (rtx addr, struct ix86_address *out)
4622 {
4623 rtx base = NULL_RTX;
4624 rtx index = NULL_RTX;
4625 rtx disp = NULL_RTX;
4626 HOST_WIDE_INT scale = 1;
4627 rtx scale_rtx = NULL_RTX;
4628 int retval = 1;
4629 enum ix86_address_seg seg = SEG_DEFAULT;
4630
4631 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4632 base = addr;
4633 else if (GET_CODE (addr) == PLUS)
4634 {
4635 rtx addends[4], op;
4636 int n = 0, i;
4637
4638 op = addr;
4639 do
4640 {
4641 if (n >= 4)
4642 return 0;
4643 addends[n++] = XEXP (op, 1);
4644 op = XEXP (op, 0);
4645 }
4646 while (GET_CODE (op) == PLUS);
4647 if (n >= 4)
4648 return 0;
4649 addends[n] = op;
4650
4651 for (i = n; i >= 0; --i)
4652 {
4653 op = addends[i];
4654 switch (GET_CODE (op))
4655 {
4656 case MULT:
4657 if (index)
4658 return 0;
4659 index = XEXP (op, 0);
4660 scale_rtx = XEXP (op, 1);
4661 break;
4662
4663 case UNSPEC:
4664 if (XINT (op, 1) == UNSPEC_TP
4665 && TARGET_TLS_DIRECT_SEG_REFS
4666 && seg == SEG_DEFAULT)
4667 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4668 else
4669 return 0;
4670 break;
4671
4672 case REG:
4673 case SUBREG:
4674 if (!base)
4675 base = op;
4676 else if (!index)
4677 index = op;
4678 else
4679 return 0;
4680 break;
4681
4682 case CONST:
4683 case CONST_INT:
4684 case SYMBOL_REF:
4685 case LABEL_REF:
4686 if (disp)
4687 return 0;
4688 disp = op;
4689 break;
4690
4691 default:
4692 return 0;
4693 }
4694 }
4695 }
4696 else if (GET_CODE (addr) == MULT)
4697 {
4698 index = XEXP (addr, 0); /* index*scale */
4699 scale_rtx = XEXP (addr, 1);
4700 }
4701 else if (GET_CODE (addr) == ASHIFT)
4702 {
4703 rtx tmp;
4704
4705 /* We're called for lea too, which implements ashift on occasion. */
4706 index = XEXP (addr, 0);
4707 tmp = XEXP (addr, 1);
4708 if (GET_CODE (tmp) != CONST_INT)
4709 return 0;
4710 scale = INTVAL (tmp);
4711 if ((unsigned HOST_WIDE_INT) scale > 3)
4712 return 0;
4713 scale = 1 << scale;
4714 retval = -1;
4715 }
4716 else
4717 disp = addr; /* displacement */
4718
4719 /* Extract the integral value of scale. */
4720 if (scale_rtx)
4721 {
4722 if (GET_CODE (scale_rtx) != CONST_INT)
4723 return 0;
4724 scale = INTVAL (scale_rtx);
4725 }
4726
4727 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4728 if (base && index && scale == 1
4729 && (index == arg_pointer_rtx
4730 || index == frame_pointer_rtx
4731 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
4732 {
4733 rtx tmp = base;
4734 base = index;
4735 index = tmp;
4736 }
4737
4738 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4739 if ((base == hard_frame_pointer_rtx
4740 || base == frame_pointer_rtx
4741 || base == arg_pointer_rtx) && !disp)
4742 disp = const0_rtx;
4743
4744 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4745 Avoid this by transforming to [%esi+0]. */
4746 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4747 && base && !index && !disp
4748 && REG_P (base)
4749 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4750 disp = const0_rtx;
4751
4752 /* Special case: encode reg+reg instead of reg*2. */
4753 if (!base && index && scale && scale == 2)
4754 base = index, scale = 1;
4755
4756 /* Special case: scaling cannot be encoded without base or displacement. */
4757 if (!base && !disp && index && scale != 1)
4758 disp = const0_rtx;
4759
4760 out->base = base;
4761 out->index = index;
4762 out->disp = disp;
4763 out->scale = scale;
4764 out->seg = seg;
4765
4766 return retval;
4767 }
4768 \f
4769 /* Return cost of the memory address x.
4770 For i386, it is better to use a complex address than let gcc copy
4771 the address into a reg and make a new pseudo. But not if the address
4772 requires to two regs - that would mean more pseudos with longer
4773 lifetimes. */
4774 static int
4775 ix86_address_cost (rtx x)
4776 {
4777 struct ix86_address parts;
4778 int cost = 1;
4779
4780 if (!ix86_decompose_address (x, &parts))
4781 abort ();
4782
4783 /* More complex memory references are better. */
4784 if (parts.disp && parts.disp != const0_rtx)
4785 cost--;
4786 if (parts.seg != SEG_DEFAULT)
4787 cost--;
4788
4789 /* Attempt to minimize number of registers in the address. */
4790 if ((parts.base
4791 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4792 || (parts.index
4793 && (!REG_P (parts.index)
4794 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4795 cost++;
4796
4797 if (parts.base
4798 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4799 && parts.index
4800 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4801 && parts.base != parts.index)
4802 cost++;
4803
4804 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4805 since it's predecode logic can't detect the length of instructions
4806 and it degenerates to vector decoded. Increase cost of such
4807 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4808 to split such addresses or even refuse such addresses at all.
4809
4810 Following addressing modes are affected:
4811 [base+scale*index]
4812 [scale*index+disp]
4813 [base+index]
4814
4815 The first and last case may be avoidable by explicitly coding the zero in
4816 memory address, but I don't have AMD-K6 machine handy to check this
4817 theory. */
4818
4819 if (TARGET_K6
4820 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4821 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4822 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4823 cost += 10;
4824
4825 return cost;
4826 }
4827 \f
4828 /* If X is a machine specific address (i.e. a symbol or label being
4829 referenced as a displacement from the GOT implemented using an
4830 UNSPEC), then return the base term. Otherwise return X. */
4831
4832 rtx
4833 ix86_find_base_term (rtx x)
4834 {
4835 rtx term;
4836
4837 if (TARGET_64BIT)
4838 {
4839 if (GET_CODE (x) != CONST)
4840 return x;
4841 term = XEXP (x, 0);
4842 if (GET_CODE (term) == PLUS
4843 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4844 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4845 term = XEXP (term, 0);
4846 if (GET_CODE (term) != UNSPEC
4847 || XINT (term, 1) != UNSPEC_GOTPCREL)
4848 return x;
4849
4850 term = XVECEXP (term, 0, 0);
4851
4852 if (GET_CODE (term) != SYMBOL_REF
4853 && GET_CODE (term) != LABEL_REF)
4854 return x;
4855
4856 return term;
4857 }
4858
4859 term = ix86_delegitimize_address (x);
4860
4861 if (GET_CODE (term) != SYMBOL_REF
4862 && GET_CODE (term) != LABEL_REF)
4863 return x;
4864
4865 return term;
4866 }
4867
4868 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
4869 this is used for to form addresses to local data when -fPIC is in
4870 use. */
4871
4872 static bool
4873 darwin_local_data_pic (rtx disp)
4874 {
4875 if (GET_CODE (disp) == MINUS)
4876 {
4877 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4878 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4879 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4880 {
4881 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4882 if (! strcmp (sym_name, "<pic base>"))
4883 return true;
4884 }
4885 }
4886
4887 return false;
4888 }
4889 \f
4890 /* Determine if a given RTX is a valid constant. We already know this
4891 satisfies CONSTANT_P. */
4892
4893 bool
4894 legitimate_constant_p (rtx x)
4895 {
4896 switch (GET_CODE (x))
4897 {
4898 case CONST:
4899 x = XEXP (x, 0);
4900
4901 if (GET_CODE (x) == PLUS)
4902 {
4903 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4904 return false;
4905 x = XEXP (x, 0);
4906 }
4907
4908 if (TARGET_MACHO && darwin_local_data_pic (x))
4909 return true;
4910
4911 /* Only some unspecs are valid as "constants". */
4912 if (GET_CODE (x) == UNSPEC)
4913 switch (XINT (x, 1))
4914 {
4915 case UNSPEC_TPOFF:
4916 case UNSPEC_NTPOFF:
4917 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4918 case UNSPEC_DTPOFF:
4919 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
4920 default:
4921 return false;
4922 }
4923
4924 /* We must have drilled down to a symbol. */
4925 if (!symbolic_operand (x, Pmode))
4926 return false;
4927 /* FALLTHRU */
4928
4929 case SYMBOL_REF:
4930 /* TLS symbols are never valid. */
4931 if (tls_symbolic_operand (x, Pmode))
4932 return false;
4933 break;
4934
4935 default:
4936 break;
4937 }
4938
4939 /* Otherwise we handle everything else in the move patterns. */
4940 return true;
4941 }
4942
4943 /* Determine if it's legal to put X into the constant pool. This
4944 is not possible for the address of thread-local symbols, which
4945 is checked above. */
4946
4947 static bool
4948 ix86_cannot_force_const_mem (rtx x)
4949 {
4950 return !legitimate_constant_p (x);
4951 }
4952
4953 /* Determine if a given RTX is a valid constant address. */
4954
4955 bool
4956 constant_address_p (rtx x)
4957 {
4958 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
4959 }
4960
4961 /* Nonzero if the constant value X is a legitimate general operand
4962 when generating PIC code. It is given that flag_pic is on and
4963 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4964
4965 bool
4966 legitimate_pic_operand_p (rtx x)
4967 {
4968 rtx inner;
4969
4970 switch (GET_CODE (x))
4971 {
4972 case CONST:
4973 inner = XEXP (x, 0);
4974
4975 /* Only some unspecs are valid as "constants". */
4976 if (GET_CODE (inner) == UNSPEC)
4977 switch (XINT (inner, 1))
4978 {
4979 case UNSPEC_TPOFF:
4980 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4981 default:
4982 return false;
4983 }
4984 /* FALLTHRU */
4985
4986 case SYMBOL_REF:
4987 case LABEL_REF:
4988 return legitimate_pic_address_disp_p (x);
4989
4990 default:
4991 return true;
4992 }
4993 }
4994
4995 /* Determine if a given CONST RTX is a valid memory displacement
4996 in PIC mode. */
4997
4998 int
4999 legitimate_pic_address_disp_p (rtx disp)
5000 {
5001 bool saw_plus;
5002
5003 /* In 64bit mode we can allow direct addresses of symbols and labels
5004 when they are not dynamic symbols. */
5005 if (TARGET_64BIT)
5006 {
5007 /* TLS references should always be enclosed in UNSPEC. */
5008 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5009 return 0;
5010 if (GET_CODE (disp) == SYMBOL_REF
5011 && ix86_cmodel == CM_SMALL_PIC
5012 && SYMBOL_REF_LOCAL_P (disp))
5013 return 1;
5014 if (GET_CODE (disp) == LABEL_REF)
5015 return 1;
5016 if (GET_CODE (disp) == CONST
5017 && GET_CODE (XEXP (disp, 0)) == PLUS)
5018 {
5019 rtx op0 = XEXP (XEXP (disp, 0), 0);
5020 rtx op1 = XEXP (XEXP (disp, 0), 1);
5021
5022 /* TLS references should always be enclosed in UNSPEC. */
5023 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5024 return 0;
5025 if (((GET_CODE (op0) == SYMBOL_REF
5026 && ix86_cmodel == CM_SMALL_PIC
5027 && SYMBOL_REF_LOCAL_P (op0))
5028 || GET_CODE (op0) == LABEL_REF)
5029 && GET_CODE (op1) == CONST_INT
5030 && INTVAL (op1) < 16*1024*1024
5031 && INTVAL (op1) >= -16*1024*1024)
5032 return 1;
5033 }
5034 }
5035 if (GET_CODE (disp) != CONST)
5036 return 0;
5037 disp = XEXP (disp, 0);
5038
5039 if (TARGET_64BIT)
5040 {
5041 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5042 of GOT tables. We should not need these anyway. */
5043 if (GET_CODE (disp) != UNSPEC
5044 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5045 return 0;
5046
5047 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5048 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5049 return 0;
5050 return 1;
5051 }
5052
5053 saw_plus = false;
5054 if (GET_CODE (disp) == PLUS)
5055 {
5056 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5057 return 0;
5058 disp = XEXP (disp, 0);
5059 saw_plus = true;
5060 }
5061
5062 if (TARGET_MACHO && darwin_local_data_pic (disp))
5063 return 1;
5064
5065 if (GET_CODE (disp) != UNSPEC)
5066 return 0;
5067
5068 switch (XINT (disp, 1))
5069 {
5070 case UNSPEC_GOT:
5071 if (saw_plus)
5072 return false;
5073 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5074 case UNSPEC_GOTOFF:
5075 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5076 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5077 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5078 return false;
5079 case UNSPEC_GOTTPOFF:
5080 case UNSPEC_GOTNTPOFF:
5081 case UNSPEC_INDNTPOFF:
5082 if (saw_plus)
5083 return false;
5084 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5085 case UNSPEC_NTPOFF:
5086 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5087 case UNSPEC_DTPOFF:
5088 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5089 }
5090
5091 return 0;
5092 }
5093
5094 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5095 memory address for an instruction. The MODE argument is the machine mode
5096 for the MEM expression that wants to use this address.
5097
5098 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5099 convert common non-canonical forms to canonical form so that they will
5100 be recognized. */
5101
5102 int
5103 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5104 {
5105 struct ix86_address parts;
5106 rtx base, index, disp;
5107 HOST_WIDE_INT scale;
5108 const char *reason = NULL;
5109 rtx reason_rtx = NULL_RTX;
5110
5111 if (TARGET_DEBUG_ADDR)
5112 {
5113 fprintf (stderr,
5114 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5115 GET_MODE_NAME (mode), strict);
5116 debug_rtx (addr);
5117 }
5118
5119 if (ix86_decompose_address (addr, &parts) <= 0)
5120 {
5121 reason = "decomposition failed";
5122 goto report_error;
5123 }
5124
5125 base = parts.base;
5126 index = parts.index;
5127 disp = parts.disp;
5128 scale = parts.scale;
5129
5130 /* Validate base register.
5131
5132 Don't allow SUBREG's here, it can lead to spill failures when the base
5133 is one word out of a two word structure, which is represented internally
5134 as a DImode int. */
5135
5136 if (base)
5137 {
5138 reason_rtx = base;
5139
5140 if (GET_CODE (base) != REG)
5141 {
5142 reason = "base is not a register";
5143 goto report_error;
5144 }
5145
5146 if (GET_MODE (base) != Pmode)
5147 {
5148 reason = "base is not in Pmode";
5149 goto report_error;
5150 }
5151
5152 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
5153 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
5154 {
5155 reason = "base is not valid";
5156 goto report_error;
5157 }
5158 }
5159
5160 /* Validate index register.
5161
5162 Don't allow SUBREG's here, it can lead to spill failures when the index
5163 is one word out of a two word structure, which is represented internally
5164 as a DImode int. */
5165
5166 if (index)
5167 {
5168 reason_rtx = index;
5169
5170 if (GET_CODE (index) != REG)
5171 {
5172 reason = "index is not a register";
5173 goto report_error;
5174 }
5175
5176 if (GET_MODE (index) != Pmode)
5177 {
5178 reason = "index is not in Pmode";
5179 goto report_error;
5180 }
5181
5182 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
5183 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
5184 {
5185 reason = "index is not valid";
5186 goto report_error;
5187 }
5188 }
5189
5190 /* Validate scale factor. */
5191 if (scale != 1)
5192 {
5193 reason_rtx = GEN_INT (scale);
5194 if (!index)
5195 {
5196 reason = "scale without index";
5197 goto report_error;
5198 }
5199
5200 if (scale != 2 && scale != 4 && scale != 8)
5201 {
5202 reason = "scale is not a valid multiplier";
5203 goto report_error;
5204 }
5205 }
5206
5207 /* Validate displacement. */
5208 if (disp)
5209 {
5210 reason_rtx = disp;
5211
5212 if (GET_CODE (disp) == CONST
5213 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5214 switch (XINT (XEXP (disp, 0), 1))
5215 {
5216 case UNSPEC_GOT:
5217 case UNSPEC_GOTOFF:
5218 case UNSPEC_GOTPCREL:
5219 if (!flag_pic)
5220 abort ();
5221 goto is_legitimate_pic;
5222
5223 case UNSPEC_GOTTPOFF:
5224 case UNSPEC_GOTNTPOFF:
5225 case UNSPEC_INDNTPOFF:
5226 case UNSPEC_NTPOFF:
5227 case UNSPEC_DTPOFF:
5228 break;
5229
5230 default:
5231 reason = "invalid address unspec";
5232 goto report_error;
5233 }
5234
5235 else if (flag_pic && (SYMBOLIC_CONST (disp)
5236 #if TARGET_MACHO
5237 && !machopic_operand_p (disp)
5238 #endif
5239 ))
5240 {
5241 is_legitimate_pic:
5242 if (TARGET_64BIT && (index || base))
5243 {
5244 /* foo@dtpoff(%rX) is ok. */
5245 if (GET_CODE (disp) != CONST
5246 || GET_CODE (XEXP (disp, 0)) != PLUS
5247 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5248 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5249 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5250 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5251 {
5252 reason = "non-constant pic memory reference";
5253 goto report_error;
5254 }
5255 }
5256 else if (! legitimate_pic_address_disp_p (disp))
5257 {
5258 reason = "displacement is an invalid pic construct";
5259 goto report_error;
5260 }
5261
5262 /* This code used to verify that a symbolic pic displacement
5263 includes the pic_offset_table_rtx register.
5264
5265 While this is good idea, unfortunately these constructs may
5266 be created by "adds using lea" optimization for incorrect
5267 code like:
5268
5269 int a;
5270 int foo(int i)
5271 {
5272 return *(&a+i);
5273 }
5274
5275 This code is nonsensical, but results in addressing
5276 GOT table with pic_offset_table_rtx base. We can't
5277 just refuse it easily, since it gets matched by
5278 "addsi3" pattern, that later gets split to lea in the
5279 case output register differs from input. While this
5280 can be handled by separate addsi pattern for this case
5281 that never results in lea, this seems to be easier and
5282 correct fix for crash to disable this test. */
5283 }
5284 else if (GET_CODE (disp) != LABEL_REF
5285 && GET_CODE (disp) != CONST_INT
5286 && (GET_CODE (disp) != CONST
5287 || !legitimate_constant_p (disp))
5288 && (GET_CODE (disp) != SYMBOL_REF
5289 || !legitimate_constant_p (disp)))
5290 {
5291 reason = "displacement is not constant";
5292 goto report_error;
5293 }
5294 else if (TARGET_64BIT
5295 && !x86_64_immediate_operand (disp, VOIDmode))
5296 {
5297 reason = "displacement is out of range";
5298 goto report_error;
5299 }
5300 }
5301
5302 /* Everything looks valid. */
5303 if (TARGET_DEBUG_ADDR)
5304 fprintf (stderr, "Success.\n");
5305 return TRUE;
5306
5307 report_error:
5308 if (TARGET_DEBUG_ADDR)
5309 {
5310 fprintf (stderr, "Error: %s\n", reason);
5311 debug_rtx (reason_rtx);
5312 }
5313 return FALSE;
5314 }
5315 \f
5316 /* Return an unique alias set for the GOT. */
5317
5318 static HOST_WIDE_INT
5319 ix86_GOT_alias_set (void)
5320 {
5321 static HOST_WIDE_INT set = -1;
5322 if (set == -1)
5323 set = new_alias_set ();
5324 return set;
5325 }
5326
5327 /* Return a legitimate reference for ORIG (an address) using the
5328 register REG. If REG is 0, a new pseudo is generated.
5329
5330 There are two types of references that must be handled:
5331
5332 1. Global data references must load the address from the GOT, via
5333 the PIC reg. An insn is emitted to do this load, and the reg is
5334 returned.
5335
5336 2. Static data references, constant pool addresses, and code labels
5337 compute the address as an offset from the GOT, whose base is in
5338 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5339 differentiate them from global data objects. The returned
5340 address is the PIC reg + an unspec constant.
5341
5342 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5343 reg also appears in the address. */
5344
5345 static rtx
5346 legitimize_pic_address (rtx orig, rtx reg)
5347 {
5348 rtx addr = orig;
5349 rtx new = orig;
5350 rtx base;
5351
5352 #if TARGET_MACHO
5353 if (reg == 0)
5354 reg = gen_reg_rtx (Pmode);
5355 /* Use the generic Mach-O PIC machinery. */
5356 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5357 #endif
5358
5359 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5360 new = addr;
5361 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5362 {
5363 /* This symbol may be referenced via a displacement from the PIC
5364 base address (@GOTOFF). */
5365
5366 if (reload_in_progress)
5367 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5368 if (GET_CODE (addr) == CONST)
5369 addr = XEXP (addr, 0);
5370 if (GET_CODE (addr) == PLUS)
5371 {
5372 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5373 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5374 }
5375 else
5376 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5377 new = gen_rtx_CONST (Pmode, new);
5378 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5379
5380 if (reg != 0)
5381 {
5382 emit_move_insn (reg, new);
5383 new = reg;
5384 }
5385 }
5386 else if (GET_CODE (addr) == SYMBOL_REF)
5387 {
5388 if (TARGET_64BIT)
5389 {
5390 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5391 new = gen_rtx_CONST (Pmode, new);
5392 new = gen_const_mem (Pmode, new);
5393 set_mem_alias_set (new, ix86_GOT_alias_set ());
5394
5395 if (reg == 0)
5396 reg = gen_reg_rtx (Pmode);
5397 /* Use directly gen_movsi, otherwise the address is loaded
5398 into register for CSE. We don't want to CSE this addresses,
5399 instead we CSE addresses from the GOT table, so skip this. */
5400 emit_insn (gen_movsi (reg, new));
5401 new = reg;
5402 }
5403 else
5404 {
5405 /* This symbol must be referenced via a load from the
5406 Global Offset Table (@GOT). */
5407
5408 if (reload_in_progress)
5409 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5410 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5411 new = gen_rtx_CONST (Pmode, new);
5412 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5413 new = gen_const_mem (Pmode, new);
5414 set_mem_alias_set (new, ix86_GOT_alias_set ());
5415
5416 if (reg == 0)
5417 reg = gen_reg_rtx (Pmode);
5418 emit_move_insn (reg, new);
5419 new = reg;
5420 }
5421 }
5422 else
5423 {
5424 if (GET_CODE (addr) == CONST)
5425 {
5426 addr = XEXP (addr, 0);
5427
5428 /* We must match stuff we generate before. Assume the only
5429 unspecs that can get here are ours. Not that we could do
5430 anything with them anyway.... */
5431 if (GET_CODE (addr) == UNSPEC
5432 || (GET_CODE (addr) == PLUS
5433 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5434 return orig;
5435 if (GET_CODE (addr) != PLUS)
5436 abort ();
5437 }
5438 if (GET_CODE (addr) == PLUS)
5439 {
5440 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5441
5442 /* Check first to see if this is a constant offset from a @GOTOFF
5443 symbol reference. */
5444 if (local_symbolic_operand (op0, Pmode)
5445 && GET_CODE (op1) == CONST_INT)
5446 {
5447 if (!TARGET_64BIT)
5448 {
5449 if (reload_in_progress)
5450 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5451 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5452 UNSPEC_GOTOFF);
5453 new = gen_rtx_PLUS (Pmode, new, op1);
5454 new = gen_rtx_CONST (Pmode, new);
5455 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5456
5457 if (reg != 0)
5458 {
5459 emit_move_insn (reg, new);
5460 new = reg;
5461 }
5462 }
5463 else
5464 {
5465 if (INTVAL (op1) < -16*1024*1024
5466 || INTVAL (op1) >= 16*1024*1024)
5467 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5468 }
5469 }
5470 else
5471 {
5472 base = legitimize_pic_address (XEXP (addr, 0), reg);
5473 new = legitimize_pic_address (XEXP (addr, 1),
5474 base == reg ? NULL_RTX : reg);
5475
5476 if (GET_CODE (new) == CONST_INT)
5477 new = plus_constant (base, INTVAL (new));
5478 else
5479 {
5480 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5481 {
5482 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5483 new = XEXP (new, 1);
5484 }
5485 new = gen_rtx_PLUS (Pmode, base, new);
5486 }
5487 }
5488 }
5489 }
5490 return new;
5491 }
5492 \f
5493 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5494
5495 static rtx
5496 get_thread_pointer (int to_reg)
5497 {
5498 rtx tp, reg, insn;
5499
5500 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5501 if (!to_reg)
5502 return tp;
5503
5504 reg = gen_reg_rtx (Pmode);
5505 insn = gen_rtx_SET (VOIDmode, reg, tp);
5506 insn = emit_insn (insn);
5507
5508 return reg;
5509 }
5510
5511 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5512 false if we expect this to be used for a memory address and true if
5513 we expect to load the address into a register. */
5514
5515 static rtx
5516 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5517 {
5518 rtx dest, base, off, pic;
5519 int type;
5520
5521 switch (model)
5522 {
5523 case TLS_MODEL_GLOBAL_DYNAMIC:
5524 dest = gen_reg_rtx (Pmode);
5525 if (TARGET_64BIT)
5526 {
5527 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5528
5529 start_sequence ();
5530 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5531 insns = get_insns ();
5532 end_sequence ();
5533
5534 emit_libcall_block (insns, dest, rax, x);
5535 }
5536 else
5537 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5538 break;
5539
5540 case TLS_MODEL_LOCAL_DYNAMIC:
5541 base = gen_reg_rtx (Pmode);
5542 if (TARGET_64BIT)
5543 {
5544 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5545
5546 start_sequence ();
5547 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5548 insns = get_insns ();
5549 end_sequence ();
5550
5551 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5552 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5553 emit_libcall_block (insns, base, rax, note);
5554 }
5555 else
5556 emit_insn (gen_tls_local_dynamic_base_32 (base));
5557
5558 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5559 off = gen_rtx_CONST (Pmode, off);
5560
5561 return gen_rtx_PLUS (Pmode, base, off);
5562
5563 case TLS_MODEL_INITIAL_EXEC:
5564 if (TARGET_64BIT)
5565 {
5566 pic = NULL;
5567 type = UNSPEC_GOTNTPOFF;
5568 }
5569 else if (flag_pic)
5570 {
5571 if (reload_in_progress)
5572 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5573 pic = pic_offset_table_rtx;
5574 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5575 }
5576 else if (!TARGET_GNU_TLS)
5577 {
5578 pic = gen_reg_rtx (Pmode);
5579 emit_insn (gen_set_got (pic));
5580 type = UNSPEC_GOTTPOFF;
5581 }
5582 else
5583 {
5584 pic = NULL;
5585 type = UNSPEC_INDNTPOFF;
5586 }
5587
5588 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5589 off = gen_rtx_CONST (Pmode, off);
5590 if (pic)
5591 off = gen_rtx_PLUS (Pmode, pic, off);
5592 off = gen_const_mem (Pmode, off);
5593 set_mem_alias_set (off, ix86_GOT_alias_set ());
5594
5595 if (TARGET_64BIT || TARGET_GNU_TLS)
5596 {
5597 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5598 off = force_reg (Pmode, off);
5599 return gen_rtx_PLUS (Pmode, base, off);
5600 }
5601 else
5602 {
5603 base = get_thread_pointer (true);
5604 dest = gen_reg_rtx (Pmode);
5605 emit_insn (gen_subsi3 (dest, base, off));
5606 }
5607 break;
5608
5609 case TLS_MODEL_LOCAL_EXEC:
5610 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5611 (TARGET_64BIT || TARGET_GNU_TLS)
5612 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5613 off = gen_rtx_CONST (Pmode, off);
5614
5615 if (TARGET_64BIT || TARGET_GNU_TLS)
5616 {
5617 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5618 return gen_rtx_PLUS (Pmode, base, off);
5619 }
5620 else
5621 {
5622 base = get_thread_pointer (true);
5623 dest = gen_reg_rtx (Pmode);
5624 emit_insn (gen_subsi3 (dest, base, off));
5625 }
5626 break;
5627
5628 default:
5629 abort ();
5630 }
5631
5632 return dest;
5633 }
5634
5635 /* Try machine-dependent ways of modifying an illegitimate address
5636 to be legitimate. If we find one, return the new, valid address.
5637 This macro is used in only one place: `memory_address' in explow.c.
5638
5639 OLDX is the address as it was before break_out_memory_refs was called.
5640 In some cases it is useful to look at this to decide what needs to be done.
5641
5642 MODE and WIN are passed so that this macro can use
5643 GO_IF_LEGITIMATE_ADDRESS.
5644
5645 It is always safe for this macro to do nothing. It exists to recognize
5646 opportunities to optimize the output.
5647
5648 For the 80386, we handle X+REG by loading X into a register R and
5649 using R+REG. R will go in a general reg and indexing will be used.
5650 However, if REG is a broken-out memory address or multiplication,
5651 nothing needs to be done because REG can certainly go in a general reg.
5652
5653 When -fpic is used, special handling is needed for symbolic references.
5654 See comments by legitimize_pic_address in i386.c for details. */
5655
5656 rtx
5657 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5658 {
5659 int changed = 0;
5660 unsigned log;
5661
5662 if (TARGET_DEBUG_ADDR)
5663 {
5664 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5665 GET_MODE_NAME (mode));
5666 debug_rtx (x);
5667 }
5668
5669 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5670 if (log)
5671 return legitimize_tls_address (x, log, false);
5672 if (GET_CODE (x) == CONST
5673 && GET_CODE (XEXP (x, 0)) == PLUS
5674 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5675 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5676 {
5677 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5678 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5679 }
5680
5681 if (flag_pic && SYMBOLIC_CONST (x))
5682 return legitimize_pic_address (x, 0);
5683
5684 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5685 if (GET_CODE (x) == ASHIFT
5686 && GET_CODE (XEXP (x, 1)) == CONST_INT
5687 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5688 {
5689 changed = 1;
5690 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5691 GEN_INT (1 << log));
5692 }
5693
5694 if (GET_CODE (x) == PLUS)
5695 {
5696 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5697
5698 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5699 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5700 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5701 {
5702 changed = 1;
5703 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5704 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5705 GEN_INT (1 << log));
5706 }
5707
5708 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5709 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5710 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5711 {
5712 changed = 1;
5713 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5714 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5715 GEN_INT (1 << log));
5716 }
5717
5718 /* Put multiply first if it isn't already. */
5719 if (GET_CODE (XEXP (x, 1)) == MULT)
5720 {
5721 rtx tmp = XEXP (x, 0);
5722 XEXP (x, 0) = XEXP (x, 1);
5723 XEXP (x, 1) = tmp;
5724 changed = 1;
5725 }
5726
5727 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5728 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5729 created by virtual register instantiation, register elimination, and
5730 similar optimizations. */
5731 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5732 {
5733 changed = 1;
5734 x = gen_rtx_PLUS (Pmode,
5735 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5736 XEXP (XEXP (x, 1), 0)),
5737 XEXP (XEXP (x, 1), 1));
5738 }
5739
5740 /* Canonicalize
5741 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5742 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5743 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5744 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5745 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5746 && CONSTANT_P (XEXP (x, 1)))
5747 {
5748 rtx constant;
5749 rtx other = NULL_RTX;
5750
5751 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5752 {
5753 constant = XEXP (x, 1);
5754 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5755 }
5756 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5757 {
5758 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5759 other = XEXP (x, 1);
5760 }
5761 else
5762 constant = 0;
5763
5764 if (constant)
5765 {
5766 changed = 1;
5767 x = gen_rtx_PLUS (Pmode,
5768 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5769 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5770 plus_constant (other, INTVAL (constant)));
5771 }
5772 }
5773
5774 if (changed && legitimate_address_p (mode, x, FALSE))
5775 return x;
5776
5777 if (GET_CODE (XEXP (x, 0)) == MULT)
5778 {
5779 changed = 1;
5780 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5781 }
5782
5783 if (GET_CODE (XEXP (x, 1)) == MULT)
5784 {
5785 changed = 1;
5786 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5787 }
5788
5789 if (changed
5790 && GET_CODE (XEXP (x, 1)) == REG
5791 && GET_CODE (XEXP (x, 0)) == REG)
5792 return x;
5793
5794 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5795 {
5796 changed = 1;
5797 x = legitimize_pic_address (x, 0);
5798 }
5799
5800 if (changed && legitimate_address_p (mode, x, FALSE))
5801 return x;
5802
5803 if (GET_CODE (XEXP (x, 0)) == REG)
5804 {
5805 rtx temp = gen_reg_rtx (Pmode);
5806 rtx val = force_operand (XEXP (x, 1), temp);
5807 if (val != temp)
5808 emit_move_insn (temp, val);
5809
5810 XEXP (x, 1) = temp;
5811 return x;
5812 }
5813
5814 else if (GET_CODE (XEXP (x, 1)) == REG)
5815 {
5816 rtx temp = gen_reg_rtx (Pmode);
5817 rtx val = force_operand (XEXP (x, 0), temp);
5818 if (val != temp)
5819 emit_move_insn (temp, val);
5820
5821 XEXP (x, 0) = temp;
5822 return x;
5823 }
5824 }
5825
5826 return x;
5827 }
5828 \f
5829 /* Print an integer constant expression in assembler syntax. Addition
5830 and subtraction are the only arithmetic that may appear in these
5831 expressions. FILE is the stdio stream to write to, X is the rtx, and
5832 CODE is the operand print code from the output string. */
5833
5834 static void
5835 output_pic_addr_const (FILE *file, rtx x, int code)
5836 {
5837 char buf[256];
5838
5839 switch (GET_CODE (x))
5840 {
5841 case PC:
5842 if (flag_pic)
5843 putc ('.', file);
5844 else
5845 abort ();
5846 break;
5847
5848 case SYMBOL_REF:
5849 /* Mark the decl as referenced so that cgraph will output the function. */
5850 if (SYMBOL_REF_DECL (x))
5851 mark_decl_referenced (SYMBOL_REF_DECL (x));
5852
5853 assemble_name (file, XSTR (x, 0));
5854 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
5855 fputs ("@PLT", file);
5856 break;
5857
5858 case LABEL_REF:
5859 x = XEXP (x, 0);
5860 /* FALLTHRU */
5861 case CODE_LABEL:
5862 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5863 assemble_name (asm_out_file, buf);
5864 break;
5865
5866 case CONST_INT:
5867 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5868 break;
5869
5870 case CONST:
5871 /* This used to output parentheses around the expression,
5872 but that does not work on the 386 (either ATT or BSD assembler). */
5873 output_pic_addr_const (file, XEXP (x, 0), code);
5874 break;
5875
5876 case CONST_DOUBLE:
5877 if (GET_MODE (x) == VOIDmode)
5878 {
5879 /* We can use %d if the number is <32 bits and positive. */
5880 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5881 fprintf (file, "0x%lx%08lx",
5882 (unsigned long) CONST_DOUBLE_HIGH (x),
5883 (unsigned long) CONST_DOUBLE_LOW (x));
5884 else
5885 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5886 }
5887 else
5888 /* We can't handle floating point constants;
5889 PRINT_OPERAND must handle them. */
5890 output_operand_lossage ("floating constant misused");
5891 break;
5892
5893 case PLUS:
5894 /* Some assemblers need integer constants to appear first. */
5895 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5896 {
5897 output_pic_addr_const (file, XEXP (x, 0), code);
5898 putc ('+', file);
5899 output_pic_addr_const (file, XEXP (x, 1), code);
5900 }
5901 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5902 {
5903 output_pic_addr_const (file, XEXP (x, 1), code);
5904 putc ('+', file);
5905 output_pic_addr_const (file, XEXP (x, 0), code);
5906 }
5907 else
5908 abort ();
5909 break;
5910
5911 case MINUS:
5912 if (!TARGET_MACHO)
5913 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5914 output_pic_addr_const (file, XEXP (x, 0), code);
5915 putc ('-', file);
5916 output_pic_addr_const (file, XEXP (x, 1), code);
5917 if (!TARGET_MACHO)
5918 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5919 break;
5920
5921 case UNSPEC:
5922 if (XVECLEN (x, 0) != 1)
5923 abort ();
5924 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5925 switch (XINT (x, 1))
5926 {
5927 case UNSPEC_GOT:
5928 fputs ("@GOT", file);
5929 break;
5930 case UNSPEC_GOTOFF:
5931 fputs ("@GOTOFF", file);
5932 break;
5933 case UNSPEC_GOTPCREL:
5934 fputs ("@GOTPCREL(%rip)", file);
5935 break;
5936 case UNSPEC_GOTTPOFF:
5937 /* FIXME: This might be @TPOFF in Sun ld too. */
5938 fputs ("@GOTTPOFF", file);
5939 break;
5940 case UNSPEC_TPOFF:
5941 fputs ("@TPOFF", file);
5942 break;
5943 case UNSPEC_NTPOFF:
5944 if (TARGET_64BIT)
5945 fputs ("@TPOFF", file);
5946 else
5947 fputs ("@NTPOFF", file);
5948 break;
5949 case UNSPEC_DTPOFF:
5950 fputs ("@DTPOFF", file);
5951 break;
5952 case UNSPEC_GOTNTPOFF:
5953 if (TARGET_64BIT)
5954 fputs ("@GOTTPOFF(%rip)", file);
5955 else
5956 fputs ("@GOTNTPOFF", file);
5957 break;
5958 case UNSPEC_INDNTPOFF:
5959 fputs ("@INDNTPOFF", file);
5960 break;
5961 default:
5962 output_operand_lossage ("invalid UNSPEC as operand");
5963 break;
5964 }
5965 break;
5966
5967 default:
5968 output_operand_lossage ("invalid expression as operand");
5969 }
5970 }
5971
5972 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5973 We need to emit DTP-relative relocations. */
5974
5975 void
5976 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
5977 {
5978 fputs (ASM_LONG, file);
5979 output_addr_const (file, x);
5980 fputs ("@DTPOFF", file);
5981 switch (size)
5982 {
5983 case 4:
5984 break;
5985 case 8:
5986 fputs (", 0", file);
5987 break;
5988 default:
5989 abort ();
5990 }
5991 }
5992
5993 /* In the name of slightly smaller debug output, and to cater to
5994 general assembler losage, recognize PIC+GOTOFF and turn it back
5995 into a direct symbol reference. */
5996
5997 static rtx
5998 ix86_delegitimize_address (rtx orig_x)
5999 {
6000 rtx x = orig_x, y;
6001
6002 if (GET_CODE (x) == MEM)
6003 x = XEXP (x, 0);
6004
6005 if (TARGET_64BIT)
6006 {
6007 if (GET_CODE (x) != CONST
6008 || GET_CODE (XEXP (x, 0)) != UNSPEC
6009 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6010 || GET_CODE (orig_x) != MEM)
6011 return orig_x;
6012 return XVECEXP (XEXP (x, 0), 0, 0);
6013 }
6014
6015 if (GET_CODE (x) != PLUS
6016 || GET_CODE (XEXP (x, 1)) != CONST)
6017 return orig_x;
6018
6019 if (GET_CODE (XEXP (x, 0)) == REG
6020 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6021 /* %ebx + GOT/GOTOFF */
6022 y = NULL;
6023 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6024 {
6025 /* %ebx + %reg * scale + GOT/GOTOFF */
6026 y = XEXP (x, 0);
6027 if (GET_CODE (XEXP (y, 0)) == REG
6028 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6029 y = XEXP (y, 1);
6030 else if (GET_CODE (XEXP (y, 1)) == REG
6031 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6032 y = XEXP (y, 0);
6033 else
6034 return orig_x;
6035 if (GET_CODE (y) != REG
6036 && GET_CODE (y) != MULT
6037 && GET_CODE (y) != ASHIFT)
6038 return orig_x;
6039 }
6040 else
6041 return orig_x;
6042
6043 x = XEXP (XEXP (x, 1), 0);
6044 if (GET_CODE (x) == UNSPEC
6045 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6046 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6047 {
6048 if (y)
6049 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6050 return XVECEXP (x, 0, 0);
6051 }
6052
6053 if (GET_CODE (x) == PLUS
6054 && GET_CODE (XEXP (x, 0)) == UNSPEC
6055 && GET_CODE (XEXP (x, 1)) == CONST_INT
6056 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6057 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6058 && GET_CODE (orig_x) != MEM)))
6059 {
6060 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6061 if (y)
6062 return gen_rtx_PLUS (Pmode, y, x);
6063 return x;
6064 }
6065
6066 return orig_x;
6067 }
6068 \f
6069 static void
6070 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6071 int fp, FILE *file)
6072 {
6073 const char *suffix;
6074
6075 if (mode == CCFPmode || mode == CCFPUmode)
6076 {
6077 enum rtx_code second_code, bypass_code;
6078 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6079 if (bypass_code != UNKNOWN || second_code != UNKNOWN)
6080 abort ();
6081 code = ix86_fp_compare_code_to_integer (code);
6082 mode = CCmode;
6083 }
6084 if (reverse)
6085 code = reverse_condition (code);
6086
6087 switch (code)
6088 {
6089 case EQ:
6090 suffix = "e";
6091 break;
6092 case NE:
6093 suffix = "ne";
6094 break;
6095 case GT:
6096 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6097 abort ();
6098 suffix = "g";
6099 break;
6100 case GTU:
6101 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6102 Those same assemblers have the same but opposite losage on cmov. */
6103 if (mode != CCmode)
6104 abort ();
6105 suffix = fp ? "nbe" : "a";
6106 break;
6107 case LT:
6108 if (mode == CCNOmode || mode == CCGOCmode)
6109 suffix = "s";
6110 else if (mode == CCmode || mode == CCGCmode)
6111 suffix = "l";
6112 else
6113 abort ();
6114 break;
6115 case LTU:
6116 if (mode != CCmode)
6117 abort ();
6118 suffix = "b";
6119 break;
6120 case GE:
6121 if (mode == CCNOmode || mode == CCGOCmode)
6122 suffix = "ns";
6123 else if (mode == CCmode || mode == CCGCmode)
6124 suffix = "ge";
6125 else
6126 abort ();
6127 break;
6128 case GEU:
6129 /* ??? As above. */
6130 if (mode != CCmode)
6131 abort ();
6132 suffix = fp ? "nb" : "ae";
6133 break;
6134 case LE:
6135 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6136 abort ();
6137 suffix = "le";
6138 break;
6139 case LEU:
6140 if (mode != CCmode)
6141 abort ();
6142 suffix = "be";
6143 break;
6144 case UNORDERED:
6145 suffix = fp ? "u" : "p";
6146 break;
6147 case ORDERED:
6148 suffix = fp ? "nu" : "np";
6149 break;
6150 default:
6151 abort ();
6152 }
6153 fputs (suffix, file);
6154 }
6155
6156 /* Print the name of register X to FILE based on its machine mode and number.
6157 If CODE is 'w', pretend the mode is HImode.
6158 If CODE is 'b', pretend the mode is QImode.
6159 If CODE is 'k', pretend the mode is SImode.
6160 If CODE is 'q', pretend the mode is DImode.
6161 If CODE is 'h', pretend the reg is the `high' byte register.
6162 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6163
6164 void
6165 print_reg (rtx x, int code, FILE *file)
6166 {
6167 if (REGNO (x) == ARG_POINTER_REGNUM
6168 || REGNO (x) == FRAME_POINTER_REGNUM
6169 || REGNO (x) == FLAGS_REG
6170 || REGNO (x) == FPSR_REG)
6171 abort ();
6172
6173 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6174 putc ('%', file);
6175
6176 if (code == 'w' || MMX_REG_P (x))
6177 code = 2;
6178 else if (code == 'b')
6179 code = 1;
6180 else if (code == 'k')
6181 code = 4;
6182 else if (code == 'q')
6183 code = 8;
6184 else if (code == 'y')
6185 code = 3;
6186 else if (code == 'h')
6187 code = 0;
6188 else
6189 code = GET_MODE_SIZE (GET_MODE (x));
6190
6191 /* Irritatingly, AMD extended registers use different naming convention
6192 from the normal registers. */
6193 if (REX_INT_REG_P (x))
6194 {
6195 if (!TARGET_64BIT)
6196 abort ();
6197 switch (code)
6198 {
6199 case 0:
6200 error ("extended registers have no high halves");
6201 break;
6202 case 1:
6203 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6204 break;
6205 case 2:
6206 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6207 break;
6208 case 4:
6209 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6210 break;
6211 case 8:
6212 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6213 break;
6214 default:
6215 error ("unsupported operand size for extended register");
6216 break;
6217 }
6218 return;
6219 }
6220 switch (code)
6221 {
6222 case 3:
6223 if (STACK_TOP_P (x))
6224 {
6225 fputs ("st(0)", file);
6226 break;
6227 }
6228 /* FALLTHRU */
6229 case 8:
6230 case 4:
6231 case 12:
6232 if (! ANY_FP_REG_P (x))
6233 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6234 /* FALLTHRU */
6235 case 16:
6236 case 2:
6237 normal:
6238 fputs (hi_reg_name[REGNO (x)], file);
6239 break;
6240 case 1:
6241 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6242 goto normal;
6243 fputs (qi_reg_name[REGNO (x)], file);
6244 break;
6245 case 0:
6246 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6247 goto normal;
6248 fputs (qi_high_reg_name[REGNO (x)], file);
6249 break;
6250 default:
6251 abort ();
6252 }
6253 }
6254
6255 /* Locate some local-dynamic symbol still in use by this function
6256 so that we can print its name in some tls_local_dynamic_base
6257 pattern. */
6258
6259 static const char *
6260 get_some_local_dynamic_name (void)
6261 {
6262 rtx insn;
6263
6264 if (cfun->machine->some_ld_name)
6265 return cfun->machine->some_ld_name;
6266
6267 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6268 if (INSN_P (insn)
6269 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6270 return cfun->machine->some_ld_name;
6271
6272 abort ();
6273 }
6274
6275 static int
6276 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6277 {
6278 rtx x = *px;
6279
6280 if (GET_CODE (x) == SYMBOL_REF
6281 && local_dynamic_symbolic_operand (x, Pmode))
6282 {
6283 cfun->machine->some_ld_name = XSTR (x, 0);
6284 return 1;
6285 }
6286
6287 return 0;
6288 }
6289
6290 /* Meaning of CODE:
6291 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6292 C -- print opcode suffix for set/cmov insn.
6293 c -- like C, but print reversed condition
6294 F,f -- likewise, but for floating-point.
6295 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6296 otherwise nothing
6297 R -- print the prefix for register names.
6298 z -- print the opcode suffix for the size of the current operand.
6299 * -- print a star (in certain assembler syntax)
6300 A -- print an absolute memory reference.
6301 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6302 s -- print a shift double count, followed by the assemblers argument
6303 delimiter.
6304 b -- print the QImode name of the register for the indicated operand.
6305 %b0 would print %al if operands[0] is reg 0.
6306 w -- likewise, print the HImode name of the register.
6307 k -- likewise, print the SImode name of the register.
6308 q -- likewise, print the DImode name of the register.
6309 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6310 y -- print "st(0)" instead of "st" as a register.
6311 D -- print condition for SSE cmp instruction.
6312 P -- if PIC, print an @PLT suffix.
6313 X -- don't print any sort of PIC '@' suffix for a symbol.
6314 & -- print some in-use local-dynamic symbol name.
6315 */
6316
6317 void
6318 print_operand (FILE *file, rtx x, int code)
6319 {
6320 if (code)
6321 {
6322 switch (code)
6323 {
6324 case '*':
6325 if (ASSEMBLER_DIALECT == ASM_ATT)
6326 putc ('*', file);
6327 return;
6328
6329 case '&':
6330 assemble_name (file, get_some_local_dynamic_name ());
6331 return;
6332
6333 case 'A':
6334 if (ASSEMBLER_DIALECT == ASM_ATT)
6335 putc ('*', file);
6336 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6337 {
6338 /* Intel syntax. For absolute addresses, registers should not
6339 be surrounded by braces. */
6340 if (GET_CODE (x) != REG)
6341 {
6342 putc ('[', file);
6343 PRINT_OPERAND (file, x, 0);
6344 putc (']', file);
6345 return;
6346 }
6347 }
6348 else
6349 abort ();
6350
6351 PRINT_OPERAND (file, x, 0);
6352 return;
6353
6354
6355 case 'L':
6356 if (ASSEMBLER_DIALECT == ASM_ATT)
6357 putc ('l', file);
6358 return;
6359
6360 case 'W':
6361 if (ASSEMBLER_DIALECT == ASM_ATT)
6362 putc ('w', file);
6363 return;
6364
6365 case 'B':
6366 if (ASSEMBLER_DIALECT == ASM_ATT)
6367 putc ('b', file);
6368 return;
6369
6370 case 'Q':
6371 if (ASSEMBLER_DIALECT == ASM_ATT)
6372 putc ('l', file);
6373 return;
6374
6375 case 'S':
6376 if (ASSEMBLER_DIALECT == ASM_ATT)
6377 putc ('s', file);
6378 return;
6379
6380 case 'T':
6381 if (ASSEMBLER_DIALECT == ASM_ATT)
6382 putc ('t', file);
6383 return;
6384
6385 case 'z':
6386 /* 387 opcodes don't get size suffixes if the operands are
6387 registers. */
6388 if (STACK_REG_P (x))
6389 return;
6390
6391 /* Likewise if using Intel opcodes. */
6392 if (ASSEMBLER_DIALECT == ASM_INTEL)
6393 return;
6394
6395 /* This is the size of op from size of operand. */
6396 switch (GET_MODE_SIZE (GET_MODE (x)))
6397 {
6398 case 2:
6399 #ifdef HAVE_GAS_FILDS_FISTS
6400 putc ('s', file);
6401 #endif
6402 return;
6403
6404 case 4:
6405 if (GET_MODE (x) == SFmode)
6406 {
6407 putc ('s', file);
6408 return;
6409 }
6410 else
6411 putc ('l', file);
6412 return;
6413
6414 case 12:
6415 case 16:
6416 putc ('t', file);
6417 return;
6418
6419 case 8:
6420 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6421 {
6422 #ifdef GAS_MNEMONICS
6423 putc ('q', file);
6424 #else
6425 putc ('l', file);
6426 putc ('l', file);
6427 #endif
6428 }
6429 else
6430 putc ('l', file);
6431 return;
6432
6433 default:
6434 abort ();
6435 }
6436
6437 case 'b':
6438 case 'w':
6439 case 'k':
6440 case 'q':
6441 case 'h':
6442 case 'y':
6443 case 'X':
6444 case 'P':
6445 break;
6446
6447 case 's':
6448 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6449 {
6450 PRINT_OPERAND (file, x, 0);
6451 putc (',', file);
6452 }
6453 return;
6454
6455 case 'D':
6456 /* Little bit of braindamage here. The SSE compare instructions
6457 does use completely different names for the comparisons that the
6458 fp conditional moves. */
6459 switch (GET_CODE (x))
6460 {
6461 case EQ:
6462 case UNEQ:
6463 fputs ("eq", file);
6464 break;
6465 case LT:
6466 case UNLT:
6467 fputs ("lt", file);
6468 break;
6469 case LE:
6470 case UNLE:
6471 fputs ("le", file);
6472 break;
6473 case UNORDERED:
6474 fputs ("unord", file);
6475 break;
6476 case NE:
6477 case LTGT:
6478 fputs ("neq", file);
6479 break;
6480 case UNGE:
6481 case GE:
6482 fputs ("nlt", file);
6483 break;
6484 case UNGT:
6485 case GT:
6486 fputs ("nle", file);
6487 break;
6488 case ORDERED:
6489 fputs ("ord", file);
6490 break;
6491 default:
6492 abort ();
6493 break;
6494 }
6495 return;
6496 case 'O':
6497 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6498 if (ASSEMBLER_DIALECT == ASM_ATT)
6499 {
6500 switch (GET_MODE (x))
6501 {
6502 case HImode: putc ('w', file); break;
6503 case SImode:
6504 case SFmode: putc ('l', file); break;
6505 case DImode:
6506 case DFmode: putc ('q', file); break;
6507 default: abort ();
6508 }
6509 putc ('.', file);
6510 }
6511 #endif
6512 return;
6513 case 'C':
6514 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6515 return;
6516 case 'F':
6517 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6518 if (ASSEMBLER_DIALECT == ASM_ATT)
6519 putc ('.', file);
6520 #endif
6521 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6522 return;
6523
6524 /* Like above, but reverse condition */
6525 case 'c':
6526 /* Check to see if argument to %c is really a constant
6527 and not a condition code which needs to be reversed. */
6528 if (!COMPARISON_P (x))
6529 {
6530 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6531 return;
6532 }
6533 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6534 return;
6535 case 'f':
6536 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6537 if (ASSEMBLER_DIALECT == ASM_ATT)
6538 putc ('.', file);
6539 #endif
6540 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6541 return;
6542 case '+':
6543 {
6544 rtx x;
6545
6546 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6547 return;
6548
6549 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6550 if (x)
6551 {
6552 int pred_val = INTVAL (XEXP (x, 0));
6553
6554 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6555 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6556 {
6557 int taken = pred_val > REG_BR_PROB_BASE / 2;
6558 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6559
6560 /* Emit hints only in the case default branch prediction
6561 heuristics would fail. */
6562 if (taken != cputaken)
6563 {
6564 /* We use 3e (DS) prefix for taken branches and
6565 2e (CS) prefix for not taken branches. */
6566 if (taken)
6567 fputs ("ds ; ", file);
6568 else
6569 fputs ("cs ; ", file);
6570 }
6571 }
6572 }
6573 return;
6574 }
6575 default:
6576 output_operand_lossage ("invalid operand code '%c'", code);
6577 }
6578 }
6579
6580 if (GET_CODE (x) == REG)
6581 print_reg (x, code, file);
6582
6583 else if (GET_CODE (x) == MEM)
6584 {
6585 /* No `byte ptr' prefix for call instructions. */
6586 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6587 {
6588 const char * size;
6589 switch (GET_MODE_SIZE (GET_MODE (x)))
6590 {
6591 case 1: size = "BYTE"; break;
6592 case 2: size = "WORD"; break;
6593 case 4: size = "DWORD"; break;
6594 case 8: size = "QWORD"; break;
6595 case 12: size = "XWORD"; break;
6596 case 16: size = "XMMWORD"; break;
6597 default:
6598 abort ();
6599 }
6600
6601 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6602 if (code == 'b')
6603 size = "BYTE";
6604 else if (code == 'w')
6605 size = "WORD";
6606 else if (code == 'k')
6607 size = "DWORD";
6608
6609 fputs (size, file);
6610 fputs (" PTR ", file);
6611 }
6612
6613 x = XEXP (x, 0);
6614 /* Avoid (%rip) for call operands. */
6615 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6616 && GET_CODE (x) != CONST_INT)
6617 output_addr_const (file, x);
6618 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6619 output_operand_lossage ("invalid constraints for operand");
6620 else
6621 output_address (x);
6622 }
6623
6624 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6625 {
6626 REAL_VALUE_TYPE r;
6627 long l;
6628
6629 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6630 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6631
6632 if (ASSEMBLER_DIALECT == ASM_ATT)
6633 putc ('$', file);
6634 fprintf (file, "0x%08lx", l);
6635 }
6636
6637 /* These float cases don't actually occur as immediate operands. */
6638 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6639 {
6640 char dstr[30];
6641
6642 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6643 fprintf (file, "%s", dstr);
6644 }
6645
6646 else if (GET_CODE (x) == CONST_DOUBLE
6647 && GET_MODE (x) == XFmode)
6648 {
6649 char dstr[30];
6650
6651 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6652 fprintf (file, "%s", dstr);
6653 }
6654
6655 else
6656 {
6657 if (code != 'P')
6658 {
6659 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6660 {
6661 if (ASSEMBLER_DIALECT == ASM_ATT)
6662 putc ('$', file);
6663 }
6664 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6665 || GET_CODE (x) == LABEL_REF)
6666 {
6667 if (ASSEMBLER_DIALECT == ASM_ATT)
6668 putc ('$', file);
6669 else
6670 fputs ("OFFSET FLAT:", file);
6671 }
6672 }
6673 if (GET_CODE (x) == CONST_INT)
6674 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6675 else if (flag_pic)
6676 output_pic_addr_const (file, x, code);
6677 else
6678 output_addr_const (file, x);
6679 }
6680 }
6681 \f
6682 /* Print a memory operand whose address is ADDR. */
6683
6684 void
6685 print_operand_address (FILE *file, rtx addr)
6686 {
6687 struct ix86_address parts;
6688 rtx base, index, disp;
6689 int scale;
6690
6691 if (! ix86_decompose_address (addr, &parts))
6692 abort ();
6693
6694 base = parts.base;
6695 index = parts.index;
6696 disp = parts.disp;
6697 scale = parts.scale;
6698
6699 switch (parts.seg)
6700 {
6701 case SEG_DEFAULT:
6702 break;
6703 case SEG_FS:
6704 case SEG_GS:
6705 if (USER_LABEL_PREFIX[0] == 0)
6706 putc ('%', file);
6707 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6708 break;
6709 default:
6710 abort ();
6711 }
6712
6713 if (!base && !index)
6714 {
6715 /* Displacement only requires special attention. */
6716
6717 if (GET_CODE (disp) == CONST_INT)
6718 {
6719 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6720 {
6721 if (USER_LABEL_PREFIX[0] == 0)
6722 putc ('%', file);
6723 fputs ("ds:", file);
6724 }
6725 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6726 }
6727 else if (flag_pic)
6728 output_pic_addr_const (file, disp, 0);
6729 else
6730 output_addr_const (file, disp);
6731
6732 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6733 if (TARGET_64BIT
6734 && ((GET_CODE (disp) == SYMBOL_REF
6735 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6736 || GET_CODE (disp) == LABEL_REF
6737 || (GET_CODE (disp) == CONST
6738 && GET_CODE (XEXP (disp, 0)) == PLUS
6739 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6740 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6741 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6742 fputs ("(%rip)", file);
6743 }
6744 else
6745 {
6746 if (ASSEMBLER_DIALECT == ASM_ATT)
6747 {
6748 if (disp)
6749 {
6750 if (flag_pic)
6751 output_pic_addr_const (file, disp, 0);
6752 else if (GET_CODE (disp) == LABEL_REF)
6753 output_asm_label (disp);
6754 else
6755 output_addr_const (file, disp);
6756 }
6757
6758 putc ('(', file);
6759 if (base)
6760 print_reg (base, 0, file);
6761 if (index)
6762 {
6763 putc (',', file);
6764 print_reg (index, 0, file);
6765 if (scale != 1)
6766 fprintf (file, ",%d", scale);
6767 }
6768 putc (')', file);
6769 }
6770 else
6771 {
6772 rtx offset = NULL_RTX;
6773
6774 if (disp)
6775 {
6776 /* Pull out the offset of a symbol; print any symbol itself. */
6777 if (GET_CODE (disp) == CONST
6778 && GET_CODE (XEXP (disp, 0)) == PLUS
6779 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6780 {
6781 offset = XEXP (XEXP (disp, 0), 1);
6782 disp = gen_rtx_CONST (VOIDmode,
6783 XEXP (XEXP (disp, 0), 0));
6784 }
6785
6786 if (flag_pic)
6787 output_pic_addr_const (file, disp, 0);
6788 else if (GET_CODE (disp) == LABEL_REF)
6789 output_asm_label (disp);
6790 else if (GET_CODE (disp) == CONST_INT)
6791 offset = disp;
6792 else
6793 output_addr_const (file, disp);
6794 }
6795
6796 putc ('[', file);
6797 if (base)
6798 {
6799 print_reg (base, 0, file);
6800 if (offset)
6801 {
6802 if (INTVAL (offset) >= 0)
6803 putc ('+', file);
6804 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6805 }
6806 }
6807 else if (offset)
6808 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6809 else
6810 putc ('0', file);
6811
6812 if (index)
6813 {
6814 putc ('+', file);
6815 print_reg (index, 0, file);
6816 if (scale != 1)
6817 fprintf (file, "*%d", scale);
6818 }
6819 putc (']', file);
6820 }
6821 }
6822 }
6823
6824 bool
6825 output_addr_const_extra (FILE *file, rtx x)
6826 {
6827 rtx op;
6828
6829 if (GET_CODE (x) != UNSPEC)
6830 return false;
6831
6832 op = XVECEXP (x, 0, 0);
6833 switch (XINT (x, 1))
6834 {
6835 case UNSPEC_GOTTPOFF:
6836 output_addr_const (file, op);
6837 /* FIXME: This might be @TPOFF in Sun ld. */
6838 fputs ("@GOTTPOFF", file);
6839 break;
6840 case UNSPEC_TPOFF:
6841 output_addr_const (file, op);
6842 fputs ("@TPOFF", file);
6843 break;
6844 case UNSPEC_NTPOFF:
6845 output_addr_const (file, op);
6846 if (TARGET_64BIT)
6847 fputs ("@TPOFF", file);
6848 else
6849 fputs ("@NTPOFF", file);
6850 break;
6851 case UNSPEC_DTPOFF:
6852 output_addr_const (file, op);
6853 fputs ("@DTPOFF", file);
6854 break;
6855 case UNSPEC_GOTNTPOFF:
6856 output_addr_const (file, op);
6857 if (TARGET_64BIT)
6858 fputs ("@GOTTPOFF(%rip)", file);
6859 else
6860 fputs ("@GOTNTPOFF", file);
6861 break;
6862 case UNSPEC_INDNTPOFF:
6863 output_addr_const (file, op);
6864 fputs ("@INDNTPOFF", file);
6865 break;
6866
6867 default:
6868 return false;
6869 }
6870
6871 return true;
6872 }
6873 \f
6874 /* Split one or more DImode RTL references into pairs of SImode
6875 references. The RTL can be REG, offsettable MEM, integer constant, or
6876 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6877 split and "num" is its length. lo_half and hi_half are output arrays
6878 that parallel "operands". */
6879
6880 void
6881 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6882 {
6883 while (num--)
6884 {
6885 rtx op = operands[num];
6886
6887 /* simplify_subreg refuse to split volatile memory addresses,
6888 but we still have to handle it. */
6889 if (GET_CODE (op) == MEM)
6890 {
6891 lo_half[num] = adjust_address (op, SImode, 0);
6892 hi_half[num] = adjust_address (op, SImode, 4);
6893 }
6894 else
6895 {
6896 lo_half[num] = simplify_gen_subreg (SImode, op,
6897 GET_MODE (op) == VOIDmode
6898 ? DImode : GET_MODE (op), 0);
6899 hi_half[num] = simplify_gen_subreg (SImode, op,
6900 GET_MODE (op) == VOIDmode
6901 ? DImode : GET_MODE (op), 4);
6902 }
6903 }
6904 }
6905 /* Split one or more TImode RTL references into pairs of SImode
6906 references. The RTL can be REG, offsettable MEM, integer constant, or
6907 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6908 split and "num" is its length. lo_half and hi_half are output arrays
6909 that parallel "operands". */
6910
6911 void
6912 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
6913 {
6914 while (num--)
6915 {
6916 rtx op = operands[num];
6917
6918 /* simplify_subreg refuse to split volatile memory addresses, but we
6919 still have to handle it. */
6920 if (GET_CODE (op) == MEM)
6921 {
6922 lo_half[num] = adjust_address (op, DImode, 0);
6923 hi_half[num] = adjust_address (op, DImode, 8);
6924 }
6925 else
6926 {
6927 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6928 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6929 }
6930 }
6931 }
6932 \f
6933 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6934 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6935 is the expression of the binary operation. The output may either be
6936 emitted here, or returned to the caller, like all output_* functions.
6937
6938 There is no guarantee that the operands are the same mode, as they
6939 might be within FLOAT or FLOAT_EXTEND expressions. */
6940
6941 #ifndef SYSV386_COMPAT
6942 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6943 wants to fix the assemblers because that causes incompatibility
6944 with gcc. No-one wants to fix gcc because that causes
6945 incompatibility with assemblers... You can use the option of
6946 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6947 #define SYSV386_COMPAT 1
6948 #endif
6949
6950 const char *
6951 output_387_binary_op (rtx insn, rtx *operands)
6952 {
6953 static char buf[30];
6954 const char *p;
6955 const char *ssep;
6956 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
6957
6958 #ifdef ENABLE_CHECKING
6959 /* Even if we do not want to check the inputs, this documents input
6960 constraints. Which helps in understanding the following code. */
6961 if (STACK_REG_P (operands[0])
6962 && ((REG_P (operands[1])
6963 && REGNO (operands[0]) == REGNO (operands[1])
6964 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6965 || (REG_P (operands[2])
6966 && REGNO (operands[0]) == REGNO (operands[2])
6967 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6968 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6969 ; /* ok */
6970 else if (!is_sse)
6971 abort ();
6972 #endif
6973
6974 switch (GET_CODE (operands[3]))
6975 {
6976 case PLUS:
6977 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6978 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6979 p = "fiadd";
6980 else
6981 p = "fadd";
6982 ssep = "add";
6983 break;
6984
6985 case MINUS:
6986 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6987 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6988 p = "fisub";
6989 else
6990 p = "fsub";
6991 ssep = "sub";
6992 break;
6993
6994 case MULT:
6995 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6996 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6997 p = "fimul";
6998 else
6999 p = "fmul";
7000 ssep = "mul";
7001 break;
7002
7003 case DIV:
7004 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7005 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7006 p = "fidiv";
7007 else
7008 p = "fdiv";
7009 ssep = "div";
7010 break;
7011
7012 default:
7013 abort ();
7014 }
7015
7016 if (is_sse)
7017 {
7018 strcpy (buf, ssep);
7019 if (GET_MODE (operands[0]) == SFmode)
7020 strcat (buf, "ss\t{%2, %0|%0, %2}");
7021 else
7022 strcat (buf, "sd\t{%2, %0|%0, %2}");
7023 return buf;
7024 }
7025 strcpy (buf, p);
7026
7027 switch (GET_CODE (operands[3]))
7028 {
7029 case MULT:
7030 case PLUS:
7031 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7032 {
7033 rtx temp = operands[2];
7034 operands[2] = operands[1];
7035 operands[1] = temp;
7036 }
7037
7038 /* know operands[0] == operands[1]. */
7039
7040 if (GET_CODE (operands[2]) == MEM)
7041 {
7042 p = "%z2\t%2";
7043 break;
7044 }
7045
7046 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7047 {
7048 if (STACK_TOP_P (operands[0]))
7049 /* How is it that we are storing to a dead operand[2]?
7050 Well, presumably operands[1] is dead too. We can't
7051 store the result to st(0) as st(0) gets popped on this
7052 instruction. Instead store to operands[2] (which I
7053 think has to be st(1)). st(1) will be popped later.
7054 gcc <= 2.8.1 didn't have this check and generated
7055 assembly code that the Unixware assembler rejected. */
7056 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7057 else
7058 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7059 break;
7060 }
7061
7062 if (STACK_TOP_P (operands[0]))
7063 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7064 else
7065 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7066 break;
7067
7068 case MINUS:
7069 case DIV:
7070 if (GET_CODE (operands[1]) == MEM)
7071 {
7072 p = "r%z1\t%1";
7073 break;
7074 }
7075
7076 if (GET_CODE (operands[2]) == MEM)
7077 {
7078 p = "%z2\t%2";
7079 break;
7080 }
7081
7082 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7083 {
7084 #if SYSV386_COMPAT
7085 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7086 derived assemblers, confusingly reverse the direction of
7087 the operation for fsub{r} and fdiv{r} when the
7088 destination register is not st(0). The Intel assembler
7089 doesn't have this brain damage. Read !SYSV386_COMPAT to
7090 figure out what the hardware really does. */
7091 if (STACK_TOP_P (operands[0]))
7092 p = "{p\t%0, %2|rp\t%2, %0}";
7093 else
7094 p = "{rp\t%2, %0|p\t%0, %2}";
7095 #else
7096 if (STACK_TOP_P (operands[0]))
7097 /* As above for fmul/fadd, we can't store to st(0). */
7098 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7099 else
7100 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7101 #endif
7102 break;
7103 }
7104
7105 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7106 {
7107 #if SYSV386_COMPAT
7108 if (STACK_TOP_P (operands[0]))
7109 p = "{rp\t%0, %1|p\t%1, %0}";
7110 else
7111 p = "{p\t%1, %0|rp\t%0, %1}";
7112 #else
7113 if (STACK_TOP_P (operands[0]))
7114 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7115 else
7116 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7117 #endif
7118 break;
7119 }
7120
7121 if (STACK_TOP_P (operands[0]))
7122 {
7123 if (STACK_TOP_P (operands[1]))
7124 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7125 else
7126 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7127 break;
7128 }
7129 else if (STACK_TOP_P (operands[1]))
7130 {
7131 #if SYSV386_COMPAT
7132 p = "{\t%1, %0|r\t%0, %1}";
7133 #else
7134 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7135 #endif
7136 }
7137 else
7138 {
7139 #if SYSV386_COMPAT
7140 p = "{r\t%2, %0|\t%0, %2}";
7141 #else
7142 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7143 #endif
7144 }
7145 break;
7146
7147 default:
7148 abort ();
7149 }
7150
7151 strcat (buf, p);
7152 return buf;
7153 }
7154
7155 /* Output code to initialize control word copies used by trunc?f?i and
7156 rounding patterns. CURRENT_MODE is set to current control word,
7157 while NEW_MODE is set to new control word. */
7158
7159 void
7160 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7161 {
7162 rtx reg = gen_reg_rtx (HImode);
7163
7164 emit_insn (gen_x86_fnstcw_1 (current_mode));
7165 emit_move_insn (reg, current_mode);
7166
7167 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7168 && !TARGET_64BIT)
7169 {
7170 switch (mode)
7171 {
7172 case I387_CW_FLOOR:
7173 /* round down toward -oo */
7174 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7175 break;
7176
7177 case I387_CW_CEIL:
7178 /* round up toward +oo */
7179 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7180 break;
7181
7182 case I387_CW_TRUNC:
7183 /* round toward zero (truncate) */
7184 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7185 break;
7186
7187 case I387_CW_MASK_PM:
7188 /* mask precision exception for nearbyint() */
7189 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7190 break;
7191
7192 default:
7193 abort();
7194 }
7195 }
7196 else
7197 {
7198 switch (mode)
7199 {
7200 case I387_CW_FLOOR:
7201 /* round down toward -oo */
7202 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7203 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7204 break;
7205
7206 case I387_CW_CEIL:
7207 /* round up toward +oo */
7208 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7209 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7210 break;
7211
7212 case I387_CW_TRUNC:
7213 /* round toward zero (truncate) */
7214 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7215 break;
7216
7217 case I387_CW_MASK_PM:
7218 /* mask precision exception for nearbyint() */
7219 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7220 break;
7221
7222 default:
7223 abort();
7224 }
7225 }
7226
7227 emit_move_insn (new_mode, reg);
7228 }
7229
7230 /* Output code for INSN to convert a float to a signed int. OPERANDS
7231 are the insn operands. The output may be [HSD]Imode and the input
7232 operand may be [SDX]Fmode. */
7233
7234 const char *
7235 output_fix_trunc (rtx insn, rtx *operands)
7236 {
7237 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7238 int dimode_p = GET_MODE (operands[0]) == DImode;
7239
7240 /* Jump through a hoop or two for DImode, since the hardware has no
7241 non-popping instruction. We used to do this a different way, but
7242 that was somewhat fragile and broke with post-reload splitters. */
7243 if (dimode_p && !stack_top_dies)
7244 output_asm_insn ("fld\t%y1", operands);
7245
7246 if (!STACK_TOP_P (operands[1]))
7247 abort ();
7248
7249 if (GET_CODE (operands[0]) != MEM)
7250 abort ();
7251
7252 output_asm_insn ("fldcw\t%3", operands);
7253 if (stack_top_dies || dimode_p)
7254 output_asm_insn ("fistp%z0\t%0", operands);
7255 else
7256 output_asm_insn ("fist%z0\t%0", operands);
7257 output_asm_insn ("fldcw\t%2", operands);
7258
7259 return "";
7260 }
7261
7262 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7263 should be used. UNORDERED_P is true when fucom should be used. */
7264
7265 const char *
7266 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7267 {
7268 int stack_top_dies;
7269 rtx cmp_op0, cmp_op1;
7270 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7271
7272 if (eflags_p)
7273 {
7274 cmp_op0 = operands[0];
7275 cmp_op1 = operands[1];
7276 }
7277 else
7278 {
7279 cmp_op0 = operands[1];
7280 cmp_op1 = operands[2];
7281 }
7282
7283 if (is_sse)
7284 {
7285 if (GET_MODE (operands[0]) == SFmode)
7286 if (unordered_p)
7287 return "ucomiss\t{%1, %0|%0, %1}";
7288 else
7289 return "comiss\t{%1, %0|%0, %1}";
7290 else
7291 if (unordered_p)
7292 return "ucomisd\t{%1, %0|%0, %1}";
7293 else
7294 return "comisd\t{%1, %0|%0, %1}";
7295 }
7296
7297 if (! STACK_TOP_P (cmp_op0))
7298 abort ();
7299
7300 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7301
7302 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7303 {
7304 if (stack_top_dies)
7305 {
7306 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7307 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7308 }
7309 else
7310 return "ftst\n\tfnstsw\t%0";
7311 }
7312
7313 if (STACK_REG_P (cmp_op1)
7314 && stack_top_dies
7315 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7316 && REGNO (cmp_op1) != FIRST_STACK_REG)
7317 {
7318 /* If both the top of the 387 stack dies, and the other operand
7319 is also a stack register that dies, then this must be a
7320 `fcompp' float compare */
7321
7322 if (eflags_p)
7323 {
7324 /* There is no double popping fcomi variant. Fortunately,
7325 eflags is immune from the fstp's cc clobbering. */
7326 if (unordered_p)
7327 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7328 else
7329 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7330 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7331 }
7332 else
7333 {
7334 if (unordered_p)
7335 return "fucompp\n\tfnstsw\t%0";
7336 else
7337 return "fcompp\n\tfnstsw\t%0";
7338 }
7339 }
7340 else
7341 {
7342 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7343
7344 static const char * const alt[16] =
7345 {
7346 "fcom%z2\t%y2\n\tfnstsw\t%0",
7347 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7348 "fucom%z2\t%y2\n\tfnstsw\t%0",
7349 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7350
7351 "ficom%z2\t%y2\n\tfnstsw\t%0",
7352 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7353 NULL,
7354 NULL,
7355
7356 "fcomi\t{%y1, %0|%0, %y1}",
7357 "fcomip\t{%y1, %0|%0, %y1}",
7358 "fucomi\t{%y1, %0|%0, %y1}",
7359 "fucomip\t{%y1, %0|%0, %y1}",
7360
7361 NULL,
7362 NULL,
7363 NULL,
7364 NULL
7365 };
7366
7367 int mask;
7368 const char *ret;
7369
7370 mask = eflags_p << 3;
7371 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7372 mask |= unordered_p << 1;
7373 mask |= stack_top_dies;
7374
7375 if (mask >= 16)
7376 abort ();
7377 ret = alt[mask];
7378 if (ret == NULL)
7379 abort ();
7380
7381 return ret;
7382 }
7383 }
7384
7385 void
7386 ix86_output_addr_vec_elt (FILE *file, int value)
7387 {
7388 const char *directive = ASM_LONG;
7389
7390 if (TARGET_64BIT)
7391 {
7392 #ifdef ASM_QUAD
7393 directive = ASM_QUAD;
7394 #else
7395 abort ();
7396 #endif
7397 }
7398
7399 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7400 }
7401
7402 void
7403 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7404 {
7405 if (TARGET_64BIT)
7406 fprintf (file, "%s%s%d-%s%d\n",
7407 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7408 else if (HAVE_AS_GOTOFF_IN_DATA)
7409 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7410 #if TARGET_MACHO
7411 else if (TARGET_MACHO)
7412 {
7413 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7414 machopic_output_function_base_name (file);
7415 fprintf(file, "\n");
7416 }
7417 #endif
7418 else
7419 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7420 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7421 }
7422 \f
7423 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7424 for the target. */
7425
7426 void
7427 ix86_expand_clear (rtx dest)
7428 {
7429 rtx tmp;
7430
7431 /* We play register width games, which are only valid after reload. */
7432 if (!reload_completed)
7433 abort ();
7434
7435 /* Avoid HImode and its attendant prefix byte. */
7436 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7437 dest = gen_rtx_REG (SImode, REGNO (dest));
7438
7439 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7440
7441 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7442 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7443 {
7444 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7445 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7446 }
7447
7448 emit_insn (tmp);
7449 }
7450
7451 /* X is an unchanging MEM. If it is a constant pool reference, return
7452 the constant pool rtx, else NULL. */
7453
7454 rtx
7455 maybe_get_pool_constant (rtx x)
7456 {
7457 x = ix86_delegitimize_address (XEXP (x, 0));
7458
7459 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7460 return get_pool_constant (x);
7461
7462 return NULL_RTX;
7463 }
7464
7465 void
7466 ix86_expand_move (enum machine_mode mode, rtx operands[])
7467 {
7468 int strict = (reload_in_progress || reload_completed);
7469 rtx op0, op1;
7470 enum tls_model model;
7471
7472 op0 = operands[0];
7473 op1 = operands[1];
7474
7475 model = GET_CODE (op1) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (op1) : 0;
7476 if (model)
7477 {
7478 op1 = legitimize_tls_address (op1, model, true);
7479 op1 = force_operand (op1, op0);
7480 if (op1 == op0)
7481 return;
7482 }
7483
7484 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7485 {
7486 #if TARGET_MACHO
7487 if (MACHOPIC_PURE)
7488 {
7489 rtx temp = ((reload_in_progress
7490 || ((op0 && GET_CODE (op0) == REG)
7491 && mode == Pmode))
7492 ? op0 : gen_reg_rtx (Pmode));
7493 op1 = machopic_indirect_data_reference (op1, temp);
7494 op1 = machopic_legitimize_pic_address (op1, mode,
7495 temp == op1 ? 0 : temp);
7496 }
7497 else if (MACHOPIC_INDIRECT)
7498 op1 = machopic_indirect_data_reference (op1, 0);
7499 if (op0 == op1)
7500 return;
7501 #else
7502 if (GET_CODE (op0) == MEM)
7503 op1 = force_reg (Pmode, op1);
7504 else
7505 op1 = legitimize_address (op1, op1, Pmode);
7506 #endif /* TARGET_MACHO */
7507 }
7508 else
7509 {
7510 if (GET_CODE (op0) == MEM
7511 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7512 || !push_operand (op0, mode))
7513 && GET_CODE (op1) == MEM)
7514 op1 = force_reg (mode, op1);
7515
7516 if (push_operand (op0, mode)
7517 && ! general_no_elim_operand (op1, mode))
7518 op1 = copy_to_mode_reg (mode, op1);
7519
7520 /* Force large constants in 64bit compilation into register
7521 to get them CSEed. */
7522 if (TARGET_64BIT && mode == DImode
7523 && immediate_operand (op1, mode)
7524 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7525 && !register_operand (op0, mode)
7526 && optimize && !reload_completed && !reload_in_progress)
7527 op1 = copy_to_mode_reg (mode, op1);
7528
7529 if (FLOAT_MODE_P (mode))
7530 {
7531 /* If we are loading a floating point constant to a register,
7532 force the value to memory now, since we'll get better code
7533 out the back end. */
7534
7535 if (strict)
7536 ;
7537 else if (GET_CODE (op1) == CONST_DOUBLE)
7538 {
7539 op1 = validize_mem (force_const_mem (mode, op1));
7540 if (!register_operand (op0, mode))
7541 {
7542 rtx temp = gen_reg_rtx (mode);
7543 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7544 emit_move_insn (op0, temp);
7545 return;
7546 }
7547 }
7548 }
7549 }
7550
7551 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7552 }
7553
7554 void
7555 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7556 {
7557 rtx op0 = operands[0], op1 = operands[1];
7558
7559 /* Force constants other than zero into memory. We do not know how
7560 the instructions used to build constants modify the upper 64 bits
7561 of the register, once we have that information we may be able
7562 to handle some of them more efficiently. */
7563 if ((reload_in_progress | reload_completed) == 0
7564 && register_operand (op0, mode)
7565 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7566 op1 = validize_mem (force_const_mem (mode, op1));
7567
7568 /* Make operand1 a register if it isn't already. */
7569 if (!no_new_pseudos
7570 && !register_operand (op0, mode)
7571 && !register_operand (op1, mode))
7572 {
7573 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7574 return;
7575 }
7576
7577 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7578 }
7579
7580 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7581 straight to ix86_expand_vector_move. */
7582
7583 void
7584 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7585 {
7586 rtx op0, op1, m;
7587
7588 op0 = operands[0];
7589 op1 = operands[1];
7590
7591 if (MEM_P (op1))
7592 {
7593 /* If we're optimizing for size, movups is the smallest. */
7594 if (optimize_size)
7595 {
7596 op0 = gen_lowpart (V4SFmode, op0);
7597 op1 = gen_lowpart (V4SFmode, op1);
7598 emit_insn (gen_sse_movups (op0, op1));
7599 return;
7600 }
7601
7602 /* ??? If we have typed data, then it would appear that using
7603 movdqu is the only way to get unaligned data loaded with
7604 integer type. */
7605 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7606 {
7607 op0 = gen_lowpart (V16QImode, op0);
7608 op1 = gen_lowpart (V16QImode, op1);
7609 emit_insn (gen_sse2_movdqu (op0, op1));
7610 return;
7611 }
7612
7613 if (TARGET_SSE2 && mode == V2DFmode)
7614 {
7615 /* When SSE registers are split into halves, we can avoid
7616 writing to the top half twice. */
7617 if (TARGET_SSE_SPLIT_REGS)
7618 {
7619 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7620 m = adjust_address (op1, DFmode, 0);
7621 emit_insn (gen_sse2_loadlpd (op0, op0, m));
7622 m = adjust_address (op1, DFmode, 8);
7623 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7624 }
7625 else
7626 {
7627 /* ??? Not sure about the best option for the Intel chips.
7628 The following would seem to satisfy; the register is
7629 entirely cleared, breaking the dependency chain. We
7630 then store to the upper half, with a dependency depth
7631 of one. A rumor has it that Intel recommends two movsd
7632 followed by an unpacklpd, but this is unconfirmed. And
7633 given that the dependency depth of the unpacklpd would
7634 still be one, I'm not sure why this would be better. */
7635 m = adjust_address (op1, DFmode, 0);
7636 emit_insn (gen_sse2_loadsd (op0, m));
7637 m = adjust_address (op1, DFmode, 8);
7638 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7639 }
7640 }
7641 else
7642 {
7643 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7644 emit_move_insn (op0, CONST0_RTX (mode));
7645 else
7646 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7647
7648 op0 = gen_lowpart (V4SFmode, op0);
7649 m = adjust_address (op1, V4SFmode, 0);
7650 emit_insn (gen_sse_movlps (op0, op0, m));
7651 m = adjust_address (op1, V4SFmode, 8);
7652 emit_insn (gen_sse_movhps (op0, op0, m));
7653 }
7654 }
7655 else if (MEM_P (op0))
7656 {
7657 /* If we're optimizing for size, movups is the smallest. */
7658 if (optimize_size)
7659 {
7660 op0 = gen_lowpart (V4SFmode, op0);
7661 op1 = gen_lowpart (V4SFmode, op1);
7662 emit_insn (gen_sse_movups (op0, op1));
7663 return;
7664 }
7665
7666 /* ??? Similar to above, only less clear because of quote
7667 typeless stores unquote. */
7668 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7669 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7670 {
7671 op0 = gen_lowpart (V16QImode, op0);
7672 op1 = gen_lowpart (V16QImode, op1);
7673 emit_insn (gen_sse2_movdqu (op0, op1));
7674 return;
7675 }
7676
7677 if (TARGET_SSE2 && mode == V2DFmode)
7678 {
7679 m = adjust_address (op0, DFmode, 0);
7680 emit_insn (gen_sse2_storelpd (m, op1));
7681 m = adjust_address (op0, DFmode, 8);
7682 emit_insn (gen_sse2_storehpd (m, op1));
7683 return;
7684 }
7685 else
7686 {
7687 op1 = gen_lowpart (V4SFmode, op1);
7688 m = adjust_address (op0, V4SFmode, 0);
7689 emit_insn (gen_sse_movlps (m, m, op1));
7690 m = adjust_address (op0, V4SFmode, 8);
7691 emit_insn (gen_sse_movhps (m, m, op1));
7692 return;
7693 }
7694 }
7695 else
7696 gcc_unreachable ();
7697 }
7698
7699
7700 /* Attempt to expand a binary operator. Make the expansion closer to the
7701 actual machine, then just general_operand, which will allow 3 separate
7702 memory references (one output, two input) in a single insn. */
7703
7704 void
7705 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
7706 rtx operands[])
7707 {
7708 int matching_memory;
7709 rtx src1, src2, dst, op, clob;
7710
7711 dst = operands[0];
7712 src1 = operands[1];
7713 src2 = operands[2];
7714
7715 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7716 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7717 && (rtx_equal_p (dst, src2)
7718 || immediate_operand (src1, mode)))
7719 {
7720 rtx temp = src1;
7721 src1 = src2;
7722 src2 = temp;
7723 }
7724
7725 /* If the destination is memory, and we do not have matching source
7726 operands, do things in registers. */
7727 matching_memory = 0;
7728 if (GET_CODE (dst) == MEM)
7729 {
7730 if (rtx_equal_p (dst, src1))
7731 matching_memory = 1;
7732 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7733 && rtx_equal_p (dst, src2))
7734 matching_memory = 2;
7735 else
7736 dst = gen_reg_rtx (mode);
7737 }
7738
7739 /* Both source operands cannot be in memory. */
7740 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7741 {
7742 if (matching_memory != 2)
7743 src2 = force_reg (mode, src2);
7744 else
7745 src1 = force_reg (mode, src1);
7746 }
7747
7748 /* If the operation is not commutable, source 1 cannot be a constant
7749 or non-matching memory. */
7750 if ((CONSTANT_P (src1)
7751 || (!matching_memory && GET_CODE (src1) == MEM))
7752 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7753 src1 = force_reg (mode, src1);
7754
7755 /* If optimizing, copy to regs to improve CSE */
7756 if (optimize && ! no_new_pseudos)
7757 {
7758 if (GET_CODE (dst) == MEM)
7759 dst = gen_reg_rtx (mode);
7760 if (GET_CODE (src1) == MEM)
7761 src1 = force_reg (mode, src1);
7762 if (GET_CODE (src2) == MEM)
7763 src2 = force_reg (mode, src2);
7764 }
7765
7766 /* Emit the instruction. */
7767
7768 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7769 if (reload_in_progress)
7770 {
7771 /* Reload doesn't know about the flags register, and doesn't know that
7772 it doesn't want to clobber it. We can only do this with PLUS. */
7773 if (code != PLUS)
7774 abort ();
7775 emit_insn (op);
7776 }
7777 else
7778 {
7779 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7780 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7781 }
7782
7783 /* Fix up the destination if needed. */
7784 if (dst != operands[0])
7785 emit_move_insn (operands[0], dst);
7786 }
7787
7788 /* Return TRUE or FALSE depending on whether the binary operator meets the
7789 appropriate constraints. */
7790
7791 int
7792 ix86_binary_operator_ok (enum rtx_code code,
7793 enum machine_mode mode ATTRIBUTE_UNUSED,
7794 rtx operands[3])
7795 {
7796 /* Both source operands cannot be in memory. */
7797 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7798 return 0;
7799 /* If the operation is not commutable, source 1 cannot be a constant. */
7800 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
7801 return 0;
7802 /* If the destination is memory, we must have a matching source operand. */
7803 if (GET_CODE (operands[0]) == MEM
7804 && ! (rtx_equal_p (operands[0], operands[1])
7805 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7806 && rtx_equal_p (operands[0], operands[2]))))
7807 return 0;
7808 /* If the operation is not commutable and the source 1 is memory, we must
7809 have a matching destination. */
7810 if (GET_CODE (operands[1]) == MEM
7811 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
7812 && ! rtx_equal_p (operands[0], operands[1]))
7813 return 0;
7814 return 1;
7815 }
7816
7817 /* Attempt to expand a unary operator. Make the expansion closer to the
7818 actual machine, then just general_operand, which will allow 2 separate
7819 memory references (one output, one input) in a single insn. */
7820
7821 void
7822 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
7823 rtx operands[])
7824 {
7825 int matching_memory;
7826 rtx src, dst, op, clob;
7827
7828 dst = operands[0];
7829 src = operands[1];
7830
7831 /* If the destination is memory, and we do not have matching source
7832 operands, do things in registers. */
7833 matching_memory = 0;
7834 if (MEM_P (dst))
7835 {
7836 if (rtx_equal_p (dst, src))
7837 matching_memory = 1;
7838 else
7839 dst = gen_reg_rtx (mode);
7840 }
7841
7842 /* When source operand is memory, destination must match. */
7843 if (MEM_P (src) && !matching_memory)
7844 src = force_reg (mode, src);
7845
7846 /* If optimizing, copy to regs to improve CSE. */
7847 if (optimize && ! no_new_pseudos)
7848 {
7849 if (GET_CODE (dst) == MEM)
7850 dst = gen_reg_rtx (mode);
7851 if (GET_CODE (src) == MEM)
7852 src = force_reg (mode, src);
7853 }
7854
7855 /* Emit the instruction. */
7856
7857 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7858 if (reload_in_progress || code == NOT)
7859 {
7860 /* Reload doesn't know about the flags register, and doesn't know that
7861 it doesn't want to clobber it. */
7862 if (code != NOT)
7863 abort ();
7864 emit_insn (op);
7865 }
7866 else
7867 {
7868 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7869 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7870 }
7871
7872 /* Fix up the destination if needed. */
7873 if (dst != operands[0])
7874 emit_move_insn (operands[0], dst);
7875 }
7876
7877 /* Return TRUE or FALSE depending on whether the unary operator meets the
7878 appropriate constraints. */
7879
7880 int
7881 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
7882 enum machine_mode mode ATTRIBUTE_UNUSED,
7883 rtx operands[2] ATTRIBUTE_UNUSED)
7884 {
7885 /* If one of operands is memory, source and destination must match. */
7886 if ((GET_CODE (operands[0]) == MEM
7887 || GET_CODE (operands[1]) == MEM)
7888 && ! rtx_equal_p (operands[0], operands[1]))
7889 return FALSE;
7890 return TRUE;
7891 }
7892
7893 /* Generate code for floating point ABS or NEG. */
7894
7895 void
7896 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
7897 rtx operands[])
7898 {
7899 rtx mask, set, use, clob, dst, src;
7900 bool matching_memory;
7901 bool use_sse = false;
7902
7903 if (TARGET_SSE_MATH)
7904 {
7905 if (mode == SFmode)
7906 use_sse = true;
7907 else if (mode == DFmode && TARGET_SSE2)
7908 use_sse = true;
7909 }
7910
7911 /* NEG and ABS performed with SSE use bitwise mask operations.
7912 Create the appropriate mask now. */
7913 if (use_sse)
7914 {
7915 HOST_WIDE_INT hi, lo;
7916 int shift = 63;
7917
7918 /* Find the sign bit, sign extended to 2*HWI. */
7919 if (mode == SFmode)
7920 lo = 0x80000000, hi = lo < 0;
7921 else if (HOST_BITS_PER_WIDE_INT >= 64)
7922 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
7923 else
7924 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
7925
7926 /* If we're looking for the absolute value, then we want
7927 the compliment. */
7928 if (code == ABS)
7929 lo = ~lo, hi = ~hi;
7930
7931 /* Force this value into the low part of a fp vector constant. */
7932 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
7933 mask = gen_lowpart (mode, mask);
7934 if (mode == SFmode)
7935 mask = gen_rtx_CONST_VECTOR (V4SFmode,
7936 gen_rtvec (4, mask, CONST0_RTX (SFmode),
7937 CONST0_RTX (SFmode),
7938 CONST0_RTX (SFmode)));
7939 else
7940 mask = gen_rtx_CONST_VECTOR (V2DFmode,
7941 gen_rtvec (2, mask, CONST0_RTX (DFmode)));
7942 mask = force_reg (GET_MODE (mask), mask);
7943 }
7944 else
7945 {
7946 /* When not using SSE, we don't use the mask, but prefer to keep the
7947 same general form of the insn pattern to reduce duplication when
7948 it comes time to split. */
7949 mask = const0_rtx;
7950 }
7951
7952 dst = operands[0];
7953 src = operands[1];
7954
7955 /* If the destination is memory, and we don't have matching source
7956 operands, do things in registers. */
7957 matching_memory = false;
7958 if (MEM_P (dst))
7959 {
7960 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
7961 matching_memory = true;
7962 else
7963 dst = gen_reg_rtx (mode);
7964 }
7965 if (MEM_P (src) && !matching_memory)
7966 src = force_reg (mode, src);
7967
7968 set = gen_rtx_fmt_e (code, mode, src);
7969 set = gen_rtx_SET (VOIDmode, dst, set);
7970 use = gen_rtx_USE (VOIDmode, mask);
7971 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7972 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
7973
7974 if (dst != operands[0])
7975 emit_move_insn (operands[0], dst);
7976 }
7977
7978 /* Return TRUE or FALSE depending on whether the first SET in INSN
7979 has source and destination with matching CC modes, and that the
7980 CC mode is at least as constrained as REQ_MODE. */
7981
7982 int
7983 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
7984 {
7985 rtx set;
7986 enum machine_mode set_mode;
7987
7988 set = PATTERN (insn);
7989 if (GET_CODE (set) == PARALLEL)
7990 set = XVECEXP (set, 0, 0);
7991 if (GET_CODE (set) != SET)
7992 abort ();
7993 if (GET_CODE (SET_SRC (set)) != COMPARE)
7994 abort ();
7995
7996 set_mode = GET_MODE (SET_DEST (set));
7997 switch (set_mode)
7998 {
7999 case CCNOmode:
8000 if (req_mode != CCNOmode
8001 && (req_mode != CCmode
8002 || XEXP (SET_SRC (set), 1) != const0_rtx))
8003 return 0;
8004 break;
8005 case CCmode:
8006 if (req_mode == CCGCmode)
8007 return 0;
8008 /* FALLTHRU */
8009 case CCGCmode:
8010 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8011 return 0;
8012 /* FALLTHRU */
8013 case CCGOCmode:
8014 if (req_mode == CCZmode)
8015 return 0;
8016 /* FALLTHRU */
8017 case CCZmode:
8018 break;
8019
8020 default:
8021 abort ();
8022 }
8023
8024 return (GET_MODE (SET_SRC (set)) == set_mode);
8025 }
8026
8027 /* Generate insn patterns to do an integer compare of OPERANDS. */
8028
8029 static rtx
8030 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8031 {
8032 enum machine_mode cmpmode;
8033 rtx tmp, flags;
8034
8035 cmpmode = SELECT_CC_MODE (code, op0, op1);
8036 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8037
8038 /* This is very simple, but making the interface the same as in the
8039 FP case makes the rest of the code easier. */
8040 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8041 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8042
8043 /* Return the test that should be put into the flags user, i.e.
8044 the bcc, scc, or cmov instruction. */
8045 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8046 }
8047
8048 /* Figure out whether to use ordered or unordered fp comparisons.
8049 Return the appropriate mode to use. */
8050
8051 enum machine_mode
8052 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8053 {
8054 /* ??? In order to make all comparisons reversible, we do all comparisons
8055 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8056 all forms trapping and nontrapping comparisons, we can make inequality
8057 comparisons trapping again, since it results in better code when using
8058 FCOM based compares. */
8059 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8060 }
8061
8062 enum machine_mode
8063 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8064 {
8065 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8066 return ix86_fp_compare_mode (code);
8067 switch (code)
8068 {
8069 /* Only zero flag is needed. */
8070 case EQ: /* ZF=0 */
8071 case NE: /* ZF!=0 */
8072 return CCZmode;
8073 /* Codes needing carry flag. */
8074 case GEU: /* CF=0 */
8075 case GTU: /* CF=0 & ZF=0 */
8076 case LTU: /* CF=1 */
8077 case LEU: /* CF=1 | ZF=1 */
8078 return CCmode;
8079 /* Codes possibly doable only with sign flag when
8080 comparing against zero. */
8081 case GE: /* SF=OF or SF=0 */
8082 case LT: /* SF<>OF or SF=1 */
8083 if (op1 == const0_rtx)
8084 return CCGOCmode;
8085 else
8086 /* For other cases Carry flag is not required. */
8087 return CCGCmode;
8088 /* Codes doable only with sign flag when comparing
8089 against zero, but we miss jump instruction for it
8090 so we need to use relational tests against overflow
8091 that thus needs to be zero. */
8092 case GT: /* ZF=0 & SF=OF */
8093 case LE: /* ZF=1 | SF<>OF */
8094 if (op1 == const0_rtx)
8095 return CCNOmode;
8096 else
8097 return CCGCmode;
8098 /* strcmp pattern do (use flags) and combine may ask us for proper
8099 mode. */
8100 case USE:
8101 return CCmode;
8102 default:
8103 abort ();
8104 }
8105 }
8106
8107 /* Return the fixed registers used for condition codes. */
8108
8109 static bool
8110 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8111 {
8112 *p1 = FLAGS_REG;
8113 *p2 = FPSR_REG;
8114 return true;
8115 }
8116
8117 /* If two condition code modes are compatible, return a condition code
8118 mode which is compatible with both. Otherwise, return
8119 VOIDmode. */
8120
8121 static enum machine_mode
8122 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8123 {
8124 if (m1 == m2)
8125 return m1;
8126
8127 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8128 return VOIDmode;
8129
8130 if ((m1 == CCGCmode && m2 == CCGOCmode)
8131 || (m1 == CCGOCmode && m2 == CCGCmode))
8132 return CCGCmode;
8133
8134 switch (m1)
8135 {
8136 default:
8137 abort ();
8138
8139 case CCmode:
8140 case CCGCmode:
8141 case CCGOCmode:
8142 case CCNOmode:
8143 case CCZmode:
8144 switch (m2)
8145 {
8146 default:
8147 return VOIDmode;
8148
8149 case CCmode:
8150 case CCGCmode:
8151 case CCGOCmode:
8152 case CCNOmode:
8153 case CCZmode:
8154 return CCmode;
8155 }
8156
8157 case CCFPmode:
8158 case CCFPUmode:
8159 /* These are only compatible with themselves, which we already
8160 checked above. */
8161 return VOIDmode;
8162 }
8163 }
8164
8165 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8166
8167 int
8168 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8169 {
8170 enum rtx_code swapped_code = swap_condition (code);
8171 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8172 || (ix86_fp_comparison_cost (swapped_code)
8173 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8174 }
8175
8176 /* Swap, force into registers, or otherwise massage the two operands
8177 to a fp comparison. The operands are updated in place; the new
8178 comparison code is returned. */
8179
8180 static enum rtx_code
8181 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8182 {
8183 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8184 rtx op0 = *pop0, op1 = *pop1;
8185 enum machine_mode op_mode = GET_MODE (op0);
8186 int is_sse = SSE_REG_P (op0) || SSE_REG_P (op1);
8187
8188 /* All of the unordered compare instructions only work on registers.
8189 The same is true of the fcomi compare instructions. The same is
8190 true of the XFmode compare instructions if not comparing with
8191 zero (ftst insn is used in this case). */
8192
8193 if (!is_sse
8194 && (fpcmp_mode == CCFPUmode
8195 || (op_mode == XFmode
8196 && ! (standard_80387_constant_p (op0) == 1
8197 || standard_80387_constant_p (op1) == 1))
8198 || ix86_use_fcomi_compare (code)))
8199 {
8200 op0 = force_reg (op_mode, op0);
8201 op1 = force_reg (op_mode, op1);
8202 }
8203 else
8204 {
8205 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8206 things around if they appear profitable, otherwise force op0
8207 into a register. */
8208
8209 if (standard_80387_constant_p (op0) == 0
8210 || (GET_CODE (op0) == MEM
8211 && ! (standard_80387_constant_p (op1) == 0
8212 || GET_CODE (op1) == MEM)))
8213 {
8214 rtx tmp;
8215 tmp = op0, op0 = op1, op1 = tmp;
8216 code = swap_condition (code);
8217 }
8218
8219 if (GET_CODE (op0) != REG)
8220 op0 = force_reg (op_mode, op0);
8221
8222 if (CONSTANT_P (op1))
8223 {
8224 int tmp = standard_80387_constant_p (op1);
8225 if (tmp == 0)
8226 op1 = validize_mem (force_const_mem (op_mode, op1));
8227 else if (tmp == 1)
8228 {
8229 if (TARGET_CMOVE)
8230 op1 = force_reg (op_mode, op1);
8231 }
8232 else
8233 op1 = force_reg (op_mode, op1);
8234 }
8235 }
8236
8237 /* Try to rearrange the comparison to make it cheaper. */
8238 if (ix86_fp_comparison_cost (code)
8239 > ix86_fp_comparison_cost (swap_condition (code))
8240 && (GET_CODE (op1) == REG || !no_new_pseudos))
8241 {
8242 rtx tmp;
8243 tmp = op0, op0 = op1, op1 = tmp;
8244 code = swap_condition (code);
8245 if (GET_CODE (op0) != REG)
8246 op0 = force_reg (op_mode, op0);
8247 }
8248
8249 *pop0 = op0;
8250 *pop1 = op1;
8251 return code;
8252 }
8253
8254 /* Convert comparison codes we use to represent FP comparison to integer
8255 code that will result in proper branch. Return UNKNOWN if no such code
8256 is available. */
8257
8258 enum rtx_code
8259 ix86_fp_compare_code_to_integer (enum rtx_code code)
8260 {
8261 switch (code)
8262 {
8263 case GT:
8264 return GTU;
8265 case GE:
8266 return GEU;
8267 case ORDERED:
8268 case UNORDERED:
8269 return code;
8270 break;
8271 case UNEQ:
8272 return EQ;
8273 break;
8274 case UNLT:
8275 return LTU;
8276 break;
8277 case UNLE:
8278 return LEU;
8279 break;
8280 case LTGT:
8281 return NE;
8282 break;
8283 default:
8284 return UNKNOWN;
8285 }
8286 }
8287
8288 /* Split comparison code CODE into comparisons we can do using branch
8289 instructions. BYPASS_CODE is comparison code for branch that will
8290 branch around FIRST_CODE and SECOND_CODE. If some of branches
8291 is not required, set value to UNKNOWN.
8292 We never require more than two branches. */
8293
8294 void
8295 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8296 enum rtx_code *first_code,
8297 enum rtx_code *second_code)
8298 {
8299 *first_code = code;
8300 *bypass_code = UNKNOWN;
8301 *second_code = UNKNOWN;
8302
8303 /* The fcomi comparison sets flags as follows:
8304
8305 cmp ZF PF CF
8306 > 0 0 0
8307 < 0 0 1
8308 = 1 0 0
8309 un 1 1 1 */
8310
8311 switch (code)
8312 {
8313 case GT: /* GTU - CF=0 & ZF=0 */
8314 case GE: /* GEU - CF=0 */
8315 case ORDERED: /* PF=0 */
8316 case UNORDERED: /* PF=1 */
8317 case UNEQ: /* EQ - ZF=1 */
8318 case UNLT: /* LTU - CF=1 */
8319 case UNLE: /* LEU - CF=1 | ZF=1 */
8320 case LTGT: /* EQ - ZF=0 */
8321 break;
8322 case LT: /* LTU - CF=1 - fails on unordered */
8323 *first_code = UNLT;
8324 *bypass_code = UNORDERED;
8325 break;
8326 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8327 *first_code = UNLE;
8328 *bypass_code = UNORDERED;
8329 break;
8330 case EQ: /* EQ - ZF=1 - fails on unordered */
8331 *first_code = UNEQ;
8332 *bypass_code = UNORDERED;
8333 break;
8334 case NE: /* NE - ZF=0 - fails on unordered */
8335 *first_code = LTGT;
8336 *second_code = UNORDERED;
8337 break;
8338 case UNGE: /* GEU - CF=0 - fails on unordered */
8339 *first_code = GE;
8340 *second_code = UNORDERED;
8341 break;
8342 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8343 *first_code = GT;
8344 *second_code = UNORDERED;
8345 break;
8346 default:
8347 abort ();
8348 }
8349 if (!TARGET_IEEE_FP)
8350 {
8351 *second_code = UNKNOWN;
8352 *bypass_code = UNKNOWN;
8353 }
8354 }
8355
8356 /* Return cost of comparison done fcom + arithmetics operations on AX.
8357 All following functions do use number of instructions as a cost metrics.
8358 In future this should be tweaked to compute bytes for optimize_size and
8359 take into account performance of various instructions on various CPUs. */
8360 static int
8361 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8362 {
8363 if (!TARGET_IEEE_FP)
8364 return 4;
8365 /* The cost of code output by ix86_expand_fp_compare. */
8366 switch (code)
8367 {
8368 case UNLE:
8369 case UNLT:
8370 case LTGT:
8371 case GT:
8372 case GE:
8373 case UNORDERED:
8374 case ORDERED:
8375 case UNEQ:
8376 return 4;
8377 break;
8378 case LT:
8379 case NE:
8380 case EQ:
8381 case UNGE:
8382 return 5;
8383 break;
8384 case LE:
8385 case UNGT:
8386 return 6;
8387 break;
8388 default:
8389 abort ();
8390 }
8391 }
8392
8393 /* Return cost of comparison done using fcomi operation.
8394 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8395 static int
8396 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8397 {
8398 enum rtx_code bypass_code, first_code, second_code;
8399 /* Return arbitrarily high cost when instruction is not supported - this
8400 prevents gcc from using it. */
8401 if (!TARGET_CMOVE)
8402 return 1024;
8403 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8404 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8405 }
8406
8407 /* Return cost of comparison done using sahf operation.
8408 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8409 static int
8410 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8411 {
8412 enum rtx_code bypass_code, first_code, second_code;
8413 /* Return arbitrarily high cost when instruction is not preferred - this
8414 avoids gcc from using it. */
8415 if (!TARGET_USE_SAHF && !optimize_size)
8416 return 1024;
8417 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8418 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8419 }
8420
8421 /* Compute cost of the comparison done using any method.
8422 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8423 static int
8424 ix86_fp_comparison_cost (enum rtx_code code)
8425 {
8426 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8427 int min;
8428
8429 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8430 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8431
8432 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8433 if (min > sahf_cost)
8434 min = sahf_cost;
8435 if (min > fcomi_cost)
8436 min = fcomi_cost;
8437 return min;
8438 }
8439
8440 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8441
8442 static rtx
8443 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8444 rtx *second_test, rtx *bypass_test)
8445 {
8446 enum machine_mode fpcmp_mode, intcmp_mode;
8447 rtx tmp, tmp2;
8448 int cost = ix86_fp_comparison_cost (code);
8449 enum rtx_code bypass_code, first_code, second_code;
8450
8451 fpcmp_mode = ix86_fp_compare_mode (code);
8452 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8453
8454 if (second_test)
8455 *second_test = NULL_RTX;
8456 if (bypass_test)
8457 *bypass_test = NULL_RTX;
8458
8459 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8460
8461 /* Do fcomi/sahf based test when profitable. */
8462 if ((bypass_code == UNKNOWN || bypass_test)
8463 && (second_code == UNKNOWN || second_test)
8464 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8465 {
8466 if (TARGET_CMOVE)
8467 {
8468 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8469 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8470 tmp);
8471 emit_insn (tmp);
8472 }
8473 else
8474 {
8475 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8476 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8477 if (!scratch)
8478 scratch = gen_reg_rtx (HImode);
8479 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8480 emit_insn (gen_x86_sahf_1 (scratch));
8481 }
8482
8483 /* The FP codes work out to act like unsigned. */
8484 intcmp_mode = fpcmp_mode;
8485 code = first_code;
8486 if (bypass_code != UNKNOWN)
8487 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8488 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8489 const0_rtx);
8490 if (second_code != UNKNOWN)
8491 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8492 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8493 const0_rtx);
8494 }
8495 else
8496 {
8497 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8498 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8499 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8500 if (!scratch)
8501 scratch = gen_reg_rtx (HImode);
8502 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8503
8504 /* In the unordered case, we have to check C2 for NaN's, which
8505 doesn't happen to work out to anything nice combination-wise.
8506 So do some bit twiddling on the value we've got in AH to come
8507 up with an appropriate set of condition codes. */
8508
8509 intcmp_mode = CCNOmode;
8510 switch (code)
8511 {
8512 case GT:
8513 case UNGT:
8514 if (code == GT || !TARGET_IEEE_FP)
8515 {
8516 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8517 code = EQ;
8518 }
8519 else
8520 {
8521 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8522 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8523 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8524 intcmp_mode = CCmode;
8525 code = GEU;
8526 }
8527 break;
8528 case LT:
8529 case UNLT:
8530 if (code == LT && TARGET_IEEE_FP)
8531 {
8532 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8533 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8534 intcmp_mode = CCmode;
8535 code = EQ;
8536 }
8537 else
8538 {
8539 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8540 code = NE;
8541 }
8542 break;
8543 case GE:
8544 case UNGE:
8545 if (code == GE || !TARGET_IEEE_FP)
8546 {
8547 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8548 code = EQ;
8549 }
8550 else
8551 {
8552 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8553 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8554 GEN_INT (0x01)));
8555 code = NE;
8556 }
8557 break;
8558 case LE:
8559 case UNLE:
8560 if (code == LE && TARGET_IEEE_FP)
8561 {
8562 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8563 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8564 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8565 intcmp_mode = CCmode;
8566 code = LTU;
8567 }
8568 else
8569 {
8570 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8571 code = NE;
8572 }
8573 break;
8574 case EQ:
8575 case UNEQ:
8576 if (code == EQ && TARGET_IEEE_FP)
8577 {
8578 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8579 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8580 intcmp_mode = CCmode;
8581 code = EQ;
8582 }
8583 else
8584 {
8585 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8586 code = NE;
8587 break;
8588 }
8589 break;
8590 case NE:
8591 case LTGT:
8592 if (code == NE && TARGET_IEEE_FP)
8593 {
8594 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8595 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8596 GEN_INT (0x40)));
8597 code = NE;
8598 }
8599 else
8600 {
8601 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8602 code = EQ;
8603 }
8604 break;
8605
8606 case UNORDERED:
8607 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8608 code = NE;
8609 break;
8610 case ORDERED:
8611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8612 code = EQ;
8613 break;
8614
8615 default:
8616 abort ();
8617 }
8618 }
8619
8620 /* Return the test that should be put into the flags user, i.e.
8621 the bcc, scc, or cmov instruction. */
8622 return gen_rtx_fmt_ee (code, VOIDmode,
8623 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8624 const0_rtx);
8625 }
8626
8627 rtx
8628 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8629 {
8630 rtx op0, op1, ret;
8631 op0 = ix86_compare_op0;
8632 op1 = ix86_compare_op1;
8633
8634 if (second_test)
8635 *second_test = NULL_RTX;
8636 if (bypass_test)
8637 *bypass_test = NULL_RTX;
8638
8639 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8640 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8641 second_test, bypass_test);
8642 else
8643 ret = ix86_expand_int_compare (code, op0, op1);
8644
8645 return ret;
8646 }
8647
8648 /* Return true if the CODE will result in nontrivial jump sequence. */
8649 bool
8650 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8651 {
8652 enum rtx_code bypass_code, first_code, second_code;
8653 if (!TARGET_CMOVE)
8654 return true;
8655 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8656 return bypass_code != UNKNOWN || second_code != UNKNOWN;
8657 }
8658
8659 void
8660 ix86_expand_branch (enum rtx_code code, rtx label)
8661 {
8662 rtx tmp;
8663
8664 switch (GET_MODE (ix86_compare_op0))
8665 {
8666 case QImode:
8667 case HImode:
8668 case SImode:
8669 simple:
8670 tmp = ix86_expand_compare (code, NULL, NULL);
8671 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8672 gen_rtx_LABEL_REF (VOIDmode, label),
8673 pc_rtx);
8674 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8675 return;
8676
8677 case SFmode:
8678 case DFmode:
8679 case XFmode:
8680 {
8681 rtvec vec;
8682 int use_fcomi;
8683 enum rtx_code bypass_code, first_code, second_code;
8684
8685 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8686 &ix86_compare_op1);
8687
8688 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8689
8690 /* Check whether we will use the natural sequence with one jump. If
8691 so, we can expand jump early. Otherwise delay expansion by
8692 creating compound insn to not confuse optimizers. */
8693 if (bypass_code == UNKNOWN && second_code == UNKNOWN
8694 && TARGET_CMOVE)
8695 {
8696 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8697 gen_rtx_LABEL_REF (VOIDmode, label),
8698 pc_rtx, NULL_RTX, NULL_RTX);
8699 }
8700 else
8701 {
8702 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8703 ix86_compare_op0, ix86_compare_op1);
8704 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8705 gen_rtx_LABEL_REF (VOIDmode, label),
8706 pc_rtx);
8707 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8708
8709 use_fcomi = ix86_use_fcomi_compare (code);
8710 vec = rtvec_alloc (3 + !use_fcomi);
8711 RTVEC_ELT (vec, 0) = tmp;
8712 RTVEC_ELT (vec, 1)
8713 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8714 RTVEC_ELT (vec, 2)
8715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8716 if (! use_fcomi)
8717 RTVEC_ELT (vec, 3)
8718 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8719
8720 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8721 }
8722 return;
8723 }
8724
8725 case DImode:
8726 if (TARGET_64BIT)
8727 goto simple;
8728 /* Expand DImode branch into multiple compare+branch. */
8729 {
8730 rtx lo[2], hi[2], label2;
8731 enum rtx_code code1, code2, code3;
8732
8733 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8734 {
8735 tmp = ix86_compare_op0;
8736 ix86_compare_op0 = ix86_compare_op1;
8737 ix86_compare_op1 = tmp;
8738 code = swap_condition (code);
8739 }
8740 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8741 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8742
8743 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8744 avoid two branches. This costs one extra insn, so disable when
8745 optimizing for size. */
8746
8747 if ((code == EQ || code == NE)
8748 && (!optimize_size
8749 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8750 {
8751 rtx xor0, xor1;
8752
8753 xor1 = hi[0];
8754 if (hi[1] != const0_rtx)
8755 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8756 NULL_RTX, 0, OPTAB_WIDEN);
8757
8758 xor0 = lo[0];
8759 if (lo[1] != const0_rtx)
8760 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8761 NULL_RTX, 0, OPTAB_WIDEN);
8762
8763 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8764 NULL_RTX, 0, OPTAB_WIDEN);
8765
8766 ix86_compare_op0 = tmp;
8767 ix86_compare_op1 = const0_rtx;
8768 ix86_expand_branch (code, label);
8769 return;
8770 }
8771
8772 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8773 op1 is a constant and the low word is zero, then we can just
8774 examine the high word. */
8775
8776 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8777 switch (code)
8778 {
8779 case LT: case LTU: case GE: case GEU:
8780 ix86_compare_op0 = hi[0];
8781 ix86_compare_op1 = hi[1];
8782 ix86_expand_branch (code, label);
8783 return;
8784 default:
8785 break;
8786 }
8787
8788 /* Otherwise, we need two or three jumps. */
8789
8790 label2 = gen_label_rtx ();
8791
8792 code1 = code;
8793 code2 = swap_condition (code);
8794 code3 = unsigned_condition (code);
8795
8796 switch (code)
8797 {
8798 case LT: case GT: case LTU: case GTU:
8799 break;
8800
8801 case LE: code1 = LT; code2 = GT; break;
8802 case GE: code1 = GT; code2 = LT; break;
8803 case LEU: code1 = LTU; code2 = GTU; break;
8804 case GEU: code1 = GTU; code2 = LTU; break;
8805
8806 case EQ: code1 = UNKNOWN; code2 = NE; break;
8807 case NE: code2 = UNKNOWN; break;
8808
8809 default:
8810 abort ();
8811 }
8812
8813 /*
8814 * a < b =>
8815 * if (hi(a) < hi(b)) goto true;
8816 * if (hi(a) > hi(b)) goto false;
8817 * if (lo(a) < lo(b)) goto true;
8818 * false:
8819 */
8820
8821 ix86_compare_op0 = hi[0];
8822 ix86_compare_op1 = hi[1];
8823
8824 if (code1 != UNKNOWN)
8825 ix86_expand_branch (code1, label);
8826 if (code2 != UNKNOWN)
8827 ix86_expand_branch (code2, label2);
8828
8829 ix86_compare_op0 = lo[0];
8830 ix86_compare_op1 = lo[1];
8831 ix86_expand_branch (code3, label);
8832
8833 if (code2 != UNKNOWN)
8834 emit_label (label2);
8835 return;
8836 }
8837
8838 default:
8839 abort ();
8840 }
8841 }
8842
8843 /* Split branch based on floating point condition. */
8844 void
8845 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
8846 rtx target1, rtx target2, rtx tmp, rtx pushed)
8847 {
8848 rtx second, bypass;
8849 rtx label = NULL_RTX;
8850 rtx condition;
8851 int bypass_probability = -1, second_probability = -1, probability = -1;
8852 rtx i;
8853
8854 if (target2 != pc_rtx)
8855 {
8856 rtx tmp = target2;
8857 code = reverse_condition_maybe_unordered (code);
8858 target2 = target1;
8859 target1 = tmp;
8860 }
8861
8862 condition = ix86_expand_fp_compare (code, op1, op2,
8863 tmp, &second, &bypass);
8864
8865 /* Remove pushed operand from stack. */
8866 if (pushed)
8867 ix86_free_from_memory (GET_MODE (pushed));
8868
8869 if (split_branch_probability >= 0)
8870 {
8871 /* Distribute the probabilities across the jumps.
8872 Assume the BYPASS and SECOND to be always test
8873 for UNORDERED. */
8874 probability = split_branch_probability;
8875
8876 /* Value of 1 is low enough to make no need for probability
8877 to be updated. Later we may run some experiments and see
8878 if unordered values are more frequent in practice. */
8879 if (bypass)
8880 bypass_probability = 1;
8881 if (second)
8882 second_probability = 1;
8883 }
8884 if (bypass != NULL_RTX)
8885 {
8886 label = gen_label_rtx ();
8887 i = emit_jump_insn (gen_rtx_SET
8888 (VOIDmode, pc_rtx,
8889 gen_rtx_IF_THEN_ELSE (VOIDmode,
8890 bypass,
8891 gen_rtx_LABEL_REF (VOIDmode,
8892 label),
8893 pc_rtx)));
8894 if (bypass_probability >= 0)
8895 REG_NOTES (i)
8896 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8897 GEN_INT (bypass_probability),
8898 REG_NOTES (i));
8899 }
8900 i = emit_jump_insn (gen_rtx_SET
8901 (VOIDmode, pc_rtx,
8902 gen_rtx_IF_THEN_ELSE (VOIDmode,
8903 condition, target1, target2)));
8904 if (probability >= 0)
8905 REG_NOTES (i)
8906 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8907 GEN_INT (probability),
8908 REG_NOTES (i));
8909 if (second != NULL_RTX)
8910 {
8911 i = emit_jump_insn (gen_rtx_SET
8912 (VOIDmode, pc_rtx,
8913 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8914 target2)));
8915 if (second_probability >= 0)
8916 REG_NOTES (i)
8917 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8918 GEN_INT (second_probability),
8919 REG_NOTES (i));
8920 }
8921 if (label != NULL_RTX)
8922 emit_label (label);
8923 }
8924
8925 int
8926 ix86_expand_setcc (enum rtx_code code, rtx dest)
8927 {
8928 rtx ret, tmp, tmpreg, equiv;
8929 rtx second_test, bypass_test;
8930
8931 if (GET_MODE (ix86_compare_op0) == DImode
8932 && !TARGET_64BIT)
8933 return 0; /* FAIL */
8934
8935 if (GET_MODE (dest) != QImode)
8936 abort ();
8937
8938 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8939 PUT_MODE (ret, QImode);
8940
8941 tmp = dest;
8942 tmpreg = dest;
8943
8944 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8945 if (bypass_test || second_test)
8946 {
8947 rtx test = second_test;
8948 int bypass = 0;
8949 rtx tmp2 = gen_reg_rtx (QImode);
8950 if (bypass_test)
8951 {
8952 if (second_test)
8953 abort ();
8954 test = bypass_test;
8955 bypass = 1;
8956 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8957 }
8958 PUT_MODE (test, QImode);
8959 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8960
8961 if (bypass)
8962 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8963 else
8964 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8965 }
8966
8967 /* Attach a REG_EQUAL note describing the comparison result. */
8968 equiv = simplify_gen_relational (code, QImode,
8969 GET_MODE (ix86_compare_op0),
8970 ix86_compare_op0, ix86_compare_op1);
8971 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
8972
8973 return 1; /* DONE */
8974 }
8975
8976 /* Expand comparison setting or clearing carry flag. Return true when
8977 successful and set pop for the operation. */
8978 static bool
8979 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
8980 {
8981 enum machine_mode mode =
8982 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
8983
8984 /* Do not handle DImode compares that go trought special path. Also we can't
8985 deal with FP compares yet. This is possible to add. */
8986 if ((mode == DImode && !TARGET_64BIT))
8987 return false;
8988 if (FLOAT_MODE_P (mode))
8989 {
8990 rtx second_test = NULL, bypass_test = NULL;
8991 rtx compare_op, compare_seq;
8992
8993 /* Shortcut: following common codes never translate into carry flag compares. */
8994 if (code == EQ || code == NE || code == UNEQ || code == LTGT
8995 || code == ORDERED || code == UNORDERED)
8996 return false;
8997
8998 /* These comparisons require zero flag; swap operands so they won't. */
8999 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9000 && !TARGET_IEEE_FP)
9001 {
9002 rtx tmp = op0;
9003 op0 = op1;
9004 op1 = tmp;
9005 code = swap_condition (code);
9006 }
9007
9008 /* Try to expand the comparison and verify that we end up with carry flag
9009 based comparison. This is fails to be true only when we decide to expand
9010 comparison using arithmetic that is not too common scenario. */
9011 start_sequence ();
9012 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9013 &second_test, &bypass_test);
9014 compare_seq = get_insns ();
9015 end_sequence ();
9016
9017 if (second_test || bypass_test)
9018 return false;
9019 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9020 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9021 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9022 else
9023 code = GET_CODE (compare_op);
9024 if (code != LTU && code != GEU)
9025 return false;
9026 emit_insn (compare_seq);
9027 *pop = compare_op;
9028 return true;
9029 }
9030 if (!INTEGRAL_MODE_P (mode))
9031 return false;
9032 switch (code)
9033 {
9034 case LTU:
9035 case GEU:
9036 break;
9037
9038 /* Convert a==0 into (unsigned)a<1. */
9039 case EQ:
9040 case NE:
9041 if (op1 != const0_rtx)
9042 return false;
9043 op1 = const1_rtx;
9044 code = (code == EQ ? LTU : GEU);
9045 break;
9046
9047 /* Convert a>b into b<a or a>=b-1. */
9048 case GTU:
9049 case LEU:
9050 if (GET_CODE (op1) == CONST_INT)
9051 {
9052 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9053 /* Bail out on overflow. We still can swap operands but that
9054 would force loading of the constant into register. */
9055 if (op1 == const0_rtx
9056 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9057 return false;
9058 code = (code == GTU ? GEU : LTU);
9059 }
9060 else
9061 {
9062 rtx tmp = op1;
9063 op1 = op0;
9064 op0 = tmp;
9065 code = (code == GTU ? LTU : GEU);
9066 }
9067 break;
9068
9069 /* Convert a>=0 into (unsigned)a<0x80000000. */
9070 case LT:
9071 case GE:
9072 if (mode == DImode || op1 != const0_rtx)
9073 return false;
9074 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9075 code = (code == LT ? GEU : LTU);
9076 break;
9077 case LE:
9078 case GT:
9079 if (mode == DImode || op1 != constm1_rtx)
9080 return false;
9081 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9082 code = (code == LE ? GEU : LTU);
9083 break;
9084
9085 default:
9086 return false;
9087 }
9088 /* Swapping operands may cause constant to appear as first operand. */
9089 if (!nonimmediate_operand (op0, VOIDmode))
9090 {
9091 if (no_new_pseudos)
9092 return false;
9093 op0 = force_reg (mode, op0);
9094 }
9095 ix86_compare_op0 = op0;
9096 ix86_compare_op1 = op1;
9097 *pop = ix86_expand_compare (code, NULL, NULL);
9098 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9099 abort ();
9100 return true;
9101 }
9102
9103 int
9104 ix86_expand_int_movcc (rtx operands[])
9105 {
9106 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9107 rtx compare_seq, compare_op;
9108 rtx second_test, bypass_test;
9109 enum machine_mode mode = GET_MODE (operands[0]);
9110 bool sign_bit_compare_p = false;;
9111
9112 start_sequence ();
9113 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9114 compare_seq = get_insns ();
9115 end_sequence ();
9116
9117 compare_code = GET_CODE (compare_op);
9118
9119 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9120 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9121 sign_bit_compare_p = true;
9122
9123 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9124 HImode insns, we'd be swallowed in word prefix ops. */
9125
9126 if ((mode != HImode || TARGET_FAST_PREFIX)
9127 && (mode != DImode || TARGET_64BIT)
9128 && GET_CODE (operands[2]) == CONST_INT
9129 && GET_CODE (operands[3]) == CONST_INT)
9130 {
9131 rtx out = operands[0];
9132 HOST_WIDE_INT ct = INTVAL (operands[2]);
9133 HOST_WIDE_INT cf = INTVAL (operands[3]);
9134 HOST_WIDE_INT diff;
9135
9136 diff = ct - cf;
9137 /* Sign bit compares are better done using shifts than we do by using
9138 sbb. */
9139 if (sign_bit_compare_p
9140 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9141 ix86_compare_op1, &compare_op))
9142 {
9143 /* Detect overlap between destination and compare sources. */
9144 rtx tmp = out;
9145
9146 if (!sign_bit_compare_p)
9147 {
9148 bool fpcmp = false;
9149
9150 compare_code = GET_CODE (compare_op);
9151
9152 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9153 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9154 {
9155 fpcmp = true;
9156 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9157 }
9158
9159 /* To simplify rest of code, restrict to the GEU case. */
9160 if (compare_code == LTU)
9161 {
9162 HOST_WIDE_INT tmp = ct;
9163 ct = cf;
9164 cf = tmp;
9165 compare_code = reverse_condition (compare_code);
9166 code = reverse_condition (code);
9167 }
9168 else
9169 {
9170 if (fpcmp)
9171 PUT_CODE (compare_op,
9172 reverse_condition_maybe_unordered
9173 (GET_CODE (compare_op)));
9174 else
9175 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9176 }
9177 diff = ct - cf;
9178
9179 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9180 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9181 tmp = gen_reg_rtx (mode);
9182
9183 if (mode == DImode)
9184 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9185 else
9186 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9187 }
9188 else
9189 {
9190 if (code == GT || code == GE)
9191 code = reverse_condition (code);
9192 else
9193 {
9194 HOST_WIDE_INT tmp = ct;
9195 ct = cf;
9196 cf = tmp;
9197 diff = ct - cf;
9198 }
9199 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9200 ix86_compare_op1, VOIDmode, 0, -1);
9201 }
9202
9203 if (diff == 1)
9204 {
9205 /*
9206 * cmpl op0,op1
9207 * sbbl dest,dest
9208 * [addl dest, ct]
9209 *
9210 * Size 5 - 8.
9211 */
9212 if (ct)
9213 tmp = expand_simple_binop (mode, PLUS,
9214 tmp, GEN_INT (ct),
9215 copy_rtx (tmp), 1, OPTAB_DIRECT);
9216 }
9217 else if (cf == -1)
9218 {
9219 /*
9220 * cmpl op0,op1
9221 * sbbl dest,dest
9222 * orl $ct, dest
9223 *
9224 * Size 8.
9225 */
9226 tmp = expand_simple_binop (mode, IOR,
9227 tmp, GEN_INT (ct),
9228 copy_rtx (tmp), 1, OPTAB_DIRECT);
9229 }
9230 else if (diff == -1 && ct)
9231 {
9232 /*
9233 * cmpl op0,op1
9234 * sbbl dest,dest
9235 * notl dest
9236 * [addl dest, cf]
9237 *
9238 * Size 8 - 11.
9239 */
9240 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9241 if (cf)
9242 tmp = expand_simple_binop (mode, PLUS,
9243 copy_rtx (tmp), GEN_INT (cf),
9244 copy_rtx (tmp), 1, OPTAB_DIRECT);
9245 }
9246 else
9247 {
9248 /*
9249 * cmpl op0,op1
9250 * sbbl dest,dest
9251 * [notl dest]
9252 * andl cf - ct, dest
9253 * [addl dest, ct]
9254 *
9255 * Size 8 - 11.
9256 */
9257
9258 if (cf == 0)
9259 {
9260 cf = ct;
9261 ct = 0;
9262 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9263 }
9264
9265 tmp = expand_simple_binop (mode, AND,
9266 copy_rtx (tmp),
9267 gen_int_mode (cf - ct, mode),
9268 copy_rtx (tmp), 1, OPTAB_DIRECT);
9269 if (ct)
9270 tmp = expand_simple_binop (mode, PLUS,
9271 copy_rtx (tmp), GEN_INT (ct),
9272 copy_rtx (tmp), 1, OPTAB_DIRECT);
9273 }
9274
9275 if (!rtx_equal_p (tmp, out))
9276 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9277
9278 return 1; /* DONE */
9279 }
9280
9281 if (diff < 0)
9282 {
9283 HOST_WIDE_INT tmp;
9284 tmp = ct, ct = cf, cf = tmp;
9285 diff = -diff;
9286 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9287 {
9288 /* We may be reversing unordered compare to normal compare, that
9289 is not valid in general (we may convert non-trapping condition
9290 to trapping one), however on i386 we currently emit all
9291 comparisons unordered. */
9292 compare_code = reverse_condition_maybe_unordered (compare_code);
9293 code = reverse_condition_maybe_unordered (code);
9294 }
9295 else
9296 {
9297 compare_code = reverse_condition (compare_code);
9298 code = reverse_condition (code);
9299 }
9300 }
9301
9302 compare_code = UNKNOWN;
9303 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9304 && GET_CODE (ix86_compare_op1) == CONST_INT)
9305 {
9306 if (ix86_compare_op1 == const0_rtx
9307 && (code == LT || code == GE))
9308 compare_code = code;
9309 else if (ix86_compare_op1 == constm1_rtx)
9310 {
9311 if (code == LE)
9312 compare_code = LT;
9313 else if (code == GT)
9314 compare_code = GE;
9315 }
9316 }
9317
9318 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9319 if (compare_code != UNKNOWN
9320 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9321 && (cf == -1 || ct == -1))
9322 {
9323 /* If lea code below could be used, only optimize
9324 if it results in a 2 insn sequence. */
9325
9326 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9327 || diff == 3 || diff == 5 || diff == 9)
9328 || (compare_code == LT && ct == -1)
9329 || (compare_code == GE && cf == -1))
9330 {
9331 /*
9332 * notl op1 (if necessary)
9333 * sarl $31, op1
9334 * orl cf, op1
9335 */
9336 if (ct != -1)
9337 {
9338 cf = ct;
9339 ct = -1;
9340 code = reverse_condition (code);
9341 }
9342
9343 out = emit_store_flag (out, code, ix86_compare_op0,
9344 ix86_compare_op1, VOIDmode, 0, -1);
9345
9346 out = expand_simple_binop (mode, IOR,
9347 out, GEN_INT (cf),
9348 out, 1, OPTAB_DIRECT);
9349 if (out != operands[0])
9350 emit_move_insn (operands[0], out);
9351
9352 return 1; /* DONE */
9353 }
9354 }
9355
9356
9357 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9358 || diff == 3 || diff == 5 || diff == 9)
9359 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9360 && (mode != DImode
9361 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9362 {
9363 /*
9364 * xorl dest,dest
9365 * cmpl op1,op2
9366 * setcc dest
9367 * lea cf(dest*(ct-cf)),dest
9368 *
9369 * Size 14.
9370 *
9371 * This also catches the degenerate setcc-only case.
9372 */
9373
9374 rtx tmp;
9375 int nops;
9376
9377 out = emit_store_flag (out, code, ix86_compare_op0,
9378 ix86_compare_op1, VOIDmode, 0, 1);
9379
9380 nops = 0;
9381 /* On x86_64 the lea instruction operates on Pmode, so we need
9382 to get arithmetics done in proper mode to match. */
9383 if (diff == 1)
9384 tmp = copy_rtx (out);
9385 else
9386 {
9387 rtx out1;
9388 out1 = copy_rtx (out);
9389 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9390 nops++;
9391 if (diff & 1)
9392 {
9393 tmp = gen_rtx_PLUS (mode, tmp, out1);
9394 nops++;
9395 }
9396 }
9397 if (cf != 0)
9398 {
9399 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9400 nops++;
9401 }
9402 if (!rtx_equal_p (tmp, out))
9403 {
9404 if (nops == 1)
9405 out = force_operand (tmp, copy_rtx (out));
9406 else
9407 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9408 }
9409 if (!rtx_equal_p (out, operands[0]))
9410 emit_move_insn (operands[0], copy_rtx (out));
9411
9412 return 1; /* DONE */
9413 }
9414
9415 /*
9416 * General case: Jumpful:
9417 * xorl dest,dest cmpl op1, op2
9418 * cmpl op1, op2 movl ct, dest
9419 * setcc dest jcc 1f
9420 * decl dest movl cf, dest
9421 * andl (cf-ct),dest 1:
9422 * addl ct,dest
9423 *
9424 * Size 20. Size 14.
9425 *
9426 * This is reasonably steep, but branch mispredict costs are
9427 * high on modern cpus, so consider failing only if optimizing
9428 * for space.
9429 */
9430
9431 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9432 && BRANCH_COST >= 2)
9433 {
9434 if (cf == 0)
9435 {
9436 cf = ct;
9437 ct = 0;
9438 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9439 /* We may be reversing unordered compare to normal compare,
9440 that is not valid in general (we may convert non-trapping
9441 condition to trapping one), however on i386 we currently
9442 emit all comparisons unordered. */
9443 code = reverse_condition_maybe_unordered (code);
9444 else
9445 {
9446 code = reverse_condition (code);
9447 if (compare_code != UNKNOWN)
9448 compare_code = reverse_condition (compare_code);
9449 }
9450 }
9451
9452 if (compare_code != UNKNOWN)
9453 {
9454 /* notl op1 (if needed)
9455 sarl $31, op1
9456 andl (cf-ct), op1
9457 addl ct, op1
9458
9459 For x < 0 (resp. x <= -1) there will be no notl,
9460 so if possible swap the constants to get rid of the
9461 complement.
9462 True/false will be -1/0 while code below (store flag
9463 followed by decrement) is 0/-1, so the constants need
9464 to be exchanged once more. */
9465
9466 if (compare_code == GE || !cf)
9467 {
9468 code = reverse_condition (code);
9469 compare_code = LT;
9470 }
9471 else
9472 {
9473 HOST_WIDE_INT tmp = cf;
9474 cf = ct;
9475 ct = tmp;
9476 }
9477
9478 out = emit_store_flag (out, code, ix86_compare_op0,
9479 ix86_compare_op1, VOIDmode, 0, -1);
9480 }
9481 else
9482 {
9483 out = emit_store_flag (out, code, ix86_compare_op0,
9484 ix86_compare_op1, VOIDmode, 0, 1);
9485
9486 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9487 copy_rtx (out), 1, OPTAB_DIRECT);
9488 }
9489
9490 out = expand_simple_binop (mode, AND, copy_rtx (out),
9491 gen_int_mode (cf - ct, mode),
9492 copy_rtx (out), 1, OPTAB_DIRECT);
9493 if (ct)
9494 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9495 copy_rtx (out), 1, OPTAB_DIRECT);
9496 if (!rtx_equal_p (out, operands[0]))
9497 emit_move_insn (operands[0], copy_rtx (out));
9498
9499 return 1; /* DONE */
9500 }
9501 }
9502
9503 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9504 {
9505 /* Try a few things more with specific constants and a variable. */
9506
9507 optab op;
9508 rtx var, orig_out, out, tmp;
9509
9510 if (BRANCH_COST <= 2)
9511 return 0; /* FAIL */
9512
9513 /* If one of the two operands is an interesting constant, load a
9514 constant with the above and mask it in with a logical operation. */
9515
9516 if (GET_CODE (operands[2]) == CONST_INT)
9517 {
9518 var = operands[3];
9519 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9520 operands[3] = constm1_rtx, op = and_optab;
9521 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9522 operands[3] = const0_rtx, op = ior_optab;
9523 else
9524 return 0; /* FAIL */
9525 }
9526 else if (GET_CODE (operands[3]) == CONST_INT)
9527 {
9528 var = operands[2];
9529 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9530 operands[2] = constm1_rtx, op = and_optab;
9531 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9532 operands[2] = const0_rtx, op = ior_optab;
9533 else
9534 return 0; /* FAIL */
9535 }
9536 else
9537 return 0; /* FAIL */
9538
9539 orig_out = operands[0];
9540 tmp = gen_reg_rtx (mode);
9541 operands[0] = tmp;
9542
9543 /* Recurse to get the constant loaded. */
9544 if (ix86_expand_int_movcc (operands) == 0)
9545 return 0; /* FAIL */
9546
9547 /* Mask in the interesting variable. */
9548 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9549 OPTAB_WIDEN);
9550 if (!rtx_equal_p (out, orig_out))
9551 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9552
9553 return 1; /* DONE */
9554 }
9555
9556 /*
9557 * For comparison with above,
9558 *
9559 * movl cf,dest
9560 * movl ct,tmp
9561 * cmpl op1,op2
9562 * cmovcc tmp,dest
9563 *
9564 * Size 15.
9565 */
9566
9567 if (! nonimmediate_operand (operands[2], mode))
9568 operands[2] = force_reg (mode, operands[2]);
9569 if (! nonimmediate_operand (operands[3], mode))
9570 operands[3] = force_reg (mode, operands[3]);
9571
9572 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9573 {
9574 rtx tmp = gen_reg_rtx (mode);
9575 emit_move_insn (tmp, operands[3]);
9576 operands[3] = tmp;
9577 }
9578 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9579 {
9580 rtx tmp = gen_reg_rtx (mode);
9581 emit_move_insn (tmp, operands[2]);
9582 operands[2] = tmp;
9583 }
9584
9585 if (! register_operand (operands[2], VOIDmode)
9586 && (mode == QImode
9587 || ! register_operand (operands[3], VOIDmode)))
9588 operands[2] = force_reg (mode, operands[2]);
9589
9590 if (mode == QImode
9591 && ! register_operand (operands[3], VOIDmode))
9592 operands[3] = force_reg (mode, operands[3]);
9593
9594 emit_insn (compare_seq);
9595 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9596 gen_rtx_IF_THEN_ELSE (mode,
9597 compare_op, operands[2],
9598 operands[3])));
9599 if (bypass_test)
9600 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9601 gen_rtx_IF_THEN_ELSE (mode,
9602 bypass_test,
9603 copy_rtx (operands[3]),
9604 copy_rtx (operands[0]))));
9605 if (second_test)
9606 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9607 gen_rtx_IF_THEN_ELSE (mode,
9608 second_test,
9609 copy_rtx (operands[2]),
9610 copy_rtx (operands[0]))));
9611
9612 return 1; /* DONE */
9613 }
9614
9615 int
9616 ix86_expand_fp_movcc (rtx operands[])
9617 {
9618 enum rtx_code code;
9619 rtx tmp;
9620 rtx compare_op, second_test, bypass_test;
9621
9622 /* For SF/DFmode conditional moves based on comparisons
9623 in same mode, we may want to use SSE min/max instructions. */
9624 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9625 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9626 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9627 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9628 && (!TARGET_IEEE_FP
9629 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9630 /* We may be called from the post-reload splitter. */
9631 && (!REG_P (operands[0])
9632 || SSE_REG_P (operands[0])
9633 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9634 {
9635 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9636 code = GET_CODE (operands[1]);
9637
9638 /* See if we have (cross) match between comparison operands and
9639 conditional move operands. */
9640 if (rtx_equal_p (operands[2], op1))
9641 {
9642 rtx tmp = op0;
9643 op0 = op1;
9644 op1 = tmp;
9645 code = reverse_condition_maybe_unordered (code);
9646 }
9647 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9648 {
9649 /* Check for min operation. */
9650 if (code == LT || code == UNLE)
9651 {
9652 if (code == UNLE)
9653 {
9654 rtx tmp = op0;
9655 op0 = op1;
9656 op1 = tmp;
9657 }
9658 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9659 if (memory_operand (op0, VOIDmode))
9660 op0 = force_reg (GET_MODE (operands[0]), op0);
9661 if (GET_MODE (operands[0]) == SFmode)
9662 emit_insn (gen_minsf3 (operands[0], op0, op1));
9663 else
9664 emit_insn (gen_mindf3 (operands[0], op0, op1));
9665 return 1;
9666 }
9667 /* Check for max operation. */
9668 if (code == GT || code == UNGE)
9669 {
9670 if (code == UNGE)
9671 {
9672 rtx tmp = op0;
9673 op0 = op1;
9674 op1 = tmp;
9675 }
9676 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9677 if (memory_operand (op0, VOIDmode))
9678 op0 = force_reg (GET_MODE (operands[0]), op0);
9679 if (GET_MODE (operands[0]) == SFmode)
9680 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9681 else
9682 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9683 return 1;
9684 }
9685 }
9686 /* Manage condition to be sse_comparison_operator. In case we are
9687 in non-ieee mode, try to canonicalize the destination operand
9688 to be first in the comparison - this helps reload to avoid extra
9689 moves. */
9690 if (!sse_comparison_operator (operands[1], VOIDmode)
9691 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9692 {
9693 rtx tmp = ix86_compare_op0;
9694 ix86_compare_op0 = ix86_compare_op1;
9695 ix86_compare_op1 = tmp;
9696 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9697 VOIDmode, ix86_compare_op0,
9698 ix86_compare_op1);
9699 }
9700 /* Similarly try to manage result to be first operand of conditional
9701 move. We also don't support the NE comparison on SSE, so try to
9702 avoid it. */
9703 if ((rtx_equal_p (operands[0], operands[3])
9704 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9705 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9706 {
9707 rtx tmp = operands[2];
9708 operands[2] = operands[3];
9709 operands[3] = tmp;
9710 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9711 (GET_CODE (operands[1])),
9712 VOIDmode, ix86_compare_op0,
9713 ix86_compare_op1);
9714 }
9715 if (GET_MODE (operands[0]) == SFmode)
9716 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9717 operands[2], operands[3],
9718 ix86_compare_op0, ix86_compare_op1));
9719 else
9720 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9721 operands[2], operands[3],
9722 ix86_compare_op0, ix86_compare_op1));
9723 return 1;
9724 }
9725
9726 /* The floating point conditional move instructions don't directly
9727 support conditions resulting from a signed integer comparison. */
9728
9729 code = GET_CODE (operands[1]);
9730 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9731
9732 /* The floating point conditional move instructions don't directly
9733 support signed integer comparisons. */
9734
9735 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9736 {
9737 if (second_test != NULL || bypass_test != NULL)
9738 abort ();
9739 tmp = gen_reg_rtx (QImode);
9740 ix86_expand_setcc (code, tmp);
9741 code = NE;
9742 ix86_compare_op0 = tmp;
9743 ix86_compare_op1 = const0_rtx;
9744 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9745 }
9746 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9747 {
9748 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9749 emit_move_insn (tmp, operands[3]);
9750 operands[3] = tmp;
9751 }
9752 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9753 {
9754 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9755 emit_move_insn (tmp, operands[2]);
9756 operands[2] = tmp;
9757 }
9758
9759 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9760 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9761 compare_op,
9762 operands[2],
9763 operands[3])));
9764 if (bypass_test)
9765 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9766 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9767 bypass_test,
9768 operands[3],
9769 operands[0])));
9770 if (second_test)
9771 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9772 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9773 second_test,
9774 operands[2],
9775 operands[0])));
9776
9777 return 1;
9778 }
9779
9780 /* Expand conditional increment or decrement using adb/sbb instructions.
9781 The default case using setcc followed by the conditional move can be
9782 done by generic code. */
9783 int
9784 ix86_expand_int_addcc (rtx operands[])
9785 {
9786 enum rtx_code code = GET_CODE (operands[1]);
9787 rtx compare_op;
9788 rtx val = const0_rtx;
9789 bool fpcmp = false;
9790 enum machine_mode mode = GET_MODE (operands[0]);
9791
9792 if (operands[3] != const1_rtx
9793 && operands[3] != constm1_rtx)
9794 return 0;
9795 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9796 ix86_compare_op1, &compare_op))
9797 return 0;
9798 code = GET_CODE (compare_op);
9799
9800 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9801 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9802 {
9803 fpcmp = true;
9804 code = ix86_fp_compare_code_to_integer (code);
9805 }
9806
9807 if (code != LTU)
9808 {
9809 val = constm1_rtx;
9810 if (fpcmp)
9811 PUT_CODE (compare_op,
9812 reverse_condition_maybe_unordered
9813 (GET_CODE (compare_op)));
9814 else
9815 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9816 }
9817 PUT_MODE (compare_op, mode);
9818
9819 /* Construct either adc or sbb insn. */
9820 if ((code == LTU) == (operands[3] == constm1_rtx))
9821 {
9822 switch (GET_MODE (operands[0]))
9823 {
9824 case QImode:
9825 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
9826 break;
9827 case HImode:
9828 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
9829 break;
9830 case SImode:
9831 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
9832 break;
9833 case DImode:
9834 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9835 break;
9836 default:
9837 abort ();
9838 }
9839 }
9840 else
9841 {
9842 switch (GET_MODE (operands[0]))
9843 {
9844 case QImode:
9845 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
9846 break;
9847 case HImode:
9848 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
9849 break;
9850 case SImode:
9851 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
9852 break;
9853 case DImode:
9854 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
9855 break;
9856 default:
9857 abort ();
9858 }
9859 }
9860 return 1; /* DONE */
9861 }
9862
9863
9864 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9865 works for floating pointer parameters and nonoffsetable memories.
9866 For pushes, it returns just stack offsets; the values will be saved
9867 in the right order. Maximally three parts are generated. */
9868
9869 static int
9870 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
9871 {
9872 int size;
9873
9874 if (!TARGET_64BIT)
9875 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
9876 else
9877 size = (GET_MODE_SIZE (mode) + 4) / 8;
9878
9879 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9880 abort ();
9881 if (size < 2 || size > 3)
9882 abort ();
9883
9884 /* Optimize constant pool reference to immediates. This is used by fp
9885 moves, that force all constants to memory to allow combining. */
9886 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
9887 {
9888 rtx tmp = maybe_get_pool_constant (operand);
9889 if (tmp)
9890 operand = tmp;
9891 }
9892
9893 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9894 {
9895 /* The only non-offsetable memories we handle are pushes. */
9896 if (! push_operand (operand, VOIDmode))
9897 abort ();
9898
9899 operand = copy_rtx (operand);
9900 PUT_MODE (operand, Pmode);
9901 parts[0] = parts[1] = parts[2] = operand;
9902 }
9903 else if (!TARGET_64BIT)
9904 {
9905 if (mode == DImode)
9906 split_di (&operand, 1, &parts[0], &parts[1]);
9907 else
9908 {
9909 if (REG_P (operand))
9910 {
9911 if (!reload_completed)
9912 abort ();
9913 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9914 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9915 if (size == 3)
9916 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9917 }
9918 else if (offsettable_memref_p (operand))
9919 {
9920 operand = adjust_address (operand, SImode, 0);
9921 parts[0] = operand;
9922 parts[1] = adjust_address (operand, SImode, 4);
9923 if (size == 3)
9924 parts[2] = adjust_address (operand, SImode, 8);
9925 }
9926 else if (GET_CODE (operand) == CONST_DOUBLE)
9927 {
9928 REAL_VALUE_TYPE r;
9929 long l[4];
9930
9931 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9932 switch (mode)
9933 {
9934 case XFmode:
9935 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9936 parts[2] = gen_int_mode (l[2], SImode);
9937 break;
9938 case DFmode:
9939 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9940 break;
9941 default:
9942 abort ();
9943 }
9944 parts[1] = gen_int_mode (l[1], SImode);
9945 parts[0] = gen_int_mode (l[0], SImode);
9946 }
9947 else
9948 abort ();
9949 }
9950 }
9951 else
9952 {
9953 if (mode == TImode)
9954 split_ti (&operand, 1, &parts[0], &parts[1]);
9955 if (mode == XFmode || mode == TFmode)
9956 {
9957 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
9958 if (REG_P (operand))
9959 {
9960 if (!reload_completed)
9961 abort ();
9962 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9963 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
9964 }
9965 else if (offsettable_memref_p (operand))
9966 {
9967 operand = adjust_address (operand, DImode, 0);
9968 parts[0] = operand;
9969 parts[1] = adjust_address (operand, upper_mode, 8);
9970 }
9971 else if (GET_CODE (operand) == CONST_DOUBLE)
9972 {
9973 REAL_VALUE_TYPE r;
9974 long l[4];
9975
9976 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9977 real_to_target (l, &r, mode);
9978
9979 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9980 if (HOST_BITS_PER_WIDE_INT >= 64)
9981 parts[0]
9982 = gen_int_mode
9983 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9984 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9985 DImode);
9986 else
9987 parts[0] = immed_double_const (l[0], l[1], DImode);
9988
9989 if (upper_mode == SImode)
9990 parts[1] = gen_int_mode (l[2], SImode);
9991 else if (HOST_BITS_PER_WIDE_INT >= 64)
9992 parts[1]
9993 = gen_int_mode
9994 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
9995 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
9996 DImode);
9997 else
9998 parts[1] = immed_double_const (l[2], l[3], DImode);
9999 }
10000 else
10001 abort ();
10002 }
10003 }
10004
10005 return size;
10006 }
10007
10008 /* Emit insns to perform a move or push of DI, DF, and XF values.
10009 Return false when normal moves are needed; true when all required
10010 insns have been emitted. Operands 2-4 contain the input values
10011 int the correct order; operands 5-7 contain the output values. */
10012
10013 void
10014 ix86_split_long_move (rtx operands[])
10015 {
10016 rtx part[2][3];
10017 int nparts;
10018 int push = 0;
10019 int collisions = 0;
10020 enum machine_mode mode = GET_MODE (operands[0]);
10021
10022 /* The DFmode expanders may ask us to move double.
10023 For 64bit target this is single move. By hiding the fact
10024 here we simplify i386.md splitters. */
10025 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10026 {
10027 /* Optimize constant pool reference to immediates. This is used by
10028 fp moves, that force all constants to memory to allow combining. */
10029
10030 if (GET_CODE (operands[1]) == MEM
10031 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10032 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10033 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10034 if (push_operand (operands[0], VOIDmode))
10035 {
10036 operands[0] = copy_rtx (operands[0]);
10037 PUT_MODE (operands[0], Pmode);
10038 }
10039 else
10040 operands[0] = gen_lowpart (DImode, operands[0]);
10041 operands[1] = gen_lowpart (DImode, operands[1]);
10042 emit_move_insn (operands[0], operands[1]);
10043 return;
10044 }
10045
10046 /* The only non-offsettable memory we handle is push. */
10047 if (push_operand (operands[0], VOIDmode))
10048 push = 1;
10049 else if (GET_CODE (operands[0]) == MEM
10050 && ! offsettable_memref_p (operands[0]))
10051 abort ();
10052
10053 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10054 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10055
10056 /* When emitting push, take care for source operands on the stack. */
10057 if (push && GET_CODE (operands[1]) == MEM
10058 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10059 {
10060 if (nparts == 3)
10061 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10062 XEXP (part[1][2], 0));
10063 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10064 XEXP (part[1][1], 0));
10065 }
10066
10067 /* We need to do copy in the right order in case an address register
10068 of the source overlaps the destination. */
10069 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10070 {
10071 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10072 collisions++;
10073 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10074 collisions++;
10075 if (nparts == 3
10076 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10077 collisions++;
10078
10079 /* Collision in the middle part can be handled by reordering. */
10080 if (collisions == 1 && nparts == 3
10081 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10082 {
10083 rtx tmp;
10084 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10085 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10086 }
10087
10088 /* If there are more collisions, we can't handle it by reordering.
10089 Do an lea to the last part and use only one colliding move. */
10090 else if (collisions > 1)
10091 {
10092 rtx base;
10093
10094 collisions = 1;
10095
10096 base = part[0][nparts - 1];
10097
10098 /* Handle the case when the last part isn't valid for lea.
10099 Happens in 64-bit mode storing the 12-byte XFmode. */
10100 if (GET_MODE (base) != Pmode)
10101 base = gen_rtx_REG (Pmode, REGNO (base));
10102
10103 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10104 part[1][0] = replace_equiv_address (part[1][0], base);
10105 part[1][1] = replace_equiv_address (part[1][1],
10106 plus_constant (base, UNITS_PER_WORD));
10107 if (nparts == 3)
10108 part[1][2] = replace_equiv_address (part[1][2],
10109 plus_constant (base, 8));
10110 }
10111 }
10112
10113 if (push)
10114 {
10115 if (!TARGET_64BIT)
10116 {
10117 if (nparts == 3)
10118 {
10119 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10120 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10121 emit_move_insn (part[0][2], part[1][2]);
10122 }
10123 }
10124 else
10125 {
10126 /* In 64bit mode we don't have 32bit push available. In case this is
10127 register, it is OK - we will just use larger counterpart. We also
10128 retype memory - these comes from attempt to avoid REX prefix on
10129 moving of second half of TFmode value. */
10130 if (GET_MODE (part[1][1]) == SImode)
10131 {
10132 if (GET_CODE (part[1][1]) == MEM)
10133 part[1][1] = adjust_address (part[1][1], DImode, 0);
10134 else if (REG_P (part[1][1]))
10135 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10136 else
10137 abort ();
10138 if (GET_MODE (part[1][0]) == SImode)
10139 part[1][0] = part[1][1];
10140 }
10141 }
10142 emit_move_insn (part[0][1], part[1][1]);
10143 emit_move_insn (part[0][0], part[1][0]);
10144 return;
10145 }
10146
10147 /* Choose correct order to not overwrite the source before it is copied. */
10148 if ((REG_P (part[0][0])
10149 && REG_P (part[1][1])
10150 && (REGNO (part[0][0]) == REGNO (part[1][1])
10151 || (nparts == 3
10152 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10153 || (collisions > 0
10154 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10155 {
10156 if (nparts == 3)
10157 {
10158 operands[2] = part[0][2];
10159 operands[3] = part[0][1];
10160 operands[4] = part[0][0];
10161 operands[5] = part[1][2];
10162 operands[6] = part[1][1];
10163 operands[7] = part[1][0];
10164 }
10165 else
10166 {
10167 operands[2] = part[0][1];
10168 operands[3] = part[0][0];
10169 operands[5] = part[1][1];
10170 operands[6] = part[1][0];
10171 }
10172 }
10173 else
10174 {
10175 if (nparts == 3)
10176 {
10177 operands[2] = part[0][0];
10178 operands[3] = part[0][1];
10179 operands[4] = part[0][2];
10180 operands[5] = part[1][0];
10181 operands[6] = part[1][1];
10182 operands[7] = part[1][2];
10183 }
10184 else
10185 {
10186 operands[2] = part[0][0];
10187 operands[3] = part[0][1];
10188 operands[5] = part[1][0];
10189 operands[6] = part[1][1];
10190 }
10191 }
10192
10193 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10194 if (optimize_size)
10195 {
10196 if (GET_CODE (operands[5]) == CONST_INT
10197 && operands[5] != const0_rtx
10198 && REG_P (operands[2]))
10199 {
10200 if (GET_CODE (operands[6]) == CONST_INT
10201 && INTVAL (operands[6]) == INTVAL (operands[5]))
10202 operands[6] = operands[2];
10203
10204 if (nparts == 3
10205 && GET_CODE (operands[7]) == CONST_INT
10206 && INTVAL (operands[7]) == INTVAL (operands[5]))
10207 operands[7] = operands[2];
10208 }
10209
10210 if (nparts == 3
10211 && GET_CODE (operands[6]) == CONST_INT
10212 && operands[6] != const0_rtx
10213 && REG_P (operands[3])
10214 && GET_CODE (operands[7]) == CONST_INT
10215 && INTVAL (operands[7]) == INTVAL (operands[6]))
10216 operands[7] = operands[3];
10217 }
10218
10219 emit_move_insn (operands[2], operands[5]);
10220 emit_move_insn (operands[3], operands[6]);
10221 if (nparts == 3)
10222 emit_move_insn (operands[4], operands[7]);
10223
10224 return;
10225 }
10226
10227 /* Helper function of ix86_split_ashldi used to generate an SImode
10228 left shift by a constant, either using a single shift or
10229 a sequence of add instructions. */
10230
10231 static void
10232 ix86_expand_ashlsi3_const (rtx operand, int count)
10233 {
10234 if (count == 1)
10235 emit_insn (gen_addsi3 (operand, operand, operand));
10236 else if (!optimize_size
10237 && count * ix86_cost->add <= ix86_cost->shift_const)
10238 {
10239 int i;
10240 for (i=0; i<count; i++)
10241 emit_insn (gen_addsi3 (operand, operand, operand));
10242 }
10243 else
10244 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10245 }
10246
10247 void
10248 ix86_split_ashldi (rtx *operands, rtx scratch)
10249 {
10250 rtx low[2], high[2];
10251 int count;
10252
10253 if (GET_CODE (operands[2]) == CONST_INT)
10254 {
10255 split_di (operands, 2, low, high);
10256 count = INTVAL (operands[2]) & 63;
10257
10258 if (count >= 32)
10259 {
10260 emit_move_insn (high[0], low[1]);
10261 emit_move_insn (low[0], const0_rtx);
10262
10263 if (count > 32)
10264 ix86_expand_ashlsi3_const (high[0], count - 32);
10265 }
10266 else
10267 {
10268 if (!rtx_equal_p (operands[0], operands[1]))
10269 emit_move_insn (operands[0], operands[1]);
10270 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10271 ix86_expand_ashlsi3_const (low[0], count);
10272 }
10273 return;
10274 }
10275
10276 split_di (operands, 1, low, high);
10277
10278 if (operands[1] == const1_rtx)
10279 {
10280 /* Assuming we've chosen a QImode capable registers, then 1LL << N
10281 can be done with two 32-bit shifts, no branches, no cmoves. */
10282 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
10283 {
10284 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
10285
10286 ix86_expand_clear (low[0]);
10287 ix86_expand_clear (high[0]);
10288 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
10289
10290 d = gen_lowpart (QImode, low[0]);
10291 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10292 s = gen_rtx_EQ (QImode, flags, const0_rtx);
10293 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10294
10295 d = gen_lowpart (QImode, high[0]);
10296 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
10297 s = gen_rtx_NE (QImode, flags, const0_rtx);
10298 emit_insn (gen_rtx_SET (VOIDmode, d, s));
10299 }
10300
10301 /* Otherwise, we can get the same results by manually performing
10302 a bit extract operation on bit 5, and then performing the two
10303 shifts. The two methods of getting 0/1 into low/high are exactly
10304 the same size. Avoiding the shift in the bit extract case helps
10305 pentium4 a bit; no one else seems to care much either way. */
10306 else
10307 {
10308 rtx x;
10309
10310 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
10311 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
10312 else
10313 x = gen_lowpart (SImode, operands[2]);
10314 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
10315
10316 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
10317 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
10318 emit_move_insn (low[0], high[0]);
10319 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
10320 }
10321
10322 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10323 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
10324 return;
10325 }
10326
10327 if (operands[1] == constm1_rtx)
10328 {
10329 /* For -1LL << N, we can avoid the shld instruction, because we
10330 know that we're shifting 0...31 ones into a -1. */
10331 emit_move_insn (low[0], constm1_rtx);
10332 if (optimize_size)
10333 emit_move_insn (high[0], low[0]);
10334 else
10335 emit_move_insn (high[0], constm1_rtx);
10336 }
10337 else
10338 {
10339 if (!rtx_equal_p (operands[0], operands[1]))
10340 emit_move_insn (operands[0], operands[1]);
10341
10342 split_di (operands, 1, low, high);
10343 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10344 }
10345
10346 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10347
10348 if (TARGET_CMOVE && scratch)
10349 {
10350 ix86_expand_clear (scratch);
10351 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
10352 }
10353 else
10354 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10355 }
10356
10357 void
10358 ix86_split_ashrdi (rtx *operands, rtx scratch)
10359 {
10360 rtx low[2], high[2];
10361 int count;
10362
10363 if (GET_CODE (operands[2]) == CONST_INT)
10364 {
10365 split_di (operands, 2, low, high);
10366 count = INTVAL (operands[2]) & 63;
10367
10368 if (count == 63)
10369 {
10370 emit_move_insn (high[0], high[1]);
10371 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10372 emit_move_insn (low[0], high[0]);
10373
10374 }
10375 else if (count >= 32)
10376 {
10377 emit_move_insn (low[0], high[1]);
10378 emit_move_insn (high[0], low[0]);
10379 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10380 if (count > 32)
10381 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10382 }
10383 else
10384 {
10385 if (!rtx_equal_p (operands[0], operands[1]))
10386 emit_move_insn (operands[0], operands[1]);
10387 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10388 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10389 }
10390 }
10391 else
10392 {
10393 if (!rtx_equal_p (operands[0], operands[1]))
10394 emit_move_insn (operands[0], operands[1]);
10395
10396 split_di (operands, 1, low, high);
10397
10398 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10399 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10400
10401 if (TARGET_CMOVE && scratch)
10402 {
10403 emit_move_insn (scratch, high[0]);
10404 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10405 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10406 scratch));
10407 }
10408 else
10409 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10410 }
10411 }
10412
10413 void
10414 ix86_split_lshrdi (rtx *operands, rtx scratch)
10415 {
10416 rtx low[2], high[2];
10417 int count;
10418
10419 if (GET_CODE (operands[2]) == CONST_INT)
10420 {
10421 split_di (operands, 2, low, high);
10422 count = INTVAL (operands[2]) & 63;
10423
10424 if (count >= 32)
10425 {
10426 emit_move_insn (low[0], high[1]);
10427 ix86_expand_clear (high[0]);
10428
10429 if (count > 32)
10430 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10431 }
10432 else
10433 {
10434 if (!rtx_equal_p (operands[0], operands[1]))
10435 emit_move_insn (operands[0], operands[1]);
10436 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10437 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10438 }
10439 }
10440 else
10441 {
10442 if (!rtx_equal_p (operands[0], operands[1]))
10443 emit_move_insn (operands[0], operands[1]);
10444
10445 split_di (operands, 1, low, high);
10446
10447 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10448 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10449
10450 /* Heh. By reversing the arguments, we can reuse this pattern. */
10451 if (TARGET_CMOVE && scratch)
10452 {
10453 ix86_expand_clear (scratch);
10454 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10455 scratch));
10456 }
10457 else
10458 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10459 }
10460 }
10461
10462 /* Helper function for the string operations below. Dest VARIABLE whether
10463 it is aligned to VALUE bytes. If true, jump to the label. */
10464 static rtx
10465 ix86_expand_aligntest (rtx variable, int value)
10466 {
10467 rtx label = gen_label_rtx ();
10468 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10469 if (GET_MODE (variable) == DImode)
10470 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10471 else
10472 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10473 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10474 1, label);
10475 return label;
10476 }
10477
10478 /* Adjust COUNTER by the VALUE. */
10479 static void
10480 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10481 {
10482 if (GET_MODE (countreg) == DImode)
10483 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10484 else
10485 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10486 }
10487
10488 /* Zero extend possibly SImode EXP to Pmode register. */
10489 rtx
10490 ix86_zero_extend_to_Pmode (rtx exp)
10491 {
10492 rtx r;
10493 if (GET_MODE (exp) == VOIDmode)
10494 return force_reg (Pmode, exp);
10495 if (GET_MODE (exp) == Pmode)
10496 return copy_to_mode_reg (Pmode, exp);
10497 r = gen_reg_rtx (Pmode);
10498 emit_insn (gen_zero_extendsidi2 (r, exp));
10499 return r;
10500 }
10501
10502 /* Expand string move (memcpy) operation. Use i386 string operations when
10503 profitable. expand_clrmem contains similar code. */
10504 int
10505 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10506 {
10507 rtx srcreg, destreg, countreg, srcexp, destexp;
10508 enum machine_mode counter_mode;
10509 HOST_WIDE_INT align = 0;
10510 unsigned HOST_WIDE_INT count = 0;
10511
10512 if (GET_CODE (align_exp) == CONST_INT)
10513 align = INTVAL (align_exp);
10514
10515 /* Can't use any of this if the user has appropriated esi or edi. */
10516 if (global_regs[4] || global_regs[5])
10517 return 0;
10518
10519 /* This simple hack avoids all inlining code and simplifies code below. */
10520 if (!TARGET_ALIGN_STRINGOPS)
10521 align = 64;
10522
10523 if (GET_CODE (count_exp) == CONST_INT)
10524 {
10525 count = INTVAL (count_exp);
10526 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10527 return 0;
10528 }
10529
10530 /* Figure out proper mode for counter. For 32bits it is always SImode,
10531 for 64bits use SImode when possible, otherwise DImode.
10532 Set count to number of bytes copied when known at compile time. */
10533 if (!TARGET_64BIT
10534 || GET_MODE (count_exp) == SImode
10535 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10536 counter_mode = SImode;
10537 else
10538 counter_mode = DImode;
10539
10540 if (counter_mode != SImode && counter_mode != DImode)
10541 abort ();
10542
10543 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10544 if (destreg != XEXP (dst, 0))
10545 dst = replace_equiv_address_nv (dst, destreg);
10546 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10547 if (srcreg != XEXP (src, 0))
10548 src = replace_equiv_address_nv (src, srcreg);
10549
10550 /* When optimizing for size emit simple rep ; movsb instruction for
10551 counts not divisible by 4. */
10552
10553 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10554 {
10555 emit_insn (gen_cld ());
10556 countreg = ix86_zero_extend_to_Pmode (count_exp);
10557 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10558 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
10559 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
10560 destexp, srcexp));
10561 }
10562
10563 /* For constant aligned (or small unaligned) copies use rep movsl
10564 followed by code copying the rest. For PentiumPro ensure 8 byte
10565 alignment to allow rep movsl acceleration. */
10566
10567 else if (count != 0
10568 && (align >= 8
10569 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10570 || optimize_size || count < (unsigned int) 64))
10571 {
10572 unsigned HOST_WIDE_INT offset = 0;
10573 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10574 rtx srcmem, dstmem;
10575
10576 emit_insn (gen_cld ());
10577 if (count & ~(size - 1))
10578 {
10579 countreg = copy_to_mode_reg (counter_mode,
10580 GEN_INT ((count >> (size == 4 ? 2 : 3))
10581 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10582 countreg = ix86_zero_extend_to_Pmode (countreg);
10583
10584 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10585 GEN_INT (size == 4 ? 2 : 3));
10586 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10587 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10588
10589 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10590 countreg, destexp, srcexp));
10591 offset = count & ~(size - 1);
10592 }
10593 if (size == 8 && (count & 0x04))
10594 {
10595 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
10596 offset);
10597 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
10598 offset);
10599 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10600 offset += 4;
10601 }
10602 if (count & 0x02)
10603 {
10604 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
10605 offset);
10606 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
10607 offset);
10608 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10609 offset += 2;
10610 }
10611 if (count & 0x01)
10612 {
10613 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
10614 offset);
10615 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
10616 offset);
10617 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10618 }
10619 }
10620 /* The generic code based on the glibc implementation:
10621 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10622 allowing accelerated copying there)
10623 - copy the data using rep movsl
10624 - copy the rest. */
10625 else
10626 {
10627 rtx countreg2;
10628 rtx label = NULL;
10629 rtx srcmem, dstmem;
10630 int desired_alignment = (TARGET_PENTIUMPRO
10631 && (count == 0 || count >= (unsigned int) 260)
10632 ? 8 : UNITS_PER_WORD);
10633 /* Get rid of MEM_OFFSETs, they won't be accurate. */
10634 dst = change_address (dst, BLKmode, destreg);
10635 src = change_address (src, BLKmode, srcreg);
10636
10637 /* In case we don't know anything about the alignment, default to
10638 library version, since it is usually equally fast and result in
10639 shorter code.
10640
10641 Also emit call when we know that the count is large and call overhead
10642 will not be important. */
10643 if (!TARGET_INLINE_ALL_STRINGOPS
10644 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10645 return 0;
10646
10647 if (TARGET_SINGLE_STRINGOP)
10648 emit_insn (gen_cld ());
10649
10650 countreg2 = gen_reg_rtx (Pmode);
10651 countreg = copy_to_mode_reg (counter_mode, count_exp);
10652
10653 /* We don't use loops to align destination and to copy parts smaller
10654 than 4 bytes, because gcc is able to optimize such code better (in
10655 the case the destination or the count really is aligned, gcc is often
10656 able to predict the branches) and also it is friendlier to the
10657 hardware branch prediction.
10658
10659 Using loops is beneficial for generic case, because we can
10660 handle small counts using the loops. Many CPUs (such as Athlon)
10661 have large REP prefix setup costs.
10662
10663 This is quite costly. Maybe we can revisit this decision later or
10664 add some customizability to this code. */
10665
10666 if (count == 0 && align < desired_alignment)
10667 {
10668 label = gen_label_rtx ();
10669 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10670 LEU, 0, counter_mode, 1, label);
10671 }
10672 if (align <= 1)
10673 {
10674 rtx label = ix86_expand_aligntest (destreg, 1);
10675 srcmem = change_address (src, QImode, srcreg);
10676 dstmem = change_address (dst, QImode, destreg);
10677 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10678 ix86_adjust_counter (countreg, 1);
10679 emit_label (label);
10680 LABEL_NUSES (label) = 1;
10681 }
10682 if (align <= 2)
10683 {
10684 rtx label = ix86_expand_aligntest (destreg, 2);
10685 srcmem = change_address (src, HImode, srcreg);
10686 dstmem = change_address (dst, HImode, destreg);
10687 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10688 ix86_adjust_counter (countreg, 2);
10689 emit_label (label);
10690 LABEL_NUSES (label) = 1;
10691 }
10692 if (align <= 4 && desired_alignment > 4)
10693 {
10694 rtx label = ix86_expand_aligntest (destreg, 4);
10695 srcmem = change_address (src, SImode, srcreg);
10696 dstmem = change_address (dst, SImode, destreg);
10697 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10698 ix86_adjust_counter (countreg, 4);
10699 emit_label (label);
10700 LABEL_NUSES (label) = 1;
10701 }
10702
10703 if (label && desired_alignment > 4 && !TARGET_64BIT)
10704 {
10705 emit_label (label);
10706 LABEL_NUSES (label) = 1;
10707 label = NULL_RTX;
10708 }
10709 if (!TARGET_SINGLE_STRINGOP)
10710 emit_insn (gen_cld ());
10711 if (TARGET_64BIT)
10712 {
10713 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10714 GEN_INT (3)));
10715 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
10716 }
10717 else
10718 {
10719 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
10720 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
10721 }
10722 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
10723 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10724 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
10725 countreg2, destexp, srcexp));
10726
10727 if (label)
10728 {
10729 emit_label (label);
10730 LABEL_NUSES (label) = 1;
10731 }
10732 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10733 {
10734 srcmem = change_address (src, SImode, srcreg);
10735 dstmem = change_address (dst, SImode, destreg);
10736 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10737 }
10738 if ((align <= 4 || count == 0) && TARGET_64BIT)
10739 {
10740 rtx label = ix86_expand_aligntest (countreg, 4);
10741 srcmem = change_address (src, SImode, srcreg);
10742 dstmem = change_address (dst, SImode, destreg);
10743 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10744 emit_label (label);
10745 LABEL_NUSES (label) = 1;
10746 }
10747 if (align > 2 && count != 0 && (count & 2))
10748 {
10749 srcmem = change_address (src, HImode, srcreg);
10750 dstmem = change_address (dst, HImode, destreg);
10751 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10752 }
10753 if (align <= 2 || count == 0)
10754 {
10755 rtx label = ix86_expand_aligntest (countreg, 2);
10756 srcmem = change_address (src, HImode, srcreg);
10757 dstmem = change_address (dst, HImode, destreg);
10758 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10759 emit_label (label);
10760 LABEL_NUSES (label) = 1;
10761 }
10762 if (align > 1 && count != 0 && (count & 1))
10763 {
10764 srcmem = change_address (src, QImode, srcreg);
10765 dstmem = change_address (dst, QImode, destreg);
10766 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10767 }
10768 if (align <= 1 || count == 0)
10769 {
10770 rtx label = ix86_expand_aligntest (countreg, 1);
10771 srcmem = change_address (src, QImode, srcreg);
10772 dstmem = change_address (dst, QImode, destreg);
10773 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
10774 emit_label (label);
10775 LABEL_NUSES (label) = 1;
10776 }
10777 }
10778
10779 return 1;
10780 }
10781
10782 /* Expand string clear operation (bzero). Use i386 string operations when
10783 profitable. expand_movmem contains similar code. */
10784 int
10785 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
10786 {
10787 rtx destreg, zeroreg, countreg, destexp;
10788 enum machine_mode counter_mode;
10789 HOST_WIDE_INT align = 0;
10790 unsigned HOST_WIDE_INT count = 0;
10791
10792 if (GET_CODE (align_exp) == CONST_INT)
10793 align = INTVAL (align_exp);
10794
10795 /* Can't use any of this if the user has appropriated esi. */
10796 if (global_regs[4])
10797 return 0;
10798
10799 /* This simple hack avoids all inlining code and simplifies code below. */
10800 if (!TARGET_ALIGN_STRINGOPS)
10801 align = 32;
10802
10803 if (GET_CODE (count_exp) == CONST_INT)
10804 {
10805 count = INTVAL (count_exp);
10806 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10807 return 0;
10808 }
10809 /* Figure out proper mode for counter. For 32bits it is always SImode,
10810 for 64bits use SImode when possible, otherwise DImode.
10811 Set count to number of bytes copied when known at compile time. */
10812 if (!TARGET_64BIT
10813 || GET_MODE (count_exp) == SImode
10814 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
10815 counter_mode = SImode;
10816 else
10817 counter_mode = DImode;
10818
10819 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10820 if (destreg != XEXP (dst, 0))
10821 dst = replace_equiv_address_nv (dst, destreg);
10822
10823
10824 /* When optimizing for size emit simple rep ; movsb instruction for
10825 counts not divisible by 4. The movl $N, %ecx; rep; stosb
10826 sequence is 7 bytes long, so if optimizing for size and count is
10827 small enough that some stosl, stosw and stosb instructions without
10828 rep are shorter, fall back into the next if. */
10829
10830 if ((!optimize || optimize_size)
10831 && (count == 0
10832 || ((count & 0x03)
10833 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
10834 {
10835 emit_insn (gen_cld ());
10836
10837 countreg = ix86_zero_extend_to_Pmode (count_exp);
10838 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10839 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
10840 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
10841 }
10842 else if (count != 0
10843 && (align >= 8
10844 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10845 || optimize_size || count < (unsigned int) 64))
10846 {
10847 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10848 unsigned HOST_WIDE_INT offset = 0;
10849
10850 emit_insn (gen_cld ());
10851
10852 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10853 if (count & ~(size - 1))
10854 {
10855 unsigned HOST_WIDE_INT repcount;
10856 unsigned int max_nonrep;
10857
10858 repcount = count >> (size == 4 ? 2 : 3);
10859 if (!TARGET_64BIT)
10860 repcount &= 0x3fffffff;
10861
10862 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
10863 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
10864 bytes. In both cases the latter seems to be faster for small
10865 values of N. */
10866 max_nonrep = size == 4 ? 7 : 4;
10867 if (!optimize_size)
10868 switch (ix86_tune)
10869 {
10870 case PROCESSOR_PENTIUM4:
10871 case PROCESSOR_NOCONA:
10872 max_nonrep = 3;
10873 break;
10874 default:
10875 break;
10876 }
10877
10878 if (repcount <= max_nonrep)
10879 while (repcount-- > 0)
10880 {
10881 rtx mem = adjust_automodify_address_nv (dst,
10882 GET_MODE (zeroreg),
10883 destreg, offset);
10884 emit_insn (gen_strset (destreg, mem, zeroreg));
10885 offset += size;
10886 }
10887 else
10888 {
10889 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
10890 countreg = ix86_zero_extend_to_Pmode (countreg);
10891 destexp = gen_rtx_ASHIFT (Pmode, countreg,
10892 GEN_INT (size == 4 ? 2 : 3));
10893 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
10894 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
10895 destexp));
10896 offset = count & ~(size - 1);
10897 }
10898 }
10899 if (size == 8 && (count & 0x04))
10900 {
10901 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
10902 offset);
10903 emit_insn (gen_strset (destreg, mem,
10904 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10905 offset += 4;
10906 }
10907 if (count & 0x02)
10908 {
10909 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
10910 offset);
10911 emit_insn (gen_strset (destreg, mem,
10912 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10913 offset += 2;
10914 }
10915 if (count & 0x01)
10916 {
10917 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
10918 offset);
10919 emit_insn (gen_strset (destreg, mem,
10920 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10921 }
10922 }
10923 else
10924 {
10925 rtx countreg2;
10926 rtx label = NULL;
10927 /* Compute desired alignment of the string operation. */
10928 int desired_alignment = (TARGET_PENTIUMPRO
10929 && (count == 0 || count >= (unsigned int) 260)
10930 ? 8 : UNITS_PER_WORD);
10931
10932 /* In case we don't know anything about the alignment, default to
10933 library version, since it is usually equally fast and result in
10934 shorter code.
10935
10936 Also emit call when we know that the count is large and call overhead
10937 will not be important. */
10938 if (!TARGET_INLINE_ALL_STRINGOPS
10939 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10940 return 0;
10941
10942 if (TARGET_SINGLE_STRINGOP)
10943 emit_insn (gen_cld ());
10944
10945 countreg2 = gen_reg_rtx (Pmode);
10946 countreg = copy_to_mode_reg (counter_mode, count_exp);
10947 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10948 /* Get rid of MEM_OFFSET, it won't be accurate. */
10949 dst = change_address (dst, BLKmode, destreg);
10950
10951 if (count == 0 && align < desired_alignment)
10952 {
10953 label = gen_label_rtx ();
10954 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10955 LEU, 0, counter_mode, 1, label);
10956 }
10957 if (align <= 1)
10958 {
10959 rtx label = ix86_expand_aligntest (destreg, 1);
10960 emit_insn (gen_strset (destreg, dst,
10961 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10962 ix86_adjust_counter (countreg, 1);
10963 emit_label (label);
10964 LABEL_NUSES (label) = 1;
10965 }
10966 if (align <= 2)
10967 {
10968 rtx label = ix86_expand_aligntest (destreg, 2);
10969 emit_insn (gen_strset (destreg, dst,
10970 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10971 ix86_adjust_counter (countreg, 2);
10972 emit_label (label);
10973 LABEL_NUSES (label) = 1;
10974 }
10975 if (align <= 4 && desired_alignment > 4)
10976 {
10977 rtx label = ix86_expand_aligntest (destreg, 4);
10978 emit_insn (gen_strset (destreg, dst,
10979 (TARGET_64BIT
10980 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10981 : zeroreg)));
10982 ix86_adjust_counter (countreg, 4);
10983 emit_label (label);
10984 LABEL_NUSES (label) = 1;
10985 }
10986
10987 if (label && desired_alignment > 4 && !TARGET_64BIT)
10988 {
10989 emit_label (label);
10990 LABEL_NUSES (label) = 1;
10991 label = NULL_RTX;
10992 }
10993
10994 if (!TARGET_SINGLE_STRINGOP)
10995 emit_insn (gen_cld ());
10996 if (TARGET_64BIT)
10997 {
10998 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10999 GEN_INT (3)));
11000 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11001 }
11002 else
11003 {
11004 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11005 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11006 }
11007 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11008 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11009
11010 if (label)
11011 {
11012 emit_label (label);
11013 LABEL_NUSES (label) = 1;
11014 }
11015
11016 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11017 emit_insn (gen_strset (destreg, dst,
11018 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11019 if (TARGET_64BIT && (align <= 4 || count == 0))
11020 {
11021 rtx label = ix86_expand_aligntest (countreg, 4);
11022 emit_insn (gen_strset (destreg, dst,
11023 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11024 emit_label (label);
11025 LABEL_NUSES (label) = 1;
11026 }
11027 if (align > 2 && count != 0 && (count & 2))
11028 emit_insn (gen_strset (destreg, dst,
11029 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11030 if (align <= 2 || count == 0)
11031 {
11032 rtx label = ix86_expand_aligntest (countreg, 2);
11033 emit_insn (gen_strset (destreg, dst,
11034 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11035 emit_label (label);
11036 LABEL_NUSES (label) = 1;
11037 }
11038 if (align > 1 && count != 0 && (count & 1))
11039 emit_insn (gen_strset (destreg, dst,
11040 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11041 if (align <= 1 || count == 0)
11042 {
11043 rtx label = ix86_expand_aligntest (countreg, 1);
11044 emit_insn (gen_strset (destreg, dst,
11045 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11046 emit_label (label);
11047 LABEL_NUSES (label) = 1;
11048 }
11049 }
11050 return 1;
11051 }
11052
11053 /* Expand strlen. */
11054 int
11055 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11056 {
11057 rtx addr, scratch1, scratch2, scratch3, scratch4;
11058
11059 /* The generic case of strlen expander is long. Avoid it's
11060 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11061
11062 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11063 && !TARGET_INLINE_ALL_STRINGOPS
11064 && !optimize_size
11065 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11066 return 0;
11067
11068 addr = force_reg (Pmode, XEXP (src, 0));
11069 scratch1 = gen_reg_rtx (Pmode);
11070
11071 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11072 && !optimize_size)
11073 {
11074 /* Well it seems that some optimizer does not combine a call like
11075 foo(strlen(bar), strlen(bar));
11076 when the move and the subtraction is done here. It does calculate
11077 the length just once when these instructions are done inside of
11078 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11079 often used and I use one fewer register for the lifetime of
11080 output_strlen_unroll() this is better. */
11081
11082 emit_move_insn (out, addr);
11083
11084 ix86_expand_strlensi_unroll_1 (out, src, align);
11085
11086 /* strlensi_unroll_1 returns the address of the zero at the end of
11087 the string, like memchr(), so compute the length by subtracting
11088 the start address. */
11089 if (TARGET_64BIT)
11090 emit_insn (gen_subdi3 (out, out, addr));
11091 else
11092 emit_insn (gen_subsi3 (out, out, addr));
11093 }
11094 else
11095 {
11096 rtx unspec;
11097 scratch2 = gen_reg_rtx (Pmode);
11098 scratch3 = gen_reg_rtx (Pmode);
11099 scratch4 = force_reg (Pmode, constm1_rtx);
11100
11101 emit_move_insn (scratch3, addr);
11102 eoschar = force_reg (QImode, eoschar);
11103
11104 emit_insn (gen_cld ());
11105 src = replace_equiv_address_nv (src, scratch3);
11106
11107 /* If .md starts supporting :P, this can be done in .md. */
11108 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11109 scratch4), UNSPEC_SCAS);
11110 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11111 if (TARGET_64BIT)
11112 {
11113 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11114 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11115 }
11116 else
11117 {
11118 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11119 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11120 }
11121 }
11122 return 1;
11123 }
11124
11125 /* Expand the appropriate insns for doing strlen if not just doing
11126 repnz; scasb
11127
11128 out = result, initialized with the start address
11129 align_rtx = alignment of the address.
11130 scratch = scratch register, initialized with the startaddress when
11131 not aligned, otherwise undefined
11132
11133 This is just the body. It needs the initializations mentioned above and
11134 some address computing at the end. These things are done in i386.md. */
11135
11136 static void
11137 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11138 {
11139 int align;
11140 rtx tmp;
11141 rtx align_2_label = NULL_RTX;
11142 rtx align_3_label = NULL_RTX;
11143 rtx align_4_label = gen_label_rtx ();
11144 rtx end_0_label = gen_label_rtx ();
11145 rtx mem;
11146 rtx tmpreg = gen_reg_rtx (SImode);
11147 rtx scratch = gen_reg_rtx (SImode);
11148 rtx cmp;
11149
11150 align = 0;
11151 if (GET_CODE (align_rtx) == CONST_INT)
11152 align = INTVAL (align_rtx);
11153
11154 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11155
11156 /* Is there a known alignment and is it less than 4? */
11157 if (align < 4)
11158 {
11159 rtx scratch1 = gen_reg_rtx (Pmode);
11160 emit_move_insn (scratch1, out);
11161 /* Is there a known alignment and is it not 2? */
11162 if (align != 2)
11163 {
11164 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11165 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11166
11167 /* Leave just the 3 lower bits. */
11168 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11169 NULL_RTX, 0, OPTAB_WIDEN);
11170
11171 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11172 Pmode, 1, align_4_label);
11173 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11174 Pmode, 1, align_2_label);
11175 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11176 Pmode, 1, align_3_label);
11177 }
11178 else
11179 {
11180 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11181 check if is aligned to 4 - byte. */
11182
11183 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11184 NULL_RTX, 0, OPTAB_WIDEN);
11185
11186 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11187 Pmode, 1, align_4_label);
11188 }
11189
11190 mem = change_address (src, QImode, out);
11191
11192 /* Now compare the bytes. */
11193
11194 /* Compare the first n unaligned byte on a byte per byte basis. */
11195 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11196 QImode, 1, end_0_label);
11197
11198 /* Increment the address. */
11199 if (TARGET_64BIT)
11200 emit_insn (gen_adddi3 (out, out, const1_rtx));
11201 else
11202 emit_insn (gen_addsi3 (out, out, const1_rtx));
11203
11204 /* Not needed with an alignment of 2 */
11205 if (align != 2)
11206 {
11207 emit_label (align_2_label);
11208
11209 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11210 end_0_label);
11211
11212 if (TARGET_64BIT)
11213 emit_insn (gen_adddi3 (out, out, const1_rtx));
11214 else
11215 emit_insn (gen_addsi3 (out, out, const1_rtx));
11216
11217 emit_label (align_3_label);
11218 }
11219
11220 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11221 end_0_label);
11222
11223 if (TARGET_64BIT)
11224 emit_insn (gen_adddi3 (out, out, const1_rtx));
11225 else
11226 emit_insn (gen_addsi3 (out, out, const1_rtx));
11227 }
11228
11229 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11230 align this loop. It gives only huge programs, but does not help to
11231 speed up. */
11232 emit_label (align_4_label);
11233
11234 mem = change_address (src, SImode, out);
11235 emit_move_insn (scratch, mem);
11236 if (TARGET_64BIT)
11237 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11238 else
11239 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11240
11241 /* This formula yields a nonzero result iff one of the bytes is zero.
11242 This saves three branches inside loop and many cycles. */
11243
11244 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11245 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11246 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11247 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11248 gen_int_mode (0x80808080, SImode)));
11249 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11250 align_4_label);
11251
11252 if (TARGET_CMOVE)
11253 {
11254 rtx reg = gen_reg_rtx (SImode);
11255 rtx reg2 = gen_reg_rtx (Pmode);
11256 emit_move_insn (reg, tmpreg);
11257 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11258
11259 /* If zero is not in the first two bytes, move two bytes forward. */
11260 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11261 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11262 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11263 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11264 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11265 reg,
11266 tmpreg)));
11267 /* Emit lea manually to avoid clobbering of flags. */
11268 emit_insn (gen_rtx_SET (SImode, reg2,
11269 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11270
11271 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11272 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11273 emit_insn (gen_rtx_SET (VOIDmode, out,
11274 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11275 reg2,
11276 out)));
11277
11278 }
11279 else
11280 {
11281 rtx end_2_label = gen_label_rtx ();
11282 /* Is zero in the first two bytes? */
11283
11284 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11285 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11286 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11287 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11288 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11289 pc_rtx);
11290 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11291 JUMP_LABEL (tmp) = end_2_label;
11292
11293 /* Not in the first two. Move two bytes forward. */
11294 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11295 if (TARGET_64BIT)
11296 emit_insn (gen_adddi3 (out, out, const2_rtx));
11297 else
11298 emit_insn (gen_addsi3 (out, out, const2_rtx));
11299
11300 emit_label (end_2_label);
11301
11302 }
11303
11304 /* Avoid branch in fixing the byte. */
11305 tmpreg = gen_lowpart (QImode, tmpreg);
11306 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11307 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11308 if (TARGET_64BIT)
11309 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11310 else
11311 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11312
11313 emit_label (end_0_label);
11314 }
11315
11316 void
11317 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11318 rtx callarg2 ATTRIBUTE_UNUSED,
11319 rtx pop, int sibcall)
11320 {
11321 rtx use = NULL, call;
11322
11323 if (pop == const0_rtx)
11324 pop = NULL;
11325 if (TARGET_64BIT && pop)
11326 abort ();
11327
11328 #if TARGET_MACHO
11329 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11330 fnaddr = machopic_indirect_call_target (fnaddr);
11331 #else
11332 /* Static functions and indirect calls don't need the pic register. */
11333 if (! TARGET_64BIT && flag_pic
11334 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11335 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11336 use_reg (&use, pic_offset_table_rtx);
11337
11338 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11339 {
11340 rtx al = gen_rtx_REG (QImode, 0);
11341 emit_move_insn (al, callarg2);
11342 use_reg (&use, al);
11343 }
11344 #endif /* TARGET_MACHO */
11345
11346 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11347 {
11348 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11349 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11350 }
11351 if (sibcall && TARGET_64BIT
11352 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11353 {
11354 rtx addr;
11355 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11356 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11357 emit_move_insn (fnaddr, addr);
11358 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11359 }
11360
11361 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11362 if (retval)
11363 call = gen_rtx_SET (VOIDmode, retval, call);
11364 if (pop)
11365 {
11366 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11367 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11368 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11369 }
11370
11371 call = emit_call_insn (call);
11372 if (use)
11373 CALL_INSN_FUNCTION_USAGE (call) = use;
11374 }
11375
11376 \f
11377 /* Clear stack slot assignments remembered from previous functions.
11378 This is called from INIT_EXPANDERS once before RTL is emitted for each
11379 function. */
11380
11381 static struct machine_function *
11382 ix86_init_machine_status (void)
11383 {
11384 struct machine_function *f;
11385
11386 f = ggc_alloc_cleared (sizeof (struct machine_function));
11387 f->use_fast_prologue_epilogue_nregs = -1;
11388
11389 return f;
11390 }
11391
11392 /* Return a MEM corresponding to a stack slot with mode MODE.
11393 Allocate a new slot if necessary.
11394
11395 The RTL for a function can have several slots available: N is
11396 which slot to use. */
11397
11398 rtx
11399 assign_386_stack_local (enum machine_mode mode, int n)
11400 {
11401 struct stack_local_entry *s;
11402
11403 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11404 abort ();
11405
11406 for (s = ix86_stack_locals; s; s = s->next)
11407 if (s->mode == mode && s->n == n)
11408 return s->rtl;
11409
11410 s = (struct stack_local_entry *)
11411 ggc_alloc (sizeof (struct stack_local_entry));
11412 s->n = n;
11413 s->mode = mode;
11414 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11415
11416 s->next = ix86_stack_locals;
11417 ix86_stack_locals = s;
11418 return s->rtl;
11419 }
11420
11421 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11422
11423 static GTY(()) rtx ix86_tls_symbol;
11424 rtx
11425 ix86_tls_get_addr (void)
11426 {
11427
11428 if (!ix86_tls_symbol)
11429 {
11430 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11431 (TARGET_GNU_TLS && !TARGET_64BIT)
11432 ? "___tls_get_addr"
11433 : "__tls_get_addr");
11434 }
11435
11436 return ix86_tls_symbol;
11437 }
11438 \f
11439 /* Calculate the length of the memory address in the instruction
11440 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11441
11442 int
11443 memory_address_length (rtx addr)
11444 {
11445 struct ix86_address parts;
11446 rtx base, index, disp;
11447 int len;
11448
11449 if (GET_CODE (addr) == PRE_DEC
11450 || GET_CODE (addr) == POST_INC
11451 || GET_CODE (addr) == PRE_MODIFY
11452 || GET_CODE (addr) == POST_MODIFY)
11453 return 0;
11454
11455 if (! ix86_decompose_address (addr, &parts))
11456 abort ();
11457
11458 base = parts.base;
11459 index = parts.index;
11460 disp = parts.disp;
11461 len = 0;
11462
11463 /* Rule of thumb:
11464 - esp as the base always wants an index,
11465 - ebp as the base always wants a displacement. */
11466
11467 /* Register Indirect. */
11468 if (base && !index && !disp)
11469 {
11470 /* esp (for its index) and ebp (for its displacement) need
11471 the two-byte modrm form. */
11472 if (addr == stack_pointer_rtx
11473 || addr == arg_pointer_rtx
11474 || addr == frame_pointer_rtx
11475 || addr == hard_frame_pointer_rtx)
11476 len = 1;
11477 }
11478
11479 /* Direct Addressing. */
11480 else if (disp && !base && !index)
11481 len = 4;
11482
11483 else
11484 {
11485 /* Find the length of the displacement constant. */
11486 if (disp)
11487 {
11488 if (GET_CODE (disp) == CONST_INT
11489 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11490 && base)
11491 len = 1;
11492 else
11493 len = 4;
11494 }
11495 /* ebp always wants a displacement. */
11496 else if (base == hard_frame_pointer_rtx)
11497 len = 1;
11498
11499 /* An index requires the two-byte modrm form.... */
11500 if (index
11501 /* ...like esp, which always wants an index. */
11502 || base == stack_pointer_rtx
11503 || base == arg_pointer_rtx
11504 || base == frame_pointer_rtx)
11505 len += 1;
11506 }
11507
11508 return len;
11509 }
11510
11511 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11512 is set, expect that insn have 8bit immediate alternative. */
11513 int
11514 ix86_attr_length_immediate_default (rtx insn, int shortform)
11515 {
11516 int len = 0;
11517 int i;
11518 extract_insn_cached (insn);
11519 for (i = recog_data.n_operands - 1; i >= 0; --i)
11520 if (CONSTANT_P (recog_data.operand[i]))
11521 {
11522 if (len)
11523 abort ();
11524 if (shortform
11525 && GET_CODE (recog_data.operand[i]) == CONST_INT
11526 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11527 len = 1;
11528 else
11529 {
11530 switch (get_attr_mode (insn))
11531 {
11532 case MODE_QI:
11533 len+=1;
11534 break;
11535 case MODE_HI:
11536 len+=2;
11537 break;
11538 case MODE_SI:
11539 len+=4;
11540 break;
11541 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11542 case MODE_DI:
11543 len+=4;
11544 break;
11545 default:
11546 fatal_insn ("unknown insn mode", insn);
11547 }
11548 }
11549 }
11550 return len;
11551 }
11552 /* Compute default value for "length_address" attribute. */
11553 int
11554 ix86_attr_length_address_default (rtx insn)
11555 {
11556 int i;
11557
11558 if (get_attr_type (insn) == TYPE_LEA)
11559 {
11560 rtx set = PATTERN (insn);
11561 if (GET_CODE (set) == SET)
11562 ;
11563 else if (GET_CODE (set) == PARALLEL
11564 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11565 set = XVECEXP (set, 0, 0);
11566 else
11567 {
11568 #ifdef ENABLE_CHECKING
11569 abort ();
11570 #endif
11571 return 0;
11572 }
11573
11574 return memory_address_length (SET_SRC (set));
11575 }
11576
11577 extract_insn_cached (insn);
11578 for (i = recog_data.n_operands - 1; i >= 0; --i)
11579 if (GET_CODE (recog_data.operand[i]) == MEM)
11580 {
11581 return memory_address_length (XEXP (recog_data.operand[i], 0));
11582 break;
11583 }
11584 return 0;
11585 }
11586 \f
11587 /* Return the maximum number of instructions a cpu can issue. */
11588
11589 static int
11590 ix86_issue_rate (void)
11591 {
11592 switch (ix86_tune)
11593 {
11594 case PROCESSOR_PENTIUM:
11595 case PROCESSOR_K6:
11596 return 2;
11597
11598 case PROCESSOR_PENTIUMPRO:
11599 case PROCESSOR_PENTIUM4:
11600 case PROCESSOR_ATHLON:
11601 case PROCESSOR_K8:
11602 case PROCESSOR_NOCONA:
11603 return 3;
11604
11605 default:
11606 return 1;
11607 }
11608 }
11609
11610 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11611 by DEP_INSN and nothing set by DEP_INSN. */
11612
11613 static int
11614 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11615 {
11616 rtx set, set2;
11617
11618 /* Simplify the test for uninteresting insns. */
11619 if (insn_type != TYPE_SETCC
11620 && insn_type != TYPE_ICMOV
11621 && insn_type != TYPE_FCMOV
11622 && insn_type != TYPE_IBR)
11623 return 0;
11624
11625 if ((set = single_set (dep_insn)) != 0)
11626 {
11627 set = SET_DEST (set);
11628 set2 = NULL_RTX;
11629 }
11630 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11631 && XVECLEN (PATTERN (dep_insn), 0) == 2
11632 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11633 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11634 {
11635 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11636 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11637 }
11638 else
11639 return 0;
11640
11641 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11642 return 0;
11643
11644 /* This test is true if the dependent insn reads the flags but
11645 not any other potentially set register. */
11646 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11647 return 0;
11648
11649 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11650 return 0;
11651
11652 return 1;
11653 }
11654
11655 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11656 address with operands set by DEP_INSN. */
11657
11658 static int
11659 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11660 {
11661 rtx addr;
11662
11663 if (insn_type == TYPE_LEA
11664 && TARGET_PENTIUM)
11665 {
11666 addr = PATTERN (insn);
11667 if (GET_CODE (addr) == SET)
11668 ;
11669 else if (GET_CODE (addr) == PARALLEL
11670 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11671 addr = XVECEXP (addr, 0, 0);
11672 else
11673 abort ();
11674 addr = SET_SRC (addr);
11675 }
11676 else
11677 {
11678 int i;
11679 extract_insn_cached (insn);
11680 for (i = recog_data.n_operands - 1; i >= 0; --i)
11681 if (GET_CODE (recog_data.operand[i]) == MEM)
11682 {
11683 addr = XEXP (recog_data.operand[i], 0);
11684 goto found;
11685 }
11686 return 0;
11687 found:;
11688 }
11689
11690 return modified_in_p (addr, dep_insn);
11691 }
11692
11693 static int
11694 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11695 {
11696 enum attr_type insn_type, dep_insn_type;
11697 enum attr_memory memory;
11698 rtx set, set2;
11699 int dep_insn_code_number;
11700
11701 /* Anti and output dependencies have zero cost on all CPUs. */
11702 if (REG_NOTE_KIND (link) != 0)
11703 return 0;
11704
11705 dep_insn_code_number = recog_memoized (dep_insn);
11706
11707 /* If we can't recognize the insns, we can't really do anything. */
11708 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11709 return cost;
11710
11711 insn_type = get_attr_type (insn);
11712 dep_insn_type = get_attr_type (dep_insn);
11713
11714 switch (ix86_tune)
11715 {
11716 case PROCESSOR_PENTIUM:
11717 /* Address Generation Interlock adds a cycle of latency. */
11718 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11719 cost += 1;
11720
11721 /* ??? Compares pair with jump/setcc. */
11722 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11723 cost = 0;
11724
11725 /* Floating point stores require value to be ready one cycle earlier. */
11726 if (insn_type == TYPE_FMOV
11727 && get_attr_memory (insn) == MEMORY_STORE
11728 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11729 cost += 1;
11730 break;
11731
11732 case PROCESSOR_PENTIUMPRO:
11733 memory = get_attr_memory (insn);
11734
11735 /* INT->FP conversion is expensive. */
11736 if (get_attr_fp_int_src (dep_insn))
11737 cost += 5;
11738
11739 /* There is one cycle extra latency between an FP op and a store. */
11740 if (insn_type == TYPE_FMOV
11741 && (set = single_set (dep_insn)) != NULL_RTX
11742 && (set2 = single_set (insn)) != NULL_RTX
11743 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11744 && GET_CODE (SET_DEST (set2)) == MEM)
11745 cost += 1;
11746
11747 /* Show ability of reorder buffer to hide latency of load by executing
11748 in parallel with previous instruction in case
11749 previous instruction is not needed to compute the address. */
11750 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11751 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11752 {
11753 /* Claim moves to take one cycle, as core can issue one load
11754 at time and the next load can start cycle later. */
11755 if (dep_insn_type == TYPE_IMOV
11756 || dep_insn_type == TYPE_FMOV)
11757 cost = 1;
11758 else if (cost > 1)
11759 cost--;
11760 }
11761 break;
11762
11763 case PROCESSOR_K6:
11764 memory = get_attr_memory (insn);
11765
11766 /* The esp dependency is resolved before the instruction is really
11767 finished. */
11768 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11769 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11770 return 1;
11771
11772 /* INT->FP conversion is expensive. */
11773 if (get_attr_fp_int_src (dep_insn))
11774 cost += 5;
11775
11776 /* Show ability of reorder buffer to hide latency of load by executing
11777 in parallel with previous instruction in case
11778 previous instruction is not needed to compute the address. */
11779 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11780 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11781 {
11782 /* Claim moves to take one cycle, as core can issue one load
11783 at time and the next load can start cycle later. */
11784 if (dep_insn_type == TYPE_IMOV
11785 || dep_insn_type == TYPE_FMOV)
11786 cost = 1;
11787 else if (cost > 2)
11788 cost -= 2;
11789 else
11790 cost = 1;
11791 }
11792 break;
11793
11794 case PROCESSOR_ATHLON:
11795 case PROCESSOR_K8:
11796 memory = get_attr_memory (insn);
11797
11798 /* Show ability of reorder buffer to hide latency of load by executing
11799 in parallel with previous instruction in case
11800 previous instruction is not needed to compute the address. */
11801 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11802 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11803 {
11804 enum attr_unit unit = get_attr_unit (insn);
11805 int loadcost = 3;
11806
11807 /* Because of the difference between the length of integer and
11808 floating unit pipeline preparation stages, the memory operands
11809 for floating point are cheaper.
11810
11811 ??? For Athlon it the difference is most probably 2. */
11812 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11813 loadcost = 3;
11814 else
11815 loadcost = TARGET_ATHLON ? 2 : 0;
11816
11817 if (cost >= loadcost)
11818 cost -= loadcost;
11819 else
11820 cost = 0;
11821 }
11822
11823 default:
11824 break;
11825 }
11826
11827 return cost;
11828 }
11829
11830 /* How many alternative schedules to try. This should be as wide as the
11831 scheduling freedom in the DFA, but no wider. Making this value too
11832 large results extra work for the scheduler. */
11833
11834 static int
11835 ia32_multipass_dfa_lookahead (void)
11836 {
11837 if (ix86_tune == PROCESSOR_PENTIUM)
11838 return 2;
11839
11840 if (ix86_tune == PROCESSOR_PENTIUMPRO
11841 || ix86_tune == PROCESSOR_K6)
11842 return 1;
11843
11844 else
11845 return 0;
11846 }
11847
11848 \f
11849 /* Compute the alignment given to a constant that is being placed in memory.
11850 EXP is the constant and ALIGN is the alignment that the object would
11851 ordinarily have.
11852 The value of this function is used instead of that alignment to align
11853 the object. */
11854
11855 int
11856 ix86_constant_alignment (tree exp, int align)
11857 {
11858 if (TREE_CODE (exp) == REAL_CST)
11859 {
11860 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11861 return 64;
11862 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11863 return 128;
11864 }
11865 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
11866 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
11867 return BITS_PER_WORD;
11868
11869 return align;
11870 }
11871
11872 /* Compute the alignment for a static variable.
11873 TYPE is the data type, and ALIGN is the alignment that
11874 the object would ordinarily have. The value of this function is used
11875 instead of that alignment to align the object. */
11876
11877 int
11878 ix86_data_alignment (tree type, int align)
11879 {
11880 if (AGGREGATE_TYPE_P (type)
11881 && TYPE_SIZE (type)
11882 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11883 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11884 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11885 return 256;
11886
11887 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11888 to 16byte boundary. */
11889 if (TARGET_64BIT)
11890 {
11891 if (AGGREGATE_TYPE_P (type)
11892 && TYPE_SIZE (type)
11893 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11894 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11895 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11896 return 128;
11897 }
11898
11899 if (TREE_CODE (type) == ARRAY_TYPE)
11900 {
11901 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11902 return 64;
11903 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11904 return 128;
11905 }
11906 else if (TREE_CODE (type) == COMPLEX_TYPE)
11907 {
11908
11909 if (TYPE_MODE (type) == DCmode && align < 64)
11910 return 64;
11911 if (TYPE_MODE (type) == XCmode && align < 128)
11912 return 128;
11913 }
11914 else if ((TREE_CODE (type) == RECORD_TYPE
11915 || TREE_CODE (type) == UNION_TYPE
11916 || TREE_CODE (type) == QUAL_UNION_TYPE)
11917 && TYPE_FIELDS (type))
11918 {
11919 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11920 return 64;
11921 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11922 return 128;
11923 }
11924 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11925 || TREE_CODE (type) == INTEGER_TYPE)
11926 {
11927 if (TYPE_MODE (type) == DFmode && align < 64)
11928 return 64;
11929 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11930 return 128;
11931 }
11932
11933 return align;
11934 }
11935
11936 /* Compute the alignment for a local variable.
11937 TYPE is the data type, and ALIGN is the alignment that
11938 the object would ordinarily have. The value of this macro is used
11939 instead of that alignment to align the object. */
11940
11941 int
11942 ix86_local_alignment (tree type, int align)
11943 {
11944 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11945 to 16byte boundary. */
11946 if (TARGET_64BIT)
11947 {
11948 if (AGGREGATE_TYPE_P (type)
11949 && TYPE_SIZE (type)
11950 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11951 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11952 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11953 return 128;
11954 }
11955 if (TREE_CODE (type) == ARRAY_TYPE)
11956 {
11957 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11958 return 64;
11959 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11960 return 128;
11961 }
11962 else if (TREE_CODE (type) == COMPLEX_TYPE)
11963 {
11964 if (TYPE_MODE (type) == DCmode && align < 64)
11965 return 64;
11966 if (TYPE_MODE (type) == XCmode && align < 128)
11967 return 128;
11968 }
11969 else if ((TREE_CODE (type) == RECORD_TYPE
11970 || TREE_CODE (type) == UNION_TYPE
11971 || TREE_CODE (type) == QUAL_UNION_TYPE)
11972 && TYPE_FIELDS (type))
11973 {
11974 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11975 return 64;
11976 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11977 return 128;
11978 }
11979 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11980 || TREE_CODE (type) == INTEGER_TYPE)
11981 {
11982
11983 if (TYPE_MODE (type) == DFmode && align < 64)
11984 return 64;
11985 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11986 return 128;
11987 }
11988 return align;
11989 }
11990 \f
11991 /* Emit RTL insns to initialize the variable parts of a trampoline.
11992 FNADDR is an RTX for the address of the function's pure code.
11993 CXT is an RTX for the static chain value for the function. */
11994 void
11995 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
11996 {
11997 if (!TARGET_64BIT)
11998 {
11999 /* Compute offset from the end of the jmp to the target function. */
12000 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12001 plus_constant (tramp, 10),
12002 NULL_RTX, 1, OPTAB_DIRECT);
12003 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12004 gen_int_mode (0xb9, QImode));
12005 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12006 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12007 gen_int_mode (0xe9, QImode));
12008 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12009 }
12010 else
12011 {
12012 int offset = 0;
12013 /* Try to load address using shorter movl instead of movabs.
12014 We may want to support movq for kernel mode, but kernel does not use
12015 trampolines at the moment. */
12016 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12017 {
12018 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12019 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12020 gen_int_mode (0xbb41, HImode));
12021 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12022 gen_lowpart (SImode, fnaddr));
12023 offset += 6;
12024 }
12025 else
12026 {
12027 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12028 gen_int_mode (0xbb49, HImode));
12029 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12030 fnaddr);
12031 offset += 10;
12032 }
12033 /* Load static chain using movabs to r10. */
12034 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12035 gen_int_mode (0xba49, HImode));
12036 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12037 cxt);
12038 offset += 10;
12039 /* Jump to the r11 */
12040 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12041 gen_int_mode (0xff49, HImode));
12042 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12043 gen_int_mode (0xe3, QImode));
12044 offset += 3;
12045 if (offset > TRAMPOLINE_SIZE)
12046 abort ();
12047 }
12048
12049 #ifdef ENABLE_EXECUTE_STACK
12050 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12051 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12052 #endif
12053 }
12054 \f
12055 #define def_builtin(MASK, NAME, TYPE, CODE) \
12056 do { \
12057 if ((MASK) & target_flags \
12058 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12059 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12060 NULL, NULL_TREE); \
12061 } while (0)
12062
12063 struct builtin_description
12064 {
12065 const unsigned int mask;
12066 const enum insn_code icode;
12067 const char *const name;
12068 const enum ix86_builtins code;
12069 const enum rtx_code comparison;
12070 const unsigned int flag;
12071 };
12072
12073 static const struct builtin_description bdesc_comi[] =
12074 {
12075 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12076 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12077 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12078 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12079 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12080 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12081 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12082 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12083 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12084 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12085 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12086 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12087 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12088 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12089 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12090 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12091 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12092 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12093 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12094 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12095 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12096 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12097 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12098 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12099 };
12100
12101 static const struct builtin_description bdesc_2arg[] =
12102 {
12103 /* SSE */
12104 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12105 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12106 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12107 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12108 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12109 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12110 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12111 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12112
12113 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12114 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12115 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12116 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12117 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12118 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12119 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12120 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12121 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12122 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12123 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12124 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12125 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12126 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12127 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12128 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12129 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12130 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12131 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12132 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12133
12134 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12135 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12136 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12137 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12138
12139 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12140 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12141 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12142 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12143
12144 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12145 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12146 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12147 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12148 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12149
12150 /* MMX */
12151 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12152 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12153 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12154 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12155 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12156 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12157 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12158 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12159
12160 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12161 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12162 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12163 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12164 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12165 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12166 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12167 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12168
12169 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12170 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12171 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12172
12173 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12174 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12175 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12176 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12177
12178 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12179 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12180
12181 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12182 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12183 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12184 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12185 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12186 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12187
12188 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12189 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12190 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12191 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12192
12193 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12194 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12195 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12196 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12197 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12198 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12199
12200 /* Special. */
12201 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12202 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12203 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12204
12205 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12206 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12207 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12208
12209 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12210 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12211 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12212 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12213 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12214 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12215
12216 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12217 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12218 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12219 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12220 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12221 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12222
12223 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12224 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12225 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12226 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12227
12228 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12229 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12230
12231 /* SSE2 */
12232 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12233 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12234 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12235 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12236 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12237 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12238 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12239 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12240
12241 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12242 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12243 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12244 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12245 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12246 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12247 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12248 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12249 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12250 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12251 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12252 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12253 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12254 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12255 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12256 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12257 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12258 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12259 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12260 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12261
12262 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12263 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12264 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12265 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12266
12267 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12268 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12269 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12270 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12271
12272 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12273 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12274 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12275
12276 /* SSE2 MMX */
12277 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12278 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12279 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12280 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12281 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12282 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12283 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12284 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12285
12286 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12287 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12288 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12289 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12290 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12291 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12292 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12293 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12294
12295 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12296 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12297
12298 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12299 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12300 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12301 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12302
12303 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12304 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12305
12306 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12307 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12308 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12309 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12310 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12311 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12312
12313 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12314 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12315 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12316 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12317
12318 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12319 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12320 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12321 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12322 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12323 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12324 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12325 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12326
12327 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12328 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12329 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12330
12331 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12332 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12333
12334 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12335 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12336
12337 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12338 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12339 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12340 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12341 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12342 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12343
12344 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12345 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12346 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12347 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12348 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12349 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12350
12351 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12352 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12353 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12354 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12355
12356 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12357
12358 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12359 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12360 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12361 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12362
12363 /* SSE3 MMX */
12364 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12365 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12366 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12367 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12368 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12369 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12370 };
12371
12372 static const struct builtin_description bdesc_1arg[] =
12373 {
12374 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12375 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12376
12377 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12378 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12379 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12380
12381 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12382 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12383 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12384 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12385 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12386 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12387
12388 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12389 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12390 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12391 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12392
12393 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12394
12395 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12396 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12397
12398 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12399 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12400 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12401 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12402 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12403
12404 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12405
12406 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12407 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12408 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12409 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12410
12411 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12412 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12413 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12414
12415 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12416
12417 /* SSE3 */
12418 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12419 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12420 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12421 };
12422
12423 void
12424 ix86_init_builtins (void)
12425 {
12426 if (TARGET_MMX)
12427 ix86_init_mmx_sse_builtins ();
12428 }
12429
12430 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12431 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12432 builtins. */
12433 static void
12434 ix86_init_mmx_sse_builtins (void)
12435 {
12436 const struct builtin_description * d;
12437 size_t i;
12438
12439 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12440 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12441 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12442 tree V2DI_type_node
12443 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
12444 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12445 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12446 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12447 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12448 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12449 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12450
12451 tree pchar_type_node = build_pointer_type (char_type_node);
12452 tree pcchar_type_node = build_pointer_type (
12453 build_type_variant (char_type_node, 1, 0));
12454 tree pfloat_type_node = build_pointer_type (float_type_node);
12455 tree pcfloat_type_node = build_pointer_type (
12456 build_type_variant (float_type_node, 1, 0));
12457 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12458 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12459 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12460
12461 /* Comparisons. */
12462 tree int_ftype_v4sf_v4sf
12463 = build_function_type_list (integer_type_node,
12464 V4SF_type_node, V4SF_type_node, NULL_TREE);
12465 tree v4si_ftype_v4sf_v4sf
12466 = build_function_type_list (V4SI_type_node,
12467 V4SF_type_node, V4SF_type_node, NULL_TREE);
12468 /* MMX/SSE/integer conversions. */
12469 tree int_ftype_v4sf
12470 = build_function_type_list (integer_type_node,
12471 V4SF_type_node, NULL_TREE);
12472 tree int64_ftype_v4sf
12473 = build_function_type_list (long_long_integer_type_node,
12474 V4SF_type_node, NULL_TREE);
12475 tree int_ftype_v8qi
12476 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12477 tree v4sf_ftype_v4sf_int
12478 = build_function_type_list (V4SF_type_node,
12479 V4SF_type_node, integer_type_node, NULL_TREE);
12480 tree v4sf_ftype_v4sf_int64
12481 = build_function_type_list (V4SF_type_node,
12482 V4SF_type_node, long_long_integer_type_node,
12483 NULL_TREE);
12484 tree v4sf_ftype_v4sf_v2si
12485 = build_function_type_list (V4SF_type_node,
12486 V4SF_type_node, V2SI_type_node, NULL_TREE);
12487 tree int_ftype_v4hi_int
12488 = build_function_type_list (integer_type_node,
12489 V4HI_type_node, integer_type_node, NULL_TREE);
12490 tree v4hi_ftype_v4hi_int_int
12491 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12492 integer_type_node, integer_type_node,
12493 NULL_TREE);
12494 /* Miscellaneous. */
12495 tree v8qi_ftype_v4hi_v4hi
12496 = build_function_type_list (V8QI_type_node,
12497 V4HI_type_node, V4HI_type_node, NULL_TREE);
12498 tree v4hi_ftype_v2si_v2si
12499 = build_function_type_list (V4HI_type_node,
12500 V2SI_type_node, V2SI_type_node, NULL_TREE);
12501 tree v4sf_ftype_v4sf_v4sf_int
12502 = build_function_type_list (V4SF_type_node,
12503 V4SF_type_node, V4SF_type_node,
12504 integer_type_node, NULL_TREE);
12505 tree v2si_ftype_v4hi_v4hi
12506 = build_function_type_list (V2SI_type_node,
12507 V4HI_type_node, V4HI_type_node, NULL_TREE);
12508 tree v4hi_ftype_v4hi_int
12509 = build_function_type_list (V4HI_type_node,
12510 V4HI_type_node, integer_type_node, NULL_TREE);
12511 tree v4hi_ftype_v4hi_di
12512 = build_function_type_list (V4HI_type_node,
12513 V4HI_type_node, long_long_unsigned_type_node,
12514 NULL_TREE);
12515 tree v2si_ftype_v2si_di
12516 = build_function_type_list (V2SI_type_node,
12517 V2SI_type_node, long_long_unsigned_type_node,
12518 NULL_TREE);
12519 tree void_ftype_void
12520 = build_function_type (void_type_node, void_list_node);
12521 tree void_ftype_unsigned
12522 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12523 tree void_ftype_unsigned_unsigned
12524 = build_function_type_list (void_type_node, unsigned_type_node,
12525 unsigned_type_node, NULL_TREE);
12526 tree void_ftype_pcvoid_unsigned_unsigned
12527 = build_function_type_list (void_type_node, const_ptr_type_node,
12528 unsigned_type_node, unsigned_type_node,
12529 NULL_TREE);
12530 tree unsigned_ftype_void
12531 = build_function_type (unsigned_type_node, void_list_node);
12532 tree di_ftype_void
12533 = build_function_type (long_long_unsigned_type_node, void_list_node);
12534 tree v4sf_ftype_void
12535 = build_function_type (V4SF_type_node, void_list_node);
12536 tree v2si_ftype_v4sf
12537 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12538 /* Loads/stores. */
12539 tree void_ftype_v8qi_v8qi_pchar
12540 = build_function_type_list (void_type_node,
12541 V8QI_type_node, V8QI_type_node,
12542 pchar_type_node, NULL_TREE);
12543 tree v4sf_ftype_pcfloat
12544 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12545 /* @@@ the type is bogus */
12546 tree v4sf_ftype_v4sf_pv2si
12547 = build_function_type_list (V4SF_type_node,
12548 V4SF_type_node, pv2si_type_node, NULL_TREE);
12549 tree void_ftype_pv2si_v4sf
12550 = build_function_type_list (void_type_node,
12551 pv2si_type_node, V4SF_type_node, NULL_TREE);
12552 tree void_ftype_pfloat_v4sf
12553 = build_function_type_list (void_type_node,
12554 pfloat_type_node, V4SF_type_node, NULL_TREE);
12555 tree void_ftype_pdi_di
12556 = build_function_type_list (void_type_node,
12557 pdi_type_node, long_long_unsigned_type_node,
12558 NULL_TREE);
12559 tree void_ftype_pv2di_v2di
12560 = build_function_type_list (void_type_node,
12561 pv2di_type_node, V2DI_type_node, NULL_TREE);
12562 /* Normal vector unops. */
12563 tree v4sf_ftype_v4sf
12564 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12565
12566 /* Normal vector binops. */
12567 tree v4sf_ftype_v4sf_v4sf
12568 = build_function_type_list (V4SF_type_node,
12569 V4SF_type_node, V4SF_type_node, NULL_TREE);
12570 tree v8qi_ftype_v8qi_v8qi
12571 = build_function_type_list (V8QI_type_node,
12572 V8QI_type_node, V8QI_type_node, NULL_TREE);
12573 tree v4hi_ftype_v4hi_v4hi
12574 = build_function_type_list (V4HI_type_node,
12575 V4HI_type_node, V4HI_type_node, NULL_TREE);
12576 tree v2si_ftype_v2si_v2si
12577 = build_function_type_list (V2SI_type_node,
12578 V2SI_type_node, V2SI_type_node, NULL_TREE);
12579 tree di_ftype_di_di
12580 = build_function_type_list (long_long_unsigned_type_node,
12581 long_long_unsigned_type_node,
12582 long_long_unsigned_type_node, NULL_TREE);
12583
12584 tree v2si_ftype_v2sf
12585 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12586 tree v2sf_ftype_v2si
12587 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12588 tree v2si_ftype_v2si
12589 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12590 tree v2sf_ftype_v2sf
12591 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12592 tree v2sf_ftype_v2sf_v2sf
12593 = build_function_type_list (V2SF_type_node,
12594 V2SF_type_node, V2SF_type_node, NULL_TREE);
12595 tree v2si_ftype_v2sf_v2sf
12596 = build_function_type_list (V2SI_type_node,
12597 V2SF_type_node, V2SF_type_node, NULL_TREE);
12598 tree pint_type_node = build_pointer_type (integer_type_node);
12599 tree pcint_type_node = build_pointer_type (
12600 build_type_variant (integer_type_node, 1, 0));
12601 tree pdouble_type_node = build_pointer_type (double_type_node);
12602 tree pcdouble_type_node = build_pointer_type (
12603 build_type_variant (double_type_node, 1, 0));
12604 tree int_ftype_v2df_v2df
12605 = build_function_type_list (integer_type_node,
12606 V2DF_type_node, V2DF_type_node, NULL_TREE);
12607
12608 tree ti_ftype_void
12609 = build_function_type (intTI_type_node, void_list_node);
12610 tree v2di_ftype_void
12611 = build_function_type (V2DI_type_node, void_list_node);
12612 tree ti_ftype_ti_ti
12613 = build_function_type_list (intTI_type_node,
12614 intTI_type_node, intTI_type_node, NULL_TREE);
12615 tree void_ftype_pcvoid
12616 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12617 tree v2di_ftype_di
12618 = build_function_type_list (V2DI_type_node,
12619 long_long_unsigned_type_node, NULL_TREE);
12620 tree di_ftype_v2di
12621 = build_function_type_list (long_long_unsigned_type_node,
12622 V2DI_type_node, NULL_TREE);
12623 tree v4sf_ftype_v4si
12624 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12625 tree v4si_ftype_v4sf
12626 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12627 tree v2df_ftype_v4si
12628 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12629 tree v4si_ftype_v2df
12630 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12631 tree v2si_ftype_v2df
12632 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12633 tree v4sf_ftype_v2df
12634 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12635 tree v2df_ftype_v2si
12636 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12637 tree v2df_ftype_v4sf
12638 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12639 tree int_ftype_v2df
12640 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12641 tree int64_ftype_v2df
12642 = build_function_type_list (long_long_integer_type_node,
12643 V2DF_type_node, NULL_TREE);
12644 tree v2df_ftype_v2df_int
12645 = build_function_type_list (V2DF_type_node,
12646 V2DF_type_node, integer_type_node, NULL_TREE);
12647 tree v2df_ftype_v2df_int64
12648 = build_function_type_list (V2DF_type_node,
12649 V2DF_type_node, long_long_integer_type_node,
12650 NULL_TREE);
12651 tree v4sf_ftype_v4sf_v2df
12652 = build_function_type_list (V4SF_type_node,
12653 V4SF_type_node, V2DF_type_node, NULL_TREE);
12654 tree v2df_ftype_v2df_v4sf
12655 = build_function_type_list (V2DF_type_node,
12656 V2DF_type_node, V4SF_type_node, NULL_TREE);
12657 tree v2df_ftype_v2df_v2df_int
12658 = build_function_type_list (V2DF_type_node,
12659 V2DF_type_node, V2DF_type_node,
12660 integer_type_node,
12661 NULL_TREE);
12662 tree v2df_ftype_v2df_pcdouble
12663 = build_function_type_list (V2DF_type_node,
12664 V2DF_type_node, pcdouble_type_node, NULL_TREE);
12665 tree void_ftype_pdouble_v2df
12666 = build_function_type_list (void_type_node,
12667 pdouble_type_node, V2DF_type_node, NULL_TREE);
12668 tree void_ftype_pint_int
12669 = build_function_type_list (void_type_node,
12670 pint_type_node, integer_type_node, NULL_TREE);
12671 tree void_ftype_v16qi_v16qi_pchar
12672 = build_function_type_list (void_type_node,
12673 V16QI_type_node, V16QI_type_node,
12674 pchar_type_node, NULL_TREE);
12675 tree v2df_ftype_pcdouble
12676 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12677 tree v2df_ftype_v2df_v2df
12678 = build_function_type_list (V2DF_type_node,
12679 V2DF_type_node, V2DF_type_node, NULL_TREE);
12680 tree v16qi_ftype_v16qi_v16qi
12681 = build_function_type_list (V16QI_type_node,
12682 V16QI_type_node, V16QI_type_node, NULL_TREE);
12683 tree v8hi_ftype_v8hi_v8hi
12684 = build_function_type_list (V8HI_type_node,
12685 V8HI_type_node, V8HI_type_node, NULL_TREE);
12686 tree v4si_ftype_v4si_v4si
12687 = build_function_type_list (V4SI_type_node,
12688 V4SI_type_node, V4SI_type_node, NULL_TREE);
12689 tree v2di_ftype_v2di_v2di
12690 = build_function_type_list (V2DI_type_node,
12691 V2DI_type_node, V2DI_type_node, NULL_TREE);
12692 tree v2di_ftype_v2df_v2df
12693 = build_function_type_list (V2DI_type_node,
12694 V2DF_type_node, V2DF_type_node, NULL_TREE);
12695 tree v2df_ftype_v2df
12696 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12697 tree v2df_ftype_double
12698 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12699 tree v2df_ftype_double_double
12700 = build_function_type_list (V2DF_type_node,
12701 double_type_node, double_type_node, NULL_TREE);
12702 tree int_ftype_v8hi_int
12703 = build_function_type_list (integer_type_node,
12704 V8HI_type_node, integer_type_node, NULL_TREE);
12705 tree v8hi_ftype_v8hi_int_int
12706 = build_function_type_list (V8HI_type_node,
12707 V8HI_type_node, integer_type_node,
12708 integer_type_node, NULL_TREE);
12709 tree v2di_ftype_v2di_int
12710 = build_function_type_list (V2DI_type_node,
12711 V2DI_type_node, integer_type_node, NULL_TREE);
12712 tree v4si_ftype_v4si_int
12713 = build_function_type_list (V4SI_type_node,
12714 V4SI_type_node, integer_type_node, NULL_TREE);
12715 tree v8hi_ftype_v8hi_int
12716 = build_function_type_list (V8HI_type_node,
12717 V8HI_type_node, integer_type_node, NULL_TREE);
12718 tree v8hi_ftype_v8hi_v2di
12719 = build_function_type_list (V8HI_type_node,
12720 V8HI_type_node, V2DI_type_node, NULL_TREE);
12721 tree v4si_ftype_v4si_v2di
12722 = build_function_type_list (V4SI_type_node,
12723 V4SI_type_node, V2DI_type_node, NULL_TREE);
12724 tree v4si_ftype_v8hi_v8hi
12725 = build_function_type_list (V4SI_type_node,
12726 V8HI_type_node, V8HI_type_node, NULL_TREE);
12727 tree di_ftype_v8qi_v8qi
12728 = build_function_type_list (long_long_unsigned_type_node,
12729 V8QI_type_node, V8QI_type_node, NULL_TREE);
12730 tree di_ftype_v2si_v2si
12731 = build_function_type_list (long_long_unsigned_type_node,
12732 V2SI_type_node, V2SI_type_node, NULL_TREE);
12733 tree v2di_ftype_v16qi_v16qi
12734 = build_function_type_list (V2DI_type_node,
12735 V16QI_type_node, V16QI_type_node, NULL_TREE);
12736 tree v2di_ftype_v4si_v4si
12737 = build_function_type_list (V2DI_type_node,
12738 V4SI_type_node, V4SI_type_node, NULL_TREE);
12739 tree int_ftype_v16qi
12740 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12741 tree v16qi_ftype_pcchar
12742 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12743 tree void_ftype_pchar_v16qi
12744 = build_function_type_list (void_type_node,
12745 pchar_type_node, V16QI_type_node, NULL_TREE);
12746 tree v4si_ftype_pcint
12747 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12748 tree void_ftype_pcint_v4si
12749 = build_function_type_list (void_type_node,
12750 pcint_type_node, V4SI_type_node, NULL_TREE);
12751 tree v2di_ftype_v2di
12752 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12753
12754 tree float80_type;
12755 tree float128_type;
12756
12757 /* The __float80 type. */
12758 if (TYPE_MODE (long_double_type_node) == XFmode)
12759 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
12760 "__float80");
12761 else
12762 {
12763 /* The __float80 type. */
12764 float80_type = make_node (REAL_TYPE);
12765 TYPE_PRECISION (float80_type) = 80;
12766 layout_type (float80_type);
12767 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
12768 }
12769
12770 float128_type = make_node (REAL_TYPE);
12771 TYPE_PRECISION (float128_type) = 128;
12772 layout_type (float128_type);
12773 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
12774
12775 /* Add all builtins that are more or less simple operations on two
12776 operands. */
12777 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12778 {
12779 /* Use one of the operands; the target can have a different mode for
12780 mask-generating compares. */
12781 enum machine_mode mode;
12782 tree type;
12783
12784 if (d->name == 0)
12785 continue;
12786 mode = insn_data[d->icode].operand[1].mode;
12787
12788 switch (mode)
12789 {
12790 case V16QImode:
12791 type = v16qi_ftype_v16qi_v16qi;
12792 break;
12793 case V8HImode:
12794 type = v8hi_ftype_v8hi_v8hi;
12795 break;
12796 case V4SImode:
12797 type = v4si_ftype_v4si_v4si;
12798 break;
12799 case V2DImode:
12800 type = v2di_ftype_v2di_v2di;
12801 break;
12802 case V2DFmode:
12803 type = v2df_ftype_v2df_v2df;
12804 break;
12805 case TImode:
12806 type = ti_ftype_ti_ti;
12807 break;
12808 case V4SFmode:
12809 type = v4sf_ftype_v4sf_v4sf;
12810 break;
12811 case V8QImode:
12812 type = v8qi_ftype_v8qi_v8qi;
12813 break;
12814 case V4HImode:
12815 type = v4hi_ftype_v4hi_v4hi;
12816 break;
12817 case V2SImode:
12818 type = v2si_ftype_v2si_v2si;
12819 break;
12820 case DImode:
12821 type = di_ftype_di_di;
12822 break;
12823
12824 default:
12825 abort ();
12826 }
12827
12828 /* Override for comparisons. */
12829 if (d->icode == CODE_FOR_maskcmpv4sf3
12830 || d->icode == CODE_FOR_maskncmpv4sf3
12831 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12832 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12833 type = v4si_ftype_v4sf_v4sf;
12834
12835 if (d->icode == CODE_FOR_maskcmpv2df3
12836 || d->icode == CODE_FOR_maskncmpv2df3
12837 || d->icode == CODE_FOR_vmmaskcmpv2df3
12838 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12839 type = v2di_ftype_v2df_v2df;
12840
12841 def_builtin (d->mask, d->name, type, d->code);
12842 }
12843
12844 /* Add the remaining MMX insns with somewhat more complicated types. */
12845 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12846 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12847 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12848 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12849 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12850
12851 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12852 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12853 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12854
12855 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12856 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12857
12858 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12859 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12860
12861 /* comi/ucomi insns. */
12862 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12863 if (d->mask == MASK_SSE2)
12864 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12865 else
12866 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12867
12868 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12869 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12870 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12871
12872 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12873 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12874 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12875 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12876 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12877 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12878 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12879 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12880 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12881 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12882 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12883
12884 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12885 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12886
12887 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12888
12889 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12890 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12891 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12892 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12893 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12894 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12895
12896 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12897 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12898 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12899 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12900
12901 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12902 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12903 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12904 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12905
12906 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12907
12908 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12909
12910 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12911 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12912 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12913 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12914 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12915 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12916
12917 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12918
12919 /* Original 3DNow! */
12920 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12921 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12922 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12923 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12924 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12925 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12926 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12927 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12928 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12929 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12930 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12931 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12932 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12933 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12934 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12935 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12936 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12937 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12938 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12939 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12940
12941 /* 3DNow! extension as used in the Athlon CPU. */
12942 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12943 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12944 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12945 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12946 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12947 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12948
12949 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12950
12951 /* SSE2 */
12952 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12953 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12954
12955 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12956 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12957 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12958
12959 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12960 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12961 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12962 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12963 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12964 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12965
12966 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
12967 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
12968 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREHPD);
12969 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORELPD);
12970
12971 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12972 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12973 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12974 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12975 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12976
12977 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12978 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12979 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12980 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12981
12982 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12983 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12984
12985 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12986
12987 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12988 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12989
12990 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12991 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12992 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12993 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12994 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12995
12996 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12997
12998 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12999 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13000 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13001 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13002
13003 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13004 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13005 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13006
13007 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13008 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13009 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13010 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13011
13012 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13013 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13014 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13015 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13016 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13017 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13018 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13019
13020 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13021 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13022 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13023
13024 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13025 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13026 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13027 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13028 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13029 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13030 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13031
13032 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13033
13034 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13035 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13036
13037 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13038 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13039 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13040
13041 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13042 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13043 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13044
13045 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13046 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13047
13048 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13049 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13050 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13051 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13052
13053 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13054 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13055 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13056 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13057
13058 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13059 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13060
13061 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13062
13063 /* Prescott New Instructions. */
13064 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13065 void_ftype_pcvoid_unsigned_unsigned,
13066 IX86_BUILTIN_MONITOR);
13067 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13068 void_ftype_unsigned_unsigned,
13069 IX86_BUILTIN_MWAIT);
13070 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13071 v4sf_ftype_v4sf,
13072 IX86_BUILTIN_MOVSHDUP);
13073 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13074 v4sf_ftype_v4sf,
13075 IX86_BUILTIN_MOVSLDUP);
13076 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13077 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13078 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13079 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13080 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13081 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13082 }
13083
13084 /* Errors in the source file can cause expand_expr to return const0_rtx
13085 where we expect a vector. To avoid crashing, use one of the vector
13086 clear instructions. */
13087 static rtx
13088 safe_vector_operand (rtx x, enum machine_mode mode)
13089 {
13090 if (x != const0_rtx)
13091 return x;
13092 x = gen_reg_rtx (mode);
13093
13094 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13095 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13096 : gen_rtx_SUBREG (DImode, x, 0)));
13097 else
13098 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13099 : gen_rtx_SUBREG (V4SFmode, x, 0),
13100 CONST0_RTX (V4SFmode)));
13101 return x;
13102 }
13103
13104 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13105
13106 static rtx
13107 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13108 {
13109 rtx pat;
13110 tree arg0 = TREE_VALUE (arglist);
13111 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13112 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13113 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13114 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13115 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13116 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13117
13118 if (VECTOR_MODE_P (mode0))
13119 op0 = safe_vector_operand (op0, mode0);
13120 if (VECTOR_MODE_P (mode1))
13121 op1 = safe_vector_operand (op1, mode1);
13122
13123 if (! target
13124 || GET_MODE (target) != tmode
13125 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13126 target = gen_reg_rtx (tmode);
13127
13128 if (GET_MODE (op1) == SImode && mode1 == TImode)
13129 {
13130 rtx x = gen_reg_rtx (V4SImode);
13131 emit_insn (gen_sse2_loadd (x, op1));
13132 op1 = gen_lowpart (TImode, x);
13133 }
13134
13135 /* In case the insn wants input operands in modes different from
13136 the result, abort. */
13137 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13138 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13139 abort ();
13140
13141 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13142 op0 = copy_to_mode_reg (mode0, op0);
13143 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13144 op1 = copy_to_mode_reg (mode1, op1);
13145
13146 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13147 yet one of the two must not be a memory. This is normally enforced
13148 by expanders, but we didn't bother to create one here. */
13149 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13150 op0 = copy_to_mode_reg (mode0, op0);
13151
13152 pat = GEN_FCN (icode) (target, op0, op1);
13153 if (! pat)
13154 return 0;
13155 emit_insn (pat);
13156 return target;
13157 }
13158
13159 /* Subroutine of ix86_expand_builtin to take care of stores. */
13160
13161 static rtx
13162 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13163 {
13164 rtx pat;
13165 tree arg0 = TREE_VALUE (arglist);
13166 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13167 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13168 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13169 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13170 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13171
13172 if (VECTOR_MODE_P (mode1))
13173 op1 = safe_vector_operand (op1, mode1);
13174
13175 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13176 op1 = copy_to_mode_reg (mode1, op1);
13177
13178 pat = GEN_FCN (icode) (op0, op1);
13179 if (pat)
13180 emit_insn (pat);
13181 return 0;
13182 }
13183
13184 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13185
13186 static rtx
13187 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13188 rtx target, int do_load)
13189 {
13190 rtx pat;
13191 tree arg0 = TREE_VALUE (arglist);
13192 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13193 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13194 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13195
13196 if (! target
13197 || GET_MODE (target) != tmode
13198 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13199 target = gen_reg_rtx (tmode);
13200 if (do_load)
13201 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13202 else
13203 {
13204 if (VECTOR_MODE_P (mode0))
13205 op0 = safe_vector_operand (op0, mode0);
13206
13207 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13208 op0 = copy_to_mode_reg (mode0, op0);
13209 }
13210
13211 pat = GEN_FCN (icode) (target, op0);
13212 if (! pat)
13213 return 0;
13214 emit_insn (pat);
13215 return target;
13216 }
13217
13218 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13219 sqrtss, rsqrtss, rcpss. */
13220
13221 static rtx
13222 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13223 {
13224 rtx pat;
13225 tree arg0 = TREE_VALUE (arglist);
13226 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13227 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13228 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13229
13230 if (! target
13231 || GET_MODE (target) != tmode
13232 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13233 target = gen_reg_rtx (tmode);
13234
13235 if (VECTOR_MODE_P (mode0))
13236 op0 = safe_vector_operand (op0, mode0);
13237
13238 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13239 op0 = copy_to_mode_reg (mode0, op0);
13240
13241 op1 = op0;
13242 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13243 op1 = copy_to_mode_reg (mode0, op1);
13244
13245 pat = GEN_FCN (icode) (target, op0, op1);
13246 if (! pat)
13247 return 0;
13248 emit_insn (pat);
13249 return target;
13250 }
13251
13252 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13253
13254 static rtx
13255 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13256 rtx target)
13257 {
13258 rtx pat;
13259 tree arg0 = TREE_VALUE (arglist);
13260 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13261 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13262 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13263 rtx op2;
13264 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13265 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13266 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13267 enum rtx_code comparison = d->comparison;
13268
13269 if (VECTOR_MODE_P (mode0))
13270 op0 = safe_vector_operand (op0, mode0);
13271 if (VECTOR_MODE_P (mode1))
13272 op1 = safe_vector_operand (op1, mode1);
13273
13274 /* Swap operands if we have a comparison that isn't available in
13275 hardware. */
13276 if (d->flag)
13277 {
13278 rtx tmp = gen_reg_rtx (mode1);
13279 emit_move_insn (tmp, op1);
13280 op1 = op0;
13281 op0 = tmp;
13282 }
13283
13284 if (! target
13285 || GET_MODE (target) != tmode
13286 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13287 target = gen_reg_rtx (tmode);
13288
13289 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13290 op0 = copy_to_mode_reg (mode0, op0);
13291 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13292 op1 = copy_to_mode_reg (mode1, op1);
13293
13294 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13295 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13296 if (! pat)
13297 return 0;
13298 emit_insn (pat);
13299 return target;
13300 }
13301
13302 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13303
13304 static rtx
13305 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13306 rtx target)
13307 {
13308 rtx pat;
13309 tree arg0 = TREE_VALUE (arglist);
13310 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13311 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13312 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13313 rtx op2;
13314 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13315 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13316 enum rtx_code comparison = d->comparison;
13317
13318 if (VECTOR_MODE_P (mode0))
13319 op0 = safe_vector_operand (op0, mode0);
13320 if (VECTOR_MODE_P (mode1))
13321 op1 = safe_vector_operand (op1, mode1);
13322
13323 /* Swap operands if we have a comparison that isn't available in
13324 hardware. */
13325 if (d->flag)
13326 {
13327 rtx tmp = op1;
13328 op1 = op0;
13329 op0 = tmp;
13330 }
13331
13332 target = gen_reg_rtx (SImode);
13333 emit_move_insn (target, const0_rtx);
13334 target = gen_rtx_SUBREG (QImode, target, 0);
13335
13336 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13337 op0 = copy_to_mode_reg (mode0, op0);
13338 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13339 op1 = copy_to_mode_reg (mode1, op1);
13340
13341 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13342 pat = GEN_FCN (d->icode) (op0, op1);
13343 if (! pat)
13344 return 0;
13345 emit_insn (pat);
13346 emit_insn (gen_rtx_SET (VOIDmode,
13347 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13348 gen_rtx_fmt_ee (comparison, QImode,
13349 SET_DEST (pat),
13350 const0_rtx)));
13351
13352 return SUBREG_REG (target);
13353 }
13354
13355 /* Expand an expression EXP that calls a built-in function,
13356 with result going to TARGET if that's convenient
13357 (and in mode MODE if that's convenient).
13358 SUBTARGET may be used as the target for computing one of EXP's operands.
13359 IGNORE is nonzero if the value is to be ignored. */
13360
13361 rtx
13362 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13363 enum machine_mode mode ATTRIBUTE_UNUSED,
13364 int ignore ATTRIBUTE_UNUSED)
13365 {
13366 const struct builtin_description *d;
13367 size_t i;
13368 enum insn_code icode;
13369 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13370 tree arglist = TREE_OPERAND (exp, 1);
13371 tree arg0, arg1, arg2;
13372 rtx op0, op1, op2, pat;
13373 enum machine_mode tmode, mode0, mode1, mode2;
13374 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13375
13376 switch (fcode)
13377 {
13378 case IX86_BUILTIN_EMMS:
13379 emit_insn (gen_emms ());
13380 return 0;
13381
13382 case IX86_BUILTIN_SFENCE:
13383 emit_insn (gen_sfence ());
13384 return 0;
13385
13386 case IX86_BUILTIN_PEXTRW:
13387 case IX86_BUILTIN_PEXTRW128:
13388 icode = (fcode == IX86_BUILTIN_PEXTRW
13389 ? CODE_FOR_mmx_pextrw
13390 : CODE_FOR_sse2_pextrw);
13391 arg0 = TREE_VALUE (arglist);
13392 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13393 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13394 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13395 tmode = insn_data[icode].operand[0].mode;
13396 mode0 = insn_data[icode].operand[1].mode;
13397 mode1 = insn_data[icode].operand[2].mode;
13398
13399 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13400 op0 = copy_to_mode_reg (mode0, op0);
13401 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13402 {
13403 error ("selector must be an integer constant in the range 0..%i",
13404 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13405 return gen_reg_rtx (tmode);
13406 }
13407 if (target == 0
13408 || GET_MODE (target) != tmode
13409 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13410 target = gen_reg_rtx (tmode);
13411 pat = GEN_FCN (icode) (target, op0, op1);
13412 if (! pat)
13413 return 0;
13414 emit_insn (pat);
13415 return target;
13416
13417 case IX86_BUILTIN_PINSRW:
13418 case IX86_BUILTIN_PINSRW128:
13419 icode = (fcode == IX86_BUILTIN_PINSRW
13420 ? CODE_FOR_mmx_pinsrw
13421 : CODE_FOR_sse2_pinsrw);
13422 arg0 = TREE_VALUE (arglist);
13423 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13424 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13425 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13426 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13427 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13428 tmode = insn_data[icode].operand[0].mode;
13429 mode0 = insn_data[icode].operand[1].mode;
13430 mode1 = insn_data[icode].operand[2].mode;
13431 mode2 = insn_data[icode].operand[3].mode;
13432
13433 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13434 op0 = copy_to_mode_reg (mode0, op0);
13435 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13436 op1 = copy_to_mode_reg (mode1, op1);
13437 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13438 {
13439 error ("selector must be an integer constant in the range 0..%i",
13440 fcode == IX86_BUILTIN_PINSRW ? 3:7);
13441 return const0_rtx;
13442 }
13443 if (target == 0
13444 || GET_MODE (target) != tmode
13445 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13446 target = gen_reg_rtx (tmode);
13447 pat = GEN_FCN (icode) (target, op0, op1, op2);
13448 if (! pat)
13449 return 0;
13450 emit_insn (pat);
13451 return target;
13452
13453 case IX86_BUILTIN_MASKMOVQ:
13454 case IX86_BUILTIN_MASKMOVDQU:
13455 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13456 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13457 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13458 : CODE_FOR_sse2_maskmovdqu));
13459 /* Note the arg order is different from the operand order. */
13460 arg1 = TREE_VALUE (arglist);
13461 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13462 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13463 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13464 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13465 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13466 mode0 = insn_data[icode].operand[0].mode;
13467 mode1 = insn_data[icode].operand[1].mode;
13468 mode2 = insn_data[icode].operand[2].mode;
13469
13470 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13471 op0 = copy_to_mode_reg (mode0, op0);
13472 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13473 op1 = copy_to_mode_reg (mode1, op1);
13474 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13475 op2 = copy_to_mode_reg (mode2, op2);
13476 pat = GEN_FCN (icode) (op0, op1, op2);
13477 if (! pat)
13478 return 0;
13479 emit_insn (pat);
13480 return 0;
13481
13482 case IX86_BUILTIN_SQRTSS:
13483 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13484 case IX86_BUILTIN_RSQRTSS:
13485 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13486 case IX86_BUILTIN_RCPSS:
13487 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13488
13489 case IX86_BUILTIN_LOADAPS:
13490 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13491
13492 case IX86_BUILTIN_LOADUPS:
13493 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13494
13495 case IX86_BUILTIN_STOREAPS:
13496 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13497
13498 case IX86_BUILTIN_STOREUPS:
13499 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13500
13501 case IX86_BUILTIN_LOADSS:
13502 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13503
13504 case IX86_BUILTIN_STORESS:
13505 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13506
13507 case IX86_BUILTIN_LOADHPS:
13508 case IX86_BUILTIN_LOADLPS:
13509 case IX86_BUILTIN_LOADHPD:
13510 case IX86_BUILTIN_LOADLPD:
13511 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13512 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13513 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
13514 : CODE_FOR_sse2_loadlpd);
13515 arg0 = TREE_VALUE (arglist);
13516 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13517 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13518 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13519 tmode = insn_data[icode].operand[0].mode;
13520 mode0 = insn_data[icode].operand[1].mode;
13521 mode1 = insn_data[icode].operand[2].mode;
13522
13523 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13524 op0 = copy_to_mode_reg (mode0, op0);
13525 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13526 if (target == 0
13527 || GET_MODE (target) != tmode
13528 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13529 target = gen_reg_rtx (tmode);
13530 pat = GEN_FCN (icode) (target, op0, op1);
13531 if (! pat)
13532 return 0;
13533 emit_insn (pat);
13534 return target;
13535
13536 case IX86_BUILTIN_STOREHPS:
13537 case IX86_BUILTIN_STORELPS:
13538 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13539 : CODE_FOR_sse_movlps);
13540 arg0 = TREE_VALUE (arglist);
13541 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13542 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13543 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13544 mode0 = insn_data[icode].operand[1].mode;
13545 mode1 = insn_data[icode].operand[2].mode;
13546
13547 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13548 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13549 op1 = copy_to_mode_reg (mode1, op1);
13550
13551 pat = GEN_FCN (icode) (op0, op0, op1);
13552 if (! pat)
13553 return 0;
13554 emit_insn (pat);
13555 return const0_rtx;
13556
13557 case IX86_BUILTIN_STOREHPD:
13558 case IX86_BUILTIN_STORELPD:
13559 icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
13560 : CODE_FOR_sse2_storelpd);
13561 arg0 = TREE_VALUE (arglist);
13562 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13563 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13564 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13565 mode0 = insn_data[icode].operand[0].mode;
13566 mode1 = insn_data[icode].operand[1].mode;
13567
13568 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13569 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13570 op1 = copy_to_mode_reg (mode1, op1);
13571
13572 pat = GEN_FCN (icode) (op0, op1);
13573 if (! pat)
13574 return 0;
13575 emit_insn (pat);
13576 return const0_rtx;
13577
13578 case IX86_BUILTIN_MOVNTPS:
13579 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13580 case IX86_BUILTIN_MOVNTQ:
13581 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13582
13583 case IX86_BUILTIN_LDMXCSR:
13584 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13585 target = assign_386_stack_local (SImode, 0);
13586 emit_move_insn (target, op0);
13587 emit_insn (gen_ldmxcsr (target));
13588 return 0;
13589
13590 case IX86_BUILTIN_STMXCSR:
13591 target = assign_386_stack_local (SImode, 0);
13592 emit_insn (gen_stmxcsr (target));
13593 return copy_to_mode_reg (SImode, target);
13594
13595 case IX86_BUILTIN_SHUFPS:
13596 case IX86_BUILTIN_SHUFPD:
13597 icode = (fcode == IX86_BUILTIN_SHUFPS
13598 ? CODE_FOR_sse_shufps
13599 : CODE_FOR_sse2_shufpd);
13600 arg0 = TREE_VALUE (arglist);
13601 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13602 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13603 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13604 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13605 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13606 tmode = insn_data[icode].operand[0].mode;
13607 mode0 = insn_data[icode].operand[1].mode;
13608 mode1 = insn_data[icode].operand[2].mode;
13609 mode2 = insn_data[icode].operand[3].mode;
13610
13611 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13612 op0 = copy_to_mode_reg (mode0, op0);
13613 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13614 op1 = copy_to_mode_reg (mode1, op1);
13615 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13616 {
13617 /* @@@ better error message */
13618 error ("mask must be an immediate");
13619 return gen_reg_rtx (tmode);
13620 }
13621 if (target == 0
13622 || GET_MODE (target) != tmode
13623 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13624 target = gen_reg_rtx (tmode);
13625 pat = GEN_FCN (icode) (target, op0, op1, op2);
13626 if (! pat)
13627 return 0;
13628 emit_insn (pat);
13629 return target;
13630
13631 case IX86_BUILTIN_PSHUFW:
13632 case IX86_BUILTIN_PSHUFD:
13633 case IX86_BUILTIN_PSHUFHW:
13634 case IX86_BUILTIN_PSHUFLW:
13635 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13636 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13637 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13638 : CODE_FOR_mmx_pshufw);
13639 arg0 = TREE_VALUE (arglist);
13640 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13641 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13642 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13643 tmode = insn_data[icode].operand[0].mode;
13644 mode1 = insn_data[icode].operand[1].mode;
13645 mode2 = insn_data[icode].operand[2].mode;
13646
13647 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13648 op0 = copy_to_mode_reg (mode1, op0);
13649 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13650 {
13651 /* @@@ better error message */
13652 error ("mask must be an immediate");
13653 return const0_rtx;
13654 }
13655 if (target == 0
13656 || GET_MODE (target) != tmode
13657 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13658 target = gen_reg_rtx (tmode);
13659 pat = GEN_FCN (icode) (target, op0, op1);
13660 if (! pat)
13661 return 0;
13662 emit_insn (pat);
13663 return target;
13664
13665 case IX86_BUILTIN_PSLLDQI128:
13666 case IX86_BUILTIN_PSRLDQI128:
13667 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13668 : CODE_FOR_sse2_lshrti3);
13669 arg0 = TREE_VALUE (arglist);
13670 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13671 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13672 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13673 tmode = insn_data[icode].operand[0].mode;
13674 mode1 = insn_data[icode].operand[1].mode;
13675 mode2 = insn_data[icode].operand[2].mode;
13676
13677 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13678 {
13679 op0 = copy_to_reg (op0);
13680 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13681 }
13682 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13683 {
13684 error ("shift must be an immediate");
13685 return const0_rtx;
13686 }
13687 target = gen_reg_rtx (V2DImode);
13688 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13689 if (! pat)
13690 return 0;
13691 emit_insn (pat);
13692 return target;
13693
13694 case IX86_BUILTIN_FEMMS:
13695 emit_insn (gen_femms ());
13696 return NULL_RTX;
13697
13698 case IX86_BUILTIN_PAVGUSB:
13699 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13700
13701 case IX86_BUILTIN_PF2ID:
13702 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13703
13704 case IX86_BUILTIN_PFACC:
13705 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13706
13707 case IX86_BUILTIN_PFADD:
13708 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13709
13710 case IX86_BUILTIN_PFCMPEQ:
13711 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13712
13713 case IX86_BUILTIN_PFCMPGE:
13714 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13715
13716 case IX86_BUILTIN_PFCMPGT:
13717 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13718
13719 case IX86_BUILTIN_PFMAX:
13720 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13721
13722 case IX86_BUILTIN_PFMIN:
13723 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13724
13725 case IX86_BUILTIN_PFMUL:
13726 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13727
13728 case IX86_BUILTIN_PFRCP:
13729 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13730
13731 case IX86_BUILTIN_PFRCPIT1:
13732 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13733
13734 case IX86_BUILTIN_PFRCPIT2:
13735 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13736
13737 case IX86_BUILTIN_PFRSQIT1:
13738 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13739
13740 case IX86_BUILTIN_PFRSQRT:
13741 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13742
13743 case IX86_BUILTIN_PFSUB:
13744 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13745
13746 case IX86_BUILTIN_PFSUBR:
13747 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13748
13749 case IX86_BUILTIN_PI2FD:
13750 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13751
13752 case IX86_BUILTIN_PMULHRW:
13753 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13754
13755 case IX86_BUILTIN_PF2IW:
13756 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13757
13758 case IX86_BUILTIN_PFNACC:
13759 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13760
13761 case IX86_BUILTIN_PFPNACC:
13762 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13763
13764 case IX86_BUILTIN_PI2FW:
13765 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13766
13767 case IX86_BUILTIN_PSWAPDSI:
13768 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13769
13770 case IX86_BUILTIN_PSWAPDSF:
13771 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13772
13773 case IX86_BUILTIN_SSE_ZERO:
13774 target = gen_reg_rtx (V4SFmode);
13775 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13776 return target;
13777
13778 case IX86_BUILTIN_MMX_ZERO:
13779 target = gen_reg_rtx (DImode);
13780 emit_insn (gen_mmx_clrdi (target));
13781 return target;
13782
13783 case IX86_BUILTIN_CLRTI:
13784 target = gen_reg_rtx (V2DImode);
13785 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13786 return target;
13787
13788
13789 case IX86_BUILTIN_SQRTSD:
13790 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13791 case IX86_BUILTIN_LOADAPD:
13792 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13793 case IX86_BUILTIN_LOADUPD:
13794 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13795
13796 case IX86_BUILTIN_STOREAPD:
13797 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13798 case IX86_BUILTIN_STOREUPD:
13799 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13800
13801 case IX86_BUILTIN_LOADSD:
13802 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13803
13804 case IX86_BUILTIN_STORESD:
13805 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13806
13807 case IX86_BUILTIN_SETPD1:
13808 target = assign_386_stack_local (DFmode, 0);
13809 arg0 = TREE_VALUE (arglist);
13810 emit_move_insn (adjust_address (target, DFmode, 0),
13811 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13812 op0 = gen_reg_rtx (V2DFmode);
13813 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13814 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
13815 return op0;
13816
13817 case IX86_BUILTIN_SETPD:
13818 target = assign_386_stack_local (V2DFmode, 0);
13819 arg0 = TREE_VALUE (arglist);
13820 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13821 emit_move_insn (adjust_address (target, DFmode, 0),
13822 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13823 emit_move_insn (adjust_address (target, DFmode, 8),
13824 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13825 op0 = gen_reg_rtx (V2DFmode);
13826 emit_insn (gen_sse2_movapd (op0, target));
13827 return op0;
13828
13829 case IX86_BUILTIN_LOADRPD:
13830 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13831 gen_reg_rtx (V2DFmode), 1);
13832 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
13833 return target;
13834
13835 case IX86_BUILTIN_LOADPD1:
13836 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13837 gen_reg_rtx (V2DFmode), 1);
13838 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13839 return target;
13840
13841 case IX86_BUILTIN_STOREPD1:
13842 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13843 case IX86_BUILTIN_STORERPD:
13844 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13845
13846 case IX86_BUILTIN_CLRPD:
13847 target = gen_reg_rtx (V2DFmode);
13848 emit_insn (gen_sse_clrv2df (target));
13849 return target;
13850
13851 case IX86_BUILTIN_MFENCE:
13852 emit_insn (gen_sse2_mfence ());
13853 return 0;
13854 case IX86_BUILTIN_LFENCE:
13855 emit_insn (gen_sse2_lfence ());
13856 return 0;
13857
13858 case IX86_BUILTIN_CLFLUSH:
13859 arg0 = TREE_VALUE (arglist);
13860 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13861 icode = CODE_FOR_sse2_clflush;
13862 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13863 op0 = copy_to_mode_reg (Pmode, op0);
13864
13865 emit_insn (gen_sse2_clflush (op0));
13866 return 0;
13867
13868 case IX86_BUILTIN_MOVNTPD:
13869 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13870 case IX86_BUILTIN_MOVNTDQ:
13871 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13872 case IX86_BUILTIN_MOVNTI:
13873 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13874
13875 case IX86_BUILTIN_LOADDQA:
13876 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13877 case IX86_BUILTIN_LOADDQU:
13878 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13879 case IX86_BUILTIN_LOADD:
13880 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13881
13882 case IX86_BUILTIN_STOREDQA:
13883 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13884 case IX86_BUILTIN_STOREDQU:
13885 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13886 case IX86_BUILTIN_STORED:
13887 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13888
13889 case IX86_BUILTIN_MONITOR:
13890 arg0 = TREE_VALUE (arglist);
13891 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13892 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13893 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13894 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13895 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13896 if (!REG_P (op0))
13897 op0 = copy_to_mode_reg (SImode, op0);
13898 if (!REG_P (op1))
13899 op1 = copy_to_mode_reg (SImode, op1);
13900 if (!REG_P (op2))
13901 op2 = copy_to_mode_reg (SImode, op2);
13902 emit_insn (gen_monitor (op0, op1, op2));
13903 return 0;
13904
13905 case IX86_BUILTIN_MWAIT:
13906 arg0 = TREE_VALUE (arglist);
13907 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13908 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13909 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13910 if (!REG_P (op0))
13911 op0 = copy_to_mode_reg (SImode, op0);
13912 if (!REG_P (op1))
13913 op1 = copy_to_mode_reg (SImode, op1);
13914 emit_insn (gen_mwait (op0, op1));
13915 return 0;
13916
13917 case IX86_BUILTIN_LOADDDUP:
13918 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13919
13920 case IX86_BUILTIN_LDDQU:
13921 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13922 1);
13923
13924 default:
13925 break;
13926 }
13927
13928 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13929 if (d->code == fcode)
13930 {
13931 /* Compares are treated specially. */
13932 if (d->icode == CODE_FOR_maskcmpv4sf3
13933 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13934 || d->icode == CODE_FOR_maskncmpv4sf3
13935 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13936 || d->icode == CODE_FOR_maskcmpv2df3
13937 || d->icode == CODE_FOR_vmmaskcmpv2df3
13938 || d->icode == CODE_FOR_maskncmpv2df3
13939 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13940 return ix86_expand_sse_compare (d, arglist, target);
13941
13942 return ix86_expand_binop_builtin (d->icode, arglist, target);
13943 }
13944
13945 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13946 if (d->code == fcode)
13947 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13948
13949 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13950 if (d->code == fcode)
13951 return ix86_expand_sse_comi (d, arglist, target);
13952
13953 /* @@@ Should really do something sensible here. */
13954 return 0;
13955 }
13956
13957 /* Store OPERAND to the memory after reload is completed. This means
13958 that we can't easily use assign_stack_local. */
13959 rtx
13960 ix86_force_to_memory (enum machine_mode mode, rtx operand)
13961 {
13962 rtx result;
13963 if (!reload_completed)
13964 abort ();
13965 if (TARGET_RED_ZONE)
13966 {
13967 result = gen_rtx_MEM (mode,
13968 gen_rtx_PLUS (Pmode,
13969 stack_pointer_rtx,
13970 GEN_INT (-RED_ZONE_SIZE)));
13971 emit_move_insn (result, operand);
13972 }
13973 else if (!TARGET_RED_ZONE && TARGET_64BIT)
13974 {
13975 switch (mode)
13976 {
13977 case HImode:
13978 case SImode:
13979 operand = gen_lowpart (DImode, operand);
13980 /* FALLTHRU */
13981 case DImode:
13982 emit_insn (
13983 gen_rtx_SET (VOIDmode,
13984 gen_rtx_MEM (DImode,
13985 gen_rtx_PRE_DEC (DImode,
13986 stack_pointer_rtx)),
13987 operand));
13988 break;
13989 default:
13990 abort ();
13991 }
13992 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13993 }
13994 else
13995 {
13996 switch (mode)
13997 {
13998 case DImode:
13999 {
14000 rtx operands[2];
14001 split_di (&operand, 1, operands, operands + 1);
14002 emit_insn (
14003 gen_rtx_SET (VOIDmode,
14004 gen_rtx_MEM (SImode,
14005 gen_rtx_PRE_DEC (Pmode,
14006 stack_pointer_rtx)),
14007 operands[1]));
14008 emit_insn (
14009 gen_rtx_SET (VOIDmode,
14010 gen_rtx_MEM (SImode,
14011 gen_rtx_PRE_DEC (Pmode,
14012 stack_pointer_rtx)),
14013 operands[0]));
14014 }
14015 break;
14016 case HImode:
14017 /* It is better to store HImodes as SImodes. */
14018 if (!TARGET_PARTIAL_REG_STALL)
14019 operand = gen_lowpart (SImode, operand);
14020 /* FALLTHRU */
14021 case SImode:
14022 emit_insn (
14023 gen_rtx_SET (VOIDmode,
14024 gen_rtx_MEM (GET_MODE (operand),
14025 gen_rtx_PRE_DEC (SImode,
14026 stack_pointer_rtx)),
14027 operand));
14028 break;
14029 default:
14030 abort ();
14031 }
14032 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14033 }
14034 return result;
14035 }
14036
14037 /* Free operand from the memory. */
14038 void
14039 ix86_free_from_memory (enum machine_mode mode)
14040 {
14041 if (!TARGET_RED_ZONE)
14042 {
14043 int size;
14044
14045 if (mode == DImode || TARGET_64BIT)
14046 size = 8;
14047 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14048 size = 2;
14049 else
14050 size = 4;
14051 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14052 to pop or add instruction if registers are available. */
14053 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14054 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14055 GEN_INT (size))));
14056 }
14057 }
14058
14059 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14060 QImode must go into class Q_REGS.
14061 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14062 movdf to do mem-to-mem moves through integer regs. */
14063 enum reg_class
14064 ix86_preferred_reload_class (rtx x, enum reg_class class)
14065 {
14066 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14067 return NO_REGS;
14068 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14069 {
14070 /* SSE can't load any constant directly yet. */
14071 if (SSE_CLASS_P (class))
14072 return NO_REGS;
14073 /* Floats can load 0 and 1. */
14074 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14075 {
14076 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14077 if (MAYBE_SSE_CLASS_P (class))
14078 return (reg_class_subset_p (class, GENERAL_REGS)
14079 ? GENERAL_REGS : FLOAT_REGS);
14080 else
14081 return class;
14082 }
14083 /* General regs can load everything. */
14084 if (reg_class_subset_p (class, GENERAL_REGS))
14085 return GENERAL_REGS;
14086 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14087 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14088 return NO_REGS;
14089 }
14090 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14091 return NO_REGS;
14092 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14093 return Q_REGS;
14094 return class;
14095 }
14096
14097 /* If we are copying between general and FP registers, we need a memory
14098 location. The same is true for SSE and MMX registers.
14099
14100 The macro can't work reliably when one of the CLASSES is class containing
14101 registers from multiple units (SSE, MMX, integer). We avoid this by never
14102 combining those units in single alternative in the machine description.
14103 Ensure that this constraint holds to avoid unexpected surprises.
14104
14105 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14106 enforce these sanity checks. */
14107 int
14108 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14109 enum machine_mode mode, int strict)
14110 {
14111 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14112 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14113 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14114 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14115 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14116 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14117 {
14118 if (strict)
14119 abort ();
14120 else
14121 return 1;
14122 }
14123 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14124 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14125 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14126 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14127 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14128 }
14129 /* Return the cost of moving data from a register in class CLASS1 to
14130 one in class CLASS2.
14131
14132 It is not required that the cost always equal 2 when FROM is the same as TO;
14133 on some machines it is expensive to move between registers if they are not
14134 general registers. */
14135 int
14136 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14137 enum reg_class class2)
14138 {
14139 /* In case we require secondary memory, compute cost of the store followed
14140 by load. In order to avoid bad register allocation choices, we need
14141 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14142
14143 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14144 {
14145 int cost = 1;
14146
14147 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14148 MEMORY_MOVE_COST (mode, class1, 1));
14149 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14150 MEMORY_MOVE_COST (mode, class2, 1));
14151
14152 /* In case of copying from general_purpose_register we may emit multiple
14153 stores followed by single load causing memory size mismatch stall.
14154 Count this as arbitrarily high cost of 20. */
14155 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14156 cost += 20;
14157
14158 /* In the case of FP/MMX moves, the registers actually overlap, and we
14159 have to switch modes in order to treat them differently. */
14160 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14161 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14162 cost += 20;
14163
14164 return cost;
14165 }
14166
14167 /* Moves between SSE/MMX and integer unit are expensive. */
14168 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14169 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14170 return ix86_cost->mmxsse_to_integer;
14171 if (MAYBE_FLOAT_CLASS_P (class1))
14172 return ix86_cost->fp_move;
14173 if (MAYBE_SSE_CLASS_P (class1))
14174 return ix86_cost->sse_move;
14175 if (MAYBE_MMX_CLASS_P (class1))
14176 return ix86_cost->mmx_move;
14177 return 2;
14178 }
14179
14180 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14181 int
14182 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14183 {
14184 /* Flags and only flags can only hold CCmode values. */
14185 if (CC_REGNO_P (regno))
14186 return GET_MODE_CLASS (mode) == MODE_CC;
14187 if (GET_MODE_CLASS (mode) == MODE_CC
14188 || GET_MODE_CLASS (mode) == MODE_RANDOM
14189 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14190 return 0;
14191 if (FP_REGNO_P (regno))
14192 return VALID_FP_MODE_P (mode);
14193 if (SSE_REGNO_P (regno))
14194 {
14195 /* We implement the move patterns for all vector modes into and
14196 out of SSE registers, even when no operation instructions
14197 are available. */
14198 return (VALID_SSE_REG_MODE (mode)
14199 || VALID_SSE2_REG_MODE (mode)
14200 || VALID_MMX_REG_MODE (mode)
14201 || VALID_MMX_REG_MODE_3DNOW (mode));
14202 }
14203 if (MMX_REGNO_P (regno))
14204 {
14205 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14206 so if the register is available at all, then we can move data of
14207 the given mode into or out of it. */
14208 return (VALID_MMX_REG_MODE (mode)
14209 || VALID_MMX_REG_MODE_3DNOW (mode));
14210 }
14211 /* We handle both integer and floats in the general purpose registers.
14212 In future we should be able to handle vector modes as well. */
14213 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14214 return 0;
14215 /* Take care for QImode values - they can be in non-QI regs, but then
14216 they do cause partial register stalls. */
14217 if (regno < 4 || mode != QImode || TARGET_64BIT)
14218 return 1;
14219 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14220 }
14221
14222 /* Return the cost of moving data of mode M between a
14223 register and memory. A value of 2 is the default; this cost is
14224 relative to those in `REGISTER_MOVE_COST'.
14225
14226 If moving between registers and memory is more expensive than
14227 between two registers, you should define this macro to express the
14228 relative cost.
14229
14230 Model also increased moving costs of QImode registers in non
14231 Q_REGS classes.
14232 */
14233 int
14234 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14235 {
14236 if (FLOAT_CLASS_P (class))
14237 {
14238 int index;
14239 switch (mode)
14240 {
14241 case SFmode:
14242 index = 0;
14243 break;
14244 case DFmode:
14245 index = 1;
14246 break;
14247 case XFmode:
14248 index = 2;
14249 break;
14250 default:
14251 return 100;
14252 }
14253 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14254 }
14255 if (SSE_CLASS_P (class))
14256 {
14257 int index;
14258 switch (GET_MODE_SIZE (mode))
14259 {
14260 case 4:
14261 index = 0;
14262 break;
14263 case 8:
14264 index = 1;
14265 break;
14266 case 16:
14267 index = 2;
14268 break;
14269 default:
14270 return 100;
14271 }
14272 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14273 }
14274 if (MMX_CLASS_P (class))
14275 {
14276 int index;
14277 switch (GET_MODE_SIZE (mode))
14278 {
14279 case 4:
14280 index = 0;
14281 break;
14282 case 8:
14283 index = 1;
14284 break;
14285 default:
14286 return 100;
14287 }
14288 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14289 }
14290 switch (GET_MODE_SIZE (mode))
14291 {
14292 case 1:
14293 if (in)
14294 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14295 : ix86_cost->movzbl_load);
14296 else
14297 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14298 : ix86_cost->int_store[0] + 4);
14299 break;
14300 case 2:
14301 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14302 default:
14303 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14304 if (mode == TFmode)
14305 mode = XFmode;
14306 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14307 * (((int) GET_MODE_SIZE (mode)
14308 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14309 }
14310 }
14311
14312 /* Compute a (partial) cost for rtx X. Return true if the complete
14313 cost has been computed, and false if subexpressions should be
14314 scanned. In either case, *TOTAL contains the cost result. */
14315
14316 static bool
14317 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14318 {
14319 enum machine_mode mode = GET_MODE (x);
14320
14321 switch (code)
14322 {
14323 case CONST_INT:
14324 case CONST:
14325 case LABEL_REF:
14326 case SYMBOL_REF:
14327 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
14328 *total = 3;
14329 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
14330 *total = 2;
14331 else if (flag_pic && SYMBOLIC_CONST (x)
14332 && (!TARGET_64BIT
14333 || (!GET_CODE (x) != LABEL_REF
14334 && (GET_CODE (x) != SYMBOL_REF
14335 || !SYMBOL_REF_LOCAL_P (x)))))
14336 *total = 1;
14337 else
14338 *total = 0;
14339 return true;
14340
14341 case CONST_DOUBLE:
14342 if (mode == VOIDmode)
14343 *total = 0;
14344 else
14345 switch (standard_80387_constant_p (x))
14346 {
14347 case 1: /* 0.0 */
14348 *total = 1;
14349 break;
14350 default: /* Other constants */
14351 *total = 2;
14352 break;
14353 case 0:
14354 case -1:
14355 /* Start with (MEM (SYMBOL_REF)), since that's where
14356 it'll probably end up. Add a penalty for size. */
14357 *total = (COSTS_N_INSNS (1)
14358 + (flag_pic != 0 && !TARGET_64BIT)
14359 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14360 break;
14361 }
14362 return true;
14363
14364 case ZERO_EXTEND:
14365 /* The zero extensions is often completely free on x86_64, so make
14366 it as cheap as possible. */
14367 if (TARGET_64BIT && mode == DImode
14368 && GET_MODE (XEXP (x, 0)) == SImode)
14369 *total = 1;
14370 else if (TARGET_ZERO_EXTEND_WITH_AND)
14371 *total = COSTS_N_INSNS (ix86_cost->add);
14372 else
14373 *total = COSTS_N_INSNS (ix86_cost->movzx);
14374 return false;
14375
14376 case SIGN_EXTEND:
14377 *total = COSTS_N_INSNS (ix86_cost->movsx);
14378 return false;
14379
14380 case ASHIFT:
14381 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14382 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14383 {
14384 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14385 if (value == 1)
14386 {
14387 *total = COSTS_N_INSNS (ix86_cost->add);
14388 return false;
14389 }
14390 if ((value == 2 || value == 3)
14391 && ix86_cost->lea <= ix86_cost->shift_const)
14392 {
14393 *total = COSTS_N_INSNS (ix86_cost->lea);
14394 return false;
14395 }
14396 }
14397 /* FALLTHRU */
14398
14399 case ROTATE:
14400 case ASHIFTRT:
14401 case LSHIFTRT:
14402 case ROTATERT:
14403 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14404 {
14405 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14406 {
14407 if (INTVAL (XEXP (x, 1)) > 32)
14408 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14409 else
14410 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14411 }
14412 else
14413 {
14414 if (GET_CODE (XEXP (x, 1)) == AND)
14415 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14416 else
14417 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14418 }
14419 }
14420 else
14421 {
14422 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14423 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14424 else
14425 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14426 }
14427 return false;
14428
14429 case MULT:
14430 if (FLOAT_MODE_P (mode))
14431 {
14432 *total = COSTS_N_INSNS (ix86_cost->fmul);
14433 return false;
14434 }
14435 else
14436 {
14437 rtx op0 = XEXP (x, 0);
14438 rtx op1 = XEXP (x, 1);
14439 int nbits;
14440 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14441 {
14442 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14443 for (nbits = 0; value != 0; value &= value - 1)
14444 nbits++;
14445 }
14446 else
14447 /* This is arbitrary. */
14448 nbits = 7;
14449
14450 /* Compute costs correctly for widening multiplication. */
14451 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14452 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14453 == GET_MODE_SIZE (mode))
14454 {
14455 int is_mulwiden = 0;
14456 enum machine_mode inner_mode = GET_MODE (op0);
14457
14458 if (GET_CODE (op0) == GET_CODE (op1))
14459 is_mulwiden = 1, op1 = XEXP (op1, 0);
14460 else if (GET_CODE (op1) == CONST_INT)
14461 {
14462 if (GET_CODE (op0) == SIGN_EXTEND)
14463 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14464 == INTVAL (op1);
14465 else
14466 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14467 }
14468
14469 if (is_mulwiden)
14470 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14471 }
14472
14473 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14474 + nbits * ix86_cost->mult_bit)
14475 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14476
14477 return true;
14478 }
14479
14480 case DIV:
14481 case UDIV:
14482 case MOD:
14483 case UMOD:
14484 if (FLOAT_MODE_P (mode))
14485 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14486 else
14487 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14488 return false;
14489
14490 case PLUS:
14491 if (FLOAT_MODE_P (mode))
14492 *total = COSTS_N_INSNS (ix86_cost->fadd);
14493 else if (GET_MODE_CLASS (mode) == MODE_INT
14494 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14495 {
14496 if (GET_CODE (XEXP (x, 0)) == PLUS
14497 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14498 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14499 && CONSTANT_P (XEXP (x, 1)))
14500 {
14501 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14502 if (val == 2 || val == 4 || val == 8)
14503 {
14504 *total = COSTS_N_INSNS (ix86_cost->lea);
14505 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14506 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14507 outer_code);
14508 *total += rtx_cost (XEXP (x, 1), outer_code);
14509 return true;
14510 }
14511 }
14512 else if (GET_CODE (XEXP (x, 0)) == MULT
14513 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14514 {
14515 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14516 if (val == 2 || val == 4 || val == 8)
14517 {
14518 *total = COSTS_N_INSNS (ix86_cost->lea);
14519 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14520 *total += rtx_cost (XEXP (x, 1), outer_code);
14521 return true;
14522 }
14523 }
14524 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14525 {
14526 *total = COSTS_N_INSNS (ix86_cost->lea);
14527 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14528 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14529 *total += rtx_cost (XEXP (x, 1), outer_code);
14530 return true;
14531 }
14532 }
14533 /* FALLTHRU */
14534
14535 case MINUS:
14536 if (FLOAT_MODE_P (mode))
14537 {
14538 *total = COSTS_N_INSNS (ix86_cost->fadd);
14539 return false;
14540 }
14541 /* FALLTHRU */
14542
14543 case AND:
14544 case IOR:
14545 case XOR:
14546 if (!TARGET_64BIT && mode == DImode)
14547 {
14548 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14549 + (rtx_cost (XEXP (x, 0), outer_code)
14550 << (GET_MODE (XEXP (x, 0)) != DImode))
14551 + (rtx_cost (XEXP (x, 1), outer_code)
14552 << (GET_MODE (XEXP (x, 1)) != DImode)));
14553 return true;
14554 }
14555 /* FALLTHRU */
14556
14557 case NEG:
14558 if (FLOAT_MODE_P (mode))
14559 {
14560 *total = COSTS_N_INSNS (ix86_cost->fchs);
14561 return false;
14562 }
14563 /* FALLTHRU */
14564
14565 case NOT:
14566 if (!TARGET_64BIT && mode == DImode)
14567 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14568 else
14569 *total = COSTS_N_INSNS (ix86_cost->add);
14570 return false;
14571
14572 case COMPARE:
14573 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
14574 && XEXP (XEXP (x, 0), 1) == const1_rtx
14575 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
14576 && XEXP (x, 1) == const0_rtx)
14577 {
14578 /* This kind of construct is implemented using test[bwl].
14579 Treat it as if we had an AND. */
14580 *total = (COSTS_N_INSNS (ix86_cost->add)
14581 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
14582 + rtx_cost (const1_rtx, outer_code));
14583 return true;
14584 }
14585 return false;
14586
14587 case FLOAT_EXTEND:
14588 if (!TARGET_SSE_MATH
14589 || mode == XFmode
14590 || (mode == DFmode && !TARGET_SSE2))
14591 *total = 0;
14592 return false;
14593
14594 case ABS:
14595 if (FLOAT_MODE_P (mode))
14596 *total = COSTS_N_INSNS (ix86_cost->fabs);
14597 return false;
14598
14599 case SQRT:
14600 if (FLOAT_MODE_P (mode))
14601 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14602 return false;
14603
14604 case UNSPEC:
14605 if (XINT (x, 1) == UNSPEC_TP)
14606 *total = 0;
14607 return false;
14608
14609 default:
14610 return false;
14611 }
14612 }
14613
14614 #if TARGET_MACHO
14615
14616 static int current_machopic_label_num;
14617
14618 /* Given a symbol name and its associated stub, write out the
14619 definition of the stub. */
14620
14621 void
14622 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14623 {
14624 unsigned int length;
14625 char *binder_name, *symbol_name, lazy_ptr_name[32];
14626 int label = ++current_machopic_label_num;
14627
14628 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14629 symb = (*targetm.strip_name_encoding) (symb);
14630
14631 length = strlen (stub);
14632 binder_name = alloca (length + 32);
14633 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14634
14635 length = strlen (symb);
14636 symbol_name = alloca (length + 32);
14637 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14638
14639 sprintf (lazy_ptr_name, "L%d$lz", label);
14640
14641 if (MACHOPIC_PURE)
14642 machopic_picsymbol_stub_section ();
14643 else
14644 machopic_symbol_stub_section ();
14645
14646 fprintf (file, "%s:\n", stub);
14647 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14648
14649 if (MACHOPIC_PURE)
14650 {
14651 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14652 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14653 fprintf (file, "\tjmp %%edx\n");
14654 }
14655 else
14656 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14657
14658 fprintf (file, "%s:\n", binder_name);
14659
14660 if (MACHOPIC_PURE)
14661 {
14662 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14663 fprintf (file, "\tpushl %%eax\n");
14664 }
14665 else
14666 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14667
14668 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14669
14670 machopic_lazy_symbol_ptr_section ();
14671 fprintf (file, "%s:\n", lazy_ptr_name);
14672 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14673 fprintf (file, "\t.long %s\n", binder_name);
14674 }
14675 #endif /* TARGET_MACHO */
14676
14677 /* Order the registers for register allocator. */
14678
14679 void
14680 x86_order_regs_for_local_alloc (void)
14681 {
14682 int pos = 0;
14683 int i;
14684
14685 /* First allocate the local general purpose registers. */
14686 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14687 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14688 reg_alloc_order [pos++] = i;
14689
14690 /* Global general purpose registers. */
14691 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14692 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14693 reg_alloc_order [pos++] = i;
14694
14695 /* x87 registers come first in case we are doing FP math
14696 using them. */
14697 if (!TARGET_SSE_MATH)
14698 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14699 reg_alloc_order [pos++] = i;
14700
14701 /* SSE registers. */
14702 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14703 reg_alloc_order [pos++] = i;
14704 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14705 reg_alloc_order [pos++] = i;
14706
14707 /* x87 registers. */
14708 if (TARGET_SSE_MATH)
14709 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14710 reg_alloc_order [pos++] = i;
14711
14712 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14713 reg_alloc_order [pos++] = i;
14714
14715 /* Initialize the rest of array as we do not allocate some registers
14716 at all. */
14717 while (pos < FIRST_PSEUDO_REGISTER)
14718 reg_alloc_order [pos++] = 0;
14719 }
14720
14721 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14722 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14723 #endif
14724
14725 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14726 struct attribute_spec.handler. */
14727 static tree
14728 ix86_handle_struct_attribute (tree *node, tree name,
14729 tree args ATTRIBUTE_UNUSED,
14730 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
14731 {
14732 tree *type = NULL;
14733 if (DECL_P (*node))
14734 {
14735 if (TREE_CODE (*node) == TYPE_DECL)
14736 type = &TREE_TYPE (*node);
14737 }
14738 else
14739 type = node;
14740
14741 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14742 || TREE_CODE (*type) == UNION_TYPE)))
14743 {
14744 warning ("%qs attribute ignored", IDENTIFIER_POINTER (name));
14745 *no_add_attrs = true;
14746 }
14747
14748 else if ((is_attribute_p ("ms_struct", name)
14749 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14750 || ((is_attribute_p ("gcc_struct", name)
14751 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14752 {
14753 warning ("%qs incompatible attribute ignored",
14754 IDENTIFIER_POINTER (name));
14755 *no_add_attrs = true;
14756 }
14757
14758 return NULL_TREE;
14759 }
14760
14761 static bool
14762 ix86_ms_bitfield_layout_p (tree record_type)
14763 {
14764 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14765 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14766 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14767 }
14768
14769 /* Returns an expression indicating where the this parameter is
14770 located on entry to the FUNCTION. */
14771
14772 static rtx
14773 x86_this_parameter (tree function)
14774 {
14775 tree type = TREE_TYPE (function);
14776
14777 if (TARGET_64BIT)
14778 {
14779 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
14780 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14781 }
14782
14783 if (ix86_function_regparm (type, function) > 0)
14784 {
14785 tree parm;
14786
14787 parm = TYPE_ARG_TYPES (type);
14788 /* Figure out whether or not the function has a variable number of
14789 arguments. */
14790 for (; parm; parm = TREE_CHAIN (parm))
14791 if (TREE_VALUE (parm) == void_type_node)
14792 break;
14793 /* If not, the this parameter is in the first argument. */
14794 if (parm)
14795 {
14796 int regno = 0;
14797 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
14798 regno = 2;
14799 return gen_rtx_REG (SImode, regno);
14800 }
14801 }
14802
14803 if (aggregate_value_p (TREE_TYPE (type), type))
14804 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14805 else
14806 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14807 }
14808
14809 /* Determine whether x86_output_mi_thunk can succeed. */
14810
14811 static bool
14812 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
14813 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
14814 HOST_WIDE_INT vcall_offset, tree function)
14815 {
14816 /* 64-bit can handle anything. */
14817 if (TARGET_64BIT)
14818 return true;
14819
14820 /* For 32-bit, everything's fine if we have one free register. */
14821 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
14822 return true;
14823
14824 /* Need a free register for vcall_offset. */
14825 if (vcall_offset)
14826 return false;
14827
14828 /* Need a free register for GOT references. */
14829 if (flag_pic && !(*targetm.binds_local_p) (function))
14830 return false;
14831
14832 /* Otherwise ok. */
14833 return true;
14834 }
14835
14836 /* Output the assembler code for a thunk function. THUNK_DECL is the
14837 declaration for the thunk function itself, FUNCTION is the decl for
14838 the target function. DELTA is an immediate constant offset to be
14839 added to THIS. If VCALL_OFFSET is nonzero, the word at
14840 *(*this + vcall_offset) should be added to THIS. */
14841
14842 static void
14843 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
14844 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
14845 HOST_WIDE_INT vcall_offset, tree function)
14846 {
14847 rtx xops[3];
14848 rtx this = x86_this_parameter (function);
14849 rtx this_reg, tmp;
14850
14851 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14852 pull it in now and let DELTA benefit. */
14853 if (REG_P (this))
14854 this_reg = this;
14855 else if (vcall_offset)
14856 {
14857 /* Put the this parameter into %eax. */
14858 xops[0] = this;
14859 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14860 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14861 }
14862 else
14863 this_reg = NULL_RTX;
14864
14865 /* Adjust the this parameter by a fixed constant. */
14866 if (delta)
14867 {
14868 xops[0] = GEN_INT (delta);
14869 xops[1] = this_reg ? this_reg : this;
14870 if (TARGET_64BIT)
14871 {
14872 if (!x86_64_general_operand (xops[0], DImode))
14873 {
14874 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14875 xops[1] = tmp;
14876 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14877 xops[0] = tmp;
14878 xops[1] = this;
14879 }
14880 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14881 }
14882 else
14883 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14884 }
14885
14886 /* Adjust the this parameter by a value stored in the vtable. */
14887 if (vcall_offset)
14888 {
14889 if (TARGET_64BIT)
14890 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14891 else
14892 {
14893 int tmp_regno = 2 /* ECX */;
14894 if (lookup_attribute ("fastcall",
14895 TYPE_ATTRIBUTES (TREE_TYPE (function))))
14896 tmp_regno = 0 /* EAX */;
14897 tmp = gen_rtx_REG (SImode, tmp_regno);
14898 }
14899
14900 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14901 xops[1] = tmp;
14902 if (TARGET_64BIT)
14903 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14904 else
14905 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14906
14907 /* Adjust the this parameter. */
14908 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14909 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14910 {
14911 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14912 xops[0] = GEN_INT (vcall_offset);
14913 xops[1] = tmp2;
14914 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14915 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14916 }
14917 xops[1] = this_reg;
14918 if (TARGET_64BIT)
14919 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14920 else
14921 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14922 }
14923
14924 /* If necessary, drop THIS back to its stack slot. */
14925 if (this_reg && this_reg != this)
14926 {
14927 xops[0] = this_reg;
14928 xops[1] = this;
14929 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14930 }
14931
14932 xops[0] = XEXP (DECL_RTL (function), 0);
14933 if (TARGET_64BIT)
14934 {
14935 if (!flag_pic || (*targetm.binds_local_p) (function))
14936 output_asm_insn ("jmp\t%P0", xops);
14937 else
14938 {
14939 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14940 tmp = gen_rtx_CONST (Pmode, tmp);
14941 tmp = gen_rtx_MEM (QImode, tmp);
14942 xops[0] = tmp;
14943 output_asm_insn ("jmp\t%A0", xops);
14944 }
14945 }
14946 else
14947 {
14948 if (!flag_pic || (*targetm.binds_local_p) (function))
14949 output_asm_insn ("jmp\t%P0", xops);
14950 else
14951 #if TARGET_MACHO
14952 if (TARGET_MACHO)
14953 {
14954 rtx sym_ref = XEXP (DECL_RTL (function), 0);
14955 tmp = (gen_rtx_SYMBOL_REF
14956 (Pmode,
14957 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
14958 tmp = gen_rtx_MEM (QImode, tmp);
14959 xops[0] = tmp;
14960 output_asm_insn ("jmp\t%0", xops);
14961 }
14962 else
14963 #endif /* TARGET_MACHO */
14964 {
14965 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14966 output_set_got (tmp);
14967
14968 xops[1] = tmp;
14969 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14970 output_asm_insn ("jmp\t{*}%1", xops);
14971 }
14972 }
14973 }
14974
14975 static void
14976 x86_file_start (void)
14977 {
14978 default_file_start ();
14979 if (X86_FILE_START_VERSION_DIRECTIVE)
14980 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
14981 if (X86_FILE_START_FLTUSED)
14982 fputs ("\t.global\t__fltused\n", asm_out_file);
14983 if (ix86_asm_dialect == ASM_INTEL)
14984 fputs ("\t.intel_syntax\n", asm_out_file);
14985 }
14986
14987 int
14988 x86_field_alignment (tree field, int computed)
14989 {
14990 enum machine_mode mode;
14991 tree type = TREE_TYPE (field);
14992
14993 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14994 return computed;
14995 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14996 ? get_inner_array_type (type) : type);
14997 if (mode == DFmode || mode == DCmode
14998 || GET_MODE_CLASS (mode) == MODE_INT
14999 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15000 return MIN (32, computed);
15001 return computed;
15002 }
15003
15004 /* Output assembler code to FILE to increment profiler label # LABELNO
15005 for profiling a function entry. */
15006 void
15007 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15008 {
15009 if (TARGET_64BIT)
15010 if (flag_pic)
15011 {
15012 #ifndef NO_PROFILE_COUNTERS
15013 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15014 #endif
15015 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15016 }
15017 else
15018 {
15019 #ifndef NO_PROFILE_COUNTERS
15020 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15021 #endif
15022 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15023 }
15024 else if (flag_pic)
15025 {
15026 #ifndef NO_PROFILE_COUNTERS
15027 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15028 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15029 #endif
15030 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15031 }
15032 else
15033 {
15034 #ifndef NO_PROFILE_COUNTERS
15035 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15036 PROFILE_COUNT_REGISTER);
15037 #endif
15038 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15039 }
15040 }
15041
15042 /* We don't have exact information about the insn sizes, but we may assume
15043 quite safely that we are informed about all 1 byte insns and memory
15044 address sizes. This is enough to eliminate unnecessary padding in
15045 99% of cases. */
15046
15047 static int
15048 min_insn_size (rtx insn)
15049 {
15050 int l = 0;
15051
15052 if (!INSN_P (insn) || !active_insn_p (insn))
15053 return 0;
15054
15055 /* Discard alignments we've emit and jump instructions. */
15056 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15057 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15058 return 0;
15059 if (GET_CODE (insn) == JUMP_INSN
15060 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15061 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15062 return 0;
15063
15064 /* Important case - calls are always 5 bytes.
15065 It is common to have many calls in the row. */
15066 if (GET_CODE (insn) == CALL_INSN
15067 && symbolic_reference_mentioned_p (PATTERN (insn))
15068 && !SIBLING_CALL_P (insn))
15069 return 5;
15070 if (get_attr_length (insn) <= 1)
15071 return 1;
15072
15073 /* For normal instructions we may rely on the sizes of addresses
15074 and the presence of symbol to require 4 bytes of encoding.
15075 This is not the case for jumps where references are PC relative. */
15076 if (GET_CODE (insn) != JUMP_INSN)
15077 {
15078 l = get_attr_length_address (insn);
15079 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15080 l = 4;
15081 }
15082 if (l)
15083 return 1+l;
15084 else
15085 return 2;
15086 }
15087
15088 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15089 window. */
15090
15091 static void
15092 ix86_avoid_jump_misspredicts (void)
15093 {
15094 rtx insn, start = get_insns ();
15095 int nbytes = 0, njumps = 0;
15096 int isjump = 0;
15097
15098 /* Look for all minimal intervals of instructions containing 4 jumps.
15099 The intervals are bounded by START and INSN. NBYTES is the total
15100 size of instructions in the interval including INSN and not including
15101 START. When the NBYTES is smaller than 16 bytes, it is possible
15102 that the end of START and INSN ends up in the same 16byte page.
15103
15104 The smallest offset in the page INSN can start is the case where START
15105 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15106 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15107 */
15108 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15109 {
15110
15111 nbytes += min_insn_size (insn);
15112 if (dump_file)
15113 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15114 INSN_UID (insn), min_insn_size (insn));
15115 if ((GET_CODE (insn) == JUMP_INSN
15116 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15117 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15118 || GET_CODE (insn) == CALL_INSN)
15119 njumps++;
15120 else
15121 continue;
15122
15123 while (njumps > 3)
15124 {
15125 start = NEXT_INSN (start);
15126 if ((GET_CODE (start) == JUMP_INSN
15127 && GET_CODE (PATTERN (start)) != ADDR_VEC
15128 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15129 || GET_CODE (start) == CALL_INSN)
15130 njumps--, isjump = 1;
15131 else
15132 isjump = 0;
15133 nbytes -= min_insn_size (start);
15134 }
15135 if (njumps < 0)
15136 abort ();
15137 if (dump_file)
15138 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15139 INSN_UID (start), INSN_UID (insn), nbytes);
15140
15141 if (njumps == 3 && isjump && nbytes < 16)
15142 {
15143 int padsize = 15 - nbytes + min_insn_size (insn);
15144
15145 if (dump_file)
15146 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15147 INSN_UID (insn), padsize);
15148 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15149 }
15150 }
15151 }
15152
15153 /* AMD Athlon works faster
15154 when RET is not destination of conditional jump or directly preceded
15155 by other jump instruction. We avoid the penalty by inserting NOP just
15156 before the RET instructions in such cases. */
15157 static void
15158 ix86_pad_returns (void)
15159 {
15160 edge e;
15161 edge_iterator ei;
15162
15163 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15164 {
15165 basic_block bb = e->src;
15166 rtx ret = BB_END (bb);
15167 rtx prev;
15168 bool replace = false;
15169
15170 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15171 || !maybe_hot_bb_p (bb))
15172 continue;
15173 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15174 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15175 break;
15176 if (prev && GET_CODE (prev) == CODE_LABEL)
15177 {
15178 edge e;
15179 edge_iterator ei;
15180
15181 FOR_EACH_EDGE (e, ei, bb->preds)
15182 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15183 && !(e->flags & EDGE_FALLTHRU))
15184 replace = true;
15185 }
15186 if (!replace)
15187 {
15188 prev = prev_active_insn (ret);
15189 if (prev
15190 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15191 || GET_CODE (prev) == CALL_INSN))
15192 replace = true;
15193 /* Empty functions get branch mispredict even when the jump destination
15194 is not visible to us. */
15195 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15196 replace = true;
15197 }
15198 if (replace)
15199 {
15200 emit_insn_before (gen_return_internal_long (), ret);
15201 delete_insn (ret);
15202 }
15203 }
15204 }
15205
15206 /* Implement machine specific optimizations. We implement padding of returns
15207 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15208 static void
15209 ix86_reorg (void)
15210 {
15211 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15212 ix86_pad_returns ();
15213 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15214 ix86_avoid_jump_misspredicts ();
15215 }
15216
15217 /* Return nonzero when QImode register that must be represented via REX prefix
15218 is used. */
15219 bool
15220 x86_extended_QIreg_mentioned_p (rtx insn)
15221 {
15222 int i;
15223 extract_insn_cached (insn);
15224 for (i = 0; i < recog_data.n_operands; i++)
15225 if (REG_P (recog_data.operand[i])
15226 && REGNO (recog_data.operand[i]) >= 4)
15227 return true;
15228 return false;
15229 }
15230
15231 /* Return nonzero when P points to register encoded via REX prefix.
15232 Called via for_each_rtx. */
15233 static int
15234 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15235 {
15236 unsigned int regno;
15237 if (!REG_P (*p))
15238 return 0;
15239 regno = REGNO (*p);
15240 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15241 }
15242
15243 /* Return true when INSN mentions register that must be encoded using REX
15244 prefix. */
15245 bool
15246 x86_extended_reg_mentioned_p (rtx insn)
15247 {
15248 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15249 }
15250
15251 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15252 optabs would emit if we didn't have TFmode patterns. */
15253
15254 void
15255 x86_emit_floatuns (rtx operands[2])
15256 {
15257 rtx neglab, donelab, i0, i1, f0, in, out;
15258 enum machine_mode mode, inmode;
15259
15260 inmode = GET_MODE (operands[1]);
15261 if (inmode != SImode
15262 && inmode != DImode)
15263 abort ();
15264
15265 out = operands[0];
15266 in = force_reg (inmode, operands[1]);
15267 mode = GET_MODE (out);
15268 neglab = gen_label_rtx ();
15269 donelab = gen_label_rtx ();
15270 i1 = gen_reg_rtx (Pmode);
15271 f0 = gen_reg_rtx (mode);
15272
15273 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15274
15275 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15276 emit_jump_insn (gen_jump (donelab));
15277 emit_barrier ();
15278
15279 emit_label (neglab);
15280
15281 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15282 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15283 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15284 expand_float (f0, i0, 0);
15285 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15286
15287 emit_label (donelab);
15288 }
15289
15290 /* Initialize vector TARGET via VALS. */
15291 void
15292 ix86_expand_vector_init (rtx target, rtx vals)
15293 {
15294 enum machine_mode mode = GET_MODE (target);
15295 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15296 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15297 int i;
15298
15299 for (i = n_elts - 1; i >= 0; i--)
15300 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15301 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15302 break;
15303
15304 /* Few special cases first...
15305 ... constants are best loaded from constant pool. */
15306 if (i < 0)
15307 {
15308 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15309 return;
15310 }
15311
15312 /* ... values where only first field is non-constant are best loaded
15313 from the pool and overwritten via move later. */
15314 if (i == 0)
15315 {
15316 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15317 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15318
15319 switch (GET_MODE (target))
15320 {
15321 case V2DFmode:
15322 emit_insn (gen_sse2_loadlpd (target, target, XVECEXP (vals, 0, 0)));
15323 break;
15324
15325 case V4SFmode:
15326 {
15327 /* ??? We can represent this better. */
15328 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15329 GET_MODE_INNER (mode), 0);
15330 op = force_reg (mode, op);
15331 emit_insn (gen_sse_movss (target, target, op));
15332 }
15333 break;
15334
15335 default:
15336 break;
15337 }
15338 return;
15339 }
15340
15341 /* And the busy sequence doing rotations. */
15342 switch (GET_MODE (target))
15343 {
15344 case V2DFmode:
15345 {
15346 rtx vecop0 =
15347 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15348 rtx vecop1 =
15349 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15350
15351 vecop0 = force_reg (V2DFmode, vecop0);
15352 vecop1 = force_reg (V2DFmode, vecop1);
15353 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15354 }
15355 break;
15356 case V4SFmode:
15357 {
15358 rtx vecop0 =
15359 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15360 rtx vecop1 =
15361 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15362 rtx vecop2 =
15363 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15364 rtx vecop3 =
15365 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15366 rtx tmp1 = gen_reg_rtx (V4SFmode);
15367 rtx tmp2 = gen_reg_rtx (V4SFmode);
15368
15369 vecop0 = force_reg (V4SFmode, vecop0);
15370 vecop1 = force_reg (V4SFmode, vecop1);
15371 vecop2 = force_reg (V4SFmode, vecop2);
15372 vecop3 = force_reg (V4SFmode, vecop3);
15373 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15374 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15375 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15376 }
15377 break;
15378 default:
15379 abort ();
15380 }
15381 }
15382
15383 /* Implements target hook vector_mode_supported_p. */
15384 static bool
15385 ix86_vector_mode_supported_p (enum machine_mode mode)
15386 {
15387 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
15388 return true;
15389 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
15390 return true;
15391 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
15392 return true;
15393 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
15394 return true;
15395 return false;
15396 }
15397
15398 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15399
15400 We do this in the new i386 backend to maintain source compatibility
15401 with the old cc0-based compiler. */
15402
15403 static tree
15404 ix86_md_asm_clobbers (tree clobbers)
15405 {
15406 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15407 clobbers);
15408 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15409 clobbers);
15410 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15411 clobbers);
15412 return clobbers;
15413 }
15414
15415 /* Worker function for REVERSE_CONDITION. */
15416
15417 enum rtx_code
15418 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15419 {
15420 return (mode != CCFPmode && mode != CCFPUmode
15421 ? reverse_condition (code)
15422 : reverse_condition_maybe_unordered (code));
15423 }
15424
15425 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15426 to OPERANDS[0]. */
15427
15428 const char *
15429 output_387_reg_move (rtx insn, rtx *operands)
15430 {
15431 if (REG_P (operands[1])
15432 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15433 {
15434 if (REGNO (operands[0]) == FIRST_STACK_REG
15435 && TARGET_USE_FFREEP)
15436 return "ffreep\t%y0";
15437 return "fstp\t%y0";
15438 }
15439 if (STACK_TOP_P (operands[0]))
15440 return "fld%z1\t%y1";
15441 return "fst\t%y0";
15442 }
15443
15444 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15445 FP status register is set. */
15446
15447 void
15448 ix86_emit_fp_unordered_jump (rtx label)
15449 {
15450 rtx reg = gen_reg_rtx (HImode);
15451 rtx temp;
15452
15453 emit_insn (gen_x86_fnstsw_1 (reg));
15454
15455 if (TARGET_USE_SAHF)
15456 {
15457 emit_insn (gen_x86_sahf_1 (reg));
15458
15459 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15460 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15461 }
15462 else
15463 {
15464 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15465
15466 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15467 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15468 }
15469
15470 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15471 gen_rtx_LABEL_REF (VOIDmode, label),
15472 pc_rtx);
15473 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15474 emit_jump_insn (temp);
15475 }
15476
15477 /* Output code to perform a log1p XFmode calculation. */
15478
15479 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15480 {
15481 rtx label1 = gen_label_rtx ();
15482 rtx label2 = gen_label_rtx ();
15483
15484 rtx tmp = gen_reg_rtx (XFmode);
15485 rtx tmp2 = gen_reg_rtx (XFmode);
15486
15487 emit_insn (gen_absxf2 (tmp, op1));
15488 emit_insn (gen_cmpxf (tmp,
15489 CONST_DOUBLE_FROM_REAL_VALUE (
15490 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15491 XFmode)));
15492 emit_jump_insn (gen_bge (label1));
15493
15494 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15495 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15496 emit_jump (label2);
15497
15498 emit_label (label1);
15499 emit_move_insn (tmp, CONST1_RTX (XFmode));
15500 emit_insn (gen_addxf3 (tmp, op1, tmp));
15501 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15502 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15503
15504 emit_label (label2);
15505 }
15506
15507 /* Solaris named-section hook. Parameters are as for
15508 named_section_real. */
15509
15510 static void
15511 i386_solaris_elf_named_section (const char *name, unsigned int flags,
15512 tree decl)
15513 {
15514 /* With Binutils 2.15, the "@unwind" marker must be specified on
15515 every occurrence of the ".eh_frame" section, not just the first
15516 one. */
15517 if (TARGET_64BIT
15518 && strcmp (name, ".eh_frame") == 0)
15519 {
15520 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
15521 flags & SECTION_WRITE ? "aw" : "a");
15522 return;
15523 }
15524 default_elf_asm_named_section (name, flags, decl);
15525 }
15526
15527 #include "gt-i386.h"
This page took 0.768674 seconds and 6 git commands to generate.