]> gcc.gnu.org Git - gcc.git/blob - gcc/config/i386/i386.c
ea10926bda73aa825b56170d322d026c1e7b11ee
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 5, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 5, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_fisttp = m_NOCONA;
529 const int x86_3dnow_a = m_ATHLON_K8;
530 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
531 /* Branch hints were put in P4 based on simulation result. But
532 after P4 was made, no performance benefit was observed with
533 branch hints. It also increases the code size. As the result,
534 icc never generates branch hints. */
535 const int x86_branch_hints = 0;
536 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
537 const int x86_partial_reg_stall = m_PPRO;
538 const int x86_use_loop = m_K6;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
570 undefined. */
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
580
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
587 /* Compare and exchange was added for 80486. */
588 const int x86_cmpxchg = ~m_386;
589 /* Exchange and add was added for 80486. */
590 const int x86_xadd = ~m_386;
591
592 /* In case the average insn count for single function invocation is
593 lower than this constant, emit fast (but longer) prologue and
594 epilogue code. */
595 #define FAST_PROLOGUE_INSN_COUNT 20
596
597 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
598 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
599 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
600 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
601
602 /* Array of the smallest class containing reg number REGNO, indexed by
603 REGNO. Used by REGNO_REG_CLASS in i386.h. */
604
605 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
606 {
607 /* ax, dx, cx, bx */
608 AREG, DREG, CREG, BREG,
609 /* si, di, bp, sp */
610 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
611 /* FP registers */
612 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
613 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
614 /* arg pointer */
615 NON_Q_REGS,
616 /* flags, fpsr, dirflag, frame */
617 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
618 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
619 SSE_REGS, SSE_REGS,
620 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
621 MMX_REGS, MMX_REGS,
622 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
624 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
625 SSE_REGS, SSE_REGS,
626 };
627
628 /* The "default" register map used in 32bit mode. */
629
630 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
631 {
632 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
633 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
634 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
635 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
636 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
638 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
639 };
640
641 static int const x86_64_int_parameter_registers[6] =
642 {
643 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
645 };
646
647 static int const x86_64_int_return_registers[4] =
648 {
649 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
650 };
651
652 /* The "default" register map used in 64bit mode. */
653 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
654 {
655 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
656 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
657 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
658 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
659 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
660 8,9,10,11,12,13,14,15, /* extended integer registers */
661 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
662 };
663
664 /* Define the register numbers to be used in Dwarf debugging information.
665 The SVR4 reference port C compiler uses the following register numbers
666 in its Dwarf output code:
667 0 for %eax (gcc regno = 0)
668 1 for %ecx (gcc regno = 2)
669 2 for %edx (gcc regno = 1)
670 3 for %ebx (gcc regno = 3)
671 4 for %esp (gcc regno = 7)
672 5 for %ebp (gcc regno = 6)
673 6 for %esi (gcc regno = 4)
674 7 for %edi (gcc regno = 5)
675 The following three DWARF register numbers are never generated by
676 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677 believes these numbers have these meanings.
678 8 for %eip (no gcc equivalent)
679 9 for %eflags (gcc regno = 17)
680 10 for %trapno (no gcc equivalent)
681 It is not at all clear how we should number the FP stack registers
682 for the x86 architecture. If the version of SDB on x86/svr4 were
683 a bit less brain dead with respect to floating-point then we would
684 have a precedent to follow with respect to DWARF register numbers
685 for x86 FP registers, but the SDB on x86/svr4 is so completely
686 broken with respect to FP registers that it is hardly worth thinking
687 of it as something to strive for compatibility with.
688 The version of x86/svr4 SDB I have at the moment does (partially)
689 seem to believe that DWARF register number 11 is associated with
690 the x86 register %st(0), but that's about all. Higher DWARF
691 register numbers don't seem to be associated with anything in
692 particular, and even for DWARF regno 11, SDB only seems to under-
693 stand that it should say that a variable lives in %st(0) (when
694 asked via an `=' command) if we said it was in DWARF regno 11,
695 but SDB still prints garbage when asked for the value of the
696 variable in question (via a `/' command).
697 (Also note that the labels SDB prints for various FP stack regs
698 when doing an `x' command are all wrong.)
699 Note that these problems generally don't affect the native SVR4
700 C compiler because it doesn't allow the use of -O with -g and
701 because when it is *not* optimizing, it allocates a memory
702 location for each floating-point variable, and the memory
703 location is what gets described in the DWARF AT_location
704 attribute for the variable in question.
705 Regardless of the severe mental illness of the x86/svr4 SDB, we
706 do something sensible here and we use the following DWARF
707 register numbers. Note that these are all stack-top-relative
708 numbers.
709 11 for %st(0) (gcc regno = 8)
710 12 for %st(1) (gcc regno = 9)
711 13 for %st(2) (gcc regno = 10)
712 14 for %st(3) (gcc regno = 11)
713 15 for %st(4) (gcc regno = 12)
714 16 for %st(5) (gcc regno = 13)
715 17 for %st(6) (gcc regno = 14)
716 18 for %st(7) (gcc regno = 15)
717 */
718 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
719 {
720 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
721 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
722 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
723 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
724 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
726 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
727 };
728
729 /* Test and compare insns in i386.md store the information needed to
730 generate branch and scc insns here. */
731
732 rtx ix86_compare_op0 = NULL_RTX;
733 rtx ix86_compare_op1 = NULL_RTX;
734 rtx ix86_compare_emitted = NULL_RTX;
735
736 #define MAX_386_STACK_LOCALS 3
737 /* Size of the register save area. */
738 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
739
740 /* Define the structure for the machine field in struct function. */
741
742 struct stack_local_entry GTY(())
743 {
744 unsigned short mode;
745 unsigned short n;
746 rtx rtl;
747 struct stack_local_entry *next;
748 };
749
750 /* Structure describing stack frame layout.
751 Stack grows downward:
752
753 [arguments]
754 <- ARG_POINTER
755 saved pc
756
757 saved frame pointer if frame_pointer_needed
758 <- HARD_FRAME_POINTER
759 [saved regs]
760
761 [padding1] \
762 )
763 [va_arg registers] (
764 > to_allocate <- FRAME_POINTER
765 [frame] (
766 )
767 [padding2] /
768 */
769 struct ix86_frame
770 {
771 int nregs;
772 int padding1;
773 int va_arg_size;
774 HOST_WIDE_INT frame;
775 int padding2;
776 int outgoing_arguments_size;
777 int red_zone_size;
778
779 HOST_WIDE_INT to_allocate;
780 /* The offsets relative to ARG_POINTER. */
781 HOST_WIDE_INT frame_pointer_offset;
782 HOST_WIDE_INT hard_frame_pointer_offset;
783 HOST_WIDE_INT stack_pointer_offset;
784
785 /* When save_regs_using_mov is set, emit prologue using
786 move instead of push instructions. */
787 bool save_regs_using_mov;
788 };
789
790 /* Code model option as passed by user. */
791 static const char *ix86_cmodel_string;
792 /* Parsed value. */
793 enum cmodel ix86_cmodel;
794 /* Asm dialect. */
795 static const char *ix86_asm_string;
796 enum asm_dialect ix86_asm_dialect = ASM_ATT;
797 /* TLS dialext. */
798 static const char *ix86_tls_dialect_string;
799 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
800
801 /* Which unit we are generating floating point math for. */
802 enum fpmath_unit ix86_fpmath;
803
804 /* Which cpu are we scheduling for. */
805 enum processor_type ix86_tune;
806 /* Which instruction set architecture to use. */
807 enum processor_type ix86_arch;
808
809 /* Strings to hold which cpu and instruction set architecture to use. */
810 const char *ix86_tune_string; /* for -mtune=<xxx> */
811 const char *ix86_arch_string; /* for -march=<xxx> */
812 static const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
813
814 /* # of registers to use to pass arguments. */
815 static const char *ix86_regparm_string;
816
817 /* true if sse prefetch instruction is not NOOP. */
818 int x86_prefetch_sse;
819
820 /* ix86_regparm_string as a number */
821 static int ix86_regparm;
822
823 /* Alignment to use for loops and jumps: */
824
825 /* Power of two alignment for loops. */
826 static const char *ix86_align_loops_string;
827
828 /* Power of two alignment for non-loop jumps. */
829 static const char *ix86_align_jumps_string;
830
831 /* Power of two alignment for stack boundary in bytes. */
832 static const char *ix86_preferred_stack_boundary_string;
833
834 /* Preferred alignment for stack boundary in bits. */
835 unsigned int ix86_preferred_stack_boundary;
836
837 /* Values 1-5: see jump.c */
838 int ix86_branch_cost;
839 static const char *ix86_branch_cost_string;
840
841 /* Power of two alignment for functions. */
842 static const char *ix86_align_funcs_string;
843
844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
845 char internal_label_prefix[16];
846 int internal_label_prefix_len;
847 \f
848 static bool ix86_handle_option (size_t, const char *, int);
849 static void output_pic_addr_const (FILE *, rtx, int);
850 static void put_condition_code (enum rtx_code, enum machine_mode,
851 int, int, FILE *);
852 static const char *get_some_local_dynamic_name (void);
853 static int get_some_local_dynamic_name_1 (rtx *, void *);
854 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
855 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
856 rtx *);
857 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
858 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
859 enum machine_mode);
860 static rtx get_thread_pointer (int);
861 static rtx legitimize_tls_address (rtx, enum tls_model, int);
862 static void get_pc_thunk_name (char [32], unsigned int);
863 static rtx gen_push (rtx);
864 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
865 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
866 static struct machine_function * ix86_init_machine_status (void);
867 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
868 static int ix86_nsaved_regs (void);
869 static void ix86_emit_save_regs (void);
870 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
871 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
872 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
873 static HOST_WIDE_INT ix86_GOT_alias_set (void);
874 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
875 static rtx ix86_expand_aligntest (rtx, int);
876 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
877 static int ix86_issue_rate (void);
878 static int ix86_adjust_cost (rtx, rtx, rtx, int);
879 static int ia32_multipass_dfa_lookahead (void);
880 static void ix86_init_mmx_sse_builtins (void);
881 static rtx x86_this_parameter (tree);
882 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
883 HOST_WIDE_INT, tree);
884 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
885 static void x86_file_start (void);
886 static void ix86_reorg (void);
887 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
888 static tree ix86_build_builtin_va_list (void);
889 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
890 tree, int *, int);
891 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
892 static bool ix86_vector_mode_supported_p (enum machine_mode);
893
894 static int ix86_address_cost (rtx);
895 static bool ix86_cannot_force_const_mem (rtx);
896 static rtx ix86_delegitimize_address (rtx);
897
898 struct builtin_description;
899 static rtx ix86_expand_sse_comi (const struct builtin_description *,
900 tree, rtx);
901 static rtx ix86_expand_sse_compare (const struct builtin_description *,
902 tree, rtx);
903 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
904 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
905 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
906 static rtx ix86_expand_store_builtin (enum insn_code, tree);
907 static rtx safe_vector_operand (rtx, enum machine_mode);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode, tree);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
931 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
932 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
933 tree, bool);
934 static void ix86_init_builtins (void);
935 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
936
937 /* This function is only used on Solaris. */
938 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
939 ATTRIBUTE_UNUSED;
940
941 /* Register class used for passing given 64bit part of the argument.
942 These represent classes as documented by the PS ABI, with the exception
943 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
944 use SF or DFmode move instead of DImode to avoid reformatting penalties.
945
946 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
947 whenever possible (upper half does contain padding).
948 */
949 enum x86_64_reg_class
950 {
951 X86_64_NO_CLASS,
952 X86_64_INTEGER_CLASS,
953 X86_64_INTEGERSI_CLASS,
954 X86_64_SSE_CLASS,
955 X86_64_SSESF_CLASS,
956 X86_64_SSEDF_CLASS,
957 X86_64_SSEUP_CLASS,
958 X86_64_X87_CLASS,
959 X86_64_X87UP_CLASS,
960 X86_64_COMPLEX_X87_CLASS,
961 X86_64_MEMORY_CLASS
962 };
963 static const char * const x86_64_reg_class_name[] = {
964 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
965 "sseup", "x87", "x87up", "cplx87", "no"
966 };
967
968 #define MAX_CLASSES 4
969
970 /* Table of constants used by fldpi, fldln2, etc.... */
971 static REAL_VALUE_TYPE ext_80387_constants_table [5];
972 static bool ext_80387_constants_init = 0;
973 static void init_ext_80387_constants (void);
974 \f
975 /* Initialize the GCC target structure. */
976 #undef TARGET_ATTRIBUTE_TABLE
977 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
978 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
979 # undef TARGET_MERGE_DECL_ATTRIBUTES
980 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
981 #endif
982
983 #undef TARGET_COMP_TYPE_ATTRIBUTES
984 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
985
986 #undef TARGET_INIT_BUILTINS
987 #define TARGET_INIT_BUILTINS ix86_init_builtins
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
990
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
993
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
998
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1003 #ifdef ASM_QUAD
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1006 #endif
1007
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1014
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1020 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1021 ia32_multipass_dfa_lookahead
1022
1023 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1024 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1025
1026 #ifdef HAVE_AS_TLS
1027 #undef TARGET_HAVE_TLS
1028 #define TARGET_HAVE_TLS true
1029 #endif
1030 #undef TARGET_CANNOT_FORCE_CONST_MEM
1031 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1032
1033 #undef TARGET_DELEGITIMIZE_ADDRESS
1034 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1035
1036 #undef TARGET_MS_BITFIELD_LAYOUT_P
1037 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1038
1039 #undef TARGET_ASM_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1043
1044 #undef TARGET_ASM_FILE_START
1045 #define TARGET_ASM_FILE_START x86_file_start
1046
1047 #undef TARGET_DEFAULT_TARGET_FLAGS
1048 #define TARGET_DEFAULT_TARGET_FLAGS \
1049 (TARGET_DEFAULT \
1050 | TARGET_64BIT_DEFAULT \
1051 | TARGET_SUBTARGET_DEFAULT \
1052 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1053
1054 #undef TARGET_HANDLE_OPTION
1055 #define TARGET_HANDLE_OPTION ix86_handle_option
1056
1057 #undef TARGET_RTX_COSTS
1058 #define TARGET_RTX_COSTS ix86_rtx_costs
1059 #undef TARGET_ADDRESS_COST
1060 #define TARGET_ADDRESS_COST ix86_address_cost
1061
1062 #undef TARGET_FIXED_CONDITION_CODE_REGS
1063 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1064 #undef TARGET_CC_MODES_COMPATIBLE
1065 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1066
1067 #undef TARGET_MACHINE_DEPENDENT_REORG
1068 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1069
1070 #undef TARGET_BUILD_BUILTIN_VA_LIST
1071 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1072
1073 #undef TARGET_MD_ASM_CLOBBERS
1074 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1075
1076 #undef TARGET_PROMOTE_PROTOTYPES
1077 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1078 #undef TARGET_STRUCT_VALUE_RTX
1079 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1080 #undef TARGET_SETUP_INCOMING_VARARGS
1081 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1082 #undef TARGET_MUST_PASS_IN_STACK
1083 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1084 #undef TARGET_PASS_BY_REFERENCE
1085 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1086
1087 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1088 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1089
1090 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1091 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1092
1093 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1094 #undef TARGET_INSERT_ATTRIBUTES
1095 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1096 #endif
1097
1098 struct gcc_target targetm = TARGET_INITIALIZER;
1099
1100 \f
1101 /* The svr4 ABI for the i386 says that records and unions are returned
1102 in memory. */
1103 #ifndef DEFAULT_PCC_STRUCT_RETURN
1104 #define DEFAULT_PCC_STRUCT_RETURN 1
1105 #endif
1106
1107 /* Implement TARGET_HANDLE_OPTION. */
1108
1109 static bool
1110 ix86_handle_option (size_t code, const char *arg, int value)
1111 {
1112 switch (code)
1113 {
1114 case OPT_m3dnow:
1115 if (!value)
1116 {
1117 target_flags &= ~MASK_3DNOW_A;
1118 target_flags_explicit |= MASK_3DNOW_A;
1119 }
1120 return true;
1121
1122 case OPT_malign_functions_:
1123 ix86_align_funcs_string = arg;
1124 return true;
1125
1126 case OPT_malign_jumps_:
1127 ix86_align_jumps_string = arg;
1128 return true;
1129
1130 case OPT_malign_loops_:
1131 ix86_align_loops_string = arg;
1132 return true;
1133
1134 case OPT_march_:
1135 ix86_arch_string = arg;
1136 return true;
1137
1138 case OPT_masm_:
1139 ix86_asm_string = arg;
1140 return true;
1141
1142 case OPT_mbranch_cost_:
1143 ix86_branch_cost_string = arg;
1144 return true;
1145
1146 case OPT_mcmodel_:
1147 ix86_cmodel_string = arg;
1148 return true;
1149
1150 case OPT_mfpmath_:
1151 ix86_fpmath_string = arg;
1152 return true;
1153
1154 case OPT_mmmx:
1155 if (!value)
1156 {
1157 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1158 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1159 }
1160 return true;
1161
1162 case OPT_mpreferred_stack_boundary_:
1163 ix86_preferred_stack_boundary_string = arg;
1164 return true;
1165
1166 case OPT_mregparm_:
1167 ix86_regparm_string = arg;
1168 return true;
1169
1170 case OPT_msse:
1171 if (!value)
1172 {
1173 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1174 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1175 }
1176 return true;
1177
1178 case OPT_msse2:
1179 if (!value)
1180 {
1181 target_flags &= ~MASK_SSE3;
1182 target_flags_explicit |= MASK_SSE3;
1183 }
1184 return true;
1185
1186 case OPT_mtls_dialect_:
1187 ix86_tls_dialect_string = arg;
1188 return true;
1189
1190 case OPT_mtune_:
1191 ix86_tune_string = arg;
1192 return true;
1193
1194 default:
1195 return true;
1196 }
1197 }
1198
1199 /* Sometimes certain combinations of command options do not make
1200 sense on a particular target machine. You can define a macro
1201 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1202 defined, is executed once just after all the command options have
1203 been parsed.
1204
1205 Don't use this macro to turn on various extra optimizations for
1206 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1207
1208 void
1209 override_options (void)
1210 {
1211 int i;
1212 int ix86_tune_defaulted = 0;
1213
1214 /* Comes from final.c -- no real reason to change it. */
1215 #define MAX_CODE_ALIGN 16
1216
1217 static struct ptt
1218 {
1219 const struct processor_costs *cost; /* Processor costs */
1220 const int target_enable; /* Target flags to enable. */
1221 const int target_disable; /* Target flags to disable. */
1222 const int align_loop; /* Default alignments. */
1223 const int align_loop_max_skip;
1224 const int align_jump;
1225 const int align_jump_max_skip;
1226 const int align_func;
1227 }
1228 const processor_target_table[PROCESSOR_max] =
1229 {
1230 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1231 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1232 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1233 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1234 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1235 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1236 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1237 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1238 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1239 };
1240
1241 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1242 static struct pta
1243 {
1244 const char *const name; /* processor name or nickname. */
1245 const enum processor_type processor;
1246 const enum pta_flags
1247 {
1248 PTA_SSE = 1,
1249 PTA_SSE2 = 2,
1250 PTA_SSE3 = 4,
1251 PTA_MMX = 8,
1252 PTA_PREFETCH_SSE = 16,
1253 PTA_3DNOW = 32,
1254 PTA_3DNOW_A = 64,
1255 PTA_64BIT = 128
1256 } flags;
1257 }
1258 const processor_alias_table[] =
1259 {
1260 {"i386", PROCESSOR_I386, 0},
1261 {"i486", PROCESSOR_I486, 0},
1262 {"i586", PROCESSOR_PENTIUM, 0},
1263 {"pentium", PROCESSOR_PENTIUM, 0},
1264 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1265 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1266 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1267 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1268 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1269 {"i686", PROCESSOR_PENTIUMPRO, 0},
1270 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1271 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1272 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1273 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1274 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1275 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1276 | PTA_MMX | PTA_PREFETCH_SSE},
1277 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1278 | PTA_MMX | PTA_PREFETCH_SSE},
1279 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1280 | PTA_MMX | PTA_PREFETCH_SSE},
1281 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1282 | PTA_MMX | PTA_PREFETCH_SSE},
1283 {"k6", PROCESSOR_K6, PTA_MMX},
1284 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1285 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1286 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1287 | PTA_3DNOW_A},
1288 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1289 | PTA_3DNOW | PTA_3DNOW_A},
1290 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1291 | PTA_3DNOW_A | PTA_SSE},
1292 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1293 | PTA_3DNOW_A | PTA_SSE},
1294 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1295 | PTA_3DNOW_A | PTA_SSE},
1296 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1297 | PTA_SSE | PTA_SSE2 },
1298 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1299 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1300 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1301 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1302 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1303 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1304 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1305 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1306 };
1307
1308 int const pta_size = ARRAY_SIZE (processor_alias_table);
1309
1310 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1311 SUBTARGET_OVERRIDE_OPTIONS;
1312 #endif
1313
1314 /* Set the default values for switches whose default depends on TARGET_64BIT
1315 in case they weren't overwritten by command line options. */
1316 if (TARGET_64BIT)
1317 {
1318 if (flag_omit_frame_pointer == 2)
1319 flag_omit_frame_pointer = 1;
1320 if (flag_asynchronous_unwind_tables == 2)
1321 flag_asynchronous_unwind_tables = 1;
1322 if (flag_pcc_struct_return == 2)
1323 flag_pcc_struct_return = 0;
1324 }
1325 else
1326 {
1327 if (flag_omit_frame_pointer == 2)
1328 flag_omit_frame_pointer = 0;
1329 if (flag_asynchronous_unwind_tables == 2)
1330 flag_asynchronous_unwind_tables = 0;
1331 if (flag_pcc_struct_return == 2)
1332 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1333 }
1334
1335 if (!ix86_tune_string && ix86_arch_string)
1336 ix86_tune_string = ix86_arch_string;
1337 if (!ix86_tune_string)
1338 {
1339 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1340 ix86_tune_defaulted = 1;
1341 }
1342 if (!ix86_arch_string)
1343 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1344
1345 if (ix86_cmodel_string != 0)
1346 {
1347 if (!strcmp (ix86_cmodel_string, "small"))
1348 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1349 else if (flag_pic)
1350 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1351 else if (!strcmp (ix86_cmodel_string, "32"))
1352 ix86_cmodel = CM_32;
1353 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1354 ix86_cmodel = CM_KERNEL;
1355 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1356 ix86_cmodel = CM_MEDIUM;
1357 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1358 ix86_cmodel = CM_LARGE;
1359 else
1360 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1361 }
1362 else
1363 {
1364 ix86_cmodel = CM_32;
1365 if (TARGET_64BIT)
1366 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1367 }
1368 if (ix86_asm_string != 0)
1369 {
1370 if (!strcmp (ix86_asm_string, "intel"))
1371 ix86_asm_dialect = ASM_INTEL;
1372 else if (!strcmp (ix86_asm_string, "att"))
1373 ix86_asm_dialect = ASM_ATT;
1374 else
1375 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1376 }
1377 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1378 error ("code model %qs not supported in the %s bit mode",
1379 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1380 if (ix86_cmodel == CM_LARGE)
1381 sorry ("code model %<large%> not supported yet");
1382 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1383 sorry ("%i-bit mode not compiled in",
1384 (target_flags & MASK_64BIT) ? 64 : 32);
1385
1386 for (i = 0; i < pta_size; i++)
1387 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1388 {
1389 ix86_arch = processor_alias_table[i].processor;
1390 /* Default cpu tuning to the architecture. */
1391 ix86_tune = ix86_arch;
1392 if (processor_alias_table[i].flags & PTA_MMX
1393 && !(target_flags_explicit & MASK_MMX))
1394 target_flags |= MASK_MMX;
1395 if (processor_alias_table[i].flags & PTA_3DNOW
1396 && !(target_flags_explicit & MASK_3DNOW))
1397 target_flags |= MASK_3DNOW;
1398 if (processor_alias_table[i].flags & PTA_3DNOW_A
1399 && !(target_flags_explicit & MASK_3DNOW_A))
1400 target_flags |= MASK_3DNOW_A;
1401 if (processor_alias_table[i].flags & PTA_SSE
1402 && !(target_flags_explicit & MASK_SSE))
1403 target_flags |= MASK_SSE;
1404 if (processor_alias_table[i].flags & PTA_SSE2
1405 && !(target_flags_explicit & MASK_SSE2))
1406 target_flags |= MASK_SSE2;
1407 if (processor_alias_table[i].flags & PTA_SSE3
1408 && !(target_flags_explicit & MASK_SSE3))
1409 target_flags |= MASK_SSE3;
1410 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1411 x86_prefetch_sse = true;
1412 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1413 error ("CPU you selected does not support x86-64 "
1414 "instruction set");
1415 break;
1416 }
1417
1418 if (i == pta_size)
1419 error ("bad value (%s) for -march= switch", ix86_arch_string);
1420
1421 for (i = 0; i < pta_size; i++)
1422 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1423 {
1424 ix86_tune = processor_alias_table[i].processor;
1425 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1426 {
1427 if (ix86_tune_defaulted)
1428 {
1429 ix86_tune_string = "x86-64";
1430 for (i = 0; i < pta_size; i++)
1431 if (! strcmp (ix86_tune_string,
1432 processor_alias_table[i].name))
1433 break;
1434 ix86_tune = processor_alias_table[i].processor;
1435 }
1436 else
1437 error ("CPU you selected does not support x86-64 "
1438 "instruction set");
1439 }
1440 /* Intel CPUs have always interpreted SSE prefetch instructions as
1441 NOPs; so, we can enable SSE prefetch instructions even when
1442 -mtune (rather than -march) points us to a processor that has them.
1443 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1444 higher processors. */
1445 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1446 x86_prefetch_sse = true;
1447 break;
1448 }
1449 if (i == pta_size)
1450 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1451
1452 if (optimize_size)
1453 ix86_cost = &size_cost;
1454 else
1455 ix86_cost = processor_target_table[ix86_tune].cost;
1456 target_flags |= processor_target_table[ix86_tune].target_enable;
1457 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1458
1459 /* Arrange to set up i386_stack_locals for all functions. */
1460 init_machine_status = ix86_init_machine_status;
1461
1462 /* Validate -mregparm= value. */
1463 if (ix86_regparm_string)
1464 {
1465 i = atoi (ix86_regparm_string);
1466 if (i < 0 || i > REGPARM_MAX)
1467 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1468 else
1469 ix86_regparm = i;
1470 }
1471 else
1472 if (TARGET_64BIT)
1473 ix86_regparm = REGPARM_MAX;
1474
1475 /* If the user has provided any of the -malign-* options,
1476 warn and use that value only if -falign-* is not set.
1477 Remove this code in GCC 3.2 or later. */
1478 if (ix86_align_loops_string)
1479 {
1480 warning (0, "-malign-loops is obsolete, use -falign-loops");
1481 if (align_loops == 0)
1482 {
1483 i = atoi (ix86_align_loops_string);
1484 if (i < 0 || i > MAX_CODE_ALIGN)
1485 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1486 else
1487 align_loops = 1 << i;
1488 }
1489 }
1490
1491 if (ix86_align_jumps_string)
1492 {
1493 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1494 if (align_jumps == 0)
1495 {
1496 i = atoi (ix86_align_jumps_string);
1497 if (i < 0 || i > MAX_CODE_ALIGN)
1498 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1499 else
1500 align_jumps = 1 << i;
1501 }
1502 }
1503
1504 if (ix86_align_funcs_string)
1505 {
1506 warning (0, "-malign-functions is obsolete, use -falign-functions");
1507 if (align_functions == 0)
1508 {
1509 i = atoi (ix86_align_funcs_string);
1510 if (i < 0 || i > MAX_CODE_ALIGN)
1511 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1512 else
1513 align_functions = 1 << i;
1514 }
1515 }
1516
1517 /* Default align_* from the processor table. */
1518 if (align_loops == 0)
1519 {
1520 align_loops = processor_target_table[ix86_tune].align_loop;
1521 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1522 }
1523 if (align_jumps == 0)
1524 {
1525 align_jumps = processor_target_table[ix86_tune].align_jump;
1526 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1527 }
1528 if (align_functions == 0)
1529 {
1530 align_functions = processor_target_table[ix86_tune].align_func;
1531 }
1532
1533 /* Validate -mpreferred-stack-boundary= value, or provide default.
1534 The default of 128 bits is for Pentium III's SSE __m128, but we
1535 don't want additional code to keep the stack aligned when
1536 optimizing for code size. */
1537 ix86_preferred_stack_boundary = (optimize_size
1538 ? TARGET_64BIT ? 128 : 32
1539 : 128);
1540 if (ix86_preferred_stack_boundary_string)
1541 {
1542 i = atoi (ix86_preferred_stack_boundary_string);
1543 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1544 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1545 TARGET_64BIT ? 4 : 2);
1546 else
1547 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1548 }
1549
1550 /* Validate -mbranch-cost= value, or provide default. */
1551 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1552 if (ix86_branch_cost_string)
1553 {
1554 i = atoi (ix86_branch_cost_string);
1555 if (i < 0 || i > 5)
1556 error ("-mbranch-cost=%d is not between 0 and 5", i);
1557 else
1558 ix86_branch_cost = i;
1559 }
1560
1561 if (ix86_tls_dialect_string)
1562 {
1563 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1564 ix86_tls_dialect = TLS_DIALECT_GNU;
1565 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1566 ix86_tls_dialect = TLS_DIALECT_SUN;
1567 else
1568 error ("bad value (%s) for -mtls-dialect= switch",
1569 ix86_tls_dialect_string);
1570 }
1571
1572 /* Keep nonleaf frame pointers. */
1573 if (flag_omit_frame_pointer)
1574 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1575 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1576 flag_omit_frame_pointer = 1;
1577
1578 /* If we're doing fast math, we don't care about comparison order
1579 wrt NaNs. This lets us use a shorter comparison sequence. */
1580 if (flag_unsafe_math_optimizations)
1581 target_flags &= ~MASK_IEEE_FP;
1582
1583 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1584 since the insns won't need emulation. */
1585 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1586 target_flags &= ~MASK_NO_FANCY_MATH_387;
1587
1588 /* Likewise, if the target doesn't have a 387, or we've specified
1589 software floating point, don't use 387 inline instrinsics. */
1590 if (!TARGET_80387)
1591 target_flags |= MASK_NO_FANCY_MATH_387;
1592
1593 /* Turn on SSE2 builtins for -msse3. */
1594 if (TARGET_SSE3)
1595 target_flags |= MASK_SSE2;
1596
1597 /* Turn on SSE builtins for -msse2. */
1598 if (TARGET_SSE2)
1599 target_flags |= MASK_SSE;
1600
1601 /* Turn on MMX builtins for -msse. */
1602 if (TARGET_SSE)
1603 {
1604 target_flags |= MASK_MMX & ~target_flags_explicit;
1605 x86_prefetch_sse = true;
1606 }
1607
1608 /* Turn on MMX builtins for 3Dnow. */
1609 if (TARGET_3DNOW)
1610 target_flags |= MASK_MMX;
1611
1612 if (TARGET_64BIT)
1613 {
1614 if (TARGET_ALIGN_DOUBLE)
1615 error ("-malign-double makes no sense in the 64bit mode");
1616 if (TARGET_RTD)
1617 error ("-mrtd calling convention not supported in the 64bit mode");
1618
1619 /* Enable by default the SSE and MMX builtins. Do allow the user to
1620 explicitly disable any of these. In particular, disabling SSE and
1621 MMX for kernel code is extremely useful. */
1622 target_flags
1623 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1624 & ~target_flags_explicit);
1625 }
1626 else
1627 {
1628 /* i386 ABI does not specify red zone. It still makes sense to use it
1629 when programmer takes care to stack from being destroyed. */
1630 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1631 target_flags |= MASK_NO_RED_ZONE;
1632 }
1633
1634 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1635
1636 if (ix86_fpmath_string != 0)
1637 {
1638 if (! strcmp (ix86_fpmath_string, "387"))
1639 ix86_fpmath = FPMATH_387;
1640 else if (! strcmp (ix86_fpmath_string, "sse"))
1641 {
1642 if (!TARGET_SSE)
1643 {
1644 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1645 ix86_fpmath = FPMATH_387;
1646 }
1647 else
1648 ix86_fpmath = FPMATH_SSE;
1649 }
1650 else if (! strcmp (ix86_fpmath_string, "387,sse")
1651 || ! strcmp (ix86_fpmath_string, "sse,387"))
1652 {
1653 if (!TARGET_SSE)
1654 {
1655 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1656 ix86_fpmath = FPMATH_387;
1657 }
1658 else if (!TARGET_80387)
1659 {
1660 warning (0, "387 instruction set disabled, using SSE arithmetics");
1661 ix86_fpmath = FPMATH_SSE;
1662 }
1663 else
1664 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1665 }
1666 else
1667 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1668 }
1669
1670 /* If the i387 is disabled, then do not return values in it. */
1671 if (!TARGET_80387)
1672 target_flags &= ~MASK_FLOAT_RETURNS;
1673
1674 if ((x86_accumulate_outgoing_args & TUNEMASK)
1675 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1676 && !optimize_size)
1677 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1678
1679 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1680 {
1681 char *p;
1682 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1683 p = strchr (internal_label_prefix, 'X');
1684 internal_label_prefix_len = p - internal_label_prefix;
1685 *p = '\0';
1686 }
1687
1688 /* When scheduling description is not available, disable scheduler pass
1689 so it won't slow down the compilation and make x87 code slower. */
1690 if (!TARGET_SCHEDULE)
1691 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1692 }
1693 \f
1694 void
1695 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1696 {
1697 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1698 make the problem with not enough registers even worse. */
1699 #ifdef INSN_SCHEDULING
1700 if (level > 1)
1701 flag_schedule_insns = 0;
1702 #endif
1703
1704 /* The default values of these switches depend on the TARGET_64BIT
1705 that is not known at this moment. Mark these values with 2 and
1706 let user the to override these. In case there is no command line option
1707 specifying them, we will set the defaults in override_options. */
1708 if (optimize >= 1)
1709 flag_omit_frame_pointer = 2;
1710 flag_pcc_struct_return = 2;
1711 flag_asynchronous_unwind_tables = 2;
1712 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1713 SUBTARGET_OPTIMIZATION_OPTIONS;
1714 #endif
1715 }
1716 \f
1717 /* Table of valid machine attributes. */
1718 const struct attribute_spec ix86_attribute_table[] =
1719 {
1720 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1721 /* Stdcall attribute says callee is responsible for popping arguments
1722 if they are not variable. */
1723 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1724 /* Fastcall attribute says callee is responsible for popping arguments
1725 if they are not variable. */
1726 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1727 /* Cdecl attribute says the callee is a normal C declaration */
1728 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1729 /* Regparm attribute specifies how many integer arguments are to be
1730 passed in registers. */
1731 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1732 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1733 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1734 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1735 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1736 #endif
1737 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1738 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1739 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1740 SUBTARGET_ATTRIBUTE_TABLE,
1741 #endif
1742 { NULL, 0, 0, false, false, false, NULL }
1743 };
1744
1745 /* Decide whether we can make a sibling call to a function. DECL is the
1746 declaration of the function being targeted by the call and EXP is the
1747 CALL_EXPR representing the call. */
1748
1749 static bool
1750 ix86_function_ok_for_sibcall (tree decl, tree exp)
1751 {
1752 tree func;
1753
1754 /* If we are generating position-independent code, we cannot sibcall
1755 optimize any indirect call, or a direct call to a global function,
1756 as the PLT requires %ebx be live. */
1757 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1758 return false;
1759
1760 if (decl)
1761 func = decl;
1762 else
1763 func = NULL;
1764
1765 /* If we are returning floats on the 80387 register stack, we cannot
1766 make a sibcall from a function that doesn't return a float to a
1767 function that does or, conversely, from a function that does return
1768 a float to a function that doesn't; the necessary stack adjustment
1769 would not be executed. */
1770 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp), func))
1771 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1772 cfun->decl)))
1773 return false;
1774
1775 /* If this call is indirect, we'll need to be able to use a call-clobbered
1776 register for the address of the target function. Make sure that all
1777 such registers are not used for passing parameters. */
1778 if (!decl && !TARGET_64BIT)
1779 {
1780 tree type;
1781
1782 /* We're looking at the CALL_EXPR, we need the type of the function. */
1783 type = TREE_OPERAND (exp, 0); /* pointer expression */
1784 type = TREE_TYPE (type); /* pointer type */
1785 type = TREE_TYPE (type); /* function type */
1786
1787 if (ix86_function_regparm (type, NULL) >= 3)
1788 {
1789 /* ??? Need to count the actual number of registers to be used,
1790 not the possible number of registers. Fix later. */
1791 return false;
1792 }
1793 }
1794
1795 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1796 /* Dllimport'd functions are also called indirectly. */
1797 if (decl && lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl))
1798 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
1799 return false;
1800 #endif
1801
1802 /* Otherwise okay. That also includes certain types of indirect calls. */
1803 return true;
1804 }
1805
1806 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1807 arguments as in struct attribute_spec.handler. */
1808 static tree
1809 ix86_handle_cdecl_attribute (tree *node, tree name,
1810 tree args ATTRIBUTE_UNUSED,
1811 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1812 {
1813 if (TREE_CODE (*node) != FUNCTION_TYPE
1814 && TREE_CODE (*node) != METHOD_TYPE
1815 && TREE_CODE (*node) != FIELD_DECL
1816 && TREE_CODE (*node) != TYPE_DECL)
1817 {
1818 warning (0, "%qs attribute only applies to functions",
1819 IDENTIFIER_POINTER (name));
1820 *no_add_attrs = true;
1821 }
1822 else
1823 {
1824 if (is_attribute_p ("fastcall", name))
1825 {
1826 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1827 {
1828 error ("fastcall and stdcall attributes are not compatible");
1829 }
1830 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1831 {
1832 error ("fastcall and regparm attributes are not compatible");
1833 }
1834 }
1835 else if (is_attribute_p ("stdcall", name))
1836 {
1837 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1838 {
1839 error ("fastcall and stdcall attributes are not compatible");
1840 }
1841 }
1842 }
1843
1844 if (TARGET_64BIT)
1845 {
1846 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
1847 *no_add_attrs = true;
1848 }
1849
1850 return NULL_TREE;
1851 }
1852
1853 /* Handle a "regparm" attribute;
1854 arguments as in struct attribute_spec.handler. */
1855 static tree
1856 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1857 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1858 {
1859 if (TREE_CODE (*node) != FUNCTION_TYPE
1860 && TREE_CODE (*node) != METHOD_TYPE
1861 && TREE_CODE (*node) != FIELD_DECL
1862 && TREE_CODE (*node) != TYPE_DECL)
1863 {
1864 warning (0, "%qs attribute only applies to functions",
1865 IDENTIFIER_POINTER (name));
1866 *no_add_attrs = true;
1867 }
1868 else
1869 {
1870 tree cst;
1871
1872 cst = TREE_VALUE (args);
1873 if (TREE_CODE (cst) != INTEGER_CST)
1874 {
1875 warning (0, "%qs attribute requires an integer constant argument",
1876 IDENTIFIER_POINTER (name));
1877 *no_add_attrs = true;
1878 }
1879 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1880 {
1881 warning (0, "argument to %qs attribute larger than %d",
1882 IDENTIFIER_POINTER (name), REGPARM_MAX);
1883 *no_add_attrs = true;
1884 }
1885
1886 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1887 {
1888 error ("fastcall and regparm attributes are not compatible");
1889 }
1890 }
1891
1892 return NULL_TREE;
1893 }
1894
1895 /* Return 0 if the attributes for two types are incompatible, 1 if they
1896 are compatible, and 2 if they are nearly compatible (which causes a
1897 warning to be generated). */
1898
1899 static int
1900 ix86_comp_type_attributes (tree type1, tree type2)
1901 {
1902 /* Check for mismatch of non-default calling convention. */
1903 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1904
1905 if (TREE_CODE (type1) != FUNCTION_TYPE)
1906 return 1;
1907
1908 /* Check for mismatched fastcall types */
1909 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1910 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1911 return 0;
1912
1913 /* Check for mismatched return types (cdecl vs stdcall). */
1914 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1915 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1916 return 0;
1917 if (ix86_function_regparm (type1, NULL)
1918 != ix86_function_regparm (type2, NULL))
1919 return 0;
1920 return 1;
1921 }
1922 \f
1923 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1924 DECL may be NULL when calling function indirectly
1925 or considering a libcall. */
1926
1927 static int
1928 ix86_function_regparm (tree type, tree decl)
1929 {
1930 tree attr;
1931 int regparm = ix86_regparm;
1932 bool user_convention = false;
1933
1934 if (!TARGET_64BIT)
1935 {
1936 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1937 if (attr)
1938 {
1939 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1940 user_convention = true;
1941 }
1942
1943 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1944 {
1945 regparm = 2;
1946 user_convention = true;
1947 }
1948
1949 /* Use register calling convention for local functions when possible. */
1950 if (!TARGET_64BIT && !user_convention && decl
1951 && flag_unit_at_a_time && !profile_flag)
1952 {
1953 struct cgraph_local_info *i = cgraph_local_info (decl);
1954 if (i && i->local)
1955 {
1956 /* We can't use regparm(3) for nested functions as these use
1957 static chain pointer in third argument. */
1958 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1959 regparm = 2;
1960 else
1961 regparm = 3;
1962 }
1963 }
1964 }
1965 return regparm;
1966 }
1967
1968 /* Return true if EAX is live at the start of the function. Used by
1969 ix86_expand_prologue to determine if we need special help before
1970 calling allocate_stack_worker. */
1971
1972 static bool
1973 ix86_eax_live_at_start_p (void)
1974 {
1975 /* Cheat. Don't bother working forward from ix86_function_regparm
1976 to the function type to whether an actual argument is located in
1977 eax. Instead just look at cfg info, which is still close enough
1978 to correct at this point. This gives false positives for broken
1979 functions that might use uninitialized data that happens to be
1980 allocated in eax, but who cares? */
1981 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1982 }
1983
1984 /* Value is the number of bytes of arguments automatically
1985 popped when returning from a subroutine call.
1986 FUNDECL is the declaration node of the function (as a tree),
1987 FUNTYPE is the data type of the function (as a tree),
1988 or for a library call it is an identifier node for the subroutine name.
1989 SIZE is the number of bytes of arguments passed on the stack.
1990
1991 On the 80386, the RTD insn may be used to pop them if the number
1992 of args is fixed, but if the number is variable then the caller
1993 must pop them all. RTD can't be used for library calls now
1994 because the library is compiled with the Unix compiler.
1995 Use of RTD is a selectable option, since it is incompatible with
1996 standard Unix calling sequences. If the option is not selected,
1997 the caller must always pop the args.
1998
1999 The attribute stdcall is equivalent to RTD on a per module basis. */
2000
2001 int
2002 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2003 {
2004 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2005
2006 /* Cdecl functions override -mrtd, and never pop the stack. */
2007 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2008
2009 /* Stdcall and fastcall functions will pop the stack if not
2010 variable args. */
2011 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2012 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2013 rtd = 1;
2014
2015 if (rtd
2016 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2017 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2018 == void_type_node)))
2019 return size;
2020 }
2021
2022 /* Lose any fake structure return argument if it is passed on the stack. */
2023 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2024 && !TARGET_64BIT
2025 && !KEEP_AGGREGATE_RETURN_POINTER)
2026 {
2027 int nregs = ix86_function_regparm (funtype, fundecl);
2028
2029 if (!nregs)
2030 return GET_MODE_SIZE (Pmode);
2031 }
2032
2033 return 0;
2034 }
2035 \f
2036 /* Argument support functions. */
2037
2038 /* Return true when register may be used to pass function parameters. */
2039 bool
2040 ix86_function_arg_regno_p (int regno)
2041 {
2042 int i;
2043 if (!TARGET_64BIT)
2044 return (regno < REGPARM_MAX
2045 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2046 if (SSE_REGNO_P (regno) && TARGET_SSE)
2047 return true;
2048 /* RAX is used as hidden argument to va_arg functions. */
2049 if (!regno)
2050 return true;
2051 for (i = 0; i < REGPARM_MAX; i++)
2052 if (regno == x86_64_int_parameter_registers[i])
2053 return true;
2054 return false;
2055 }
2056
2057 /* Return if we do not know how to pass TYPE solely in registers. */
2058
2059 static bool
2060 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2061 {
2062 if (must_pass_in_stack_var_size_or_pad (mode, type))
2063 return true;
2064
2065 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2066 The layout_type routine is crafty and tries to trick us into passing
2067 currently unsupported vector types on the stack by using TImode. */
2068 return (!TARGET_64BIT && mode == TImode
2069 && type && TREE_CODE (type) != VECTOR_TYPE);
2070 }
2071
2072 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2073 for a call to a function whose data type is FNTYPE.
2074 For a library call, FNTYPE is 0. */
2075
2076 void
2077 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2078 tree fntype, /* tree ptr for function decl */
2079 rtx libname, /* SYMBOL_REF of library name or 0 */
2080 tree fndecl)
2081 {
2082 static CUMULATIVE_ARGS zero_cum;
2083 tree param, next_param;
2084
2085 if (TARGET_DEBUG_ARG)
2086 {
2087 fprintf (stderr, "\ninit_cumulative_args (");
2088 if (fntype)
2089 fprintf (stderr, "fntype code = %s, ret code = %s",
2090 tree_code_name[(int) TREE_CODE (fntype)],
2091 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2092 else
2093 fprintf (stderr, "no fntype");
2094
2095 if (libname)
2096 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2097 }
2098
2099 *cum = zero_cum;
2100
2101 /* Set up the number of registers to use for passing arguments. */
2102 if (fntype)
2103 cum->nregs = ix86_function_regparm (fntype, fndecl);
2104 else
2105 cum->nregs = ix86_regparm;
2106 if (TARGET_SSE)
2107 cum->sse_nregs = SSE_REGPARM_MAX;
2108 if (TARGET_MMX)
2109 cum->mmx_nregs = MMX_REGPARM_MAX;
2110 cum->warn_sse = true;
2111 cum->warn_mmx = true;
2112 cum->maybe_vaarg = false;
2113
2114 /* Use ecx and edx registers if function has fastcall attribute */
2115 if (fntype && !TARGET_64BIT)
2116 {
2117 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2118 {
2119 cum->nregs = 2;
2120 cum->fastcall = 1;
2121 }
2122 }
2123
2124 /* Determine if this function has variable arguments. This is
2125 indicated by the last argument being 'void_type_mode' if there
2126 are no variable arguments. If there are variable arguments, then
2127 we won't pass anything in registers in 32-bit mode. */
2128
2129 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2130 {
2131 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2132 param != 0; param = next_param)
2133 {
2134 next_param = TREE_CHAIN (param);
2135 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2136 {
2137 if (!TARGET_64BIT)
2138 {
2139 cum->nregs = 0;
2140 cum->sse_nregs = 0;
2141 cum->mmx_nregs = 0;
2142 cum->warn_sse = 0;
2143 cum->warn_mmx = 0;
2144 cum->fastcall = 0;
2145 }
2146 cum->maybe_vaarg = true;
2147 }
2148 }
2149 }
2150 if ((!fntype && !libname)
2151 || (fntype && !TYPE_ARG_TYPES (fntype)))
2152 cum->maybe_vaarg = true;
2153
2154 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2155 in SSE registers even for 32-bit mode and not just 3, but up to
2156 8 SSE arguments in registers. */
2157 if (!TARGET_64BIT && !cum->maybe_vaarg && !cum->fastcall
2158 && cum->sse_nregs == SSE_REGPARM_MAX && fndecl
2159 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2160 {
2161 struct cgraph_local_info *i = cgraph_local_info (fndecl);
2162 if (i && i->local)
2163 {
2164 cum->sse_nregs = 8;
2165 cum->float_in_sse = true;
2166 }
2167 }
2168
2169 if (TARGET_DEBUG_ARG)
2170 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2171
2172 return;
2173 }
2174
2175 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2176 But in the case of vector types, it is some vector mode.
2177
2178 When we have only some of our vector isa extensions enabled, then there
2179 are some modes for which vector_mode_supported_p is false. For these
2180 modes, the generic vector support in gcc will choose some non-vector mode
2181 in order to implement the type. By computing the natural mode, we'll
2182 select the proper ABI location for the operand and not depend on whatever
2183 the middle-end decides to do with these vector types. */
2184
2185 static enum machine_mode
2186 type_natural_mode (tree type)
2187 {
2188 enum machine_mode mode = TYPE_MODE (type);
2189
2190 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2191 {
2192 HOST_WIDE_INT size = int_size_in_bytes (type);
2193 if ((size == 8 || size == 16)
2194 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2195 && TYPE_VECTOR_SUBPARTS (type) > 1)
2196 {
2197 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2198
2199 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2200 mode = MIN_MODE_VECTOR_FLOAT;
2201 else
2202 mode = MIN_MODE_VECTOR_INT;
2203
2204 /* Get the mode which has this inner mode and number of units. */
2205 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2206 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2207 && GET_MODE_INNER (mode) == innermode)
2208 return mode;
2209
2210 gcc_unreachable ();
2211 }
2212 }
2213
2214 return mode;
2215 }
2216
2217 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2218 this may not agree with the mode that the type system has chosen for the
2219 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2220 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2221
2222 static rtx
2223 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2224 unsigned int regno)
2225 {
2226 rtx tmp;
2227
2228 if (orig_mode != BLKmode)
2229 tmp = gen_rtx_REG (orig_mode, regno);
2230 else
2231 {
2232 tmp = gen_rtx_REG (mode, regno);
2233 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2234 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2235 }
2236
2237 return tmp;
2238 }
2239
2240 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2241 of this code is to classify each 8bytes of incoming argument by the register
2242 class and assign registers accordingly. */
2243
2244 /* Return the union class of CLASS1 and CLASS2.
2245 See the x86-64 PS ABI for details. */
2246
2247 static enum x86_64_reg_class
2248 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2249 {
2250 /* Rule #1: If both classes are equal, this is the resulting class. */
2251 if (class1 == class2)
2252 return class1;
2253
2254 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2255 the other class. */
2256 if (class1 == X86_64_NO_CLASS)
2257 return class2;
2258 if (class2 == X86_64_NO_CLASS)
2259 return class1;
2260
2261 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2262 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2263 return X86_64_MEMORY_CLASS;
2264
2265 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2266 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2267 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2268 return X86_64_INTEGERSI_CLASS;
2269 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2270 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2271 return X86_64_INTEGER_CLASS;
2272
2273 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2274 MEMORY is used. */
2275 if (class1 == X86_64_X87_CLASS
2276 || class1 == X86_64_X87UP_CLASS
2277 || class1 == X86_64_COMPLEX_X87_CLASS
2278 || class2 == X86_64_X87_CLASS
2279 || class2 == X86_64_X87UP_CLASS
2280 || class2 == X86_64_COMPLEX_X87_CLASS)
2281 return X86_64_MEMORY_CLASS;
2282
2283 /* Rule #6: Otherwise class SSE is used. */
2284 return X86_64_SSE_CLASS;
2285 }
2286
2287 /* Classify the argument of type TYPE and mode MODE.
2288 CLASSES will be filled by the register class used to pass each word
2289 of the operand. The number of words is returned. In case the parameter
2290 should be passed in memory, 0 is returned. As a special case for zero
2291 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2292
2293 BIT_OFFSET is used internally for handling records and specifies offset
2294 of the offset in bits modulo 256 to avoid overflow cases.
2295
2296 See the x86-64 PS ABI for details.
2297 */
2298
2299 static int
2300 classify_argument (enum machine_mode mode, tree type,
2301 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2302 {
2303 HOST_WIDE_INT bytes =
2304 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2305 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2306
2307 /* Variable sized entities are always passed/returned in memory. */
2308 if (bytes < 0)
2309 return 0;
2310
2311 if (mode != VOIDmode
2312 && targetm.calls.must_pass_in_stack (mode, type))
2313 return 0;
2314
2315 if (type && AGGREGATE_TYPE_P (type))
2316 {
2317 int i;
2318 tree field;
2319 enum x86_64_reg_class subclasses[MAX_CLASSES];
2320
2321 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2322 if (bytes > 16)
2323 return 0;
2324
2325 for (i = 0; i < words; i++)
2326 classes[i] = X86_64_NO_CLASS;
2327
2328 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2329 signalize memory class, so handle it as special case. */
2330 if (!words)
2331 {
2332 classes[0] = X86_64_NO_CLASS;
2333 return 1;
2334 }
2335
2336 /* Classify each field of record and merge classes. */
2337 switch (TREE_CODE (type))
2338 {
2339 case RECORD_TYPE:
2340 /* For classes first merge in the field of the subclasses. */
2341 if (TYPE_BINFO (type))
2342 {
2343 tree binfo, base_binfo;
2344 int basenum;
2345
2346 for (binfo = TYPE_BINFO (type), basenum = 0;
2347 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2348 {
2349 int num;
2350 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2351 tree type = BINFO_TYPE (base_binfo);
2352
2353 num = classify_argument (TYPE_MODE (type),
2354 type, subclasses,
2355 (offset + bit_offset) % 256);
2356 if (!num)
2357 return 0;
2358 for (i = 0; i < num; i++)
2359 {
2360 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2361 classes[i + pos] =
2362 merge_classes (subclasses[i], classes[i + pos]);
2363 }
2364 }
2365 }
2366 /* And now merge the fields of structure. */
2367 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2368 {
2369 if (TREE_CODE (field) == FIELD_DECL)
2370 {
2371 int num;
2372
2373 /* Bitfields are always classified as integer. Handle them
2374 early, since later code would consider them to be
2375 misaligned integers. */
2376 if (DECL_BIT_FIELD (field))
2377 {
2378 for (i = int_bit_position (field) / 8 / 8;
2379 i < (int_bit_position (field)
2380 + tree_low_cst (DECL_SIZE (field), 0)
2381 + 63) / 8 / 8; i++)
2382 classes[i] =
2383 merge_classes (X86_64_INTEGER_CLASS,
2384 classes[i]);
2385 }
2386 else
2387 {
2388 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2389 TREE_TYPE (field), subclasses,
2390 (int_bit_position (field)
2391 + bit_offset) % 256);
2392 if (!num)
2393 return 0;
2394 for (i = 0; i < num; i++)
2395 {
2396 int pos =
2397 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2398 classes[i + pos] =
2399 merge_classes (subclasses[i], classes[i + pos]);
2400 }
2401 }
2402 }
2403 }
2404 break;
2405
2406 case ARRAY_TYPE:
2407 /* Arrays are handled as small records. */
2408 {
2409 int num;
2410 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2411 TREE_TYPE (type), subclasses, bit_offset);
2412 if (!num)
2413 return 0;
2414
2415 /* The partial classes are now full classes. */
2416 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2417 subclasses[0] = X86_64_SSE_CLASS;
2418 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2419 subclasses[0] = X86_64_INTEGER_CLASS;
2420
2421 for (i = 0; i < words; i++)
2422 classes[i] = subclasses[i % num];
2423
2424 break;
2425 }
2426 case UNION_TYPE:
2427 case QUAL_UNION_TYPE:
2428 /* Unions are similar to RECORD_TYPE but offset is always 0.
2429 */
2430
2431 /* Unions are not derived. */
2432 gcc_assert (!TYPE_BINFO (type)
2433 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2434 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2435 {
2436 if (TREE_CODE (field) == FIELD_DECL)
2437 {
2438 int num;
2439 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2440 TREE_TYPE (field), subclasses,
2441 bit_offset);
2442 if (!num)
2443 return 0;
2444 for (i = 0; i < num; i++)
2445 classes[i] = merge_classes (subclasses[i], classes[i]);
2446 }
2447 }
2448 break;
2449
2450 default:
2451 gcc_unreachable ();
2452 }
2453
2454 /* Final merger cleanup. */
2455 for (i = 0; i < words; i++)
2456 {
2457 /* If one class is MEMORY, everything should be passed in
2458 memory. */
2459 if (classes[i] == X86_64_MEMORY_CLASS)
2460 return 0;
2461
2462 /* The X86_64_SSEUP_CLASS should be always preceded by
2463 X86_64_SSE_CLASS. */
2464 if (classes[i] == X86_64_SSEUP_CLASS
2465 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2466 classes[i] = X86_64_SSE_CLASS;
2467
2468 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2469 if (classes[i] == X86_64_X87UP_CLASS
2470 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2471 classes[i] = X86_64_SSE_CLASS;
2472 }
2473 return words;
2474 }
2475
2476 /* Compute alignment needed. We align all types to natural boundaries with
2477 exception of XFmode that is aligned to 64bits. */
2478 if (mode != VOIDmode && mode != BLKmode)
2479 {
2480 int mode_alignment = GET_MODE_BITSIZE (mode);
2481
2482 if (mode == XFmode)
2483 mode_alignment = 128;
2484 else if (mode == XCmode)
2485 mode_alignment = 256;
2486 if (COMPLEX_MODE_P (mode))
2487 mode_alignment /= 2;
2488 /* Misaligned fields are always returned in memory. */
2489 if (bit_offset % mode_alignment)
2490 return 0;
2491 }
2492
2493 /* for V1xx modes, just use the base mode */
2494 if (VECTOR_MODE_P (mode)
2495 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2496 mode = GET_MODE_INNER (mode);
2497
2498 /* Classification of atomic types. */
2499 switch (mode)
2500 {
2501 case DImode:
2502 case SImode:
2503 case HImode:
2504 case QImode:
2505 case CSImode:
2506 case CHImode:
2507 case CQImode:
2508 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2509 classes[0] = X86_64_INTEGERSI_CLASS;
2510 else
2511 classes[0] = X86_64_INTEGER_CLASS;
2512 return 1;
2513 case CDImode:
2514 case TImode:
2515 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2516 return 2;
2517 case CTImode:
2518 return 0;
2519 case SFmode:
2520 if (!(bit_offset % 64))
2521 classes[0] = X86_64_SSESF_CLASS;
2522 else
2523 classes[0] = X86_64_SSE_CLASS;
2524 return 1;
2525 case DFmode:
2526 classes[0] = X86_64_SSEDF_CLASS;
2527 return 1;
2528 case XFmode:
2529 classes[0] = X86_64_X87_CLASS;
2530 classes[1] = X86_64_X87UP_CLASS;
2531 return 2;
2532 case TFmode:
2533 classes[0] = X86_64_SSE_CLASS;
2534 classes[1] = X86_64_SSEUP_CLASS;
2535 return 2;
2536 case SCmode:
2537 classes[0] = X86_64_SSE_CLASS;
2538 return 1;
2539 case DCmode:
2540 classes[0] = X86_64_SSEDF_CLASS;
2541 classes[1] = X86_64_SSEDF_CLASS;
2542 return 2;
2543 case XCmode:
2544 classes[0] = X86_64_COMPLEX_X87_CLASS;
2545 return 1;
2546 case TCmode:
2547 /* This modes is larger than 16 bytes. */
2548 return 0;
2549 case V4SFmode:
2550 case V4SImode:
2551 case V16QImode:
2552 case V8HImode:
2553 case V2DFmode:
2554 case V2DImode:
2555 classes[0] = X86_64_SSE_CLASS;
2556 classes[1] = X86_64_SSEUP_CLASS;
2557 return 2;
2558 case V2SFmode:
2559 case V2SImode:
2560 case V4HImode:
2561 case V8QImode:
2562 classes[0] = X86_64_SSE_CLASS;
2563 return 1;
2564 case BLKmode:
2565 case VOIDmode:
2566 return 0;
2567 default:
2568 gcc_assert (VECTOR_MODE_P (mode));
2569
2570 if (bytes > 16)
2571 return 0;
2572
2573 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2574
2575 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2576 classes[0] = X86_64_INTEGERSI_CLASS;
2577 else
2578 classes[0] = X86_64_INTEGER_CLASS;
2579 classes[1] = X86_64_INTEGER_CLASS;
2580 return 1 + (bytes > 8);
2581 }
2582 }
2583
2584 /* Examine the argument and return set number of register required in each
2585 class. Return 0 iff parameter should be passed in memory. */
2586 static int
2587 examine_argument (enum machine_mode mode, tree type, int in_return,
2588 int *int_nregs, int *sse_nregs)
2589 {
2590 enum x86_64_reg_class class[MAX_CLASSES];
2591 int n = classify_argument (mode, type, class, 0);
2592
2593 *int_nregs = 0;
2594 *sse_nregs = 0;
2595 if (!n)
2596 return 0;
2597 for (n--; n >= 0; n--)
2598 switch (class[n])
2599 {
2600 case X86_64_INTEGER_CLASS:
2601 case X86_64_INTEGERSI_CLASS:
2602 (*int_nregs)++;
2603 break;
2604 case X86_64_SSE_CLASS:
2605 case X86_64_SSESF_CLASS:
2606 case X86_64_SSEDF_CLASS:
2607 (*sse_nregs)++;
2608 break;
2609 case X86_64_NO_CLASS:
2610 case X86_64_SSEUP_CLASS:
2611 break;
2612 case X86_64_X87_CLASS:
2613 case X86_64_X87UP_CLASS:
2614 if (!in_return)
2615 return 0;
2616 break;
2617 case X86_64_COMPLEX_X87_CLASS:
2618 return in_return ? 2 : 0;
2619 case X86_64_MEMORY_CLASS:
2620 gcc_unreachable ();
2621 }
2622 return 1;
2623 }
2624
2625 /* Construct container for the argument used by GCC interface. See
2626 FUNCTION_ARG for the detailed description. */
2627
2628 static rtx
2629 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2630 tree type, int in_return, int nintregs, int nsseregs,
2631 const int *intreg, int sse_regno)
2632 {
2633 enum machine_mode tmpmode;
2634 int bytes =
2635 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2636 enum x86_64_reg_class class[MAX_CLASSES];
2637 int n;
2638 int i;
2639 int nexps = 0;
2640 int needed_sseregs, needed_intregs;
2641 rtx exp[MAX_CLASSES];
2642 rtx ret;
2643
2644 n = classify_argument (mode, type, class, 0);
2645 if (TARGET_DEBUG_ARG)
2646 {
2647 if (!n)
2648 fprintf (stderr, "Memory class\n");
2649 else
2650 {
2651 fprintf (stderr, "Classes:");
2652 for (i = 0; i < n; i++)
2653 {
2654 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2655 }
2656 fprintf (stderr, "\n");
2657 }
2658 }
2659 if (!n)
2660 return NULL;
2661 if (!examine_argument (mode, type, in_return, &needed_intregs,
2662 &needed_sseregs))
2663 return NULL;
2664 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2665 return NULL;
2666
2667 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2668 some less clueful developer tries to use floating-point anyway. */
2669 if (needed_sseregs && !TARGET_SSE)
2670 {
2671 static bool issued_error;
2672 if (!issued_error)
2673 {
2674 issued_error = true;
2675 if (in_return)
2676 error ("SSE register return with SSE disabled");
2677 else
2678 error ("SSE register argument with SSE disabled");
2679 }
2680 return NULL;
2681 }
2682
2683 /* First construct simple cases. Avoid SCmode, since we want to use
2684 single register to pass this type. */
2685 if (n == 1 && mode != SCmode)
2686 switch (class[0])
2687 {
2688 case X86_64_INTEGER_CLASS:
2689 case X86_64_INTEGERSI_CLASS:
2690 return gen_rtx_REG (mode, intreg[0]);
2691 case X86_64_SSE_CLASS:
2692 case X86_64_SSESF_CLASS:
2693 case X86_64_SSEDF_CLASS:
2694 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2695 case X86_64_X87_CLASS:
2696 case X86_64_COMPLEX_X87_CLASS:
2697 return gen_rtx_REG (mode, FIRST_STACK_REG);
2698 case X86_64_NO_CLASS:
2699 /* Zero sized array, struct or class. */
2700 return NULL;
2701 default:
2702 gcc_unreachable ();
2703 }
2704 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2705 && mode != BLKmode)
2706 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2707 if (n == 2
2708 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2709 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2710 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2711 && class[1] == X86_64_INTEGER_CLASS
2712 && (mode == CDImode || mode == TImode || mode == TFmode)
2713 && intreg[0] + 1 == intreg[1])
2714 return gen_rtx_REG (mode, intreg[0]);
2715
2716 /* Otherwise figure out the entries of the PARALLEL. */
2717 for (i = 0; i < n; i++)
2718 {
2719 switch (class[i])
2720 {
2721 case X86_64_NO_CLASS:
2722 break;
2723 case X86_64_INTEGER_CLASS:
2724 case X86_64_INTEGERSI_CLASS:
2725 /* Merge TImodes on aligned occasions here too. */
2726 if (i * 8 + 8 > bytes)
2727 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2728 else if (class[i] == X86_64_INTEGERSI_CLASS)
2729 tmpmode = SImode;
2730 else
2731 tmpmode = DImode;
2732 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2733 if (tmpmode == BLKmode)
2734 tmpmode = DImode;
2735 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2736 gen_rtx_REG (tmpmode, *intreg),
2737 GEN_INT (i*8));
2738 intreg++;
2739 break;
2740 case X86_64_SSESF_CLASS:
2741 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2742 gen_rtx_REG (SFmode,
2743 SSE_REGNO (sse_regno)),
2744 GEN_INT (i*8));
2745 sse_regno++;
2746 break;
2747 case X86_64_SSEDF_CLASS:
2748 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2749 gen_rtx_REG (DFmode,
2750 SSE_REGNO (sse_regno)),
2751 GEN_INT (i*8));
2752 sse_regno++;
2753 break;
2754 case X86_64_SSE_CLASS:
2755 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2756 tmpmode = TImode;
2757 else
2758 tmpmode = DImode;
2759 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2760 gen_rtx_REG (tmpmode,
2761 SSE_REGNO (sse_regno)),
2762 GEN_INT (i*8));
2763 if (tmpmode == TImode)
2764 i++;
2765 sse_regno++;
2766 break;
2767 default:
2768 gcc_unreachable ();
2769 }
2770 }
2771
2772 /* Empty aligned struct, union or class. */
2773 if (nexps == 0)
2774 return NULL;
2775
2776 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2777 for (i = 0; i < nexps; i++)
2778 XVECEXP (ret, 0, i) = exp [i];
2779 return ret;
2780 }
2781
2782 /* Update the data in CUM to advance over an argument
2783 of mode MODE and data type TYPE.
2784 (TYPE is null for libcalls where that information may not be available.) */
2785
2786 void
2787 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2788 tree type, int named)
2789 {
2790 int bytes =
2791 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2792 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2793
2794 if (type)
2795 mode = type_natural_mode (type);
2796
2797 if (TARGET_DEBUG_ARG)
2798 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
2799 "mode=%s, named=%d)\n\n",
2800 words, cum->words, cum->nregs, cum->sse_nregs,
2801 GET_MODE_NAME (mode), named);
2802
2803 if (TARGET_64BIT)
2804 {
2805 int int_nregs, sse_nregs;
2806 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2807 cum->words += words;
2808 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2809 {
2810 cum->nregs -= int_nregs;
2811 cum->sse_nregs -= sse_nregs;
2812 cum->regno += int_nregs;
2813 cum->sse_regno += sse_nregs;
2814 }
2815 else
2816 cum->words += words;
2817 }
2818 else
2819 {
2820 switch (mode)
2821 {
2822 default:
2823 break;
2824
2825 case BLKmode:
2826 if (bytes < 0)
2827 break;
2828 /* FALLTHRU */
2829
2830 case DImode:
2831 case SImode:
2832 case HImode:
2833 case QImode:
2834 cum->words += words;
2835 cum->nregs -= words;
2836 cum->regno += words;
2837
2838 if (cum->nregs <= 0)
2839 {
2840 cum->nregs = 0;
2841 cum->regno = 0;
2842 }
2843 break;
2844
2845 case DFmode:
2846 if (!TARGET_SSE2)
2847 break;
2848 case SFmode:
2849 if (!cum->float_in_sse)
2850 break;
2851 /* FALLTHRU */
2852
2853 case TImode:
2854 case V16QImode:
2855 case V8HImode:
2856 case V4SImode:
2857 case V2DImode:
2858 case V4SFmode:
2859 case V2DFmode:
2860 if (!type || !AGGREGATE_TYPE_P (type))
2861 {
2862 cum->sse_words += words;
2863 cum->sse_nregs -= 1;
2864 cum->sse_regno += 1;
2865 if (cum->sse_nregs <= 0)
2866 {
2867 cum->sse_nregs = 0;
2868 cum->sse_regno = 0;
2869 }
2870 }
2871 break;
2872
2873 case V8QImode:
2874 case V4HImode:
2875 case V2SImode:
2876 case V2SFmode:
2877 if (!type || !AGGREGATE_TYPE_P (type))
2878 {
2879 cum->mmx_words += words;
2880 cum->mmx_nregs -= 1;
2881 cum->mmx_regno += 1;
2882 if (cum->mmx_nregs <= 0)
2883 {
2884 cum->mmx_nregs = 0;
2885 cum->mmx_regno = 0;
2886 }
2887 }
2888 break;
2889 }
2890 }
2891 }
2892
2893 /* Define where to put the arguments to a function.
2894 Value is zero to push the argument on the stack,
2895 or a hard register in which to store the argument.
2896
2897 MODE is the argument's machine mode.
2898 TYPE is the data type of the argument (as a tree).
2899 This is null for libcalls where that information may
2900 not be available.
2901 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2902 the preceding args and about the function being called.
2903 NAMED is nonzero if this argument is a named parameter
2904 (otherwise it is an extra parameter matching an ellipsis). */
2905
2906 rtx
2907 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2908 tree type, int named)
2909 {
2910 enum machine_mode mode = orig_mode;
2911 rtx ret = NULL_RTX;
2912 int bytes =
2913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2914 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2915 static bool warnedsse, warnedmmx;
2916
2917 /* To simplify the code below, represent vector types with a vector mode
2918 even if MMX/SSE are not active. */
2919 if (type && TREE_CODE (type) == VECTOR_TYPE)
2920 mode = type_natural_mode (type);
2921
2922 /* Handle a hidden AL argument containing number of registers for varargs
2923 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2924 any AL settings. */
2925 if (mode == VOIDmode)
2926 {
2927 if (TARGET_64BIT)
2928 return GEN_INT (cum->maybe_vaarg
2929 ? (cum->sse_nregs < 0
2930 ? SSE_REGPARM_MAX
2931 : cum->sse_regno)
2932 : -1);
2933 else
2934 return constm1_rtx;
2935 }
2936 if (TARGET_64BIT)
2937 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
2938 cum->sse_nregs,
2939 &x86_64_int_parameter_registers [cum->regno],
2940 cum->sse_regno);
2941 else
2942 switch (mode)
2943 {
2944 /* For now, pass fp/complex values on the stack. */
2945 default:
2946 break;
2947
2948 case BLKmode:
2949 if (bytes < 0)
2950 break;
2951 /* FALLTHRU */
2952 case DImode:
2953 case SImode:
2954 case HImode:
2955 case QImode:
2956 if (words <= cum->nregs)
2957 {
2958 int regno = cum->regno;
2959
2960 /* Fastcall allocates the first two DWORD (SImode) or
2961 smaller arguments to ECX and EDX. */
2962 if (cum->fastcall)
2963 {
2964 if (mode == BLKmode || mode == DImode)
2965 break;
2966
2967 /* ECX not EAX is the first allocated register. */
2968 if (regno == 0)
2969 regno = 2;
2970 }
2971 ret = gen_rtx_REG (mode, regno);
2972 }
2973 break;
2974 case DFmode:
2975 if (!TARGET_SSE2)
2976 break;
2977 case SFmode:
2978 if (!cum->float_in_sse)
2979 break;
2980 /* FALLTHRU */
2981 case TImode:
2982 case V16QImode:
2983 case V8HImode:
2984 case V4SImode:
2985 case V2DImode:
2986 case V4SFmode:
2987 case V2DFmode:
2988 if (!type || !AGGREGATE_TYPE_P (type))
2989 {
2990 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2991 {
2992 warnedsse = true;
2993 warning (0, "SSE vector argument without SSE enabled "
2994 "changes the ABI");
2995 }
2996 if (cum->sse_nregs)
2997 ret = gen_reg_or_parallel (mode, orig_mode,
2998 cum->sse_regno + FIRST_SSE_REG);
2999 }
3000 break;
3001 case V8QImode:
3002 case V4HImode:
3003 case V2SImode:
3004 case V2SFmode:
3005 if (!type || !AGGREGATE_TYPE_P (type))
3006 {
3007 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3008 {
3009 warnedmmx = true;
3010 warning (0, "MMX vector argument without MMX enabled "
3011 "changes the ABI");
3012 }
3013 if (cum->mmx_nregs)
3014 ret = gen_reg_or_parallel (mode, orig_mode,
3015 cum->mmx_regno + FIRST_MMX_REG);
3016 }
3017 break;
3018 }
3019
3020 if (TARGET_DEBUG_ARG)
3021 {
3022 fprintf (stderr,
3023 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3024 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3025
3026 if (ret)
3027 print_simple_rtl (stderr, ret);
3028 else
3029 fprintf (stderr, ", stack");
3030
3031 fprintf (stderr, " )\n");
3032 }
3033
3034 return ret;
3035 }
3036
3037 /* A C expression that indicates when an argument must be passed by
3038 reference. If nonzero for an argument, a copy of that argument is
3039 made in memory and a pointer to the argument is passed instead of
3040 the argument itself. The pointer is passed in whatever way is
3041 appropriate for passing a pointer to that type. */
3042
3043 static bool
3044 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3045 enum machine_mode mode ATTRIBUTE_UNUSED,
3046 tree type, bool named ATTRIBUTE_UNUSED)
3047 {
3048 if (!TARGET_64BIT)
3049 return 0;
3050
3051 if (type && int_size_in_bytes (type) == -1)
3052 {
3053 if (TARGET_DEBUG_ARG)
3054 fprintf (stderr, "function_arg_pass_by_reference\n");
3055 return 1;
3056 }
3057
3058 return 0;
3059 }
3060
3061 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3062 ABI. Only called if TARGET_SSE. */
3063 static bool
3064 contains_128bit_aligned_vector_p (tree type)
3065 {
3066 enum machine_mode mode = TYPE_MODE (type);
3067 if (SSE_REG_MODE_P (mode)
3068 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3069 return true;
3070 if (TYPE_ALIGN (type) < 128)
3071 return false;
3072
3073 if (AGGREGATE_TYPE_P (type))
3074 {
3075 /* Walk the aggregates recursively. */
3076 switch (TREE_CODE (type))
3077 {
3078 case RECORD_TYPE:
3079 case UNION_TYPE:
3080 case QUAL_UNION_TYPE:
3081 {
3082 tree field;
3083
3084 if (TYPE_BINFO (type))
3085 {
3086 tree binfo, base_binfo;
3087 int i;
3088
3089 for (binfo = TYPE_BINFO (type), i = 0;
3090 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3091 if (contains_128bit_aligned_vector_p
3092 (BINFO_TYPE (base_binfo)))
3093 return true;
3094 }
3095 /* And now merge the fields of structure. */
3096 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3097 {
3098 if (TREE_CODE (field) == FIELD_DECL
3099 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3100 return true;
3101 }
3102 break;
3103 }
3104
3105 case ARRAY_TYPE:
3106 /* Just for use if some languages passes arrays by value. */
3107 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3108 return true;
3109
3110 default:
3111 gcc_unreachable ();
3112 }
3113 }
3114 return false;
3115 }
3116
3117 /* Gives the alignment boundary, in bits, of an argument with the
3118 specified mode and type. */
3119
3120 int
3121 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3122 {
3123 int align;
3124 if (type)
3125 align = TYPE_ALIGN (type);
3126 else
3127 align = GET_MODE_ALIGNMENT (mode);
3128 if (align < PARM_BOUNDARY)
3129 align = PARM_BOUNDARY;
3130 if (!TARGET_64BIT)
3131 {
3132 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3133 make an exception for SSE modes since these require 128bit
3134 alignment.
3135
3136 The handling here differs from field_alignment. ICC aligns MMX
3137 arguments to 4 byte boundaries, while structure fields are aligned
3138 to 8 byte boundaries. */
3139 if (!TARGET_SSE)
3140 align = PARM_BOUNDARY;
3141 else if (!type)
3142 {
3143 if (!SSE_REG_MODE_P (mode))
3144 align = PARM_BOUNDARY;
3145 }
3146 else
3147 {
3148 if (!contains_128bit_aligned_vector_p (type))
3149 align = PARM_BOUNDARY;
3150 }
3151 }
3152 if (align > 128)
3153 align = 128;
3154 return align;
3155 }
3156
3157 /* Return true if N is a possible register number of function value. */
3158 bool
3159 ix86_function_value_regno_p (int regno)
3160 {
3161 if (!TARGET_64BIT)
3162 {
3163 return ((regno) == 0
3164 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3165 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3166 }
3167 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3168 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3169 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3170 }
3171
3172 /* Define how to find the value returned by a function.
3173 VALTYPE is the data type of the value (as a tree).
3174 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3175 otherwise, FUNC is 0. */
3176 rtx
3177 ix86_function_value (tree valtype, tree func)
3178 {
3179 enum machine_mode natmode = type_natural_mode (valtype);
3180
3181 if (TARGET_64BIT)
3182 {
3183 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3184 1, REGPARM_MAX, SSE_REGPARM_MAX,
3185 x86_64_int_return_registers, 0);
3186 /* For zero sized structures, construct_container return NULL, but we
3187 need to keep rest of compiler happy by returning meaningful value. */
3188 if (!ret)
3189 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3190 return ret;
3191 }
3192 else
3193 return gen_rtx_REG (TYPE_MODE (valtype), ix86_value_regno (natmode, func));
3194 }
3195
3196 /* Return false iff type is returned in memory. */
3197 int
3198 ix86_return_in_memory (tree type)
3199 {
3200 int needed_intregs, needed_sseregs, size;
3201 enum machine_mode mode = type_natural_mode (type);
3202
3203 if (TARGET_64BIT)
3204 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3205
3206 if (mode == BLKmode)
3207 return 1;
3208
3209 size = int_size_in_bytes (type);
3210
3211 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3212 return 0;
3213
3214 if (VECTOR_MODE_P (mode) || mode == TImode)
3215 {
3216 /* User-created vectors small enough to fit in EAX. */
3217 if (size < 8)
3218 return 0;
3219
3220 /* MMX/3dNow values are returned on the stack, since we've
3221 got to EMMS/FEMMS before returning. */
3222 if (size == 8)
3223 return 1;
3224
3225 /* SSE values are returned in XMM0, except when it doesn't exist. */
3226 if (size == 16)
3227 return (TARGET_SSE ? 0 : 1);
3228 }
3229
3230 if (mode == XFmode)
3231 return 0;
3232
3233 if (size > 12)
3234 return 1;
3235 return 0;
3236 }
3237
3238 /* When returning SSE vector types, we have a choice of either
3239 (1) being abi incompatible with a -march switch, or
3240 (2) generating an error.
3241 Given no good solution, I think the safest thing is one warning.
3242 The user won't be able to use -Werror, but....
3243
3244 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3245 called in response to actually generating a caller or callee that
3246 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3247 via aggregate_value_p for general type probing from tree-ssa. */
3248
3249 static rtx
3250 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3251 {
3252 static bool warned;
3253
3254 if (!TARGET_SSE && type && !warned)
3255 {
3256 /* Look at the return type of the function, not the function type. */
3257 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3258
3259 if (mode == TImode
3260 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3261 {
3262 warned = true;
3263 warning (0, "SSE vector return without SSE enabled changes the ABI");
3264 }
3265 }
3266
3267 return NULL;
3268 }
3269
3270 /* Define how to find the value returned by a library function
3271 assuming the value has mode MODE. */
3272 rtx
3273 ix86_libcall_value (enum machine_mode mode)
3274 {
3275 if (TARGET_64BIT)
3276 {
3277 switch (mode)
3278 {
3279 case SFmode:
3280 case SCmode:
3281 case DFmode:
3282 case DCmode:
3283 case TFmode:
3284 return gen_rtx_REG (mode, FIRST_SSE_REG);
3285 case XFmode:
3286 case XCmode:
3287 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3288 case TCmode:
3289 return NULL;
3290 default:
3291 return gen_rtx_REG (mode, 0);
3292 }
3293 }
3294 else
3295 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL));
3296 }
3297
3298 /* Given a mode, return the register to use for a return value. */
3299
3300 static int
3301 ix86_value_regno (enum machine_mode mode, tree func)
3302 {
3303 gcc_assert (!TARGET_64BIT);
3304
3305 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3306 we prevent this case when sse is not available. */
3307 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3308 return FIRST_SSE_REG;
3309
3310 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3311 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3312 return 0;
3313
3314 /* Floating point return values in %st(0), except for local functions when
3315 SSE math is enabled. */
3316 if (func && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH
3317 && flag_unit_at_a_time)
3318 {
3319 struct cgraph_local_info *i = cgraph_local_info (func);
3320 if (i && i->local)
3321 return FIRST_SSE_REG;
3322 }
3323
3324 return FIRST_FLOAT_REG;
3325 }
3326 \f
3327 /* Create the va_list data type. */
3328
3329 static tree
3330 ix86_build_builtin_va_list (void)
3331 {
3332 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3333
3334 /* For i386 we use plain pointer to argument area. */
3335 if (!TARGET_64BIT)
3336 return build_pointer_type (char_type_node);
3337
3338 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3339 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3340
3341 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3342 unsigned_type_node);
3343 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3344 unsigned_type_node);
3345 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3346 ptr_type_node);
3347 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3348 ptr_type_node);
3349
3350 va_list_gpr_counter_field = f_gpr;
3351 va_list_fpr_counter_field = f_fpr;
3352
3353 DECL_FIELD_CONTEXT (f_gpr) = record;
3354 DECL_FIELD_CONTEXT (f_fpr) = record;
3355 DECL_FIELD_CONTEXT (f_ovf) = record;
3356 DECL_FIELD_CONTEXT (f_sav) = record;
3357
3358 TREE_CHAIN (record) = type_decl;
3359 TYPE_NAME (record) = type_decl;
3360 TYPE_FIELDS (record) = f_gpr;
3361 TREE_CHAIN (f_gpr) = f_fpr;
3362 TREE_CHAIN (f_fpr) = f_ovf;
3363 TREE_CHAIN (f_ovf) = f_sav;
3364
3365 layout_type (record);
3366
3367 /* The correct type is an array type of one element. */
3368 return build_array_type (record, build_index_type (size_zero_node));
3369 }
3370
3371 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3372
3373 static void
3374 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3375 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3376 int no_rtl)
3377 {
3378 CUMULATIVE_ARGS next_cum;
3379 rtx save_area = NULL_RTX, mem;
3380 rtx label;
3381 rtx label_ref;
3382 rtx tmp_reg;
3383 rtx nsse_reg;
3384 int set;
3385 tree fntype;
3386 int stdarg_p;
3387 int i;
3388
3389 if (!TARGET_64BIT)
3390 return;
3391
3392 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3393 return;
3394
3395 /* Indicate to allocate space on the stack for varargs save area. */
3396 ix86_save_varrargs_registers = 1;
3397
3398 cfun->stack_alignment_needed = 128;
3399
3400 fntype = TREE_TYPE (current_function_decl);
3401 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3402 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3403 != void_type_node));
3404
3405 /* For varargs, we do not want to skip the dummy va_dcl argument.
3406 For stdargs, we do want to skip the last named argument. */
3407 next_cum = *cum;
3408 if (stdarg_p)
3409 function_arg_advance (&next_cum, mode, type, 1);
3410
3411 if (!no_rtl)
3412 save_area = frame_pointer_rtx;
3413
3414 set = get_varargs_alias_set ();
3415
3416 for (i = next_cum.regno;
3417 i < ix86_regparm
3418 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3419 i++)
3420 {
3421 mem = gen_rtx_MEM (Pmode,
3422 plus_constant (save_area, i * UNITS_PER_WORD));
3423 set_mem_alias_set (mem, set);
3424 emit_move_insn (mem, gen_rtx_REG (Pmode,
3425 x86_64_int_parameter_registers[i]));
3426 }
3427
3428 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3429 {
3430 /* Now emit code to save SSE registers. The AX parameter contains number
3431 of SSE parameter registers used to call this function. We use
3432 sse_prologue_save insn template that produces computed jump across
3433 SSE saves. We need some preparation work to get this working. */
3434
3435 label = gen_label_rtx ();
3436 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3437
3438 /* Compute address to jump to :
3439 label - 5*eax + nnamed_sse_arguments*5 */
3440 tmp_reg = gen_reg_rtx (Pmode);
3441 nsse_reg = gen_reg_rtx (Pmode);
3442 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3443 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3444 gen_rtx_MULT (Pmode, nsse_reg,
3445 GEN_INT (4))));
3446 if (next_cum.sse_regno)
3447 emit_move_insn
3448 (nsse_reg,
3449 gen_rtx_CONST (DImode,
3450 gen_rtx_PLUS (DImode,
3451 label_ref,
3452 GEN_INT (next_cum.sse_regno * 4))));
3453 else
3454 emit_move_insn (nsse_reg, label_ref);
3455 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3456
3457 /* Compute address of memory block we save into. We always use pointer
3458 pointing 127 bytes after first byte to store - this is needed to keep
3459 instruction size limited by 4 bytes. */
3460 tmp_reg = gen_reg_rtx (Pmode);
3461 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3462 plus_constant (save_area,
3463 8 * REGPARM_MAX + 127)));
3464 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3465 set_mem_alias_set (mem, set);
3466 set_mem_align (mem, BITS_PER_WORD);
3467
3468 /* And finally do the dirty job! */
3469 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3470 GEN_INT (next_cum.sse_regno), label));
3471 }
3472
3473 }
3474
3475 /* Implement va_start. */
3476
3477 void
3478 ix86_va_start (tree valist, rtx nextarg)
3479 {
3480 HOST_WIDE_INT words, n_gpr, n_fpr;
3481 tree f_gpr, f_fpr, f_ovf, f_sav;
3482 tree gpr, fpr, ovf, sav, t;
3483
3484 /* Only 64bit target needs something special. */
3485 if (!TARGET_64BIT)
3486 {
3487 std_expand_builtin_va_start (valist, nextarg);
3488 return;
3489 }
3490
3491 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3492 f_fpr = TREE_CHAIN (f_gpr);
3493 f_ovf = TREE_CHAIN (f_fpr);
3494 f_sav = TREE_CHAIN (f_ovf);
3495
3496 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3497 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3498 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3499 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3500 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3501
3502 /* Count number of gp and fp argument registers used. */
3503 words = current_function_args_info.words;
3504 n_gpr = current_function_args_info.regno;
3505 n_fpr = current_function_args_info.sse_regno;
3506
3507 if (TARGET_DEBUG_ARG)
3508 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3509 (int) words, (int) n_gpr, (int) n_fpr);
3510
3511 if (cfun->va_list_gpr_size)
3512 {
3513 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3514 build_int_cst (NULL_TREE, n_gpr * 8));
3515 TREE_SIDE_EFFECTS (t) = 1;
3516 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3517 }
3518
3519 if (cfun->va_list_fpr_size)
3520 {
3521 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3522 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3523 TREE_SIDE_EFFECTS (t) = 1;
3524 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3525 }
3526
3527 /* Find the overflow area. */
3528 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3529 if (words != 0)
3530 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3531 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3532 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3533 TREE_SIDE_EFFECTS (t) = 1;
3534 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3535
3536 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3537 {
3538 /* Find the register save area.
3539 Prologue of the function save it right above stack frame. */
3540 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3541 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3542 TREE_SIDE_EFFECTS (t) = 1;
3543 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3544 }
3545 }
3546
3547 /* Implement va_arg. */
3548
3549 tree
3550 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3551 {
3552 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3553 tree f_gpr, f_fpr, f_ovf, f_sav;
3554 tree gpr, fpr, ovf, sav, t;
3555 int size, rsize;
3556 tree lab_false, lab_over = NULL_TREE;
3557 tree addr, t2;
3558 rtx container;
3559 int indirect_p = 0;
3560 tree ptrtype;
3561 enum machine_mode nat_mode;
3562
3563 /* Only 64bit target needs something special. */
3564 if (!TARGET_64BIT)
3565 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3566
3567 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3568 f_fpr = TREE_CHAIN (f_gpr);
3569 f_ovf = TREE_CHAIN (f_fpr);
3570 f_sav = TREE_CHAIN (f_ovf);
3571
3572 valist = build_va_arg_indirect_ref (valist);
3573 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3574 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3575 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3576 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3577
3578 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3579 if (indirect_p)
3580 type = build_pointer_type (type);
3581 size = int_size_in_bytes (type);
3582 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3583
3584 nat_mode = type_natural_mode (type);
3585 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3586 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3587
3588 /* Pull the value out of the saved registers. */
3589
3590 addr = create_tmp_var (ptr_type_node, "addr");
3591 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3592
3593 if (container)
3594 {
3595 int needed_intregs, needed_sseregs;
3596 bool need_temp;
3597 tree int_addr, sse_addr;
3598
3599 lab_false = create_artificial_label ();
3600 lab_over = create_artificial_label ();
3601
3602 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3603
3604 need_temp = (!REG_P (container)
3605 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3606 || TYPE_ALIGN (type) > 128));
3607
3608 /* In case we are passing structure, verify that it is consecutive block
3609 on the register save area. If not we need to do moves. */
3610 if (!need_temp && !REG_P (container))
3611 {
3612 /* Verify that all registers are strictly consecutive */
3613 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3614 {
3615 int i;
3616
3617 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3618 {
3619 rtx slot = XVECEXP (container, 0, i);
3620 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3621 || INTVAL (XEXP (slot, 1)) != i * 16)
3622 need_temp = 1;
3623 }
3624 }
3625 else
3626 {
3627 int i;
3628
3629 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3630 {
3631 rtx slot = XVECEXP (container, 0, i);
3632 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3633 || INTVAL (XEXP (slot, 1)) != i * 8)
3634 need_temp = 1;
3635 }
3636 }
3637 }
3638 if (!need_temp)
3639 {
3640 int_addr = addr;
3641 sse_addr = addr;
3642 }
3643 else
3644 {
3645 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3646 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3647 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3648 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3649 }
3650
3651 /* First ensure that we fit completely in registers. */
3652 if (needed_intregs)
3653 {
3654 t = build_int_cst (TREE_TYPE (gpr),
3655 (REGPARM_MAX - needed_intregs + 1) * 8);
3656 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3657 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3658 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3659 gimplify_and_add (t, pre_p);
3660 }
3661 if (needed_sseregs)
3662 {
3663 t = build_int_cst (TREE_TYPE (fpr),
3664 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3665 + REGPARM_MAX * 8);
3666 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3667 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3668 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3669 gimplify_and_add (t, pre_p);
3670 }
3671
3672 /* Compute index to start of area used for integer regs. */
3673 if (needed_intregs)
3674 {
3675 /* int_addr = gpr + sav; */
3676 t = fold_convert (ptr_type_node, gpr);
3677 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3678 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3679 gimplify_and_add (t, pre_p);
3680 }
3681 if (needed_sseregs)
3682 {
3683 /* sse_addr = fpr + sav; */
3684 t = fold_convert (ptr_type_node, fpr);
3685 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
3686 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3687 gimplify_and_add (t, pre_p);
3688 }
3689 if (need_temp)
3690 {
3691 int i;
3692 tree temp = create_tmp_var (type, "va_arg_tmp");
3693
3694 /* addr = &temp; */
3695 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3696 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3697 gimplify_and_add (t, pre_p);
3698
3699 for (i = 0; i < XVECLEN (container, 0); i++)
3700 {
3701 rtx slot = XVECEXP (container, 0, i);
3702 rtx reg = XEXP (slot, 0);
3703 enum machine_mode mode = GET_MODE (reg);
3704 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3705 tree addr_type = build_pointer_type (piece_type);
3706 tree src_addr, src;
3707 int src_offset;
3708 tree dest_addr, dest;
3709
3710 if (SSE_REGNO_P (REGNO (reg)))
3711 {
3712 src_addr = sse_addr;
3713 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3714 }
3715 else
3716 {
3717 src_addr = int_addr;
3718 src_offset = REGNO (reg) * 8;
3719 }
3720 src_addr = fold_convert (addr_type, src_addr);
3721 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3722 size_int (src_offset)));
3723 src = build_va_arg_indirect_ref (src_addr);
3724
3725 dest_addr = fold_convert (addr_type, addr);
3726 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3727 size_int (INTVAL (XEXP (slot, 1)))));
3728 dest = build_va_arg_indirect_ref (dest_addr);
3729
3730 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3731 gimplify_and_add (t, pre_p);
3732 }
3733 }
3734
3735 if (needed_intregs)
3736 {
3737 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3738 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
3739 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3740 gimplify_and_add (t, pre_p);
3741 }
3742 if (needed_sseregs)
3743 {
3744 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3745 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
3746 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3747 gimplify_and_add (t, pre_p);
3748 }
3749
3750 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3751 gimplify_and_add (t, pre_p);
3752
3753 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3754 append_to_statement_list (t, pre_p);
3755 }
3756
3757 /* ... otherwise out of the overflow area. */
3758
3759 /* Care for on-stack alignment if needed. */
3760 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3761 t = ovf;
3762 else
3763 {
3764 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3765 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
3766 build_int_cst (TREE_TYPE (ovf), align - 1));
3767 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3768 build_int_cst (TREE_TYPE (t), -align));
3769 }
3770 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3771
3772 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3773 gimplify_and_add (t2, pre_p);
3774
3775 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3776 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
3777 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3778 gimplify_and_add (t, pre_p);
3779
3780 if (container)
3781 {
3782 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3783 append_to_statement_list (t, pre_p);
3784 }
3785
3786 ptrtype = build_pointer_type (type);
3787 addr = fold_convert (ptrtype, addr);
3788
3789 if (indirect_p)
3790 addr = build_va_arg_indirect_ref (addr);
3791 return build_va_arg_indirect_ref (addr);
3792 }
3793 \f
3794 /* Return nonzero if OPNUM's MEM should be matched
3795 in movabs* patterns. */
3796
3797 int
3798 ix86_check_movabs (rtx insn, int opnum)
3799 {
3800 rtx set, mem;
3801
3802 set = PATTERN (insn);
3803 if (GET_CODE (set) == PARALLEL)
3804 set = XVECEXP (set, 0, 0);
3805 gcc_assert (GET_CODE (set) == SET);
3806 mem = XEXP (set, opnum);
3807 while (GET_CODE (mem) == SUBREG)
3808 mem = SUBREG_REG (mem);
3809 gcc_assert (GET_CODE (mem) == MEM);
3810 return (volatile_ok || !MEM_VOLATILE_P (mem));
3811 }
3812 \f
3813 /* Initialize the table of extra 80387 mathematical constants. */
3814
3815 static void
3816 init_ext_80387_constants (void)
3817 {
3818 static const char * cst[5] =
3819 {
3820 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
3821 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
3822 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
3823 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
3824 "3.1415926535897932385128089594061862044", /* 4: fldpi */
3825 };
3826 int i;
3827
3828 for (i = 0; i < 5; i++)
3829 {
3830 real_from_string (&ext_80387_constants_table[i], cst[i]);
3831 /* Ensure each constant is rounded to XFmode precision. */
3832 real_convert (&ext_80387_constants_table[i],
3833 XFmode, &ext_80387_constants_table[i]);
3834 }
3835
3836 ext_80387_constants_init = 1;
3837 }
3838
3839 /* Return true if the constant is something that can be loaded with
3840 a special instruction. */
3841
3842 int
3843 standard_80387_constant_p (rtx x)
3844 {
3845 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3846 return -1;
3847
3848 if (x == CONST0_RTX (GET_MODE (x)))
3849 return 1;
3850 if (x == CONST1_RTX (GET_MODE (x)))
3851 return 2;
3852
3853 /* For XFmode constants, try to find a special 80387 instruction when
3854 optimizing for size or on those CPUs that benefit from them. */
3855 if (GET_MODE (x) == XFmode
3856 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
3857 {
3858 REAL_VALUE_TYPE r;
3859 int i;
3860
3861 if (! ext_80387_constants_init)
3862 init_ext_80387_constants ();
3863
3864 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3865 for (i = 0; i < 5; i++)
3866 if (real_identical (&r, &ext_80387_constants_table[i]))
3867 return i + 3;
3868 }
3869
3870 return 0;
3871 }
3872
3873 /* Return the opcode of the special instruction to be used to load
3874 the constant X. */
3875
3876 const char *
3877 standard_80387_constant_opcode (rtx x)
3878 {
3879 switch (standard_80387_constant_p (x))
3880 {
3881 case 1:
3882 return "fldz";
3883 case 2:
3884 return "fld1";
3885 case 3:
3886 return "fldlg2";
3887 case 4:
3888 return "fldln2";
3889 case 5:
3890 return "fldl2e";
3891 case 6:
3892 return "fldl2t";
3893 case 7:
3894 return "fldpi";
3895 default:
3896 gcc_unreachable ();
3897 }
3898 }
3899
3900 /* Return the CONST_DOUBLE representing the 80387 constant that is
3901 loaded by the specified special instruction. The argument IDX
3902 matches the return value from standard_80387_constant_p. */
3903
3904 rtx
3905 standard_80387_constant_rtx (int idx)
3906 {
3907 int i;
3908
3909 if (! ext_80387_constants_init)
3910 init_ext_80387_constants ();
3911
3912 switch (idx)
3913 {
3914 case 3:
3915 case 4:
3916 case 5:
3917 case 6:
3918 case 7:
3919 i = idx - 3;
3920 break;
3921
3922 default:
3923 gcc_unreachable ();
3924 }
3925
3926 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
3927 XFmode);
3928 }
3929
3930 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3931 */
3932 int
3933 standard_sse_constant_p (rtx x)
3934 {
3935 if (x == const0_rtx)
3936 return 1;
3937 return (x == CONST0_RTX (GET_MODE (x)));
3938 }
3939
3940 /* Returns 1 if OP contains a symbol reference */
3941
3942 int
3943 symbolic_reference_mentioned_p (rtx op)
3944 {
3945 const char *fmt;
3946 int i;
3947
3948 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3949 return 1;
3950
3951 fmt = GET_RTX_FORMAT (GET_CODE (op));
3952 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3953 {
3954 if (fmt[i] == 'E')
3955 {
3956 int j;
3957
3958 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3959 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3960 return 1;
3961 }
3962
3963 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3964 return 1;
3965 }
3966
3967 return 0;
3968 }
3969
3970 /* Return 1 if it is appropriate to emit `ret' instructions in the
3971 body of a function. Do this only if the epilogue is simple, needing a
3972 couple of insns. Prior to reloading, we can't tell how many registers
3973 must be saved, so return 0 then. Return 0 if there is no frame
3974 marker to de-allocate. */
3975
3976 int
3977 ix86_can_use_return_insn_p (void)
3978 {
3979 struct ix86_frame frame;
3980
3981 if (! reload_completed || frame_pointer_needed)
3982 return 0;
3983
3984 /* Don't allow more than 32 pop, since that's all we can do
3985 with one instruction. */
3986 if (current_function_pops_args
3987 && current_function_args_size >= 32768)
3988 return 0;
3989
3990 ix86_compute_frame_layout (&frame);
3991 return frame.to_allocate == 0 && frame.nregs == 0;
3992 }
3993 \f
3994 /* Value should be nonzero if functions must have frame pointers.
3995 Zero means the frame pointer need not be set up (and parms may
3996 be accessed via the stack pointer) in functions that seem suitable. */
3997
3998 int
3999 ix86_frame_pointer_required (void)
4000 {
4001 /* If we accessed previous frames, then the generated code expects
4002 to be able to access the saved ebp value in our frame. */
4003 if (cfun->machine->accesses_prev_frame)
4004 return 1;
4005
4006 /* Several x86 os'es need a frame pointer for other reasons,
4007 usually pertaining to setjmp. */
4008 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4009 return 1;
4010
4011 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4012 the frame pointer by default. Turn it back on now if we've not
4013 got a leaf function. */
4014 if (TARGET_OMIT_LEAF_FRAME_POINTER
4015 && (!current_function_is_leaf))
4016 return 1;
4017
4018 if (current_function_profile)
4019 return 1;
4020
4021 return 0;
4022 }
4023
4024 /* Record that the current function accesses previous call frames. */
4025
4026 void
4027 ix86_setup_frame_addresses (void)
4028 {
4029 cfun->machine->accesses_prev_frame = 1;
4030 }
4031 \f
4032 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4033 # define USE_HIDDEN_LINKONCE 1
4034 #else
4035 # define USE_HIDDEN_LINKONCE 0
4036 #endif
4037
4038 static int pic_labels_used;
4039
4040 /* Fills in the label name that should be used for a pc thunk for
4041 the given register. */
4042
4043 static void
4044 get_pc_thunk_name (char name[32], unsigned int regno)
4045 {
4046 if (USE_HIDDEN_LINKONCE)
4047 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4048 else
4049 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4050 }
4051
4052
4053 /* This function generates code for -fpic that loads %ebx with
4054 the return address of the caller and then returns. */
4055
4056 void
4057 ix86_file_end (void)
4058 {
4059 rtx xops[2];
4060 int regno;
4061
4062 for (regno = 0; regno < 8; ++regno)
4063 {
4064 char name[32];
4065
4066 if (! ((pic_labels_used >> regno) & 1))
4067 continue;
4068
4069 get_pc_thunk_name (name, regno);
4070
4071 if (USE_HIDDEN_LINKONCE)
4072 {
4073 tree decl;
4074
4075 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4076 error_mark_node);
4077 TREE_PUBLIC (decl) = 1;
4078 TREE_STATIC (decl) = 1;
4079 DECL_ONE_ONLY (decl) = 1;
4080
4081 (*targetm.asm_out.unique_section) (decl, 0);
4082 named_section (decl, NULL, 0);
4083
4084 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4085 fputs ("\t.hidden\t", asm_out_file);
4086 assemble_name (asm_out_file, name);
4087 fputc ('\n', asm_out_file);
4088 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4089 }
4090 else
4091 {
4092 text_section ();
4093 ASM_OUTPUT_LABEL (asm_out_file, name);
4094 }
4095
4096 xops[0] = gen_rtx_REG (SImode, regno);
4097 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4098 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4099 output_asm_insn ("ret", xops);
4100 }
4101
4102 if (NEED_INDICATE_EXEC_STACK)
4103 file_end_indicate_exec_stack ();
4104 }
4105
4106 /* Emit code for the SET_GOT patterns. */
4107
4108 const char *
4109 output_set_got (rtx dest)
4110 {
4111 rtx xops[3];
4112
4113 xops[0] = dest;
4114 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4115
4116 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4117 {
4118 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4119
4120 if (!flag_pic)
4121 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4122 else
4123 output_asm_insn ("call\t%a2", xops);
4124
4125 #if TARGET_MACHO
4126 /* Output the "canonical" label name ("Lxx$pb") here too. This
4127 is what will be referred to by the Mach-O PIC subsystem. */
4128 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4129 #endif
4130 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4131 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4132
4133 if (flag_pic)
4134 output_asm_insn ("pop{l}\t%0", xops);
4135 }
4136 else
4137 {
4138 char name[32];
4139 get_pc_thunk_name (name, REGNO (dest));
4140 pic_labels_used |= 1 << REGNO (dest);
4141
4142 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4143 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4144 output_asm_insn ("call\t%X2", xops);
4145 }
4146
4147 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4148 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4149 else if (!TARGET_MACHO)
4150 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4151
4152 return "";
4153 }
4154
4155 /* Generate an "push" pattern for input ARG. */
4156
4157 static rtx
4158 gen_push (rtx arg)
4159 {
4160 return gen_rtx_SET (VOIDmode,
4161 gen_rtx_MEM (Pmode,
4162 gen_rtx_PRE_DEC (Pmode,
4163 stack_pointer_rtx)),
4164 arg);
4165 }
4166
4167 /* Return >= 0 if there is an unused call-clobbered register available
4168 for the entire function. */
4169
4170 static unsigned int
4171 ix86_select_alt_pic_regnum (void)
4172 {
4173 if (current_function_is_leaf && !current_function_profile)
4174 {
4175 int i;
4176 for (i = 2; i >= 0; --i)
4177 if (!regs_ever_live[i])
4178 return i;
4179 }
4180
4181 return INVALID_REGNUM;
4182 }
4183
4184 /* Return 1 if we need to save REGNO. */
4185 static int
4186 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4187 {
4188 if (pic_offset_table_rtx
4189 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4190 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4191 || current_function_profile
4192 || current_function_calls_eh_return
4193 || current_function_uses_const_pool))
4194 {
4195 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4196 return 0;
4197 return 1;
4198 }
4199
4200 if (current_function_calls_eh_return && maybe_eh_return)
4201 {
4202 unsigned i;
4203 for (i = 0; ; i++)
4204 {
4205 unsigned test = EH_RETURN_DATA_REGNO (i);
4206 if (test == INVALID_REGNUM)
4207 break;
4208 if (test == regno)
4209 return 1;
4210 }
4211 }
4212
4213 return (regs_ever_live[regno]
4214 && !call_used_regs[regno]
4215 && !fixed_regs[regno]
4216 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4217 }
4218
4219 /* Return number of registers to be saved on the stack. */
4220
4221 static int
4222 ix86_nsaved_regs (void)
4223 {
4224 int nregs = 0;
4225 int regno;
4226
4227 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4228 if (ix86_save_reg (regno, true))
4229 nregs++;
4230 return nregs;
4231 }
4232
4233 /* Return the offset between two registers, one to be eliminated, and the other
4234 its replacement, at the start of a routine. */
4235
4236 HOST_WIDE_INT
4237 ix86_initial_elimination_offset (int from, int to)
4238 {
4239 struct ix86_frame frame;
4240 ix86_compute_frame_layout (&frame);
4241
4242 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4243 return frame.hard_frame_pointer_offset;
4244 else if (from == FRAME_POINTER_REGNUM
4245 && to == HARD_FRAME_POINTER_REGNUM)
4246 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4247 else
4248 {
4249 gcc_assert (to == STACK_POINTER_REGNUM);
4250
4251 if (from == ARG_POINTER_REGNUM)
4252 return frame.stack_pointer_offset;
4253
4254 gcc_assert (from == FRAME_POINTER_REGNUM);
4255 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4256 }
4257 }
4258
4259 /* Fill structure ix86_frame about frame of currently computed function. */
4260
4261 static void
4262 ix86_compute_frame_layout (struct ix86_frame *frame)
4263 {
4264 HOST_WIDE_INT total_size;
4265 unsigned int stack_alignment_needed;
4266 HOST_WIDE_INT offset;
4267 unsigned int preferred_alignment;
4268 HOST_WIDE_INT size = get_frame_size ();
4269
4270 frame->nregs = ix86_nsaved_regs ();
4271 total_size = size;
4272
4273 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4274 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4275
4276 /* During reload iteration the amount of registers saved can change.
4277 Recompute the value as needed. Do not recompute when amount of registers
4278 didn't change as reload does multiple calls to the function and does not
4279 expect the decision to change within single iteration. */
4280 if (!optimize_size
4281 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4282 {
4283 int count = frame->nregs;
4284
4285 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4286 /* The fast prologue uses move instead of push to save registers. This
4287 is significantly longer, but also executes faster as modern hardware
4288 can execute the moves in parallel, but can't do that for push/pop.
4289
4290 Be careful about choosing what prologue to emit: When function takes
4291 many instructions to execute we may use slow version as well as in
4292 case function is known to be outside hot spot (this is known with
4293 feedback only). Weight the size of function by number of registers
4294 to save as it is cheap to use one or two push instructions but very
4295 slow to use many of them. */
4296 if (count)
4297 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4298 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4299 || (flag_branch_probabilities
4300 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4301 cfun->machine->use_fast_prologue_epilogue = false;
4302 else
4303 cfun->machine->use_fast_prologue_epilogue
4304 = !expensive_function_p (count);
4305 }
4306 if (TARGET_PROLOGUE_USING_MOVE
4307 && cfun->machine->use_fast_prologue_epilogue)
4308 frame->save_regs_using_mov = true;
4309 else
4310 frame->save_regs_using_mov = false;
4311
4312
4313 /* Skip return address and saved base pointer. */
4314 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4315
4316 frame->hard_frame_pointer_offset = offset;
4317
4318 /* Do some sanity checking of stack_alignment_needed and
4319 preferred_alignment, since i386 port is the only using those features
4320 that may break easily. */
4321
4322 gcc_assert (!size || stack_alignment_needed);
4323 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4324 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4325 gcc_assert (stack_alignment_needed
4326 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4327
4328 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4329 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4330
4331 /* Register save area */
4332 offset += frame->nregs * UNITS_PER_WORD;
4333
4334 /* Va-arg area */
4335 if (ix86_save_varrargs_registers)
4336 {
4337 offset += X86_64_VARARGS_SIZE;
4338 frame->va_arg_size = X86_64_VARARGS_SIZE;
4339 }
4340 else
4341 frame->va_arg_size = 0;
4342
4343 /* Align start of frame for local function. */
4344 frame->padding1 = ((offset + stack_alignment_needed - 1)
4345 & -stack_alignment_needed) - offset;
4346
4347 offset += frame->padding1;
4348
4349 /* Frame pointer points here. */
4350 frame->frame_pointer_offset = offset;
4351
4352 offset += size;
4353
4354 /* Add outgoing arguments area. Can be skipped if we eliminated
4355 all the function calls as dead code.
4356 Skipping is however impossible when function calls alloca. Alloca
4357 expander assumes that last current_function_outgoing_args_size
4358 of stack frame are unused. */
4359 if (ACCUMULATE_OUTGOING_ARGS
4360 && (!current_function_is_leaf || current_function_calls_alloca))
4361 {
4362 offset += current_function_outgoing_args_size;
4363 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4364 }
4365 else
4366 frame->outgoing_arguments_size = 0;
4367
4368 /* Align stack boundary. Only needed if we're calling another function
4369 or using alloca. */
4370 if (!current_function_is_leaf || current_function_calls_alloca)
4371 frame->padding2 = ((offset + preferred_alignment - 1)
4372 & -preferred_alignment) - offset;
4373 else
4374 frame->padding2 = 0;
4375
4376 offset += frame->padding2;
4377
4378 /* We've reached end of stack frame. */
4379 frame->stack_pointer_offset = offset;
4380
4381 /* Size prologue needs to allocate. */
4382 frame->to_allocate =
4383 (size + frame->padding1 + frame->padding2
4384 + frame->outgoing_arguments_size + frame->va_arg_size);
4385
4386 if ((!frame->to_allocate && frame->nregs <= 1)
4387 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4388 frame->save_regs_using_mov = false;
4389
4390 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4391 && current_function_is_leaf)
4392 {
4393 frame->red_zone_size = frame->to_allocate;
4394 if (frame->save_regs_using_mov)
4395 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4396 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4397 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4398 }
4399 else
4400 frame->red_zone_size = 0;
4401 frame->to_allocate -= frame->red_zone_size;
4402 frame->stack_pointer_offset -= frame->red_zone_size;
4403 #if 0
4404 fprintf (stderr, "nregs: %i\n", frame->nregs);
4405 fprintf (stderr, "size: %i\n", size);
4406 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4407 fprintf (stderr, "padding1: %i\n", frame->padding1);
4408 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4409 fprintf (stderr, "padding2: %i\n", frame->padding2);
4410 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4411 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4412 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4413 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4414 frame->hard_frame_pointer_offset);
4415 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4416 #endif
4417 }
4418
4419 /* Emit code to save registers in the prologue. */
4420
4421 static void
4422 ix86_emit_save_regs (void)
4423 {
4424 int regno;
4425 rtx insn;
4426
4427 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4428 if (ix86_save_reg (regno, true))
4429 {
4430 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4431 RTX_FRAME_RELATED_P (insn) = 1;
4432 }
4433 }
4434
4435 /* Emit code to save registers using MOV insns. First register
4436 is restored from POINTER + OFFSET. */
4437 static void
4438 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4439 {
4440 int regno;
4441 rtx insn;
4442
4443 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4444 if (ix86_save_reg (regno, true))
4445 {
4446 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4447 Pmode, offset),
4448 gen_rtx_REG (Pmode, regno));
4449 RTX_FRAME_RELATED_P (insn) = 1;
4450 offset += UNITS_PER_WORD;
4451 }
4452 }
4453
4454 /* Expand prologue or epilogue stack adjustment.
4455 The pattern exist to put a dependency on all ebp-based memory accesses.
4456 STYLE should be negative if instructions should be marked as frame related,
4457 zero if %r11 register is live and cannot be freely used and positive
4458 otherwise. */
4459
4460 static void
4461 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4462 {
4463 rtx insn;
4464
4465 if (! TARGET_64BIT)
4466 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4467 else if (x86_64_immediate_operand (offset, DImode))
4468 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4469 else
4470 {
4471 rtx r11;
4472 /* r11 is used by indirect sibcall return as well, set before the
4473 epilogue and used after the epilogue. ATM indirect sibcall
4474 shouldn't be used together with huge frame sizes in one
4475 function because of the frame_size check in sibcall.c. */
4476 gcc_assert (style);
4477 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4478 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4479 if (style < 0)
4480 RTX_FRAME_RELATED_P (insn) = 1;
4481 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4482 offset));
4483 }
4484 if (style < 0)
4485 RTX_FRAME_RELATED_P (insn) = 1;
4486 }
4487
4488 /* Expand the prologue into a bunch of separate insns. */
4489
4490 void
4491 ix86_expand_prologue (void)
4492 {
4493 rtx insn;
4494 bool pic_reg_used;
4495 struct ix86_frame frame;
4496 HOST_WIDE_INT allocate;
4497
4498 ix86_compute_frame_layout (&frame);
4499
4500 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4501 slower on all targets. Also sdb doesn't like it. */
4502
4503 if (frame_pointer_needed)
4504 {
4505 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4506 RTX_FRAME_RELATED_P (insn) = 1;
4507
4508 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4509 RTX_FRAME_RELATED_P (insn) = 1;
4510 }
4511
4512 allocate = frame.to_allocate;
4513
4514 if (!frame.save_regs_using_mov)
4515 ix86_emit_save_regs ();
4516 else
4517 allocate += frame.nregs * UNITS_PER_WORD;
4518
4519 /* When using red zone we may start register saving before allocating
4520 the stack frame saving one cycle of the prologue. */
4521 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4522 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4523 : stack_pointer_rtx,
4524 -frame.nregs * UNITS_PER_WORD);
4525
4526 if (allocate == 0)
4527 ;
4528 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4529 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4530 GEN_INT (-allocate), -1);
4531 else
4532 {
4533 /* Only valid for Win32. */
4534 rtx eax = gen_rtx_REG (SImode, 0);
4535 bool eax_live = ix86_eax_live_at_start_p ();
4536 rtx t;
4537
4538 gcc_assert (!TARGET_64BIT);
4539
4540 if (eax_live)
4541 {
4542 emit_insn (gen_push (eax));
4543 allocate -= 4;
4544 }
4545
4546 emit_move_insn (eax, GEN_INT (allocate));
4547
4548 insn = emit_insn (gen_allocate_stack_worker (eax));
4549 RTX_FRAME_RELATED_P (insn) = 1;
4550 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4551 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4552 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4553 t, REG_NOTES (insn));
4554
4555 if (eax_live)
4556 {
4557 if (frame_pointer_needed)
4558 t = plus_constant (hard_frame_pointer_rtx,
4559 allocate
4560 - frame.to_allocate
4561 - frame.nregs * UNITS_PER_WORD);
4562 else
4563 t = plus_constant (stack_pointer_rtx, allocate);
4564 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4565 }
4566 }
4567
4568 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4569 {
4570 if (!frame_pointer_needed || !frame.to_allocate)
4571 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4572 else
4573 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4574 -frame.nregs * UNITS_PER_WORD);
4575 }
4576
4577 pic_reg_used = false;
4578 if (pic_offset_table_rtx
4579 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4580 || current_function_profile))
4581 {
4582 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4583
4584 if (alt_pic_reg_used != INVALID_REGNUM)
4585 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4586
4587 pic_reg_used = true;
4588 }
4589
4590 if (pic_reg_used)
4591 {
4592 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4593
4594 /* Even with accurate pre-reload life analysis, we can wind up
4595 deleting all references to the pic register after reload.
4596 Consider if cross-jumping unifies two sides of a branch
4597 controlled by a comparison vs the only read from a global.
4598 In which case, allow the set_got to be deleted, though we're
4599 too late to do anything about the ebx save in the prologue. */
4600 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4601 }
4602
4603 /* Prevent function calls from be scheduled before the call to mcount.
4604 In the pic_reg_used case, make sure that the got load isn't deleted. */
4605 if (current_function_profile)
4606 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4607 }
4608
4609 /* Emit code to restore saved registers using MOV insns. First register
4610 is restored from POINTER + OFFSET. */
4611 static void
4612 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
4613 int maybe_eh_return)
4614 {
4615 int regno;
4616 rtx base_address = gen_rtx_MEM (Pmode, pointer);
4617
4618 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4619 if (ix86_save_reg (regno, maybe_eh_return))
4620 {
4621 /* Ensure that adjust_address won't be forced to produce pointer
4622 out of range allowed by x86-64 instruction set. */
4623 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
4624 {
4625 rtx r11;
4626
4627 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4628 emit_move_insn (r11, GEN_INT (offset));
4629 emit_insn (gen_adddi3 (r11, r11, pointer));
4630 base_address = gen_rtx_MEM (Pmode, r11);
4631 offset = 0;
4632 }
4633 emit_move_insn (gen_rtx_REG (Pmode, regno),
4634 adjust_address (base_address, Pmode, offset));
4635 offset += UNITS_PER_WORD;
4636 }
4637 }
4638
4639 /* Restore function stack, frame, and registers. */
4640
4641 void
4642 ix86_expand_epilogue (int style)
4643 {
4644 int regno;
4645 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4646 struct ix86_frame frame;
4647 HOST_WIDE_INT offset;
4648
4649 ix86_compute_frame_layout (&frame);
4650
4651 /* Calculate start of saved registers relative to ebp. Special care
4652 must be taken for the normal return case of a function using
4653 eh_return: the eax and edx registers are marked as saved, but not
4654 restored along this path. */
4655 offset = frame.nregs;
4656 if (current_function_calls_eh_return && style != 2)
4657 offset -= 2;
4658 offset *= -UNITS_PER_WORD;
4659
4660 /* If we're only restoring one register and sp is not valid then
4661 using a move instruction to restore the register since it's
4662 less work than reloading sp and popping the register.
4663
4664 The default code result in stack adjustment using add/lea instruction,
4665 while this code results in LEAVE instruction (or discrete equivalent),
4666 so it is profitable in some other cases as well. Especially when there
4667 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4668 and there is exactly one register to pop. This heuristic may need some
4669 tuning in future. */
4670 if ((!sp_valid && frame.nregs <= 1)
4671 || (TARGET_EPILOGUE_USING_MOVE
4672 && cfun->machine->use_fast_prologue_epilogue
4673 && (frame.nregs > 1 || frame.to_allocate))
4674 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4675 || (frame_pointer_needed && TARGET_USE_LEAVE
4676 && cfun->machine->use_fast_prologue_epilogue
4677 && frame.nregs == 1)
4678 || current_function_calls_eh_return)
4679 {
4680 /* Restore registers. We can use ebp or esp to address the memory
4681 locations. If both are available, default to ebp, since offsets
4682 are known to be small. Only exception is esp pointing directly to the
4683 end of block of saved registers, where we may simplify addressing
4684 mode. */
4685
4686 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4687 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4688 frame.to_allocate, style == 2);
4689 else
4690 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4691 offset, style == 2);
4692
4693 /* eh_return epilogues need %ecx added to the stack pointer. */
4694 if (style == 2)
4695 {
4696 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4697
4698 if (frame_pointer_needed)
4699 {
4700 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4701 tmp = plus_constant (tmp, UNITS_PER_WORD);
4702 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4703
4704 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4705 emit_move_insn (hard_frame_pointer_rtx, tmp);
4706
4707 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
4708 const0_rtx, style);
4709 }
4710 else
4711 {
4712 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4713 tmp = plus_constant (tmp, (frame.to_allocate
4714 + frame.nregs * UNITS_PER_WORD));
4715 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4716 }
4717 }
4718 else if (!frame_pointer_needed)
4719 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4720 GEN_INT (frame.to_allocate
4721 + frame.nregs * UNITS_PER_WORD),
4722 style);
4723 /* If not an i386, mov & pop is faster than "leave". */
4724 else if (TARGET_USE_LEAVE || optimize_size
4725 || !cfun->machine->use_fast_prologue_epilogue)
4726 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4727 else
4728 {
4729 pro_epilogue_adjust_stack (stack_pointer_rtx,
4730 hard_frame_pointer_rtx,
4731 const0_rtx, style);
4732 if (TARGET_64BIT)
4733 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4734 else
4735 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4736 }
4737 }
4738 else
4739 {
4740 /* First step is to deallocate the stack frame so that we can
4741 pop the registers. */
4742 if (!sp_valid)
4743 {
4744 gcc_assert (frame_pointer_needed);
4745 pro_epilogue_adjust_stack (stack_pointer_rtx,
4746 hard_frame_pointer_rtx,
4747 GEN_INT (offset), style);
4748 }
4749 else if (frame.to_allocate)
4750 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4751 GEN_INT (frame.to_allocate), style);
4752
4753 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4754 if (ix86_save_reg (regno, false))
4755 {
4756 if (TARGET_64BIT)
4757 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4758 else
4759 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4760 }
4761 if (frame_pointer_needed)
4762 {
4763 /* Leave results in shorter dependency chains on CPUs that are
4764 able to grok it fast. */
4765 if (TARGET_USE_LEAVE)
4766 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4767 else if (TARGET_64BIT)
4768 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4769 else
4770 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4771 }
4772 }
4773
4774 /* Sibcall epilogues don't want a return instruction. */
4775 if (style == 0)
4776 return;
4777
4778 if (current_function_pops_args && current_function_args_size)
4779 {
4780 rtx popc = GEN_INT (current_function_pops_args);
4781
4782 /* i386 can only pop 64K bytes. If asked to pop more, pop
4783 return address, do explicit add, and jump indirectly to the
4784 caller. */
4785
4786 if (current_function_pops_args >= 65536)
4787 {
4788 rtx ecx = gen_rtx_REG (SImode, 2);
4789
4790 /* There is no "pascal" calling convention in 64bit ABI. */
4791 gcc_assert (!TARGET_64BIT);
4792
4793 emit_insn (gen_popsi1 (ecx));
4794 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4795 emit_jump_insn (gen_return_indirect_internal (ecx));
4796 }
4797 else
4798 emit_jump_insn (gen_return_pop_internal (popc));
4799 }
4800 else
4801 emit_jump_insn (gen_return_internal ());
4802 }
4803
4804 /* Reset from the function's potential modifications. */
4805
4806 static void
4807 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4808 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4809 {
4810 if (pic_offset_table_rtx)
4811 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4812 }
4813 \f
4814 /* Extract the parts of an RTL expression that is a valid memory address
4815 for an instruction. Return 0 if the structure of the address is
4816 grossly off. Return -1 if the address contains ASHIFT, so it is not
4817 strictly valid, but still used for computing length of lea instruction. */
4818
4819 int
4820 ix86_decompose_address (rtx addr, struct ix86_address *out)
4821 {
4822 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
4823 rtx base_reg, index_reg;
4824 HOST_WIDE_INT scale = 1;
4825 rtx scale_rtx = NULL_RTX;
4826 int retval = 1;
4827 enum ix86_address_seg seg = SEG_DEFAULT;
4828
4829 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4830 base = addr;
4831 else if (GET_CODE (addr) == PLUS)
4832 {
4833 rtx addends[4], op;
4834 int n = 0, i;
4835
4836 op = addr;
4837 do
4838 {
4839 if (n >= 4)
4840 return 0;
4841 addends[n++] = XEXP (op, 1);
4842 op = XEXP (op, 0);
4843 }
4844 while (GET_CODE (op) == PLUS);
4845 if (n >= 4)
4846 return 0;
4847 addends[n] = op;
4848
4849 for (i = n; i >= 0; --i)
4850 {
4851 op = addends[i];
4852 switch (GET_CODE (op))
4853 {
4854 case MULT:
4855 if (index)
4856 return 0;
4857 index = XEXP (op, 0);
4858 scale_rtx = XEXP (op, 1);
4859 break;
4860
4861 case UNSPEC:
4862 if (XINT (op, 1) == UNSPEC_TP
4863 && TARGET_TLS_DIRECT_SEG_REFS
4864 && seg == SEG_DEFAULT)
4865 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
4866 else
4867 return 0;
4868 break;
4869
4870 case REG:
4871 case SUBREG:
4872 if (!base)
4873 base = op;
4874 else if (!index)
4875 index = op;
4876 else
4877 return 0;
4878 break;
4879
4880 case CONST:
4881 case CONST_INT:
4882 case SYMBOL_REF:
4883 case LABEL_REF:
4884 if (disp)
4885 return 0;
4886 disp = op;
4887 break;
4888
4889 default:
4890 return 0;
4891 }
4892 }
4893 }
4894 else if (GET_CODE (addr) == MULT)
4895 {
4896 index = XEXP (addr, 0); /* index*scale */
4897 scale_rtx = XEXP (addr, 1);
4898 }
4899 else if (GET_CODE (addr) == ASHIFT)
4900 {
4901 rtx tmp;
4902
4903 /* We're called for lea too, which implements ashift on occasion. */
4904 index = XEXP (addr, 0);
4905 tmp = XEXP (addr, 1);
4906 if (GET_CODE (tmp) != CONST_INT)
4907 return 0;
4908 scale = INTVAL (tmp);
4909 if ((unsigned HOST_WIDE_INT) scale > 3)
4910 return 0;
4911 scale = 1 << scale;
4912 retval = -1;
4913 }
4914 else
4915 disp = addr; /* displacement */
4916
4917 /* Extract the integral value of scale. */
4918 if (scale_rtx)
4919 {
4920 if (GET_CODE (scale_rtx) != CONST_INT)
4921 return 0;
4922 scale = INTVAL (scale_rtx);
4923 }
4924
4925 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
4926 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
4927
4928 /* Allow arg pointer and stack pointer as index if there is not scaling. */
4929 if (base_reg && index_reg && scale == 1
4930 && (index_reg == arg_pointer_rtx
4931 || index_reg == frame_pointer_rtx
4932 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
4933 {
4934 rtx tmp;
4935 tmp = base, base = index, index = tmp;
4936 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
4937 }
4938
4939 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4940 if ((base_reg == hard_frame_pointer_rtx
4941 || base_reg == frame_pointer_rtx
4942 || base_reg == arg_pointer_rtx) && !disp)
4943 disp = const0_rtx;
4944
4945 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4946 Avoid this by transforming to [%esi+0]. */
4947 if (ix86_tune == PROCESSOR_K6 && !optimize_size
4948 && base_reg && !index_reg && !disp
4949 && REG_P (base_reg)
4950 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
4951 disp = const0_rtx;
4952
4953 /* Special case: encode reg+reg instead of reg*2. */
4954 if (!base && index && scale && scale == 2)
4955 base = index, base_reg = index_reg, scale = 1;
4956
4957 /* Special case: scaling cannot be encoded without base or displacement. */
4958 if (!base && !disp && index && scale != 1)
4959 disp = const0_rtx;
4960
4961 out->base = base;
4962 out->index = index;
4963 out->disp = disp;
4964 out->scale = scale;
4965 out->seg = seg;
4966
4967 return retval;
4968 }
4969 \f
4970 /* Return cost of the memory address x.
4971 For i386, it is better to use a complex address than let gcc copy
4972 the address into a reg and make a new pseudo. But not if the address
4973 requires to two regs - that would mean more pseudos with longer
4974 lifetimes. */
4975 static int
4976 ix86_address_cost (rtx x)
4977 {
4978 struct ix86_address parts;
4979 int cost = 1;
4980 int ok = ix86_decompose_address (x, &parts);
4981
4982 gcc_assert (ok);
4983
4984 if (parts.base && GET_CODE (parts.base) == SUBREG)
4985 parts.base = SUBREG_REG (parts.base);
4986 if (parts.index && GET_CODE (parts.index) == SUBREG)
4987 parts.index = SUBREG_REG (parts.index);
4988
4989 /* More complex memory references are better. */
4990 if (parts.disp && parts.disp != const0_rtx)
4991 cost--;
4992 if (parts.seg != SEG_DEFAULT)
4993 cost--;
4994
4995 /* Attempt to minimize number of registers in the address. */
4996 if ((parts.base
4997 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4998 || (parts.index
4999 && (!REG_P (parts.index)
5000 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5001 cost++;
5002
5003 if (parts.base
5004 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5005 && parts.index
5006 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5007 && parts.base != parts.index)
5008 cost++;
5009
5010 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5011 since it's predecode logic can't detect the length of instructions
5012 and it degenerates to vector decoded. Increase cost of such
5013 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5014 to split such addresses or even refuse such addresses at all.
5015
5016 Following addressing modes are affected:
5017 [base+scale*index]
5018 [scale*index+disp]
5019 [base+index]
5020
5021 The first and last case may be avoidable by explicitly coding the zero in
5022 memory address, but I don't have AMD-K6 machine handy to check this
5023 theory. */
5024
5025 if (TARGET_K6
5026 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5027 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5028 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5029 cost += 10;
5030
5031 return cost;
5032 }
5033 \f
5034 /* If X is a machine specific address (i.e. a symbol or label being
5035 referenced as a displacement from the GOT implemented using an
5036 UNSPEC), then return the base term. Otherwise return X. */
5037
5038 rtx
5039 ix86_find_base_term (rtx x)
5040 {
5041 rtx term;
5042
5043 if (TARGET_64BIT)
5044 {
5045 if (GET_CODE (x) != CONST)
5046 return x;
5047 term = XEXP (x, 0);
5048 if (GET_CODE (term) == PLUS
5049 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5050 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5051 term = XEXP (term, 0);
5052 if (GET_CODE (term) != UNSPEC
5053 || XINT (term, 1) != UNSPEC_GOTPCREL)
5054 return x;
5055
5056 term = XVECEXP (term, 0, 0);
5057
5058 if (GET_CODE (term) != SYMBOL_REF
5059 && GET_CODE (term) != LABEL_REF)
5060 return x;
5061
5062 return term;
5063 }
5064
5065 term = ix86_delegitimize_address (x);
5066
5067 if (GET_CODE (term) != SYMBOL_REF
5068 && GET_CODE (term) != LABEL_REF)
5069 return x;
5070
5071 return term;
5072 }
5073
5074 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5075 this is used for to form addresses to local data when -fPIC is in
5076 use. */
5077
5078 static bool
5079 darwin_local_data_pic (rtx disp)
5080 {
5081 if (GET_CODE (disp) == MINUS)
5082 {
5083 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5084 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5085 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5086 {
5087 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5088 if (! strcmp (sym_name, "<pic base>"))
5089 return true;
5090 }
5091 }
5092
5093 return false;
5094 }
5095 \f
5096 /* Determine if a given RTX is a valid constant. We already know this
5097 satisfies CONSTANT_P. */
5098
5099 bool
5100 legitimate_constant_p (rtx x)
5101 {
5102 switch (GET_CODE (x))
5103 {
5104 case CONST:
5105 x = XEXP (x, 0);
5106
5107 if (GET_CODE (x) == PLUS)
5108 {
5109 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5110 return false;
5111 x = XEXP (x, 0);
5112 }
5113
5114 if (TARGET_MACHO && darwin_local_data_pic (x))
5115 return true;
5116
5117 /* Only some unspecs are valid as "constants". */
5118 if (GET_CODE (x) == UNSPEC)
5119 switch (XINT (x, 1))
5120 {
5121 case UNSPEC_TPOFF:
5122 case UNSPEC_NTPOFF:
5123 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5124 case UNSPEC_DTPOFF:
5125 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5126 default:
5127 return false;
5128 }
5129
5130 /* We must have drilled down to a symbol. */
5131 if (!symbolic_operand (x, Pmode))
5132 return false;
5133 /* FALLTHRU */
5134
5135 case SYMBOL_REF:
5136 /* TLS symbols are never valid. */
5137 if (tls_symbolic_operand (x, Pmode))
5138 return false;
5139 break;
5140
5141 default:
5142 break;
5143 }
5144
5145 /* Otherwise we handle everything else in the move patterns. */
5146 return true;
5147 }
5148
5149 /* Determine if it's legal to put X into the constant pool. This
5150 is not possible for the address of thread-local symbols, which
5151 is checked above. */
5152
5153 static bool
5154 ix86_cannot_force_const_mem (rtx x)
5155 {
5156 return !legitimate_constant_p (x);
5157 }
5158
5159 /* Determine if a given RTX is a valid constant address. */
5160
5161 bool
5162 constant_address_p (rtx x)
5163 {
5164 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5165 }
5166
5167 /* Nonzero if the constant value X is a legitimate general operand
5168 when generating PIC code. It is given that flag_pic is on and
5169 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5170
5171 bool
5172 legitimate_pic_operand_p (rtx x)
5173 {
5174 rtx inner;
5175
5176 switch (GET_CODE (x))
5177 {
5178 case CONST:
5179 inner = XEXP (x, 0);
5180
5181 /* Only some unspecs are valid as "constants". */
5182 if (GET_CODE (inner) == UNSPEC)
5183 switch (XINT (inner, 1))
5184 {
5185 case UNSPEC_TPOFF:
5186 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5187 default:
5188 return false;
5189 }
5190 /* FALLTHRU */
5191
5192 case SYMBOL_REF:
5193 case LABEL_REF:
5194 return legitimate_pic_address_disp_p (x);
5195
5196 default:
5197 return true;
5198 }
5199 }
5200
5201 /* Determine if a given CONST RTX is a valid memory displacement
5202 in PIC mode. */
5203
5204 int
5205 legitimate_pic_address_disp_p (rtx disp)
5206 {
5207 bool saw_plus;
5208
5209 /* In 64bit mode we can allow direct addresses of symbols and labels
5210 when they are not dynamic symbols. */
5211 if (TARGET_64BIT)
5212 {
5213 /* TLS references should always be enclosed in UNSPEC. */
5214 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5215 return 0;
5216 if (GET_CODE (disp) == SYMBOL_REF
5217 && ix86_cmodel == CM_SMALL_PIC
5218 && SYMBOL_REF_LOCAL_P (disp))
5219 return 1;
5220 if (GET_CODE (disp) == LABEL_REF)
5221 return 1;
5222 if (GET_CODE (disp) == CONST
5223 && GET_CODE (XEXP (disp, 0)) == PLUS)
5224 {
5225 rtx op0 = XEXP (XEXP (disp, 0), 0);
5226 rtx op1 = XEXP (XEXP (disp, 0), 1);
5227
5228 /* TLS references should always be enclosed in UNSPEC. */
5229 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5230 return 0;
5231 if (((GET_CODE (op0) == SYMBOL_REF
5232 && ix86_cmodel == CM_SMALL_PIC
5233 && SYMBOL_REF_LOCAL_P (op0))
5234 || GET_CODE (op0) == LABEL_REF)
5235 && GET_CODE (op1) == CONST_INT
5236 && INTVAL (op1) < 16*1024*1024
5237 && INTVAL (op1) >= -16*1024*1024)
5238 return 1;
5239 }
5240 }
5241 if (GET_CODE (disp) != CONST)
5242 return 0;
5243 disp = XEXP (disp, 0);
5244
5245 if (TARGET_64BIT)
5246 {
5247 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5248 of GOT tables. We should not need these anyway. */
5249 if (GET_CODE (disp) != UNSPEC
5250 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5251 return 0;
5252
5253 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5254 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5255 return 0;
5256 return 1;
5257 }
5258
5259 saw_plus = false;
5260 if (GET_CODE (disp) == PLUS)
5261 {
5262 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5263 return 0;
5264 disp = XEXP (disp, 0);
5265 saw_plus = true;
5266 }
5267
5268 if (TARGET_MACHO && darwin_local_data_pic (disp))
5269 return 1;
5270
5271 if (GET_CODE (disp) != UNSPEC)
5272 return 0;
5273
5274 switch (XINT (disp, 1))
5275 {
5276 case UNSPEC_GOT:
5277 if (saw_plus)
5278 return false;
5279 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5280 case UNSPEC_GOTOFF:
5281 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5282 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5283 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5284 return false;
5285 case UNSPEC_GOTTPOFF:
5286 case UNSPEC_GOTNTPOFF:
5287 case UNSPEC_INDNTPOFF:
5288 if (saw_plus)
5289 return false;
5290 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5291 case UNSPEC_NTPOFF:
5292 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5293 case UNSPEC_DTPOFF:
5294 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5295 }
5296
5297 return 0;
5298 }
5299
5300 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5301 memory address for an instruction. The MODE argument is the machine mode
5302 for the MEM expression that wants to use this address.
5303
5304 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5305 convert common non-canonical forms to canonical form so that they will
5306 be recognized. */
5307
5308 int
5309 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5310 {
5311 struct ix86_address parts;
5312 rtx base, index, disp;
5313 HOST_WIDE_INT scale;
5314 const char *reason = NULL;
5315 rtx reason_rtx = NULL_RTX;
5316
5317 if (TARGET_DEBUG_ADDR)
5318 {
5319 fprintf (stderr,
5320 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5321 GET_MODE_NAME (mode), strict);
5322 debug_rtx (addr);
5323 }
5324
5325 if (ix86_decompose_address (addr, &parts) <= 0)
5326 {
5327 reason = "decomposition failed";
5328 goto report_error;
5329 }
5330
5331 base = parts.base;
5332 index = parts.index;
5333 disp = parts.disp;
5334 scale = parts.scale;
5335
5336 /* Validate base register.
5337
5338 Don't allow SUBREG's that span more than a word here. It can lead to spill
5339 failures when the base is one word out of a two word structure, which is
5340 represented internally as a DImode int. */
5341
5342 if (base)
5343 {
5344 rtx reg;
5345 reason_rtx = base;
5346
5347 if (REG_P (base))
5348 reg = base;
5349 else if (GET_CODE (base) == SUBREG
5350 && REG_P (SUBREG_REG (base))
5351 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5352 <= UNITS_PER_WORD)
5353 reg = SUBREG_REG (base);
5354 else
5355 {
5356 reason = "base is not a register";
5357 goto report_error;
5358 }
5359
5360 if (GET_MODE (base) != Pmode)
5361 {
5362 reason = "base is not in Pmode";
5363 goto report_error;
5364 }
5365
5366 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5367 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5368 {
5369 reason = "base is not valid";
5370 goto report_error;
5371 }
5372 }
5373
5374 /* Validate index register.
5375
5376 Don't allow SUBREG's that span more than a word here -- same as above. */
5377
5378 if (index)
5379 {
5380 rtx reg;
5381 reason_rtx = index;
5382
5383 if (REG_P (index))
5384 reg = index;
5385 else if (GET_CODE (index) == SUBREG
5386 && REG_P (SUBREG_REG (index))
5387 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5388 <= UNITS_PER_WORD)
5389 reg = SUBREG_REG (index);
5390 else
5391 {
5392 reason = "index is not a register";
5393 goto report_error;
5394 }
5395
5396 if (GET_MODE (index) != Pmode)
5397 {
5398 reason = "index is not in Pmode";
5399 goto report_error;
5400 }
5401
5402 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5403 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5404 {
5405 reason = "index is not valid";
5406 goto report_error;
5407 }
5408 }
5409
5410 /* Validate scale factor. */
5411 if (scale != 1)
5412 {
5413 reason_rtx = GEN_INT (scale);
5414 if (!index)
5415 {
5416 reason = "scale without index";
5417 goto report_error;
5418 }
5419
5420 if (scale != 2 && scale != 4 && scale != 8)
5421 {
5422 reason = "scale is not a valid multiplier";
5423 goto report_error;
5424 }
5425 }
5426
5427 /* Validate displacement. */
5428 if (disp)
5429 {
5430 reason_rtx = disp;
5431
5432 if (GET_CODE (disp) == CONST
5433 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5434 switch (XINT (XEXP (disp, 0), 1))
5435 {
5436 case UNSPEC_GOT:
5437 case UNSPEC_GOTOFF:
5438 case UNSPEC_GOTPCREL:
5439 gcc_assert (flag_pic);
5440 goto is_legitimate_pic;
5441
5442 case UNSPEC_GOTTPOFF:
5443 case UNSPEC_GOTNTPOFF:
5444 case UNSPEC_INDNTPOFF:
5445 case UNSPEC_NTPOFF:
5446 case UNSPEC_DTPOFF:
5447 break;
5448
5449 default:
5450 reason = "invalid address unspec";
5451 goto report_error;
5452 }
5453
5454 else if (flag_pic && (SYMBOLIC_CONST (disp)
5455 #if TARGET_MACHO
5456 && !machopic_operand_p (disp)
5457 #endif
5458 ))
5459 {
5460 is_legitimate_pic:
5461 if (TARGET_64BIT && (index || base))
5462 {
5463 /* foo@dtpoff(%rX) is ok. */
5464 if (GET_CODE (disp) != CONST
5465 || GET_CODE (XEXP (disp, 0)) != PLUS
5466 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5467 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5468 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5469 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5470 {
5471 reason = "non-constant pic memory reference";
5472 goto report_error;
5473 }
5474 }
5475 else if (! legitimate_pic_address_disp_p (disp))
5476 {
5477 reason = "displacement is an invalid pic construct";
5478 goto report_error;
5479 }
5480
5481 /* This code used to verify that a symbolic pic displacement
5482 includes the pic_offset_table_rtx register.
5483
5484 While this is good idea, unfortunately these constructs may
5485 be created by "adds using lea" optimization for incorrect
5486 code like:
5487
5488 int a;
5489 int foo(int i)
5490 {
5491 return *(&a+i);
5492 }
5493
5494 This code is nonsensical, but results in addressing
5495 GOT table with pic_offset_table_rtx base. We can't
5496 just refuse it easily, since it gets matched by
5497 "addsi3" pattern, that later gets split to lea in the
5498 case output register differs from input. While this
5499 can be handled by separate addsi pattern for this case
5500 that never results in lea, this seems to be easier and
5501 correct fix for crash to disable this test. */
5502 }
5503 else if (GET_CODE (disp) != LABEL_REF
5504 && GET_CODE (disp) != CONST_INT
5505 && (GET_CODE (disp) != CONST
5506 || !legitimate_constant_p (disp))
5507 && (GET_CODE (disp) != SYMBOL_REF
5508 || !legitimate_constant_p (disp)))
5509 {
5510 reason = "displacement is not constant";
5511 goto report_error;
5512 }
5513 else if (TARGET_64BIT
5514 && !x86_64_immediate_operand (disp, VOIDmode))
5515 {
5516 reason = "displacement is out of range";
5517 goto report_error;
5518 }
5519 }
5520
5521 /* Everything looks valid. */
5522 if (TARGET_DEBUG_ADDR)
5523 fprintf (stderr, "Success.\n");
5524 return TRUE;
5525
5526 report_error:
5527 if (TARGET_DEBUG_ADDR)
5528 {
5529 fprintf (stderr, "Error: %s\n", reason);
5530 debug_rtx (reason_rtx);
5531 }
5532 return FALSE;
5533 }
5534 \f
5535 /* Return an unique alias set for the GOT. */
5536
5537 static HOST_WIDE_INT
5538 ix86_GOT_alias_set (void)
5539 {
5540 static HOST_WIDE_INT set = -1;
5541 if (set == -1)
5542 set = new_alias_set ();
5543 return set;
5544 }
5545
5546 /* Return a legitimate reference for ORIG (an address) using the
5547 register REG. If REG is 0, a new pseudo is generated.
5548
5549 There are two types of references that must be handled:
5550
5551 1. Global data references must load the address from the GOT, via
5552 the PIC reg. An insn is emitted to do this load, and the reg is
5553 returned.
5554
5555 2. Static data references, constant pool addresses, and code labels
5556 compute the address as an offset from the GOT, whose base is in
5557 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5558 differentiate them from global data objects. The returned
5559 address is the PIC reg + an unspec constant.
5560
5561 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5562 reg also appears in the address. */
5563
5564 static rtx
5565 legitimize_pic_address (rtx orig, rtx reg)
5566 {
5567 rtx addr = orig;
5568 rtx new = orig;
5569 rtx base;
5570
5571 #if TARGET_MACHO
5572 if (reg == 0)
5573 reg = gen_reg_rtx (Pmode);
5574 /* Use the generic Mach-O PIC machinery. */
5575 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5576 #endif
5577
5578 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5579 new = addr;
5580 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5581 {
5582 /* This symbol may be referenced via a displacement from the PIC
5583 base address (@GOTOFF). */
5584
5585 if (reload_in_progress)
5586 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5587 if (GET_CODE (addr) == CONST)
5588 addr = XEXP (addr, 0);
5589 if (GET_CODE (addr) == PLUS)
5590 {
5591 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
5592 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
5593 }
5594 else
5595 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5596 new = gen_rtx_CONST (Pmode, new);
5597 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5598
5599 if (reg != 0)
5600 {
5601 emit_move_insn (reg, new);
5602 new = reg;
5603 }
5604 }
5605 else if (GET_CODE (addr) == SYMBOL_REF)
5606 {
5607 if (TARGET_64BIT)
5608 {
5609 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5610 new = gen_rtx_CONST (Pmode, new);
5611 new = gen_const_mem (Pmode, new);
5612 set_mem_alias_set (new, ix86_GOT_alias_set ());
5613
5614 if (reg == 0)
5615 reg = gen_reg_rtx (Pmode);
5616 /* Use directly gen_movsi, otherwise the address is loaded
5617 into register for CSE. We don't want to CSE this addresses,
5618 instead we CSE addresses from the GOT table, so skip this. */
5619 emit_insn (gen_movsi (reg, new));
5620 new = reg;
5621 }
5622 else
5623 {
5624 /* This symbol must be referenced via a load from the
5625 Global Offset Table (@GOT). */
5626
5627 if (reload_in_progress)
5628 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5629 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5630 new = gen_rtx_CONST (Pmode, new);
5631 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5632 new = gen_const_mem (Pmode, new);
5633 set_mem_alias_set (new, ix86_GOT_alias_set ());
5634
5635 if (reg == 0)
5636 reg = gen_reg_rtx (Pmode);
5637 emit_move_insn (reg, new);
5638 new = reg;
5639 }
5640 }
5641 else
5642 {
5643 if (GET_CODE (addr) == CONST)
5644 {
5645 addr = XEXP (addr, 0);
5646
5647 /* We must match stuff we generate before. Assume the only
5648 unspecs that can get here are ours. Not that we could do
5649 anything with them anyway.... */
5650 if (GET_CODE (addr) == UNSPEC
5651 || (GET_CODE (addr) == PLUS
5652 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5653 return orig;
5654 gcc_assert (GET_CODE (addr) == PLUS);
5655 }
5656 if (GET_CODE (addr) == PLUS)
5657 {
5658 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5659
5660 /* Check first to see if this is a constant offset from a @GOTOFF
5661 symbol reference. */
5662 if (local_symbolic_operand (op0, Pmode)
5663 && GET_CODE (op1) == CONST_INT)
5664 {
5665 if (!TARGET_64BIT)
5666 {
5667 if (reload_in_progress)
5668 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5669 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5670 UNSPEC_GOTOFF);
5671 new = gen_rtx_PLUS (Pmode, new, op1);
5672 new = gen_rtx_CONST (Pmode, new);
5673 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5674
5675 if (reg != 0)
5676 {
5677 emit_move_insn (reg, new);
5678 new = reg;
5679 }
5680 }
5681 else
5682 {
5683 if (INTVAL (op1) < -16*1024*1024
5684 || INTVAL (op1) >= 16*1024*1024)
5685 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
5686 }
5687 }
5688 else
5689 {
5690 base = legitimize_pic_address (XEXP (addr, 0), reg);
5691 new = legitimize_pic_address (XEXP (addr, 1),
5692 base == reg ? NULL_RTX : reg);
5693
5694 if (GET_CODE (new) == CONST_INT)
5695 new = plus_constant (base, INTVAL (new));
5696 else
5697 {
5698 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5699 {
5700 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5701 new = XEXP (new, 1);
5702 }
5703 new = gen_rtx_PLUS (Pmode, base, new);
5704 }
5705 }
5706 }
5707 }
5708 return new;
5709 }
5710 \f
5711 /* Load the thread pointer. If TO_REG is true, force it into a register. */
5712
5713 static rtx
5714 get_thread_pointer (int to_reg)
5715 {
5716 rtx tp, reg, insn;
5717
5718 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5719 if (!to_reg)
5720 return tp;
5721
5722 reg = gen_reg_rtx (Pmode);
5723 insn = gen_rtx_SET (VOIDmode, reg, tp);
5724 insn = emit_insn (insn);
5725
5726 return reg;
5727 }
5728
5729 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
5730 false if we expect this to be used for a memory address and true if
5731 we expect to load the address into a register. */
5732
5733 static rtx
5734 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
5735 {
5736 rtx dest, base, off, pic;
5737 int type;
5738
5739 switch (model)
5740 {
5741 case TLS_MODEL_GLOBAL_DYNAMIC:
5742 dest = gen_reg_rtx (Pmode);
5743 if (TARGET_64BIT)
5744 {
5745 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5746
5747 start_sequence ();
5748 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5749 insns = get_insns ();
5750 end_sequence ();
5751
5752 emit_libcall_block (insns, dest, rax, x);
5753 }
5754 else
5755 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5756 break;
5757
5758 case TLS_MODEL_LOCAL_DYNAMIC:
5759 base = gen_reg_rtx (Pmode);
5760 if (TARGET_64BIT)
5761 {
5762 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5763
5764 start_sequence ();
5765 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5766 insns = get_insns ();
5767 end_sequence ();
5768
5769 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5770 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5771 emit_libcall_block (insns, base, rax, note);
5772 }
5773 else
5774 emit_insn (gen_tls_local_dynamic_base_32 (base));
5775
5776 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5777 off = gen_rtx_CONST (Pmode, off);
5778
5779 return gen_rtx_PLUS (Pmode, base, off);
5780
5781 case TLS_MODEL_INITIAL_EXEC:
5782 if (TARGET_64BIT)
5783 {
5784 pic = NULL;
5785 type = UNSPEC_GOTNTPOFF;
5786 }
5787 else if (flag_pic)
5788 {
5789 if (reload_in_progress)
5790 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5791 pic = pic_offset_table_rtx;
5792 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5793 }
5794 else if (!TARGET_GNU_TLS)
5795 {
5796 pic = gen_reg_rtx (Pmode);
5797 emit_insn (gen_set_got (pic));
5798 type = UNSPEC_GOTTPOFF;
5799 }
5800 else
5801 {
5802 pic = NULL;
5803 type = UNSPEC_INDNTPOFF;
5804 }
5805
5806 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5807 off = gen_rtx_CONST (Pmode, off);
5808 if (pic)
5809 off = gen_rtx_PLUS (Pmode, pic, off);
5810 off = gen_const_mem (Pmode, off);
5811 set_mem_alias_set (off, ix86_GOT_alias_set ());
5812
5813 if (TARGET_64BIT || TARGET_GNU_TLS)
5814 {
5815 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5816 off = force_reg (Pmode, off);
5817 return gen_rtx_PLUS (Pmode, base, off);
5818 }
5819 else
5820 {
5821 base = get_thread_pointer (true);
5822 dest = gen_reg_rtx (Pmode);
5823 emit_insn (gen_subsi3 (dest, base, off));
5824 }
5825 break;
5826
5827 case TLS_MODEL_LOCAL_EXEC:
5828 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5829 (TARGET_64BIT || TARGET_GNU_TLS)
5830 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5831 off = gen_rtx_CONST (Pmode, off);
5832
5833 if (TARGET_64BIT || TARGET_GNU_TLS)
5834 {
5835 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
5836 return gen_rtx_PLUS (Pmode, base, off);
5837 }
5838 else
5839 {
5840 base = get_thread_pointer (true);
5841 dest = gen_reg_rtx (Pmode);
5842 emit_insn (gen_subsi3 (dest, base, off));
5843 }
5844 break;
5845
5846 default:
5847 gcc_unreachable ();
5848 }
5849
5850 return dest;
5851 }
5852
5853 /* Try machine-dependent ways of modifying an illegitimate address
5854 to be legitimate. If we find one, return the new, valid address.
5855 This macro is used in only one place: `memory_address' in explow.c.
5856
5857 OLDX is the address as it was before break_out_memory_refs was called.
5858 In some cases it is useful to look at this to decide what needs to be done.
5859
5860 MODE and WIN are passed so that this macro can use
5861 GO_IF_LEGITIMATE_ADDRESS.
5862
5863 It is always safe for this macro to do nothing. It exists to recognize
5864 opportunities to optimize the output.
5865
5866 For the 80386, we handle X+REG by loading X into a register R and
5867 using R+REG. R will go in a general reg and indexing will be used.
5868 However, if REG is a broken-out memory address or multiplication,
5869 nothing needs to be done because REG can certainly go in a general reg.
5870
5871 When -fpic is used, special handling is needed for symbolic references.
5872 See comments by legitimize_pic_address in i386.c for details. */
5873
5874 rtx
5875 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
5876 {
5877 int changed = 0;
5878 unsigned log;
5879
5880 if (TARGET_DEBUG_ADDR)
5881 {
5882 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5883 GET_MODE_NAME (mode));
5884 debug_rtx (x);
5885 }
5886
5887 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
5888 if (log)
5889 return legitimize_tls_address (x, log, false);
5890 if (GET_CODE (x) == CONST
5891 && GET_CODE (XEXP (x, 0)) == PLUS
5892 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
5893 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
5894 {
5895 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
5896 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
5897 }
5898
5899 if (flag_pic && SYMBOLIC_CONST (x))
5900 return legitimize_pic_address (x, 0);
5901
5902 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5903 if (GET_CODE (x) == ASHIFT
5904 && GET_CODE (XEXP (x, 1)) == CONST_INT
5905 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
5906 {
5907 changed = 1;
5908 log = INTVAL (XEXP (x, 1));
5909 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5910 GEN_INT (1 << log));
5911 }
5912
5913 if (GET_CODE (x) == PLUS)
5914 {
5915 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5916
5917 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5918 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5919 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
5920 {
5921 changed = 1;
5922 log = INTVAL (XEXP (XEXP (x, 0), 1));
5923 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5924 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5925 GEN_INT (1 << log));
5926 }
5927
5928 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5929 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5930 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
5931 {
5932 changed = 1;
5933 log = INTVAL (XEXP (XEXP (x, 1), 1));
5934 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5935 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5936 GEN_INT (1 << log));
5937 }
5938
5939 /* Put multiply first if it isn't already. */
5940 if (GET_CODE (XEXP (x, 1)) == MULT)
5941 {
5942 rtx tmp = XEXP (x, 0);
5943 XEXP (x, 0) = XEXP (x, 1);
5944 XEXP (x, 1) = tmp;
5945 changed = 1;
5946 }
5947
5948 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5949 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5950 created by virtual register instantiation, register elimination, and
5951 similar optimizations. */
5952 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5953 {
5954 changed = 1;
5955 x = gen_rtx_PLUS (Pmode,
5956 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5957 XEXP (XEXP (x, 1), 0)),
5958 XEXP (XEXP (x, 1), 1));
5959 }
5960
5961 /* Canonicalize
5962 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5963 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5964 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5965 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5966 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5967 && CONSTANT_P (XEXP (x, 1)))
5968 {
5969 rtx constant;
5970 rtx other = NULL_RTX;
5971
5972 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5973 {
5974 constant = XEXP (x, 1);
5975 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5976 }
5977 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5978 {
5979 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5980 other = XEXP (x, 1);
5981 }
5982 else
5983 constant = 0;
5984
5985 if (constant)
5986 {
5987 changed = 1;
5988 x = gen_rtx_PLUS (Pmode,
5989 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5990 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5991 plus_constant (other, INTVAL (constant)));
5992 }
5993 }
5994
5995 if (changed && legitimate_address_p (mode, x, FALSE))
5996 return x;
5997
5998 if (GET_CODE (XEXP (x, 0)) == MULT)
5999 {
6000 changed = 1;
6001 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6002 }
6003
6004 if (GET_CODE (XEXP (x, 1)) == MULT)
6005 {
6006 changed = 1;
6007 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6008 }
6009
6010 if (changed
6011 && GET_CODE (XEXP (x, 1)) == REG
6012 && GET_CODE (XEXP (x, 0)) == REG)
6013 return x;
6014
6015 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6016 {
6017 changed = 1;
6018 x = legitimize_pic_address (x, 0);
6019 }
6020
6021 if (changed && legitimate_address_p (mode, x, FALSE))
6022 return x;
6023
6024 if (GET_CODE (XEXP (x, 0)) == REG)
6025 {
6026 rtx temp = gen_reg_rtx (Pmode);
6027 rtx val = force_operand (XEXP (x, 1), temp);
6028 if (val != temp)
6029 emit_move_insn (temp, val);
6030
6031 XEXP (x, 1) = temp;
6032 return x;
6033 }
6034
6035 else if (GET_CODE (XEXP (x, 1)) == REG)
6036 {
6037 rtx temp = gen_reg_rtx (Pmode);
6038 rtx val = force_operand (XEXP (x, 0), temp);
6039 if (val != temp)
6040 emit_move_insn (temp, val);
6041
6042 XEXP (x, 0) = temp;
6043 return x;
6044 }
6045 }
6046
6047 return x;
6048 }
6049 \f
6050 /* Print an integer constant expression in assembler syntax. Addition
6051 and subtraction are the only arithmetic that may appear in these
6052 expressions. FILE is the stdio stream to write to, X is the rtx, and
6053 CODE is the operand print code from the output string. */
6054
6055 static void
6056 output_pic_addr_const (FILE *file, rtx x, int code)
6057 {
6058 char buf[256];
6059
6060 switch (GET_CODE (x))
6061 {
6062 case PC:
6063 gcc_assert (flag_pic);
6064 putc ('.', file);
6065 break;
6066
6067 case SYMBOL_REF:
6068 assemble_name (file, XSTR (x, 0));
6069 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6070 fputs ("@PLT", file);
6071 break;
6072
6073 case LABEL_REF:
6074 x = XEXP (x, 0);
6075 /* FALLTHRU */
6076 case CODE_LABEL:
6077 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6078 assemble_name (asm_out_file, buf);
6079 break;
6080
6081 case CONST_INT:
6082 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6083 break;
6084
6085 case CONST:
6086 /* This used to output parentheses around the expression,
6087 but that does not work on the 386 (either ATT or BSD assembler). */
6088 output_pic_addr_const (file, XEXP (x, 0), code);
6089 break;
6090
6091 case CONST_DOUBLE:
6092 if (GET_MODE (x) == VOIDmode)
6093 {
6094 /* We can use %d if the number is <32 bits and positive. */
6095 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6096 fprintf (file, "0x%lx%08lx",
6097 (unsigned long) CONST_DOUBLE_HIGH (x),
6098 (unsigned long) CONST_DOUBLE_LOW (x));
6099 else
6100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6101 }
6102 else
6103 /* We can't handle floating point constants;
6104 PRINT_OPERAND must handle them. */
6105 output_operand_lossage ("floating constant misused");
6106 break;
6107
6108 case PLUS:
6109 /* Some assemblers need integer constants to appear first. */
6110 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6111 {
6112 output_pic_addr_const (file, XEXP (x, 0), code);
6113 putc ('+', file);
6114 output_pic_addr_const (file, XEXP (x, 1), code);
6115 }
6116 else
6117 {
6118 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6119 output_pic_addr_const (file, XEXP (x, 1), code);
6120 putc ('+', file);
6121 output_pic_addr_const (file, XEXP (x, 0), code);
6122 }
6123 break;
6124
6125 case MINUS:
6126 if (!TARGET_MACHO)
6127 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6128 output_pic_addr_const (file, XEXP (x, 0), code);
6129 putc ('-', file);
6130 output_pic_addr_const (file, XEXP (x, 1), code);
6131 if (!TARGET_MACHO)
6132 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6133 break;
6134
6135 case UNSPEC:
6136 gcc_assert (XVECLEN (x, 0) == 1);
6137 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6138 switch (XINT (x, 1))
6139 {
6140 case UNSPEC_GOT:
6141 fputs ("@GOT", file);
6142 break;
6143 case UNSPEC_GOTOFF:
6144 fputs ("@GOTOFF", file);
6145 break;
6146 case UNSPEC_GOTPCREL:
6147 fputs ("@GOTPCREL(%rip)", file);
6148 break;
6149 case UNSPEC_GOTTPOFF:
6150 /* FIXME: This might be @TPOFF in Sun ld too. */
6151 fputs ("@GOTTPOFF", file);
6152 break;
6153 case UNSPEC_TPOFF:
6154 fputs ("@TPOFF", file);
6155 break;
6156 case UNSPEC_NTPOFF:
6157 if (TARGET_64BIT)
6158 fputs ("@TPOFF", file);
6159 else
6160 fputs ("@NTPOFF", file);
6161 break;
6162 case UNSPEC_DTPOFF:
6163 fputs ("@DTPOFF", file);
6164 break;
6165 case UNSPEC_GOTNTPOFF:
6166 if (TARGET_64BIT)
6167 fputs ("@GOTTPOFF(%rip)", file);
6168 else
6169 fputs ("@GOTNTPOFF", file);
6170 break;
6171 case UNSPEC_INDNTPOFF:
6172 fputs ("@INDNTPOFF", file);
6173 break;
6174 default:
6175 output_operand_lossage ("invalid UNSPEC as operand");
6176 break;
6177 }
6178 break;
6179
6180 default:
6181 output_operand_lossage ("invalid expression as operand");
6182 }
6183 }
6184
6185 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6186 We need to emit DTP-relative relocations. */
6187
6188 void
6189 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6190 {
6191 fputs (ASM_LONG, file);
6192 output_addr_const (file, x);
6193 fputs ("@DTPOFF", file);
6194 switch (size)
6195 {
6196 case 4:
6197 break;
6198 case 8:
6199 fputs (", 0", file);
6200 break;
6201 default:
6202 gcc_unreachable ();
6203 }
6204 }
6205
6206 /* In the name of slightly smaller debug output, and to cater to
6207 general assembler lossage, recognize PIC+GOTOFF and turn it back
6208 into a direct symbol reference. */
6209
6210 static rtx
6211 ix86_delegitimize_address (rtx orig_x)
6212 {
6213 rtx x = orig_x, y;
6214
6215 if (GET_CODE (x) == MEM)
6216 x = XEXP (x, 0);
6217
6218 if (TARGET_64BIT)
6219 {
6220 if (GET_CODE (x) != CONST
6221 || GET_CODE (XEXP (x, 0)) != UNSPEC
6222 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6223 || GET_CODE (orig_x) != MEM)
6224 return orig_x;
6225 return XVECEXP (XEXP (x, 0), 0, 0);
6226 }
6227
6228 if (GET_CODE (x) != PLUS
6229 || GET_CODE (XEXP (x, 1)) != CONST)
6230 return orig_x;
6231
6232 if (GET_CODE (XEXP (x, 0)) == REG
6233 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6234 /* %ebx + GOT/GOTOFF */
6235 y = NULL;
6236 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6237 {
6238 /* %ebx + %reg * scale + GOT/GOTOFF */
6239 y = XEXP (x, 0);
6240 if (GET_CODE (XEXP (y, 0)) == REG
6241 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6242 y = XEXP (y, 1);
6243 else if (GET_CODE (XEXP (y, 1)) == REG
6244 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6245 y = XEXP (y, 0);
6246 else
6247 return orig_x;
6248 if (GET_CODE (y) != REG
6249 && GET_CODE (y) != MULT
6250 && GET_CODE (y) != ASHIFT)
6251 return orig_x;
6252 }
6253 else
6254 return orig_x;
6255
6256 x = XEXP (XEXP (x, 1), 0);
6257 if (GET_CODE (x) == UNSPEC
6258 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6259 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6260 {
6261 if (y)
6262 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6263 return XVECEXP (x, 0, 0);
6264 }
6265
6266 if (GET_CODE (x) == PLUS
6267 && GET_CODE (XEXP (x, 0)) == UNSPEC
6268 && GET_CODE (XEXP (x, 1)) == CONST_INT
6269 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6270 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6271 && GET_CODE (orig_x) != MEM)))
6272 {
6273 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6274 if (y)
6275 return gen_rtx_PLUS (Pmode, y, x);
6276 return x;
6277 }
6278
6279 return orig_x;
6280 }
6281 \f
6282 static void
6283 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6284 int fp, FILE *file)
6285 {
6286 const char *suffix;
6287
6288 if (mode == CCFPmode || mode == CCFPUmode)
6289 {
6290 enum rtx_code second_code, bypass_code;
6291 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6292 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6293 code = ix86_fp_compare_code_to_integer (code);
6294 mode = CCmode;
6295 }
6296 if (reverse)
6297 code = reverse_condition (code);
6298
6299 switch (code)
6300 {
6301 case EQ:
6302 suffix = "e";
6303 break;
6304 case NE:
6305 suffix = "ne";
6306 break;
6307 case GT:
6308 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6309 suffix = "g";
6310 break;
6311 case GTU:
6312 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6313 Those same assemblers have the same but opposite lossage on cmov. */
6314 gcc_assert (mode == CCmode);
6315 suffix = fp ? "nbe" : "a";
6316 break;
6317 case LT:
6318 switch (mode)
6319 {
6320 case CCNOmode:
6321 case CCGOCmode:
6322 suffix = "s";
6323 break;
6324
6325 case CCmode:
6326 case CCGCmode:
6327 suffix = "l";
6328 break;
6329
6330 default:
6331 gcc_unreachable ();
6332 }
6333 break;
6334 case LTU:
6335 gcc_assert (mode == CCmode);
6336 suffix = "b";
6337 break;
6338 case GE:
6339 switch (mode)
6340 {
6341 case CCNOmode:
6342 case CCGOCmode:
6343 suffix = "ns";
6344 break;
6345
6346 case CCmode:
6347 case CCGCmode:
6348 suffix = "ge";
6349 break;
6350
6351 default:
6352 gcc_unreachable ();
6353 }
6354 break;
6355 case GEU:
6356 /* ??? As above. */
6357 gcc_assert (mode == CCmode);
6358 suffix = fp ? "nb" : "ae";
6359 break;
6360 case LE:
6361 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6362 suffix = "le";
6363 break;
6364 case LEU:
6365 gcc_assert (mode == CCmode);
6366 suffix = "be";
6367 break;
6368 case UNORDERED:
6369 suffix = fp ? "u" : "p";
6370 break;
6371 case ORDERED:
6372 suffix = fp ? "nu" : "np";
6373 break;
6374 default:
6375 gcc_unreachable ();
6376 }
6377 fputs (suffix, file);
6378 }
6379
6380 /* Print the name of register X to FILE based on its machine mode and number.
6381 If CODE is 'w', pretend the mode is HImode.
6382 If CODE is 'b', pretend the mode is QImode.
6383 If CODE is 'k', pretend the mode is SImode.
6384 If CODE is 'q', pretend the mode is DImode.
6385 If CODE is 'h', pretend the reg is the 'high' byte register.
6386 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6387
6388 void
6389 print_reg (rtx x, int code, FILE *file)
6390 {
6391 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6392 && REGNO (x) != FRAME_POINTER_REGNUM
6393 && REGNO (x) != FLAGS_REG
6394 && REGNO (x) != FPSR_REG);
6395
6396 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6397 putc ('%', file);
6398
6399 if (code == 'w' || MMX_REG_P (x))
6400 code = 2;
6401 else if (code == 'b')
6402 code = 1;
6403 else if (code == 'k')
6404 code = 4;
6405 else if (code == 'q')
6406 code = 8;
6407 else if (code == 'y')
6408 code = 3;
6409 else if (code == 'h')
6410 code = 0;
6411 else
6412 code = GET_MODE_SIZE (GET_MODE (x));
6413
6414 /* Irritatingly, AMD extended registers use different naming convention
6415 from the normal registers. */
6416 if (REX_INT_REG_P (x))
6417 {
6418 gcc_assert (TARGET_64BIT);
6419 switch (code)
6420 {
6421 case 0:
6422 error ("extended registers have no high halves");
6423 break;
6424 case 1:
6425 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6426 break;
6427 case 2:
6428 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6429 break;
6430 case 4:
6431 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6432 break;
6433 case 8:
6434 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6435 break;
6436 default:
6437 error ("unsupported operand size for extended register");
6438 break;
6439 }
6440 return;
6441 }
6442 switch (code)
6443 {
6444 case 3:
6445 if (STACK_TOP_P (x))
6446 {
6447 fputs ("st(0)", file);
6448 break;
6449 }
6450 /* FALLTHRU */
6451 case 8:
6452 case 4:
6453 case 12:
6454 if (! ANY_FP_REG_P (x))
6455 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6456 /* FALLTHRU */
6457 case 16:
6458 case 2:
6459 normal:
6460 fputs (hi_reg_name[REGNO (x)], file);
6461 break;
6462 case 1:
6463 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
6464 goto normal;
6465 fputs (qi_reg_name[REGNO (x)], file);
6466 break;
6467 case 0:
6468 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
6469 goto normal;
6470 fputs (qi_high_reg_name[REGNO (x)], file);
6471 break;
6472 default:
6473 gcc_unreachable ();
6474 }
6475 }
6476
6477 /* Locate some local-dynamic symbol still in use by this function
6478 so that we can print its name in some tls_local_dynamic_base
6479 pattern. */
6480
6481 static const char *
6482 get_some_local_dynamic_name (void)
6483 {
6484 rtx insn;
6485
6486 if (cfun->machine->some_ld_name)
6487 return cfun->machine->some_ld_name;
6488
6489 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6490 if (INSN_P (insn)
6491 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6492 return cfun->machine->some_ld_name;
6493
6494 gcc_unreachable ();
6495 }
6496
6497 static int
6498 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6499 {
6500 rtx x = *px;
6501
6502 if (GET_CODE (x) == SYMBOL_REF
6503 && local_dynamic_symbolic_operand (x, Pmode))
6504 {
6505 cfun->machine->some_ld_name = XSTR (x, 0);
6506 return 1;
6507 }
6508
6509 return 0;
6510 }
6511
6512 /* Meaning of CODE:
6513 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6514 C -- print opcode suffix for set/cmov insn.
6515 c -- like C, but print reversed condition
6516 F,f -- likewise, but for floating-point.
6517 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6518 otherwise nothing
6519 R -- print the prefix for register names.
6520 z -- print the opcode suffix for the size of the current operand.
6521 * -- print a star (in certain assembler syntax)
6522 A -- print an absolute memory reference.
6523 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6524 s -- print a shift double count, followed by the assemblers argument
6525 delimiter.
6526 b -- print the QImode name of the register for the indicated operand.
6527 %b0 would print %al if operands[0] is reg 0.
6528 w -- likewise, print the HImode name of the register.
6529 k -- likewise, print the SImode name of the register.
6530 q -- likewise, print the DImode name of the register.
6531 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6532 y -- print "st(0)" instead of "st" as a register.
6533 D -- print condition for SSE cmp instruction.
6534 P -- if PIC, print an @PLT suffix.
6535 X -- don't print any sort of PIC '@' suffix for a symbol.
6536 & -- print some in-use local-dynamic symbol name.
6537 H -- print a memory address offset by 8; used for sse high-parts
6538 */
6539
6540 void
6541 print_operand (FILE *file, rtx x, int code)
6542 {
6543 if (code)
6544 {
6545 switch (code)
6546 {
6547 case '*':
6548 if (ASSEMBLER_DIALECT == ASM_ATT)
6549 putc ('*', file);
6550 return;
6551
6552 case '&':
6553 assemble_name (file, get_some_local_dynamic_name ());
6554 return;
6555
6556 case 'A':
6557 switch (ASSEMBLER_DIALECT)
6558 {
6559 case ASM_ATT:
6560 putc ('*', file);
6561 break;
6562
6563 case ASM_INTEL:
6564 /* Intel syntax. For absolute addresses, registers should not
6565 be surrounded by braces. */
6566 if (GET_CODE (x) != REG)
6567 {
6568 putc ('[', file);
6569 PRINT_OPERAND (file, x, 0);
6570 putc (']', file);
6571 return;
6572 }
6573 break;
6574
6575 default:
6576 gcc_unreachable ();
6577 }
6578
6579 PRINT_OPERAND (file, x, 0);
6580 return;
6581
6582
6583 case 'L':
6584 if (ASSEMBLER_DIALECT == ASM_ATT)
6585 putc ('l', file);
6586 return;
6587
6588 case 'W':
6589 if (ASSEMBLER_DIALECT == ASM_ATT)
6590 putc ('w', file);
6591 return;
6592
6593 case 'B':
6594 if (ASSEMBLER_DIALECT == ASM_ATT)
6595 putc ('b', file);
6596 return;
6597
6598 case 'Q':
6599 if (ASSEMBLER_DIALECT == ASM_ATT)
6600 putc ('l', file);
6601 return;
6602
6603 case 'S':
6604 if (ASSEMBLER_DIALECT == ASM_ATT)
6605 putc ('s', file);
6606 return;
6607
6608 case 'T':
6609 if (ASSEMBLER_DIALECT == ASM_ATT)
6610 putc ('t', file);
6611 return;
6612
6613 case 'z':
6614 /* 387 opcodes don't get size suffixes if the operands are
6615 registers. */
6616 if (STACK_REG_P (x))
6617 return;
6618
6619 /* Likewise if using Intel opcodes. */
6620 if (ASSEMBLER_DIALECT == ASM_INTEL)
6621 return;
6622
6623 /* This is the size of op from size of operand. */
6624 switch (GET_MODE_SIZE (GET_MODE (x)))
6625 {
6626 case 2:
6627 #ifdef HAVE_GAS_FILDS_FISTS
6628 putc ('s', file);
6629 #endif
6630 return;
6631
6632 case 4:
6633 if (GET_MODE (x) == SFmode)
6634 {
6635 putc ('s', file);
6636 return;
6637 }
6638 else
6639 putc ('l', file);
6640 return;
6641
6642 case 12:
6643 case 16:
6644 putc ('t', file);
6645 return;
6646
6647 case 8:
6648 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6649 {
6650 #ifdef GAS_MNEMONICS
6651 putc ('q', file);
6652 #else
6653 putc ('l', file);
6654 putc ('l', file);
6655 #endif
6656 }
6657 else
6658 putc ('l', file);
6659 return;
6660
6661 default:
6662 gcc_unreachable ();
6663 }
6664
6665 case 'b':
6666 case 'w':
6667 case 'k':
6668 case 'q':
6669 case 'h':
6670 case 'y':
6671 case 'X':
6672 case 'P':
6673 break;
6674
6675 case 's':
6676 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6677 {
6678 PRINT_OPERAND (file, x, 0);
6679 putc (',', file);
6680 }
6681 return;
6682
6683 case 'D':
6684 /* Little bit of braindamage here. The SSE compare instructions
6685 does use completely different names for the comparisons that the
6686 fp conditional moves. */
6687 switch (GET_CODE (x))
6688 {
6689 case EQ:
6690 case UNEQ:
6691 fputs ("eq", file);
6692 break;
6693 case LT:
6694 case UNLT:
6695 fputs ("lt", file);
6696 break;
6697 case LE:
6698 case UNLE:
6699 fputs ("le", file);
6700 break;
6701 case UNORDERED:
6702 fputs ("unord", file);
6703 break;
6704 case NE:
6705 case LTGT:
6706 fputs ("neq", file);
6707 break;
6708 case UNGE:
6709 case GE:
6710 fputs ("nlt", file);
6711 break;
6712 case UNGT:
6713 case GT:
6714 fputs ("nle", file);
6715 break;
6716 case ORDERED:
6717 fputs ("ord", file);
6718 break;
6719 default:
6720 gcc_unreachable ();
6721 }
6722 return;
6723 case 'O':
6724 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6725 if (ASSEMBLER_DIALECT == ASM_ATT)
6726 {
6727 switch (GET_MODE (x))
6728 {
6729 case HImode: putc ('w', file); break;
6730 case SImode:
6731 case SFmode: putc ('l', file); break;
6732 case DImode:
6733 case DFmode: putc ('q', file); break;
6734 default: gcc_unreachable ();
6735 }
6736 putc ('.', file);
6737 }
6738 #endif
6739 return;
6740 case 'C':
6741 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6742 return;
6743 case 'F':
6744 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6745 if (ASSEMBLER_DIALECT == ASM_ATT)
6746 putc ('.', file);
6747 #endif
6748 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6749 return;
6750
6751 /* Like above, but reverse condition */
6752 case 'c':
6753 /* Check to see if argument to %c is really a constant
6754 and not a condition code which needs to be reversed. */
6755 if (!COMPARISON_P (x))
6756 {
6757 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6758 return;
6759 }
6760 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6761 return;
6762 case 'f':
6763 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6764 if (ASSEMBLER_DIALECT == ASM_ATT)
6765 putc ('.', file);
6766 #endif
6767 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6768 return;
6769
6770 case 'H':
6771 /* It doesn't actually matter what mode we use here, as we're
6772 only going to use this for printing. */
6773 x = adjust_address_nv (x, DImode, 8);
6774 break;
6775
6776 case '+':
6777 {
6778 rtx x;
6779
6780 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6781 return;
6782
6783 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6784 if (x)
6785 {
6786 int pred_val = INTVAL (XEXP (x, 0));
6787
6788 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6789 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6790 {
6791 int taken = pred_val > REG_BR_PROB_BASE / 2;
6792 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6793
6794 /* Emit hints only in the case default branch prediction
6795 heuristics would fail. */
6796 if (taken != cputaken)
6797 {
6798 /* We use 3e (DS) prefix for taken branches and
6799 2e (CS) prefix for not taken branches. */
6800 if (taken)
6801 fputs ("ds ; ", file);
6802 else
6803 fputs ("cs ; ", file);
6804 }
6805 }
6806 }
6807 return;
6808 }
6809 default:
6810 output_operand_lossage ("invalid operand code '%c'", code);
6811 }
6812 }
6813
6814 if (GET_CODE (x) == REG)
6815 print_reg (x, code, file);
6816
6817 else if (GET_CODE (x) == MEM)
6818 {
6819 /* No `byte ptr' prefix for call instructions. */
6820 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6821 {
6822 const char * size;
6823 switch (GET_MODE_SIZE (GET_MODE (x)))
6824 {
6825 case 1: size = "BYTE"; break;
6826 case 2: size = "WORD"; break;
6827 case 4: size = "DWORD"; break;
6828 case 8: size = "QWORD"; break;
6829 case 12: size = "XWORD"; break;
6830 case 16: size = "XMMWORD"; break;
6831 default:
6832 gcc_unreachable ();
6833 }
6834
6835 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6836 if (code == 'b')
6837 size = "BYTE";
6838 else if (code == 'w')
6839 size = "WORD";
6840 else if (code == 'k')
6841 size = "DWORD";
6842
6843 fputs (size, file);
6844 fputs (" PTR ", file);
6845 }
6846
6847 x = XEXP (x, 0);
6848 /* Avoid (%rip) for call operands. */
6849 if (CONSTANT_ADDRESS_P (x) && code == 'P'
6850 && GET_CODE (x) != CONST_INT)
6851 output_addr_const (file, x);
6852 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6853 output_operand_lossage ("invalid constraints for operand");
6854 else
6855 output_address (x);
6856 }
6857
6858 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6859 {
6860 REAL_VALUE_TYPE r;
6861 long l;
6862
6863 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6864 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6865
6866 if (ASSEMBLER_DIALECT == ASM_ATT)
6867 putc ('$', file);
6868 fprintf (file, "0x%08lx", l);
6869 }
6870
6871 /* These float cases don't actually occur as immediate operands. */
6872 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6873 {
6874 char dstr[30];
6875
6876 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6877 fprintf (file, "%s", dstr);
6878 }
6879
6880 else if (GET_CODE (x) == CONST_DOUBLE
6881 && GET_MODE (x) == XFmode)
6882 {
6883 char dstr[30];
6884
6885 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6886 fprintf (file, "%s", dstr);
6887 }
6888
6889 else
6890 {
6891 /* We have patterns that allow zero sets of memory, for instance.
6892 In 64-bit mode, we should probably support all 8-byte vectors,
6893 since we can in fact encode that into an immediate. */
6894 if (GET_CODE (x) == CONST_VECTOR)
6895 {
6896 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
6897 x = const0_rtx;
6898 }
6899
6900 if (code != 'P')
6901 {
6902 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6903 {
6904 if (ASSEMBLER_DIALECT == ASM_ATT)
6905 putc ('$', file);
6906 }
6907 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6908 || GET_CODE (x) == LABEL_REF)
6909 {
6910 if (ASSEMBLER_DIALECT == ASM_ATT)
6911 putc ('$', file);
6912 else
6913 fputs ("OFFSET FLAT:", file);
6914 }
6915 }
6916 if (GET_CODE (x) == CONST_INT)
6917 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6918 else if (flag_pic)
6919 output_pic_addr_const (file, x, code);
6920 else
6921 output_addr_const (file, x);
6922 }
6923 }
6924 \f
6925 /* Print a memory operand whose address is ADDR. */
6926
6927 void
6928 print_operand_address (FILE *file, rtx addr)
6929 {
6930 struct ix86_address parts;
6931 rtx base, index, disp;
6932 int scale;
6933 int ok = ix86_decompose_address (addr, &parts);
6934
6935 gcc_assert (ok);
6936
6937 base = parts.base;
6938 index = parts.index;
6939 disp = parts.disp;
6940 scale = parts.scale;
6941
6942 switch (parts.seg)
6943 {
6944 case SEG_DEFAULT:
6945 break;
6946 case SEG_FS:
6947 case SEG_GS:
6948 if (USER_LABEL_PREFIX[0] == 0)
6949 putc ('%', file);
6950 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
6951 break;
6952 default:
6953 gcc_unreachable ();
6954 }
6955
6956 if (!base && !index)
6957 {
6958 /* Displacement only requires special attention. */
6959
6960 if (GET_CODE (disp) == CONST_INT)
6961 {
6962 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
6963 {
6964 if (USER_LABEL_PREFIX[0] == 0)
6965 putc ('%', file);
6966 fputs ("ds:", file);
6967 }
6968 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
6969 }
6970 else if (flag_pic)
6971 output_pic_addr_const (file, disp, 0);
6972 else
6973 output_addr_const (file, disp);
6974
6975 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6976 if (TARGET_64BIT
6977 && ((GET_CODE (disp) == SYMBOL_REF
6978 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
6979 || GET_CODE (disp) == LABEL_REF
6980 || (GET_CODE (disp) == CONST
6981 && GET_CODE (XEXP (disp, 0)) == PLUS
6982 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
6983 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
6984 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
6985 fputs ("(%rip)", file);
6986 }
6987 else
6988 {
6989 if (ASSEMBLER_DIALECT == ASM_ATT)
6990 {
6991 if (disp)
6992 {
6993 if (flag_pic)
6994 output_pic_addr_const (file, disp, 0);
6995 else if (GET_CODE (disp) == LABEL_REF)
6996 output_asm_label (disp);
6997 else
6998 output_addr_const (file, disp);
6999 }
7000
7001 putc ('(', file);
7002 if (base)
7003 print_reg (base, 0, file);
7004 if (index)
7005 {
7006 putc (',', file);
7007 print_reg (index, 0, file);
7008 if (scale != 1)
7009 fprintf (file, ",%d", scale);
7010 }
7011 putc (')', file);
7012 }
7013 else
7014 {
7015 rtx offset = NULL_RTX;
7016
7017 if (disp)
7018 {
7019 /* Pull out the offset of a symbol; print any symbol itself. */
7020 if (GET_CODE (disp) == CONST
7021 && GET_CODE (XEXP (disp, 0)) == PLUS
7022 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7023 {
7024 offset = XEXP (XEXP (disp, 0), 1);
7025 disp = gen_rtx_CONST (VOIDmode,
7026 XEXP (XEXP (disp, 0), 0));
7027 }
7028
7029 if (flag_pic)
7030 output_pic_addr_const (file, disp, 0);
7031 else if (GET_CODE (disp) == LABEL_REF)
7032 output_asm_label (disp);
7033 else if (GET_CODE (disp) == CONST_INT)
7034 offset = disp;
7035 else
7036 output_addr_const (file, disp);
7037 }
7038
7039 putc ('[', file);
7040 if (base)
7041 {
7042 print_reg (base, 0, file);
7043 if (offset)
7044 {
7045 if (INTVAL (offset) >= 0)
7046 putc ('+', file);
7047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7048 }
7049 }
7050 else if (offset)
7051 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7052 else
7053 putc ('0', file);
7054
7055 if (index)
7056 {
7057 putc ('+', file);
7058 print_reg (index, 0, file);
7059 if (scale != 1)
7060 fprintf (file, "*%d", scale);
7061 }
7062 putc (']', file);
7063 }
7064 }
7065 }
7066
7067 bool
7068 output_addr_const_extra (FILE *file, rtx x)
7069 {
7070 rtx op;
7071
7072 if (GET_CODE (x) != UNSPEC)
7073 return false;
7074
7075 op = XVECEXP (x, 0, 0);
7076 switch (XINT (x, 1))
7077 {
7078 case UNSPEC_GOTTPOFF:
7079 output_addr_const (file, op);
7080 /* FIXME: This might be @TPOFF in Sun ld. */
7081 fputs ("@GOTTPOFF", file);
7082 break;
7083 case UNSPEC_TPOFF:
7084 output_addr_const (file, op);
7085 fputs ("@TPOFF", file);
7086 break;
7087 case UNSPEC_NTPOFF:
7088 output_addr_const (file, op);
7089 if (TARGET_64BIT)
7090 fputs ("@TPOFF", file);
7091 else
7092 fputs ("@NTPOFF", file);
7093 break;
7094 case UNSPEC_DTPOFF:
7095 output_addr_const (file, op);
7096 fputs ("@DTPOFF", file);
7097 break;
7098 case UNSPEC_GOTNTPOFF:
7099 output_addr_const (file, op);
7100 if (TARGET_64BIT)
7101 fputs ("@GOTTPOFF(%rip)", file);
7102 else
7103 fputs ("@GOTNTPOFF", file);
7104 break;
7105 case UNSPEC_INDNTPOFF:
7106 output_addr_const (file, op);
7107 fputs ("@INDNTPOFF", file);
7108 break;
7109
7110 default:
7111 return false;
7112 }
7113
7114 return true;
7115 }
7116 \f
7117 /* Split one or more DImode RTL references into pairs of SImode
7118 references. The RTL can be REG, offsettable MEM, integer constant, or
7119 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7120 split and "num" is its length. lo_half and hi_half are output arrays
7121 that parallel "operands". */
7122
7123 void
7124 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7125 {
7126 while (num--)
7127 {
7128 rtx op = operands[num];
7129
7130 /* simplify_subreg refuse to split volatile memory addresses,
7131 but we still have to handle it. */
7132 if (GET_CODE (op) == MEM)
7133 {
7134 lo_half[num] = adjust_address (op, SImode, 0);
7135 hi_half[num] = adjust_address (op, SImode, 4);
7136 }
7137 else
7138 {
7139 lo_half[num] = simplify_gen_subreg (SImode, op,
7140 GET_MODE (op) == VOIDmode
7141 ? DImode : GET_MODE (op), 0);
7142 hi_half[num] = simplify_gen_subreg (SImode, op,
7143 GET_MODE (op) == VOIDmode
7144 ? DImode : GET_MODE (op), 4);
7145 }
7146 }
7147 }
7148 /* Split one or more TImode RTL references into pairs of SImode
7149 references. The RTL can be REG, offsettable MEM, integer constant, or
7150 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7151 split and "num" is its length. lo_half and hi_half are output arrays
7152 that parallel "operands". */
7153
7154 void
7155 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7156 {
7157 while (num--)
7158 {
7159 rtx op = operands[num];
7160
7161 /* simplify_subreg refuse to split volatile memory addresses, but we
7162 still have to handle it. */
7163 if (GET_CODE (op) == MEM)
7164 {
7165 lo_half[num] = adjust_address (op, DImode, 0);
7166 hi_half[num] = adjust_address (op, DImode, 8);
7167 }
7168 else
7169 {
7170 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7171 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7172 }
7173 }
7174 }
7175 \f
7176 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7177 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7178 is the expression of the binary operation. The output may either be
7179 emitted here, or returned to the caller, like all output_* functions.
7180
7181 There is no guarantee that the operands are the same mode, as they
7182 might be within FLOAT or FLOAT_EXTEND expressions. */
7183
7184 #ifndef SYSV386_COMPAT
7185 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7186 wants to fix the assemblers because that causes incompatibility
7187 with gcc. No-one wants to fix gcc because that causes
7188 incompatibility with assemblers... You can use the option of
7189 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7190 #define SYSV386_COMPAT 1
7191 #endif
7192
7193 const char *
7194 output_387_binary_op (rtx insn, rtx *operands)
7195 {
7196 static char buf[30];
7197 const char *p;
7198 const char *ssep;
7199 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7200
7201 #ifdef ENABLE_CHECKING
7202 /* Even if we do not want to check the inputs, this documents input
7203 constraints. Which helps in understanding the following code. */
7204 if (STACK_REG_P (operands[0])
7205 && ((REG_P (operands[1])
7206 && REGNO (operands[0]) == REGNO (operands[1])
7207 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7208 || (REG_P (operands[2])
7209 && REGNO (operands[0]) == REGNO (operands[2])
7210 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7211 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7212 ; /* ok */
7213 else
7214 gcc_assert (is_sse);
7215 #endif
7216
7217 switch (GET_CODE (operands[3]))
7218 {
7219 case PLUS:
7220 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7221 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7222 p = "fiadd";
7223 else
7224 p = "fadd";
7225 ssep = "add";
7226 break;
7227
7228 case MINUS:
7229 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7230 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7231 p = "fisub";
7232 else
7233 p = "fsub";
7234 ssep = "sub";
7235 break;
7236
7237 case MULT:
7238 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7239 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7240 p = "fimul";
7241 else
7242 p = "fmul";
7243 ssep = "mul";
7244 break;
7245
7246 case DIV:
7247 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7248 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7249 p = "fidiv";
7250 else
7251 p = "fdiv";
7252 ssep = "div";
7253 break;
7254
7255 default:
7256 gcc_unreachable ();
7257 }
7258
7259 if (is_sse)
7260 {
7261 strcpy (buf, ssep);
7262 if (GET_MODE (operands[0]) == SFmode)
7263 strcat (buf, "ss\t{%2, %0|%0, %2}");
7264 else
7265 strcat (buf, "sd\t{%2, %0|%0, %2}");
7266 return buf;
7267 }
7268 strcpy (buf, p);
7269
7270 switch (GET_CODE (operands[3]))
7271 {
7272 case MULT:
7273 case PLUS:
7274 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7275 {
7276 rtx temp = operands[2];
7277 operands[2] = operands[1];
7278 operands[1] = temp;
7279 }
7280
7281 /* know operands[0] == operands[1]. */
7282
7283 if (GET_CODE (operands[2]) == MEM)
7284 {
7285 p = "%z2\t%2";
7286 break;
7287 }
7288
7289 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7290 {
7291 if (STACK_TOP_P (operands[0]))
7292 /* How is it that we are storing to a dead operand[2]?
7293 Well, presumably operands[1] is dead too. We can't
7294 store the result to st(0) as st(0) gets popped on this
7295 instruction. Instead store to operands[2] (which I
7296 think has to be st(1)). st(1) will be popped later.
7297 gcc <= 2.8.1 didn't have this check and generated
7298 assembly code that the Unixware assembler rejected. */
7299 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7300 else
7301 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7302 break;
7303 }
7304
7305 if (STACK_TOP_P (operands[0]))
7306 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7307 else
7308 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7309 break;
7310
7311 case MINUS:
7312 case DIV:
7313 if (GET_CODE (operands[1]) == MEM)
7314 {
7315 p = "r%z1\t%1";
7316 break;
7317 }
7318
7319 if (GET_CODE (operands[2]) == MEM)
7320 {
7321 p = "%z2\t%2";
7322 break;
7323 }
7324
7325 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7326 {
7327 #if SYSV386_COMPAT
7328 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7329 derived assemblers, confusingly reverse the direction of
7330 the operation for fsub{r} and fdiv{r} when the
7331 destination register is not st(0). The Intel assembler
7332 doesn't have this brain damage. Read !SYSV386_COMPAT to
7333 figure out what the hardware really does. */
7334 if (STACK_TOP_P (operands[0]))
7335 p = "{p\t%0, %2|rp\t%2, %0}";
7336 else
7337 p = "{rp\t%2, %0|p\t%0, %2}";
7338 #else
7339 if (STACK_TOP_P (operands[0]))
7340 /* As above for fmul/fadd, we can't store to st(0). */
7341 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7342 else
7343 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7344 #endif
7345 break;
7346 }
7347
7348 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7349 {
7350 #if SYSV386_COMPAT
7351 if (STACK_TOP_P (operands[0]))
7352 p = "{rp\t%0, %1|p\t%1, %0}";
7353 else
7354 p = "{p\t%1, %0|rp\t%0, %1}";
7355 #else
7356 if (STACK_TOP_P (operands[0]))
7357 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7358 else
7359 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7360 #endif
7361 break;
7362 }
7363
7364 if (STACK_TOP_P (operands[0]))
7365 {
7366 if (STACK_TOP_P (operands[1]))
7367 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7368 else
7369 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7370 break;
7371 }
7372 else if (STACK_TOP_P (operands[1]))
7373 {
7374 #if SYSV386_COMPAT
7375 p = "{\t%1, %0|r\t%0, %1}";
7376 #else
7377 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7378 #endif
7379 }
7380 else
7381 {
7382 #if SYSV386_COMPAT
7383 p = "{r\t%2, %0|\t%0, %2}";
7384 #else
7385 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7386 #endif
7387 }
7388 break;
7389
7390 default:
7391 gcc_unreachable ();
7392 }
7393
7394 strcat (buf, p);
7395 return buf;
7396 }
7397
7398 /* Output code to initialize control word copies used by trunc?f?i and
7399 rounding patterns. CURRENT_MODE is set to current control word,
7400 while NEW_MODE is set to new control word. */
7401
7402 void
7403 emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode)
7404 {
7405 rtx reg = gen_reg_rtx (HImode);
7406
7407 emit_insn (gen_x86_fnstcw_1 (current_mode));
7408 emit_move_insn (reg, current_mode);
7409
7410 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7411 && !TARGET_64BIT)
7412 {
7413 switch (mode)
7414 {
7415 case I387_CW_FLOOR:
7416 /* round down toward -oo */
7417 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
7418 break;
7419
7420 case I387_CW_CEIL:
7421 /* round up toward +oo */
7422 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
7423 break;
7424
7425 case I387_CW_TRUNC:
7426 /* round toward zero (truncate) */
7427 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7428 break;
7429
7430 case I387_CW_MASK_PM:
7431 /* mask precision exception for nearbyint() */
7432 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7433 break;
7434
7435 default:
7436 gcc_unreachable ();
7437 }
7438 }
7439 else
7440 {
7441 switch (mode)
7442 {
7443 case I387_CW_FLOOR:
7444 /* round down toward -oo */
7445 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7446 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
7447 break;
7448
7449 case I387_CW_CEIL:
7450 /* round up toward +oo */
7451 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
7452 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
7453 break;
7454
7455 case I387_CW_TRUNC:
7456 /* round toward zero (truncate) */
7457 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
7458 break;
7459
7460 case I387_CW_MASK_PM:
7461 /* mask precision exception for nearbyint() */
7462 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
7463 break;
7464
7465 default:
7466 gcc_unreachable ();
7467 }
7468 }
7469
7470 emit_move_insn (new_mode, reg);
7471 }
7472
7473 /* Output code for INSN to convert a float to a signed int. OPERANDS
7474 are the insn operands. The output may be [HSD]Imode and the input
7475 operand may be [SDX]Fmode. */
7476
7477 const char *
7478 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
7479 {
7480 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7481 int dimode_p = GET_MODE (operands[0]) == DImode;
7482 int round_mode = get_attr_i387_cw (insn);
7483
7484 /* Jump through a hoop or two for DImode, since the hardware has no
7485 non-popping instruction. We used to do this a different way, but
7486 that was somewhat fragile and broke with post-reload splitters. */
7487 if ((dimode_p || fisttp) && !stack_top_dies)
7488 output_asm_insn ("fld\t%y1", operands);
7489
7490 gcc_assert (STACK_TOP_P (operands[1]));
7491 gcc_assert (GET_CODE (operands[0]) == MEM);
7492
7493 if (fisttp)
7494 output_asm_insn ("fisttp%z0\t%0", operands);
7495 else
7496 {
7497 if (round_mode != I387_CW_ANY)
7498 output_asm_insn ("fldcw\t%3", operands);
7499 if (stack_top_dies || dimode_p)
7500 output_asm_insn ("fistp%z0\t%0", operands);
7501 else
7502 output_asm_insn ("fist%z0\t%0", operands);
7503 if (round_mode != I387_CW_ANY)
7504 output_asm_insn ("fldcw\t%2", operands);
7505 }
7506
7507 return "";
7508 }
7509
7510 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7511 should be used. UNORDERED_P is true when fucom should be used. */
7512
7513 const char *
7514 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7515 {
7516 int stack_top_dies;
7517 rtx cmp_op0, cmp_op1;
7518 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
7519
7520 if (eflags_p)
7521 {
7522 cmp_op0 = operands[0];
7523 cmp_op1 = operands[1];
7524 }
7525 else
7526 {
7527 cmp_op0 = operands[1];
7528 cmp_op1 = operands[2];
7529 }
7530
7531 if (is_sse)
7532 {
7533 if (GET_MODE (operands[0]) == SFmode)
7534 if (unordered_p)
7535 return "ucomiss\t{%1, %0|%0, %1}";
7536 else
7537 return "comiss\t{%1, %0|%0, %1}";
7538 else
7539 if (unordered_p)
7540 return "ucomisd\t{%1, %0|%0, %1}";
7541 else
7542 return "comisd\t{%1, %0|%0, %1}";
7543 }
7544
7545 gcc_assert (STACK_TOP_P (cmp_op0));
7546
7547 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7548
7549 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
7550 {
7551 if (stack_top_dies)
7552 {
7553 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
7554 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
7555 }
7556 else
7557 return "ftst\n\tfnstsw\t%0";
7558 }
7559
7560 if (STACK_REG_P (cmp_op1)
7561 && stack_top_dies
7562 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7563 && REGNO (cmp_op1) != FIRST_STACK_REG)
7564 {
7565 /* If both the top of the 387 stack dies, and the other operand
7566 is also a stack register that dies, then this must be a
7567 `fcompp' float compare */
7568
7569 if (eflags_p)
7570 {
7571 /* There is no double popping fcomi variant. Fortunately,
7572 eflags is immune from the fstp's cc clobbering. */
7573 if (unordered_p)
7574 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7575 else
7576 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7577 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
7578 }
7579 else
7580 {
7581 if (unordered_p)
7582 return "fucompp\n\tfnstsw\t%0";
7583 else
7584 return "fcompp\n\tfnstsw\t%0";
7585 }
7586 }
7587 else
7588 {
7589 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7590
7591 static const char * const alt[16] =
7592 {
7593 "fcom%z2\t%y2\n\tfnstsw\t%0",
7594 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7595 "fucom%z2\t%y2\n\tfnstsw\t%0",
7596 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7597
7598 "ficom%z2\t%y2\n\tfnstsw\t%0",
7599 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7600 NULL,
7601 NULL,
7602
7603 "fcomi\t{%y1, %0|%0, %y1}",
7604 "fcomip\t{%y1, %0|%0, %y1}",
7605 "fucomi\t{%y1, %0|%0, %y1}",
7606 "fucomip\t{%y1, %0|%0, %y1}",
7607
7608 NULL,
7609 NULL,
7610 NULL,
7611 NULL
7612 };
7613
7614 int mask;
7615 const char *ret;
7616
7617 mask = eflags_p << 3;
7618 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
7619 mask |= unordered_p << 1;
7620 mask |= stack_top_dies;
7621
7622 gcc_assert (mask < 16);
7623 ret = alt[mask];
7624 gcc_assert (ret);
7625
7626 return ret;
7627 }
7628 }
7629
7630 void
7631 ix86_output_addr_vec_elt (FILE *file, int value)
7632 {
7633 const char *directive = ASM_LONG;
7634
7635 #ifdef ASM_QUAD
7636 if (TARGET_64BIT)
7637 directive = ASM_QUAD;
7638 #else
7639 gcc_assert (!TARGET_64BIT);
7640 #endif
7641
7642 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7643 }
7644
7645 void
7646 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
7647 {
7648 if (TARGET_64BIT)
7649 fprintf (file, "%s%s%d-%s%d\n",
7650 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7651 else if (HAVE_AS_GOTOFF_IN_DATA)
7652 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7653 #if TARGET_MACHO
7654 else if (TARGET_MACHO)
7655 {
7656 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
7657 machopic_output_function_base_name (file);
7658 fprintf(file, "\n");
7659 }
7660 #endif
7661 else
7662 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7663 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7664 }
7665 \f
7666 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7667 for the target. */
7668
7669 void
7670 ix86_expand_clear (rtx dest)
7671 {
7672 rtx tmp;
7673
7674 /* We play register width games, which are only valid after reload. */
7675 gcc_assert (reload_completed);
7676
7677 /* Avoid HImode and its attendant prefix byte. */
7678 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7679 dest = gen_rtx_REG (SImode, REGNO (dest));
7680
7681 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7682
7683 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7684 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7685 {
7686 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7687 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7688 }
7689
7690 emit_insn (tmp);
7691 }
7692
7693 /* X is an unchanging MEM. If it is a constant pool reference, return
7694 the constant pool rtx, else NULL. */
7695
7696 rtx
7697 maybe_get_pool_constant (rtx x)
7698 {
7699 x = ix86_delegitimize_address (XEXP (x, 0));
7700
7701 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7702 return get_pool_constant (x);
7703
7704 return NULL_RTX;
7705 }
7706
7707 void
7708 ix86_expand_move (enum machine_mode mode, rtx operands[])
7709 {
7710 int strict = (reload_in_progress || reload_completed);
7711 rtx op0, op1;
7712 enum tls_model model;
7713
7714 op0 = operands[0];
7715 op1 = operands[1];
7716
7717 if (GET_CODE (op1) == SYMBOL_REF)
7718 {
7719 model = SYMBOL_REF_TLS_MODEL (op1);
7720 if (model)
7721 {
7722 op1 = legitimize_tls_address (op1, model, true);
7723 op1 = force_operand (op1, op0);
7724 if (op1 == op0)
7725 return;
7726 }
7727 }
7728 else if (GET_CODE (op1) == CONST
7729 && GET_CODE (XEXP (op1, 0)) == PLUS
7730 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
7731 {
7732 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
7733 if (model)
7734 {
7735 rtx addend = XEXP (XEXP (op1, 0), 1);
7736 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
7737 op1 = force_operand (op1, NULL);
7738 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
7739 op0, 1, OPTAB_DIRECT);
7740 if (op1 == op0)
7741 return;
7742 }
7743 }
7744
7745 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7746 {
7747 #if TARGET_MACHO
7748 if (MACHOPIC_PURE)
7749 {
7750 rtx temp = ((reload_in_progress
7751 || ((op0 && GET_CODE (op0) == REG)
7752 && mode == Pmode))
7753 ? op0 : gen_reg_rtx (Pmode));
7754 op1 = machopic_indirect_data_reference (op1, temp);
7755 op1 = machopic_legitimize_pic_address (op1, mode,
7756 temp == op1 ? 0 : temp);
7757 }
7758 else if (MACHOPIC_INDIRECT)
7759 op1 = machopic_indirect_data_reference (op1, 0);
7760 if (op0 == op1)
7761 return;
7762 #else
7763 if (GET_CODE (op0) == MEM)
7764 op1 = force_reg (Pmode, op1);
7765 else
7766 op1 = legitimize_address (op1, op1, Pmode);
7767 #endif /* TARGET_MACHO */
7768 }
7769 else
7770 {
7771 if (GET_CODE (op0) == MEM
7772 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7773 || !push_operand (op0, mode))
7774 && GET_CODE (op1) == MEM)
7775 op1 = force_reg (mode, op1);
7776
7777 if (push_operand (op0, mode)
7778 && ! general_no_elim_operand (op1, mode))
7779 op1 = copy_to_mode_reg (mode, op1);
7780
7781 /* Force large constants in 64bit compilation into register
7782 to get them CSEed. */
7783 if (TARGET_64BIT && mode == DImode
7784 && immediate_operand (op1, mode)
7785 && !x86_64_zext_immediate_operand (op1, VOIDmode)
7786 && !register_operand (op0, mode)
7787 && optimize && !reload_completed && !reload_in_progress)
7788 op1 = copy_to_mode_reg (mode, op1);
7789
7790 if (FLOAT_MODE_P (mode))
7791 {
7792 /* If we are loading a floating point constant to a register,
7793 force the value to memory now, since we'll get better code
7794 out the back end. */
7795
7796 if (strict)
7797 ;
7798 else if (GET_CODE (op1) == CONST_DOUBLE)
7799 {
7800 op1 = validize_mem (force_const_mem (mode, op1));
7801 if (!register_operand (op0, mode))
7802 {
7803 rtx temp = gen_reg_rtx (mode);
7804 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7805 emit_move_insn (op0, temp);
7806 return;
7807 }
7808 }
7809 }
7810 }
7811
7812 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7813 }
7814
7815 void
7816 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
7817 {
7818 rtx op0 = operands[0], op1 = operands[1];
7819
7820 /* Force constants other than zero into memory. We do not know how
7821 the instructions used to build constants modify the upper 64 bits
7822 of the register, once we have that information we may be able
7823 to handle some of them more efficiently. */
7824 if ((reload_in_progress | reload_completed) == 0
7825 && register_operand (op0, mode)
7826 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
7827 op1 = validize_mem (force_const_mem (mode, op1));
7828
7829 /* Make operand1 a register if it isn't already. */
7830 if (!no_new_pseudos
7831 && !register_operand (op0, mode)
7832 && !register_operand (op1, mode))
7833 {
7834 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
7835 return;
7836 }
7837
7838 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
7839 }
7840
7841 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
7842 straight to ix86_expand_vector_move. */
7843
7844 void
7845 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
7846 {
7847 rtx op0, op1, m;
7848
7849 op0 = operands[0];
7850 op1 = operands[1];
7851
7852 if (MEM_P (op1))
7853 {
7854 /* If we're optimizing for size, movups is the smallest. */
7855 if (optimize_size)
7856 {
7857 op0 = gen_lowpart (V4SFmode, op0);
7858 op1 = gen_lowpart (V4SFmode, op1);
7859 emit_insn (gen_sse_movups (op0, op1));
7860 return;
7861 }
7862
7863 /* ??? If we have typed data, then it would appear that using
7864 movdqu is the only way to get unaligned data loaded with
7865 integer type. */
7866 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7867 {
7868 op0 = gen_lowpart (V16QImode, op0);
7869 op1 = gen_lowpart (V16QImode, op1);
7870 emit_insn (gen_sse2_movdqu (op0, op1));
7871 return;
7872 }
7873
7874 if (TARGET_SSE2 && mode == V2DFmode)
7875 {
7876 rtx zero;
7877
7878 /* When SSE registers are split into halves, we can avoid
7879 writing to the top half twice. */
7880 if (TARGET_SSE_SPLIT_REGS)
7881 {
7882 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7883 zero = op0;
7884 }
7885 else
7886 {
7887 /* ??? Not sure about the best option for the Intel chips.
7888 The following would seem to satisfy; the register is
7889 entirely cleared, breaking the dependency chain. We
7890 then store to the upper half, with a dependency depth
7891 of one. A rumor has it that Intel recommends two movsd
7892 followed by an unpacklpd, but this is unconfirmed. And
7893 given that the dependency depth of the unpacklpd would
7894 still be one, I'm not sure why this would be better. */
7895 zero = CONST0_RTX (V2DFmode);
7896 }
7897
7898 m = adjust_address (op1, DFmode, 0);
7899 emit_insn (gen_sse2_loadlpd (op0, zero, m));
7900 m = adjust_address (op1, DFmode, 8);
7901 emit_insn (gen_sse2_loadhpd (op0, op0, m));
7902 }
7903 else
7904 {
7905 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
7906 emit_move_insn (op0, CONST0_RTX (mode));
7907 else
7908 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
7909
7910 if (mode != V4SFmode)
7911 op0 = gen_lowpart (V4SFmode, op0);
7912 m = adjust_address (op1, V2SFmode, 0);
7913 emit_insn (gen_sse_loadlps (op0, op0, m));
7914 m = adjust_address (op1, V2SFmode, 8);
7915 emit_insn (gen_sse_loadhps (op0, op0, m));
7916 }
7917 }
7918 else if (MEM_P (op0))
7919 {
7920 /* If we're optimizing for size, movups is the smallest. */
7921 if (optimize_size)
7922 {
7923 op0 = gen_lowpart (V4SFmode, op0);
7924 op1 = gen_lowpart (V4SFmode, op1);
7925 emit_insn (gen_sse_movups (op0, op1));
7926 return;
7927 }
7928
7929 /* ??? Similar to above, only less clear because of quote
7930 typeless stores unquote. */
7931 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
7932 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7933 {
7934 op0 = gen_lowpart (V16QImode, op0);
7935 op1 = gen_lowpart (V16QImode, op1);
7936 emit_insn (gen_sse2_movdqu (op0, op1));
7937 return;
7938 }
7939
7940 if (TARGET_SSE2 && mode == V2DFmode)
7941 {
7942 m = adjust_address (op0, DFmode, 0);
7943 emit_insn (gen_sse2_storelpd (m, op1));
7944 m = adjust_address (op0, DFmode, 8);
7945 emit_insn (gen_sse2_storehpd (m, op1));
7946 }
7947 else
7948 {
7949 if (mode != V4SFmode)
7950 op1 = gen_lowpart (V4SFmode, op1);
7951 m = adjust_address (op0, V2SFmode, 0);
7952 emit_insn (gen_sse_storelps (m, op1));
7953 m = adjust_address (op0, V2SFmode, 8);
7954 emit_insn (gen_sse_storehps (m, op1));
7955 }
7956 }
7957 else
7958 gcc_unreachable ();
7959 }
7960
7961 /* Expand a push in MODE. This is some mode for which we do not support
7962 proper push instructions, at least from the registers that we expect
7963 the value to live in. */
7964
7965 void
7966 ix86_expand_push (enum machine_mode mode, rtx x)
7967 {
7968 rtx tmp;
7969
7970 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
7971 GEN_INT (-GET_MODE_SIZE (mode)),
7972 stack_pointer_rtx, 1, OPTAB_DIRECT);
7973 if (tmp != stack_pointer_rtx)
7974 emit_move_insn (stack_pointer_rtx, tmp);
7975
7976 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
7977 emit_move_insn (tmp, x);
7978 }
7979
7980 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
7981 destination to use for the operation. If different from the true
7982 destination in operands[0], a copy operation will be required. */
7983
7984 rtx
7985 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
7986 rtx operands[])
7987 {
7988 int matching_memory;
7989 rtx src1, src2, dst;
7990
7991 dst = operands[0];
7992 src1 = operands[1];
7993 src2 = operands[2];
7994
7995 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7996 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
7997 && (rtx_equal_p (dst, src2)
7998 || immediate_operand (src1, mode)))
7999 {
8000 rtx temp = src1;
8001 src1 = src2;
8002 src2 = temp;
8003 }
8004
8005 /* If the destination is memory, and we do not have matching source
8006 operands, do things in registers. */
8007 matching_memory = 0;
8008 if (GET_CODE (dst) == MEM)
8009 {
8010 if (rtx_equal_p (dst, src1))
8011 matching_memory = 1;
8012 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8013 && rtx_equal_p (dst, src2))
8014 matching_memory = 2;
8015 else
8016 dst = gen_reg_rtx (mode);
8017 }
8018
8019 /* Both source operands cannot be in memory. */
8020 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8021 {
8022 if (matching_memory != 2)
8023 src2 = force_reg (mode, src2);
8024 else
8025 src1 = force_reg (mode, src1);
8026 }
8027
8028 /* If the operation is not commutable, source 1 cannot be a constant
8029 or non-matching memory. */
8030 if ((CONSTANT_P (src1)
8031 || (!matching_memory && GET_CODE (src1) == MEM))
8032 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8033 src1 = force_reg (mode, src1);
8034
8035 /* If optimizing, copy to regs to improve CSE */
8036 if (optimize && ! no_new_pseudos)
8037 {
8038 if (GET_CODE (dst) == MEM)
8039 dst = gen_reg_rtx (mode);
8040 if (GET_CODE (src1) == MEM)
8041 src1 = force_reg (mode, src1);
8042 if (GET_CODE (src2) == MEM)
8043 src2 = force_reg (mode, src2);
8044 }
8045
8046 src1 = operands[1] = src1;
8047 src2 = operands[2] = src2;
8048 return dst;
8049 }
8050
8051 /* Similarly, but assume that the destination has already been
8052 set up properly. */
8053
8054 void
8055 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8056 enum machine_mode mode, rtx operands[])
8057 {
8058 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8059 gcc_assert (dst == operands[0]);
8060 }
8061
8062 /* Attempt to expand a binary operator. Make the expansion closer to the
8063 actual machine, then just general_operand, which will allow 3 separate
8064 memory references (one output, two input) in a single insn. */
8065
8066 void
8067 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8068 rtx operands[])
8069 {
8070 rtx src1, src2, dst, op, clob;
8071
8072 dst = ix86_fixup_binary_operands (code, mode, operands);
8073 src1 = operands[1];
8074 src2 = operands[2];
8075
8076 /* Emit the instruction. */
8077
8078 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8079 if (reload_in_progress)
8080 {
8081 /* Reload doesn't know about the flags register, and doesn't know that
8082 it doesn't want to clobber it. We can only do this with PLUS. */
8083 gcc_assert (code == PLUS);
8084 emit_insn (op);
8085 }
8086 else
8087 {
8088 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8089 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8090 }
8091
8092 /* Fix up the destination if needed. */
8093 if (dst != operands[0])
8094 emit_move_insn (operands[0], dst);
8095 }
8096
8097 /* Return TRUE or FALSE depending on whether the binary operator meets the
8098 appropriate constraints. */
8099
8100 int
8101 ix86_binary_operator_ok (enum rtx_code code,
8102 enum machine_mode mode ATTRIBUTE_UNUSED,
8103 rtx operands[3])
8104 {
8105 /* Both source operands cannot be in memory. */
8106 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8107 return 0;
8108 /* If the operation is not commutable, source 1 cannot be a constant. */
8109 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8110 return 0;
8111 /* If the destination is memory, we must have a matching source operand. */
8112 if (GET_CODE (operands[0]) == MEM
8113 && ! (rtx_equal_p (operands[0], operands[1])
8114 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8115 && rtx_equal_p (operands[0], operands[2]))))
8116 return 0;
8117 /* If the operation is not commutable and the source 1 is memory, we must
8118 have a matching destination. */
8119 if (GET_CODE (operands[1]) == MEM
8120 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8121 && ! rtx_equal_p (operands[0], operands[1]))
8122 return 0;
8123 return 1;
8124 }
8125
8126 /* Attempt to expand a unary operator. Make the expansion closer to the
8127 actual machine, then just general_operand, which will allow 2 separate
8128 memory references (one output, one input) in a single insn. */
8129
8130 void
8131 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8132 rtx operands[])
8133 {
8134 int matching_memory;
8135 rtx src, dst, op, clob;
8136
8137 dst = operands[0];
8138 src = operands[1];
8139
8140 /* If the destination is memory, and we do not have matching source
8141 operands, do things in registers. */
8142 matching_memory = 0;
8143 if (MEM_P (dst))
8144 {
8145 if (rtx_equal_p (dst, src))
8146 matching_memory = 1;
8147 else
8148 dst = gen_reg_rtx (mode);
8149 }
8150
8151 /* When source operand is memory, destination must match. */
8152 if (MEM_P (src) && !matching_memory)
8153 src = force_reg (mode, src);
8154
8155 /* If optimizing, copy to regs to improve CSE. */
8156 if (optimize && ! no_new_pseudos)
8157 {
8158 if (GET_CODE (dst) == MEM)
8159 dst = gen_reg_rtx (mode);
8160 if (GET_CODE (src) == MEM)
8161 src = force_reg (mode, src);
8162 }
8163
8164 /* Emit the instruction. */
8165
8166 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8167 if (reload_in_progress || code == NOT)
8168 {
8169 /* Reload doesn't know about the flags register, and doesn't know that
8170 it doesn't want to clobber it. */
8171 gcc_assert (code == NOT);
8172 emit_insn (op);
8173 }
8174 else
8175 {
8176 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8177 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8178 }
8179
8180 /* Fix up the destination if needed. */
8181 if (dst != operands[0])
8182 emit_move_insn (operands[0], dst);
8183 }
8184
8185 /* Return TRUE or FALSE depending on whether the unary operator meets the
8186 appropriate constraints. */
8187
8188 int
8189 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8190 enum machine_mode mode ATTRIBUTE_UNUSED,
8191 rtx operands[2] ATTRIBUTE_UNUSED)
8192 {
8193 /* If one of operands is memory, source and destination must match. */
8194 if ((GET_CODE (operands[0]) == MEM
8195 || GET_CODE (operands[1]) == MEM)
8196 && ! rtx_equal_p (operands[0], operands[1]))
8197 return FALSE;
8198 return TRUE;
8199 }
8200
8201 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8202 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8203 true, then replicate the mask for all elements of the vector register.
8204 If INVERT is true, then create a mask excluding the sign bit. */
8205
8206 rtx
8207 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8208 {
8209 enum machine_mode vec_mode;
8210 HOST_WIDE_INT hi, lo;
8211 int shift = 63;
8212 rtvec v;
8213 rtx mask;
8214
8215 /* Find the sign bit, sign extended to 2*HWI. */
8216 if (mode == SFmode)
8217 lo = 0x80000000, hi = lo < 0;
8218 else if (HOST_BITS_PER_WIDE_INT >= 64)
8219 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8220 else
8221 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8222
8223 if (invert)
8224 lo = ~lo, hi = ~hi;
8225
8226 /* Force this value into the low part of a fp vector constant. */
8227 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8228 mask = gen_lowpart (mode, mask);
8229
8230 if (mode == SFmode)
8231 {
8232 if (vect)
8233 v = gen_rtvec (4, mask, mask, mask, mask);
8234 else
8235 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8236 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8237 vec_mode = V4SFmode;
8238 }
8239 else
8240 {
8241 if (vect)
8242 v = gen_rtvec (2, mask, mask);
8243 else
8244 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8245 vec_mode = V2DFmode;
8246 }
8247
8248 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8249 }
8250
8251 /* Generate code for floating point ABS or NEG. */
8252
8253 void
8254 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8255 rtx operands[])
8256 {
8257 rtx mask, set, use, clob, dst, src;
8258 bool matching_memory;
8259 bool use_sse = false;
8260 bool vector_mode = VECTOR_MODE_P (mode);
8261 enum machine_mode elt_mode = mode;
8262
8263 if (vector_mode)
8264 {
8265 elt_mode = GET_MODE_INNER (mode);
8266 use_sse = true;
8267 }
8268 else if (TARGET_SSE_MATH)
8269 use_sse = SSE_FLOAT_MODE_P (mode);
8270
8271 /* NEG and ABS performed with SSE use bitwise mask operations.
8272 Create the appropriate mask now. */
8273 if (use_sse)
8274 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8275 else
8276 {
8277 /* When not using SSE, we don't use the mask, but prefer to keep the
8278 same general form of the insn pattern to reduce duplication when
8279 it comes time to split. */
8280 mask = const0_rtx;
8281 }
8282
8283 dst = operands[0];
8284 src = operands[1];
8285
8286 /* If the destination is memory, and we don't have matching source
8287 operands, do things in registers. */
8288 matching_memory = false;
8289 if (MEM_P (dst))
8290 {
8291 if (rtx_equal_p (dst, src) && (!optimize || no_new_pseudos))
8292 matching_memory = true;
8293 else
8294 dst = gen_reg_rtx (mode);
8295 }
8296 if (MEM_P (src) && !matching_memory)
8297 src = force_reg (mode, src);
8298
8299 if (vector_mode)
8300 {
8301 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8302 set = gen_rtx_SET (VOIDmode, dst, set);
8303 emit_insn (set);
8304 }
8305 else
8306 {
8307 set = gen_rtx_fmt_e (code, mode, src);
8308 set = gen_rtx_SET (VOIDmode, dst, set);
8309 use = gen_rtx_USE (VOIDmode, mask);
8310 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8311 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8312 }
8313
8314 if (dst != operands[0])
8315 emit_move_insn (operands[0], dst);
8316 }
8317
8318 /* Expand a copysign operation. Special case operand 0 being a constant. */
8319
8320 void
8321 ix86_expand_copysign (rtx operands[])
8322 {
8323 enum machine_mode mode, vmode;
8324 rtx dest, op0, op1, mask, nmask;
8325
8326 dest = operands[0];
8327 op0 = operands[1];
8328 op1 = operands[2];
8329
8330 mode = GET_MODE (dest);
8331 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8332
8333 if (GET_CODE (op0) == CONST_DOUBLE)
8334 {
8335 rtvec v;
8336
8337 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8338 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8339
8340 if (op0 == CONST0_RTX (mode))
8341 op0 = CONST0_RTX (vmode);
8342 else
8343 {
8344 if (mode == SFmode)
8345 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8346 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8347 else
8348 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8349 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8350 }
8351
8352 mask = ix86_build_signbit_mask (mode, 0, 0);
8353
8354 if (mode == SFmode)
8355 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8356 else
8357 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8358 }
8359 else
8360 {
8361 nmask = ix86_build_signbit_mask (mode, 0, 1);
8362 mask = ix86_build_signbit_mask (mode, 0, 0);
8363
8364 if (mode == SFmode)
8365 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8366 else
8367 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8368 }
8369 }
8370
8371 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8372 be a constant, and so has already been expanded into a vector constant. */
8373
8374 void
8375 ix86_split_copysign_const (rtx operands[])
8376 {
8377 enum machine_mode mode, vmode;
8378 rtx dest, op0, op1, mask, x;
8379
8380 dest = operands[0];
8381 op0 = operands[1];
8382 op1 = operands[2];
8383 mask = operands[3];
8384
8385 mode = GET_MODE (dest);
8386 vmode = GET_MODE (mask);
8387
8388 dest = simplify_gen_subreg (vmode, dest, mode, 0);
8389 x = gen_rtx_AND (vmode, dest, mask);
8390 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8391
8392 if (op0 != CONST0_RTX (vmode))
8393 {
8394 x = gen_rtx_IOR (vmode, dest, op0);
8395 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8396 }
8397 }
8398
8399 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
8400 so we have to do two masks. */
8401
8402 void
8403 ix86_split_copysign_var (rtx operands[])
8404 {
8405 enum machine_mode mode, vmode;
8406 rtx dest, scratch, op0, op1, mask, nmask, x;
8407
8408 dest = operands[0];
8409 scratch = operands[1];
8410 op0 = operands[2];
8411 op1 = operands[3];
8412 nmask = operands[4];
8413 mask = operands[5];
8414
8415 mode = GET_MODE (dest);
8416 vmode = GET_MODE (mask);
8417
8418 if (rtx_equal_p (op0, op1))
8419 {
8420 /* Shouldn't happen often (it's useless, obviously), but when it does
8421 we'd generate incorrect code if we continue below. */
8422 emit_move_insn (dest, op0);
8423 return;
8424 }
8425
8426 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
8427 {
8428 gcc_assert (REGNO (op1) == REGNO (scratch));
8429
8430 x = gen_rtx_AND (vmode, scratch, mask);
8431 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8432
8433 dest = mask;
8434 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8435 x = gen_rtx_NOT (vmode, dest);
8436 x = gen_rtx_AND (vmode, x, op0);
8437 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8438 }
8439 else
8440 {
8441 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
8442 {
8443 x = gen_rtx_AND (vmode, scratch, mask);
8444 }
8445 else /* alternative 2,4 */
8446 {
8447 gcc_assert (REGNO (mask) == REGNO (scratch));
8448 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
8449 x = gen_rtx_AND (vmode, scratch, op1);
8450 }
8451 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
8452
8453 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
8454 {
8455 dest = simplify_gen_subreg (vmode, op0, mode, 0);
8456 x = gen_rtx_AND (vmode, dest, nmask);
8457 }
8458 else /* alternative 3,4 */
8459 {
8460 gcc_assert (REGNO (nmask) == REGNO (dest));
8461 dest = nmask;
8462 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
8463 x = gen_rtx_AND (vmode, dest, op0);
8464 }
8465 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8466 }
8467
8468 x = gen_rtx_IOR (vmode, dest, scratch);
8469 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8470 }
8471
8472 /* Return TRUE or FALSE depending on whether the first SET in INSN
8473 has source and destination with matching CC modes, and that the
8474 CC mode is at least as constrained as REQ_MODE. */
8475
8476 int
8477 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8478 {
8479 rtx set;
8480 enum machine_mode set_mode;
8481
8482 set = PATTERN (insn);
8483 if (GET_CODE (set) == PARALLEL)
8484 set = XVECEXP (set, 0, 0);
8485 gcc_assert (GET_CODE (set) == SET);
8486 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
8487
8488 set_mode = GET_MODE (SET_DEST (set));
8489 switch (set_mode)
8490 {
8491 case CCNOmode:
8492 if (req_mode != CCNOmode
8493 && (req_mode != CCmode
8494 || XEXP (SET_SRC (set), 1) != const0_rtx))
8495 return 0;
8496 break;
8497 case CCmode:
8498 if (req_mode == CCGCmode)
8499 return 0;
8500 /* FALLTHRU */
8501 case CCGCmode:
8502 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8503 return 0;
8504 /* FALLTHRU */
8505 case CCGOCmode:
8506 if (req_mode == CCZmode)
8507 return 0;
8508 /* FALLTHRU */
8509 case CCZmode:
8510 break;
8511
8512 default:
8513 gcc_unreachable ();
8514 }
8515
8516 return (GET_MODE (SET_SRC (set)) == set_mode);
8517 }
8518
8519 /* Generate insn patterns to do an integer compare of OPERANDS. */
8520
8521 static rtx
8522 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8523 {
8524 enum machine_mode cmpmode;
8525 rtx tmp, flags;
8526
8527 cmpmode = SELECT_CC_MODE (code, op0, op1);
8528 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8529
8530 /* This is very simple, but making the interface the same as in the
8531 FP case makes the rest of the code easier. */
8532 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8533 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8534
8535 /* Return the test that should be put into the flags user, i.e.
8536 the bcc, scc, or cmov instruction. */
8537 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8538 }
8539
8540 /* Figure out whether to use ordered or unordered fp comparisons.
8541 Return the appropriate mode to use. */
8542
8543 enum machine_mode
8544 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8545 {
8546 /* ??? In order to make all comparisons reversible, we do all comparisons
8547 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8548 all forms trapping and nontrapping comparisons, we can make inequality
8549 comparisons trapping again, since it results in better code when using
8550 FCOM based compares. */
8551 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8552 }
8553
8554 enum machine_mode
8555 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8556 {
8557 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8558 return ix86_fp_compare_mode (code);
8559 switch (code)
8560 {
8561 /* Only zero flag is needed. */
8562 case EQ: /* ZF=0 */
8563 case NE: /* ZF!=0 */
8564 return CCZmode;
8565 /* Codes needing carry flag. */
8566 case GEU: /* CF=0 */
8567 case GTU: /* CF=0 & ZF=0 */
8568 case LTU: /* CF=1 */
8569 case LEU: /* CF=1 | ZF=1 */
8570 return CCmode;
8571 /* Codes possibly doable only with sign flag when
8572 comparing against zero. */
8573 case GE: /* SF=OF or SF=0 */
8574 case LT: /* SF<>OF or SF=1 */
8575 if (op1 == const0_rtx)
8576 return CCGOCmode;
8577 else
8578 /* For other cases Carry flag is not required. */
8579 return CCGCmode;
8580 /* Codes doable only with sign flag when comparing
8581 against zero, but we miss jump instruction for it
8582 so we need to use relational tests against overflow
8583 that thus needs to be zero. */
8584 case GT: /* ZF=0 & SF=OF */
8585 case LE: /* ZF=1 | SF<>OF */
8586 if (op1 == const0_rtx)
8587 return CCNOmode;
8588 else
8589 return CCGCmode;
8590 /* strcmp pattern do (use flags) and combine may ask us for proper
8591 mode. */
8592 case USE:
8593 return CCmode;
8594 default:
8595 gcc_unreachable ();
8596 }
8597 }
8598
8599 /* Return the fixed registers used for condition codes. */
8600
8601 static bool
8602 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8603 {
8604 *p1 = FLAGS_REG;
8605 *p2 = FPSR_REG;
8606 return true;
8607 }
8608
8609 /* If two condition code modes are compatible, return a condition code
8610 mode which is compatible with both. Otherwise, return
8611 VOIDmode. */
8612
8613 static enum machine_mode
8614 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8615 {
8616 if (m1 == m2)
8617 return m1;
8618
8619 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8620 return VOIDmode;
8621
8622 if ((m1 == CCGCmode && m2 == CCGOCmode)
8623 || (m1 == CCGOCmode && m2 == CCGCmode))
8624 return CCGCmode;
8625
8626 switch (m1)
8627 {
8628 default:
8629 gcc_unreachable ();
8630
8631 case CCmode:
8632 case CCGCmode:
8633 case CCGOCmode:
8634 case CCNOmode:
8635 case CCZmode:
8636 switch (m2)
8637 {
8638 default:
8639 return VOIDmode;
8640
8641 case CCmode:
8642 case CCGCmode:
8643 case CCGOCmode:
8644 case CCNOmode:
8645 case CCZmode:
8646 return CCmode;
8647 }
8648
8649 case CCFPmode:
8650 case CCFPUmode:
8651 /* These are only compatible with themselves, which we already
8652 checked above. */
8653 return VOIDmode;
8654 }
8655 }
8656
8657 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8658
8659 int
8660 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8661 {
8662 enum rtx_code swapped_code = swap_condition (code);
8663 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8664 || (ix86_fp_comparison_cost (swapped_code)
8665 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8666 }
8667
8668 /* Swap, force into registers, or otherwise massage the two operands
8669 to a fp comparison. The operands are updated in place; the new
8670 comparison code is returned. */
8671
8672 static enum rtx_code
8673 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8674 {
8675 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8676 rtx op0 = *pop0, op1 = *pop1;
8677 enum machine_mode op_mode = GET_MODE (op0);
8678 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
8679
8680 /* All of the unordered compare instructions only work on registers.
8681 The same is true of the fcomi compare instructions. The same is
8682 true of the XFmode compare instructions if not comparing with
8683 zero (ftst insn is used in this case). */
8684
8685 if (!is_sse
8686 && (fpcmp_mode == CCFPUmode
8687 || (op_mode == XFmode
8688 && ! (standard_80387_constant_p (op0) == 1
8689 || standard_80387_constant_p (op1) == 1))
8690 || ix86_use_fcomi_compare (code)))
8691 {
8692 op0 = force_reg (op_mode, op0);
8693 op1 = force_reg (op_mode, op1);
8694 }
8695 else
8696 {
8697 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8698 things around if they appear profitable, otherwise force op0
8699 into a register. */
8700
8701 if (standard_80387_constant_p (op0) == 0
8702 || (GET_CODE (op0) == MEM
8703 && ! (standard_80387_constant_p (op1) == 0
8704 || GET_CODE (op1) == MEM)))
8705 {
8706 rtx tmp;
8707 tmp = op0, op0 = op1, op1 = tmp;
8708 code = swap_condition (code);
8709 }
8710
8711 if (GET_CODE (op0) != REG)
8712 op0 = force_reg (op_mode, op0);
8713
8714 if (CONSTANT_P (op1))
8715 {
8716 int tmp = standard_80387_constant_p (op1);
8717 if (tmp == 0)
8718 op1 = validize_mem (force_const_mem (op_mode, op1));
8719 else if (tmp == 1)
8720 {
8721 if (TARGET_CMOVE)
8722 op1 = force_reg (op_mode, op1);
8723 }
8724 else
8725 op1 = force_reg (op_mode, op1);
8726 }
8727 }
8728
8729 /* Try to rearrange the comparison to make it cheaper. */
8730 if (ix86_fp_comparison_cost (code)
8731 > ix86_fp_comparison_cost (swap_condition (code))
8732 && (GET_CODE (op1) == REG || !no_new_pseudos))
8733 {
8734 rtx tmp;
8735 tmp = op0, op0 = op1, op1 = tmp;
8736 code = swap_condition (code);
8737 if (GET_CODE (op0) != REG)
8738 op0 = force_reg (op_mode, op0);
8739 }
8740
8741 *pop0 = op0;
8742 *pop1 = op1;
8743 return code;
8744 }
8745
8746 /* Convert comparison codes we use to represent FP comparison to integer
8747 code that will result in proper branch. Return UNKNOWN if no such code
8748 is available. */
8749
8750 enum rtx_code
8751 ix86_fp_compare_code_to_integer (enum rtx_code code)
8752 {
8753 switch (code)
8754 {
8755 case GT:
8756 return GTU;
8757 case GE:
8758 return GEU;
8759 case ORDERED:
8760 case UNORDERED:
8761 return code;
8762 break;
8763 case UNEQ:
8764 return EQ;
8765 break;
8766 case UNLT:
8767 return LTU;
8768 break;
8769 case UNLE:
8770 return LEU;
8771 break;
8772 case LTGT:
8773 return NE;
8774 break;
8775 default:
8776 return UNKNOWN;
8777 }
8778 }
8779
8780 /* Split comparison code CODE into comparisons we can do using branch
8781 instructions. BYPASS_CODE is comparison code for branch that will
8782 branch around FIRST_CODE and SECOND_CODE. If some of branches
8783 is not required, set value to UNKNOWN.
8784 We never require more than two branches. */
8785
8786 void
8787 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8788 enum rtx_code *first_code,
8789 enum rtx_code *second_code)
8790 {
8791 *first_code = code;
8792 *bypass_code = UNKNOWN;
8793 *second_code = UNKNOWN;
8794
8795 /* The fcomi comparison sets flags as follows:
8796
8797 cmp ZF PF CF
8798 > 0 0 0
8799 < 0 0 1
8800 = 1 0 0
8801 un 1 1 1 */
8802
8803 switch (code)
8804 {
8805 case GT: /* GTU - CF=0 & ZF=0 */
8806 case GE: /* GEU - CF=0 */
8807 case ORDERED: /* PF=0 */
8808 case UNORDERED: /* PF=1 */
8809 case UNEQ: /* EQ - ZF=1 */
8810 case UNLT: /* LTU - CF=1 */
8811 case UNLE: /* LEU - CF=1 | ZF=1 */
8812 case LTGT: /* EQ - ZF=0 */
8813 break;
8814 case LT: /* LTU - CF=1 - fails on unordered */
8815 *first_code = UNLT;
8816 *bypass_code = UNORDERED;
8817 break;
8818 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8819 *first_code = UNLE;
8820 *bypass_code = UNORDERED;
8821 break;
8822 case EQ: /* EQ - ZF=1 - fails on unordered */
8823 *first_code = UNEQ;
8824 *bypass_code = UNORDERED;
8825 break;
8826 case NE: /* NE - ZF=0 - fails on unordered */
8827 *first_code = LTGT;
8828 *second_code = UNORDERED;
8829 break;
8830 case UNGE: /* GEU - CF=0 - fails on unordered */
8831 *first_code = GE;
8832 *second_code = UNORDERED;
8833 break;
8834 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8835 *first_code = GT;
8836 *second_code = UNORDERED;
8837 break;
8838 default:
8839 gcc_unreachable ();
8840 }
8841 if (!TARGET_IEEE_FP)
8842 {
8843 *second_code = UNKNOWN;
8844 *bypass_code = UNKNOWN;
8845 }
8846 }
8847
8848 /* Return cost of comparison done fcom + arithmetics operations on AX.
8849 All following functions do use number of instructions as a cost metrics.
8850 In future this should be tweaked to compute bytes for optimize_size and
8851 take into account performance of various instructions on various CPUs. */
8852 static int
8853 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8854 {
8855 if (!TARGET_IEEE_FP)
8856 return 4;
8857 /* The cost of code output by ix86_expand_fp_compare. */
8858 switch (code)
8859 {
8860 case UNLE:
8861 case UNLT:
8862 case LTGT:
8863 case GT:
8864 case GE:
8865 case UNORDERED:
8866 case ORDERED:
8867 case UNEQ:
8868 return 4;
8869 break;
8870 case LT:
8871 case NE:
8872 case EQ:
8873 case UNGE:
8874 return 5;
8875 break;
8876 case LE:
8877 case UNGT:
8878 return 6;
8879 break;
8880 default:
8881 gcc_unreachable ();
8882 }
8883 }
8884
8885 /* Return cost of comparison done using fcomi operation.
8886 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8887 static int
8888 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8889 {
8890 enum rtx_code bypass_code, first_code, second_code;
8891 /* Return arbitrarily high cost when instruction is not supported - this
8892 prevents gcc from using it. */
8893 if (!TARGET_CMOVE)
8894 return 1024;
8895 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8896 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
8897 }
8898
8899 /* Return cost of comparison done using sahf operation.
8900 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8901 static int
8902 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8903 {
8904 enum rtx_code bypass_code, first_code, second_code;
8905 /* Return arbitrarily high cost when instruction is not preferred - this
8906 avoids gcc from using it. */
8907 if (!TARGET_USE_SAHF && !optimize_size)
8908 return 1024;
8909 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8910 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
8911 }
8912
8913 /* Compute cost of the comparison done using any method.
8914 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8915 static int
8916 ix86_fp_comparison_cost (enum rtx_code code)
8917 {
8918 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8919 int min;
8920
8921 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8922 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8923
8924 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8925 if (min > sahf_cost)
8926 min = sahf_cost;
8927 if (min > fcomi_cost)
8928 min = fcomi_cost;
8929 return min;
8930 }
8931
8932 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8933
8934 static rtx
8935 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8936 rtx *second_test, rtx *bypass_test)
8937 {
8938 enum machine_mode fpcmp_mode, intcmp_mode;
8939 rtx tmp, tmp2;
8940 int cost = ix86_fp_comparison_cost (code);
8941 enum rtx_code bypass_code, first_code, second_code;
8942
8943 fpcmp_mode = ix86_fp_compare_mode (code);
8944 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8945
8946 if (second_test)
8947 *second_test = NULL_RTX;
8948 if (bypass_test)
8949 *bypass_test = NULL_RTX;
8950
8951 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8952
8953 /* Do fcomi/sahf based test when profitable. */
8954 if ((bypass_code == UNKNOWN || bypass_test)
8955 && (second_code == UNKNOWN || second_test)
8956 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8957 {
8958 if (TARGET_CMOVE)
8959 {
8960 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8961 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8962 tmp);
8963 emit_insn (tmp);
8964 }
8965 else
8966 {
8967 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8968 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8969 if (!scratch)
8970 scratch = gen_reg_rtx (HImode);
8971 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8972 emit_insn (gen_x86_sahf_1 (scratch));
8973 }
8974
8975 /* The FP codes work out to act like unsigned. */
8976 intcmp_mode = fpcmp_mode;
8977 code = first_code;
8978 if (bypass_code != UNKNOWN)
8979 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8980 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8981 const0_rtx);
8982 if (second_code != UNKNOWN)
8983 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8984 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8985 const0_rtx);
8986 }
8987 else
8988 {
8989 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8990 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8991 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8992 if (!scratch)
8993 scratch = gen_reg_rtx (HImode);
8994 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8995
8996 /* In the unordered case, we have to check C2 for NaN's, which
8997 doesn't happen to work out to anything nice combination-wise.
8998 So do some bit twiddling on the value we've got in AH to come
8999 up with an appropriate set of condition codes. */
9000
9001 intcmp_mode = CCNOmode;
9002 switch (code)
9003 {
9004 case GT:
9005 case UNGT:
9006 if (code == GT || !TARGET_IEEE_FP)
9007 {
9008 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9009 code = EQ;
9010 }
9011 else
9012 {
9013 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9014 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9015 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9016 intcmp_mode = CCmode;
9017 code = GEU;
9018 }
9019 break;
9020 case LT:
9021 case UNLT:
9022 if (code == LT && TARGET_IEEE_FP)
9023 {
9024 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9025 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9026 intcmp_mode = CCmode;
9027 code = EQ;
9028 }
9029 else
9030 {
9031 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9032 code = NE;
9033 }
9034 break;
9035 case GE:
9036 case UNGE:
9037 if (code == GE || !TARGET_IEEE_FP)
9038 {
9039 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9040 code = EQ;
9041 }
9042 else
9043 {
9044 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9045 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9046 GEN_INT (0x01)));
9047 code = NE;
9048 }
9049 break;
9050 case LE:
9051 case UNLE:
9052 if (code == LE && TARGET_IEEE_FP)
9053 {
9054 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9055 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9056 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9057 intcmp_mode = CCmode;
9058 code = LTU;
9059 }
9060 else
9061 {
9062 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9063 code = NE;
9064 }
9065 break;
9066 case EQ:
9067 case UNEQ:
9068 if (code == EQ && TARGET_IEEE_FP)
9069 {
9070 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9071 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9072 intcmp_mode = CCmode;
9073 code = EQ;
9074 }
9075 else
9076 {
9077 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9078 code = NE;
9079 break;
9080 }
9081 break;
9082 case NE:
9083 case LTGT:
9084 if (code == NE && TARGET_IEEE_FP)
9085 {
9086 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9087 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9088 GEN_INT (0x40)));
9089 code = NE;
9090 }
9091 else
9092 {
9093 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9094 code = EQ;
9095 }
9096 break;
9097
9098 case UNORDERED:
9099 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9100 code = NE;
9101 break;
9102 case ORDERED:
9103 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9104 code = EQ;
9105 break;
9106
9107 default:
9108 gcc_unreachable ();
9109 }
9110 }
9111
9112 /* Return the test that should be put into the flags user, i.e.
9113 the bcc, scc, or cmov instruction. */
9114 return gen_rtx_fmt_ee (code, VOIDmode,
9115 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9116 const0_rtx);
9117 }
9118
9119 rtx
9120 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9121 {
9122 rtx op0, op1, ret;
9123 op0 = ix86_compare_op0;
9124 op1 = ix86_compare_op1;
9125
9126 if (second_test)
9127 *second_test = NULL_RTX;
9128 if (bypass_test)
9129 *bypass_test = NULL_RTX;
9130
9131 if (ix86_compare_emitted)
9132 {
9133 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9134 ix86_compare_emitted = NULL_RTX;
9135 }
9136 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9137 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9138 second_test, bypass_test);
9139 else
9140 ret = ix86_expand_int_compare (code, op0, op1);
9141
9142 return ret;
9143 }
9144
9145 /* Return true if the CODE will result in nontrivial jump sequence. */
9146 bool
9147 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9148 {
9149 enum rtx_code bypass_code, first_code, second_code;
9150 if (!TARGET_CMOVE)
9151 return true;
9152 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9153 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9154 }
9155
9156 void
9157 ix86_expand_branch (enum rtx_code code, rtx label)
9158 {
9159 rtx tmp;
9160
9161 switch (GET_MODE (ix86_compare_op0))
9162 {
9163 case QImode:
9164 case HImode:
9165 case SImode:
9166 simple:
9167 tmp = ix86_expand_compare (code, NULL, NULL);
9168 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9169 gen_rtx_LABEL_REF (VOIDmode, label),
9170 pc_rtx);
9171 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9172 return;
9173
9174 case SFmode:
9175 case DFmode:
9176 case XFmode:
9177 {
9178 rtvec vec;
9179 int use_fcomi;
9180 enum rtx_code bypass_code, first_code, second_code;
9181
9182 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9183 &ix86_compare_op1);
9184
9185 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9186
9187 /* Check whether we will use the natural sequence with one jump. If
9188 so, we can expand jump early. Otherwise delay expansion by
9189 creating compound insn to not confuse optimizers. */
9190 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9191 && TARGET_CMOVE)
9192 {
9193 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9194 gen_rtx_LABEL_REF (VOIDmode, label),
9195 pc_rtx, NULL_RTX, NULL_RTX);
9196 }
9197 else
9198 {
9199 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9200 ix86_compare_op0, ix86_compare_op1);
9201 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9202 gen_rtx_LABEL_REF (VOIDmode, label),
9203 pc_rtx);
9204 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9205
9206 use_fcomi = ix86_use_fcomi_compare (code);
9207 vec = rtvec_alloc (3 + !use_fcomi);
9208 RTVEC_ELT (vec, 0) = tmp;
9209 RTVEC_ELT (vec, 1)
9210 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9211 RTVEC_ELT (vec, 2)
9212 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9213 if (! use_fcomi)
9214 RTVEC_ELT (vec, 3)
9215 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9216
9217 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9218 }
9219 return;
9220 }
9221
9222 case DImode:
9223 if (TARGET_64BIT)
9224 goto simple;
9225 /* Expand DImode branch into multiple compare+branch. */
9226 {
9227 rtx lo[2], hi[2], label2;
9228 enum rtx_code code1, code2, code3;
9229
9230 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9231 {
9232 tmp = ix86_compare_op0;
9233 ix86_compare_op0 = ix86_compare_op1;
9234 ix86_compare_op1 = tmp;
9235 code = swap_condition (code);
9236 }
9237 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9238 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9239
9240 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9241 avoid two branches. This costs one extra insn, so disable when
9242 optimizing for size. */
9243
9244 if ((code == EQ || code == NE)
9245 && (!optimize_size
9246 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9247 {
9248 rtx xor0, xor1;
9249
9250 xor1 = hi[0];
9251 if (hi[1] != const0_rtx)
9252 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9253 NULL_RTX, 0, OPTAB_WIDEN);
9254
9255 xor0 = lo[0];
9256 if (lo[1] != const0_rtx)
9257 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9258 NULL_RTX, 0, OPTAB_WIDEN);
9259
9260 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9261 NULL_RTX, 0, OPTAB_WIDEN);
9262
9263 ix86_compare_op0 = tmp;
9264 ix86_compare_op1 = const0_rtx;
9265 ix86_expand_branch (code, label);
9266 return;
9267 }
9268
9269 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9270 op1 is a constant and the low word is zero, then we can just
9271 examine the high word. */
9272
9273 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9274 switch (code)
9275 {
9276 case LT: case LTU: case GE: case GEU:
9277 ix86_compare_op0 = hi[0];
9278 ix86_compare_op1 = hi[1];
9279 ix86_expand_branch (code, label);
9280 return;
9281 default:
9282 break;
9283 }
9284
9285 /* Otherwise, we need two or three jumps. */
9286
9287 label2 = gen_label_rtx ();
9288
9289 code1 = code;
9290 code2 = swap_condition (code);
9291 code3 = unsigned_condition (code);
9292
9293 switch (code)
9294 {
9295 case LT: case GT: case LTU: case GTU:
9296 break;
9297
9298 case LE: code1 = LT; code2 = GT; break;
9299 case GE: code1 = GT; code2 = LT; break;
9300 case LEU: code1 = LTU; code2 = GTU; break;
9301 case GEU: code1 = GTU; code2 = LTU; break;
9302
9303 case EQ: code1 = UNKNOWN; code2 = NE; break;
9304 case NE: code2 = UNKNOWN; break;
9305
9306 default:
9307 gcc_unreachable ();
9308 }
9309
9310 /*
9311 * a < b =>
9312 * if (hi(a) < hi(b)) goto true;
9313 * if (hi(a) > hi(b)) goto false;
9314 * if (lo(a) < lo(b)) goto true;
9315 * false:
9316 */
9317
9318 ix86_compare_op0 = hi[0];
9319 ix86_compare_op1 = hi[1];
9320
9321 if (code1 != UNKNOWN)
9322 ix86_expand_branch (code1, label);
9323 if (code2 != UNKNOWN)
9324 ix86_expand_branch (code2, label2);
9325
9326 ix86_compare_op0 = lo[0];
9327 ix86_compare_op1 = lo[1];
9328 ix86_expand_branch (code3, label);
9329
9330 if (code2 != UNKNOWN)
9331 emit_label (label2);
9332 return;
9333 }
9334
9335 default:
9336 gcc_unreachable ();
9337 }
9338 }
9339
9340 /* Split branch based on floating point condition. */
9341 void
9342 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9343 rtx target1, rtx target2, rtx tmp, rtx pushed)
9344 {
9345 rtx second, bypass;
9346 rtx label = NULL_RTX;
9347 rtx condition;
9348 int bypass_probability = -1, second_probability = -1, probability = -1;
9349 rtx i;
9350
9351 if (target2 != pc_rtx)
9352 {
9353 rtx tmp = target2;
9354 code = reverse_condition_maybe_unordered (code);
9355 target2 = target1;
9356 target1 = tmp;
9357 }
9358
9359 condition = ix86_expand_fp_compare (code, op1, op2,
9360 tmp, &second, &bypass);
9361
9362 /* Remove pushed operand from stack. */
9363 if (pushed)
9364 ix86_free_from_memory (GET_MODE (pushed));
9365
9366 if (split_branch_probability >= 0)
9367 {
9368 /* Distribute the probabilities across the jumps.
9369 Assume the BYPASS and SECOND to be always test
9370 for UNORDERED. */
9371 probability = split_branch_probability;
9372
9373 /* Value of 1 is low enough to make no need for probability
9374 to be updated. Later we may run some experiments and see
9375 if unordered values are more frequent in practice. */
9376 if (bypass)
9377 bypass_probability = 1;
9378 if (second)
9379 second_probability = 1;
9380 }
9381 if (bypass != NULL_RTX)
9382 {
9383 label = gen_label_rtx ();
9384 i = emit_jump_insn (gen_rtx_SET
9385 (VOIDmode, pc_rtx,
9386 gen_rtx_IF_THEN_ELSE (VOIDmode,
9387 bypass,
9388 gen_rtx_LABEL_REF (VOIDmode,
9389 label),
9390 pc_rtx)));
9391 if (bypass_probability >= 0)
9392 REG_NOTES (i)
9393 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9394 GEN_INT (bypass_probability),
9395 REG_NOTES (i));
9396 }
9397 i = emit_jump_insn (gen_rtx_SET
9398 (VOIDmode, pc_rtx,
9399 gen_rtx_IF_THEN_ELSE (VOIDmode,
9400 condition, target1, target2)));
9401 if (probability >= 0)
9402 REG_NOTES (i)
9403 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9404 GEN_INT (probability),
9405 REG_NOTES (i));
9406 if (second != NULL_RTX)
9407 {
9408 i = emit_jump_insn (gen_rtx_SET
9409 (VOIDmode, pc_rtx,
9410 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9411 target2)));
9412 if (second_probability >= 0)
9413 REG_NOTES (i)
9414 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9415 GEN_INT (second_probability),
9416 REG_NOTES (i));
9417 }
9418 if (label != NULL_RTX)
9419 emit_label (label);
9420 }
9421
9422 int
9423 ix86_expand_setcc (enum rtx_code code, rtx dest)
9424 {
9425 rtx ret, tmp, tmpreg, equiv;
9426 rtx second_test, bypass_test;
9427
9428 if (GET_MODE (ix86_compare_op0) == DImode
9429 && !TARGET_64BIT)
9430 return 0; /* FAIL */
9431
9432 gcc_assert (GET_MODE (dest) == QImode);
9433
9434 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9435 PUT_MODE (ret, QImode);
9436
9437 tmp = dest;
9438 tmpreg = dest;
9439
9440 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9441 if (bypass_test || second_test)
9442 {
9443 rtx test = second_test;
9444 int bypass = 0;
9445 rtx tmp2 = gen_reg_rtx (QImode);
9446 if (bypass_test)
9447 {
9448 gcc_assert (!second_test);
9449 test = bypass_test;
9450 bypass = 1;
9451 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9452 }
9453 PUT_MODE (test, QImode);
9454 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9455
9456 if (bypass)
9457 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9458 else
9459 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9460 }
9461
9462 /* Attach a REG_EQUAL note describing the comparison result. */
9463 if (ix86_compare_op0 && ix86_compare_op1)
9464 {
9465 equiv = simplify_gen_relational (code, QImode,
9466 GET_MODE (ix86_compare_op0),
9467 ix86_compare_op0, ix86_compare_op1);
9468 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9469 }
9470
9471 return 1; /* DONE */
9472 }
9473
9474 /* Expand comparison setting or clearing carry flag. Return true when
9475 successful and set pop for the operation. */
9476 static bool
9477 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9478 {
9479 enum machine_mode mode =
9480 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9481
9482 /* Do not handle DImode compares that go trought special path. Also we can't
9483 deal with FP compares yet. This is possible to add. */
9484 if ((mode == DImode && !TARGET_64BIT))
9485 return false;
9486 if (FLOAT_MODE_P (mode))
9487 {
9488 rtx second_test = NULL, bypass_test = NULL;
9489 rtx compare_op, compare_seq;
9490
9491 /* Shortcut: following common codes never translate into carry flag compares. */
9492 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9493 || code == ORDERED || code == UNORDERED)
9494 return false;
9495
9496 /* These comparisons require zero flag; swap operands so they won't. */
9497 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9498 && !TARGET_IEEE_FP)
9499 {
9500 rtx tmp = op0;
9501 op0 = op1;
9502 op1 = tmp;
9503 code = swap_condition (code);
9504 }
9505
9506 /* Try to expand the comparison and verify that we end up with carry flag
9507 based comparison. This is fails to be true only when we decide to expand
9508 comparison using arithmetic that is not too common scenario. */
9509 start_sequence ();
9510 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9511 &second_test, &bypass_test);
9512 compare_seq = get_insns ();
9513 end_sequence ();
9514
9515 if (second_test || bypass_test)
9516 return false;
9517 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9518 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9519 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9520 else
9521 code = GET_CODE (compare_op);
9522 if (code != LTU && code != GEU)
9523 return false;
9524 emit_insn (compare_seq);
9525 *pop = compare_op;
9526 return true;
9527 }
9528 if (!INTEGRAL_MODE_P (mode))
9529 return false;
9530 switch (code)
9531 {
9532 case LTU:
9533 case GEU:
9534 break;
9535
9536 /* Convert a==0 into (unsigned)a<1. */
9537 case EQ:
9538 case NE:
9539 if (op1 != const0_rtx)
9540 return false;
9541 op1 = const1_rtx;
9542 code = (code == EQ ? LTU : GEU);
9543 break;
9544
9545 /* Convert a>b into b<a or a>=b-1. */
9546 case GTU:
9547 case LEU:
9548 if (GET_CODE (op1) == CONST_INT)
9549 {
9550 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9551 /* Bail out on overflow. We still can swap operands but that
9552 would force loading of the constant into register. */
9553 if (op1 == const0_rtx
9554 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9555 return false;
9556 code = (code == GTU ? GEU : LTU);
9557 }
9558 else
9559 {
9560 rtx tmp = op1;
9561 op1 = op0;
9562 op0 = tmp;
9563 code = (code == GTU ? LTU : GEU);
9564 }
9565 break;
9566
9567 /* Convert a>=0 into (unsigned)a<0x80000000. */
9568 case LT:
9569 case GE:
9570 if (mode == DImode || op1 != const0_rtx)
9571 return false;
9572 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9573 code = (code == LT ? GEU : LTU);
9574 break;
9575 case LE:
9576 case GT:
9577 if (mode == DImode || op1 != constm1_rtx)
9578 return false;
9579 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9580 code = (code == LE ? GEU : LTU);
9581 break;
9582
9583 default:
9584 return false;
9585 }
9586 /* Swapping operands may cause constant to appear as first operand. */
9587 if (!nonimmediate_operand (op0, VOIDmode))
9588 {
9589 if (no_new_pseudos)
9590 return false;
9591 op0 = force_reg (mode, op0);
9592 }
9593 ix86_compare_op0 = op0;
9594 ix86_compare_op1 = op1;
9595 *pop = ix86_expand_compare (code, NULL, NULL);
9596 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
9597 return true;
9598 }
9599
9600 int
9601 ix86_expand_int_movcc (rtx operands[])
9602 {
9603 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9604 rtx compare_seq, compare_op;
9605 rtx second_test, bypass_test;
9606 enum machine_mode mode = GET_MODE (operands[0]);
9607 bool sign_bit_compare_p = false;;
9608
9609 start_sequence ();
9610 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9611 compare_seq = get_insns ();
9612 end_sequence ();
9613
9614 compare_code = GET_CODE (compare_op);
9615
9616 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9617 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9618 sign_bit_compare_p = true;
9619
9620 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9621 HImode insns, we'd be swallowed in word prefix ops. */
9622
9623 if ((mode != HImode || TARGET_FAST_PREFIX)
9624 && (mode != DImode || TARGET_64BIT)
9625 && GET_CODE (operands[2]) == CONST_INT
9626 && GET_CODE (operands[3]) == CONST_INT)
9627 {
9628 rtx out = operands[0];
9629 HOST_WIDE_INT ct = INTVAL (operands[2]);
9630 HOST_WIDE_INT cf = INTVAL (operands[3]);
9631 HOST_WIDE_INT diff;
9632
9633 diff = ct - cf;
9634 /* Sign bit compares are better done using shifts than we do by using
9635 sbb. */
9636 if (sign_bit_compare_p
9637 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9638 ix86_compare_op1, &compare_op))
9639 {
9640 /* Detect overlap between destination and compare sources. */
9641 rtx tmp = out;
9642
9643 if (!sign_bit_compare_p)
9644 {
9645 bool fpcmp = false;
9646
9647 compare_code = GET_CODE (compare_op);
9648
9649 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9650 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9651 {
9652 fpcmp = true;
9653 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9654 }
9655
9656 /* To simplify rest of code, restrict to the GEU case. */
9657 if (compare_code == LTU)
9658 {
9659 HOST_WIDE_INT tmp = ct;
9660 ct = cf;
9661 cf = tmp;
9662 compare_code = reverse_condition (compare_code);
9663 code = reverse_condition (code);
9664 }
9665 else
9666 {
9667 if (fpcmp)
9668 PUT_CODE (compare_op,
9669 reverse_condition_maybe_unordered
9670 (GET_CODE (compare_op)));
9671 else
9672 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9673 }
9674 diff = ct - cf;
9675
9676 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9677 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9678 tmp = gen_reg_rtx (mode);
9679
9680 if (mode == DImode)
9681 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9682 else
9683 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9684 }
9685 else
9686 {
9687 if (code == GT || code == GE)
9688 code = reverse_condition (code);
9689 else
9690 {
9691 HOST_WIDE_INT tmp = ct;
9692 ct = cf;
9693 cf = tmp;
9694 diff = ct - cf;
9695 }
9696 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9697 ix86_compare_op1, VOIDmode, 0, -1);
9698 }
9699
9700 if (diff == 1)
9701 {
9702 /*
9703 * cmpl op0,op1
9704 * sbbl dest,dest
9705 * [addl dest, ct]
9706 *
9707 * Size 5 - 8.
9708 */
9709 if (ct)
9710 tmp = expand_simple_binop (mode, PLUS,
9711 tmp, GEN_INT (ct),
9712 copy_rtx (tmp), 1, OPTAB_DIRECT);
9713 }
9714 else if (cf == -1)
9715 {
9716 /*
9717 * cmpl op0,op1
9718 * sbbl dest,dest
9719 * orl $ct, dest
9720 *
9721 * Size 8.
9722 */
9723 tmp = expand_simple_binop (mode, IOR,
9724 tmp, GEN_INT (ct),
9725 copy_rtx (tmp), 1, OPTAB_DIRECT);
9726 }
9727 else if (diff == -1 && ct)
9728 {
9729 /*
9730 * cmpl op0,op1
9731 * sbbl dest,dest
9732 * notl dest
9733 * [addl dest, cf]
9734 *
9735 * Size 8 - 11.
9736 */
9737 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9738 if (cf)
9739 tmp = expand_simple_binop (mode, PLUS,
9740 copy_rtx (tmp), GEN_INT (cf),
9741 copy_rtx (tmp), 1, OPTAB_DIRECT);
9742 }
9743 else
9744 {
9745 /*
9746 * cmpl op0,op1
9747 * sbbl dest,dest
9748 * [notl dest]
9749 * andl cf - ct, dest
9750 * [addl dest, ct]
9751 *
9752 * Size 8 - 11.
9753 */
9754
9755 if (cf == 0)
9756 {
9757 cf = ct;
9758 ct = 0;
9759 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9760 }
9761
9762 tmp = expand_simple_binop (mode, AND,
9763 copy_rtx (tmp),
9764 gen_int_mode (cf - ct, mode),
9765 copy_rtx (tmp), 1, OPTAB_DIRECT);
9766 if (ct)
9767 tmp = expand_simple_binop (mode, PLUS,
9768 copy_rtx (tmp), GEN_INT (ct),
9769 copy_rtx (tmp), 1, OPTAB_DIRECT);
9770 }
9771
9772 if (!rtx_equal_p (tmp, out))
9773 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9774
9775 return 1; /* DONE */
9776 }
9777
9778 if (diff < 0)
9779 {
9780 HOST_WIDE_INT tmp;
9781 tmp = ct, ct = cf, cf = tmp;
9782 diff = -diff;
9783 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9784 {
9785 /* We may be reversing unordered compare to normal compare, that
9786 is not valid in general (we may convert non-trapping condition
9787 to trapping one), however on i386 we currently emit all
9788 comparisons unordered. */
9789 compare_code = reverse_condition_maybe_unordered (compare_code);
9790 code = reverse_condition_maybe_unordered (code);
9791 }
9792 else
9793 {
9794 compare_code = reverse_condition (compare_code);
9795 code = reverse_condition (code);
9796 }
9797 }
9798
9799 compare_code = UNKNOWN;
9800 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9801 && GET_CODE (ix86_compare_op1) == CONST_INT)
9802 {
9803 if (ix86_compare_op1 == const0_rtx
9804 && (code == LT || code == GE))
9805 compare_code = code;
9806 else if (ix86_compare_op1 == constm1_rtx)
9807 {
9808 if (code == LE)
9809 compare_code = LT;
9810 else if (code == GT)
9811 compare_code = GE;
9812 }
9813 }
9814
9815 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9816 if (compare_code != UNKNOWN
9817 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9818 && (cf == -1 || ct == -1))
9819 {
9820 /* If lea code below could be used, only optimize
9821 if it results in a 2 insn sequence. */
9822
9823 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9824 || diff == 3 || diff == 5 || diff == 9)
9825 || (compare_code == LT && ct == -1)
9826 || (compare_code == GE && cf == -1))
9827 {
9828 /*
9829 * notl op1 (if necessary)
9830 * sarl $31, op1
9831 * orl cf, op1
9832 */
9833 if (ct != -1)
9834 {
9835 cf = ct;
9836 ct = -1;
9837 code = reverse_condition (code);
9838 }
9839
9840 out = emit_store_flag (out, code, ix86_compare_op0,
9841 ix86_compare_op1, VOIDmode, 0, -1);
9842
9843 out = expand_simple_binop (mode, IOR,
9844 out, GEN_INT (cf),
9845 out, 1, OPTAB_DIRECT);
9846 if (out != operands[0])
9847 emit_move_insn (operands[0], out);
9848
9849 return 1; /* DONE */
9850 }
9851 }
9852
9853
9854 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9855 || diff == 3 || diff == 5 || diff == 9)
9856 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9857 && (mode != DImode
9858 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
9859 {
9860 /*
9861 * xorl dest,dest
9862 * cmpl op1,op2
9863 * setcc dest
9864 * lea cf(dest*(ct-cf)),dest
9865 *
9866 * Size 14.
9867 *
9868 * This also catches the degenerate setcc-only case.
9869 */
9870
9871 rtx tmp;
9872 int nops;
9873
9874 out = emit_store_flag (out, code, ix86_compare_op0,
9875 ix86_compare_op1, VOIDmode, 0, 1);
9876
9877 nops = 0;
9878 /* On x86_64 the lea instruction operates on Pmode, so we need
9879 to get arithmetics done in proper mode to match. */
9880 if (diff == 1)
9881 tmp = copy_rtx (out);
9882 else
9883 {
9884 rtx out1;
9885 out1 = copy_rtx (out);
9886 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9887 nops++;
9888 if (diff & 1)
9889 {
9890 tmp = gen_rtx_PLUS (mode, tmp, out1);
9891 nops++;
9892 }
9893 }
9894 if (cf != 0)
9895 {
9896 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9897 nops++;
9898 }
9899 if (!rtx_equal_p (tmp, out))
9900 {
9901 if (nops == 1)
9902 out = force_operand (tmp, copy_rtx (out));
9903 else
9904 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9905 }
9906 if (!rtx_equal_p (out, operands[0]))
9907 emit_move_insn (operands[0], copy_rtx (out));
9908
9909 return 1; /* DONE */
9910 }
9911
9912 /*
9913 * General case: Jumpful:
9914 * xorl dest,dest cmpl op1, op2
9915 * cmpl op1, op2 movl ct, dest
9916 * setcc dest jcc 1f
9917 * decl dest movl cf, dest
9918 * andl (cf-ct),dest 1:
9919 * addl ct,dest
9920 *
9921 * Size 20. Size 14.
9922 *
9923 * This is reasonably steep, but branch mispredict costs are
9924 * high on modern cpus, so consider failing only if optimizing
9925 * for space.
9926 */
9927
9928 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9929 && BRANCH_COST >= 2)
9930 {
9931 if (cf == 0)
9932 {
9933 cf = ct;
9934 ct = 0;
9935 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9936 /* We may be reversing unordered compare to normal compare,
9937 that is not valid in general (we may convert non-trapping
9938 condition to trapping one), however on i386 we currently
9939 emit all comparisons unordered. */
9940 code = reverse_condition_maybe_unordered (code);
9941 else
9942 {
9943 code = reverse_condition (code);
9944 if (compare_code != UNKNOWN)
9945 compare_code = reverse_condition (compare_code);
9946 }
9947 }
9948
9949 if (compare_code != UNKNOWN)
9950 {
9951 /* notl op1 (if needed)
9952 sarl $31, op1
9953 andl (cf-ct), op1
9954 addl ct, op1
9955
9956 For x < 0 (resp. x <= -1) there will be no notl,
9957 so if possible swap the constants to get rid of the
9958 complement.
9959 True/false will be -1/0 while code below (store flag
9960 followed by decrement) is 0/-1, so the constants need
9961 to be exchanged once more. */
9962
9963 if (compare_code == GE || !cf)
9964 {
9965 code = reverse_condition (code);
9966 compare_code = LT;
9967 }
9968 else
9969 {
9970 HOST_WIDE_INT tmp = cf;
9971 cf = ct;
9972 ct = tmp;
9973 }
9974
9975 out = emit_store_flag (out, code, ix86_compare_op0,
9976 ix86_compare_op1, VOIDmode, 0, -1);
9977 }
9978 else
9979 {
9980 out = emit_store_flag (out, code, ix86_compare_op0,
9981 ix86_compare_op1, VOIDmode, 0, 1);
9982
9983 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9984 copy_rtx (out), 1, OPTAB_DIRECT);
9985 }
9986
9987 out = expand_simple_binop (mode, AND, copy_rtx (out),
9988 gen_int_mode (cf - ct, mode),
9989 copy_rtx (out), 1, OPTAB_DIRECT);
9990 if (ct)
9991 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9992 copy_rtx (out), 1, OPTAB_DIRECT);
9993 if (!rtx_equal_p (out, operands[0]))
9994 emit_move_insn (operands[0], copy_rtx (out));
9995
9996 return 1; /* DONE */
9997 }
9998 }
9999
10000 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10001 {
10002 /* Try a few things more with specific constants and a variable. */
10003
10004 optab op;
10005 rtx var, orig_out, out, tmp;
10006
10007 if (BRANCH_COST <= 2)
10008 return 0; /* FAIL */
10009
10010 /* If one of the two operands is an interesting constant, load a
10011 constant with the above and mask it in with a logical operation. */
10012
10013 if (GET_CODE (operands[2]) == CONST_INT)
10014 {
10015 var = operands[3];
10016 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10017 operands[3] = constm1_rtx, op = and_optab;
10018 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10019 operands[3] = const0_rtx, op = ior_optab;
10020 else
10021 return 0; /* FAIL */
10022 }
10023 else if (GET_CODE (operands[3]) == CONST_INT)
10024 {
10025 var = operands[2];
10026 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10027 operands[2] = constm1_rtx, op = and_optab;
10028 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10029 operands[2] = const0_rtx, op = ior_optab;
10030 else
10031 return 0; /* FAIL */
10032 }
10033 else
10034 return 0; /* FAIL */
10035
10036 orig_out = operands[0];
10037 tmp = gen_reg_rtx (mode);
10038 operands[0] = tmp;
10039
10040 /* Recurse to get the constant loaded. */
10041 if (ix86_expand_int_movcc (operands) == 0)
10042 return 0; /* FAIL */
10043
10044 /* Mask in the interesting variable. */
10045 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10046 OPTAB_WIDEN);
10047 if (!rtx_equal_p (out, orig_out))
10048 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10049
10050 return 1; /* DONE */
10051 }
10052
10053 /*
10054 * For comparison with above,
10055 *
10056 * movl cf,dest
10057 * movl ct,tmp
10058 * cmpl op1,op2
10059 * cmovcc tmp,dest
10060 *
10061 * Size 15.
10062 */
10063
10064 if (! nonimmediate_operand (operands[2], mode))
10065 operands[2] = force_reg (mode, operands[2]);
10066 if (! nonimmediate_operand (operands[3], mode))
10067 operands[3] = force_reg (mode, operands[3]);
10068
10069 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10070 {
10071 rtx tmp = gen_reg_rtx (mode);
10072 emit_move_insn (tmp, operands[3]);
10073 operands[3] = tmp;
10074 }
10075 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10076 {
10077 rtx tmp = gen_reg_rtx (mode);
10078 emit_move_insn (tmp, operands[2]);
10079 operands[2] = tmp;
10080 }
10081
10082 if (! register_operand (operands[2], VOIDmode)
10083 && (mode == QImode
10084 || ! register_operand (operands[3], VOIDmode)))
10085 operands[2] = force_reg (mode, operands[2]);
10086
10087 if (mode == QImode
10088 && ! register_operand (operands[3], VOIDmode))
10089 operands[3] = force_reg (mode, operands[3]);
10090
10091 emit_insn (compare_seq);
10092 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10093 gen_rtx_IF_THEN_ELSE (mode,
10094 compare_op, operands[2],
10095 operands[3])));
10096 if (bypass_test)
10097 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10098 gen_rtx_IF_THEN_ELSE (mode,
10099 bypass_test,
10100 copy_rtx (operands[3]),
10101 copy_rtx (operands[0]))));
10102 if (second_test)
10103 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10104 gen_rtx_IF_THEN_ELSE (mode,
10105 second_test,
10106 copy_rtx (operands[2]),
10107 copy_rtx (operands[0]))));
10108
10109 return 1; /* DONE */
10110 }
10111
10112 /* Swap, force into registers, or otherwise massage the two operands
10113 to an sse comparison with a mask result. Thus we differ a bit from
10114 ix86_prepare_fp_compare_args which expects to produce a flags result.
10115
10116 The DEST operand exists to help determine whether to commute commutative
10117 operators. The POP0/POP1 operands are updated in place. The new
10118 comparison code is returned, or UNKNOWN if not implementable. */
10119
10120 static enum rtx_code
10121 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10122 rtx *pop0, rtx *pop1)
10123 {
10124 rtx tmp;
10125
10126 switch (code)
10127 {
10128 case LTGT:
10129 case UNEQ:
10130 /* We have no LTGT as an operator. We could implement it with
10131 NE & ORDERED, but this requires an extra temporary. It's
10132 not clear that it's worth it. */
10133 return UNKNOWN;
10134
10135 case LT:
10136 case LE:
10137 case UNGT:
10138 case UNGE:
10139 /* These are supported directly. */
10140 break;
10141
10142 case EQ:
10143 case NE:
10144 case UNORDERED:
10145 case ORDERED:
10146 /* For commutative operators, try to canonicalize the destination
10147 operand to be first in the comparison - this helps reload to
10148 avoid extra moves. */
10149 if (!dest || !rtx_equal_p (dest, *pop1))
10150 break;
10151 /* FALLTHRU */
10152
10153 case GE:
10154 case GT:
10155 case UNLE:
10156 case UNLT:
10157 /* These are not supported directly. Swap the comparison operands
10158 to transform into something that is supported. */
10159 tmp = *pop0;
10160 *pop0 = *pop1;
10161 *pop1 = tmp;
10162 code = swap_condition (code);
10163 break;
10164
10165 default:
10166 gcc_unreachable ();
10167 }
10168
10169 return code;
10170 }
10171
10172 /* Detect conditional moves that exactly match min/max operational
10173 semantics. Note that this is IEEE safe, as long as we don't
10174 interchange the operands.
10175
10176 Returns FALSE if this conditional move doesn't match a MIN/MAX,
10177 and TRUE if the operation is successful and instructions are emitted. */
10178
10179 static bool
10180 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10181 rtx cmp_op1, rtx if_true, rtx if_false)
10182 {
10183 enum machine_mode mode;
10184 bool is_min;
10185 rtx tmp;
10186
10187 if (code == LT)
10188 ;
10189 else if (code == UNGE)
10190 {
10191 tmp = if_true;
10192 if_true = if_false;
10193 if_false = tmp;
10194 }
10195 else
10196 return false;
10197
10198 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10199 is_min = true;
10200 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10201 is_min = false;
10202 else
10203 return false;
10204
10205 mode = GET_MODE (dest);
10206
10207 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10208 but MODE may be a vector mode and thus not appropriate. */
10209 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10210 {
10211 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10212 rtvec v;
10213
10214 if_true = force_reg (mode, if_true);
10215 v = gen_rtvec (2, if_true, if_false);
10216 tmp = gen_rtx_UNSPEC (mode, v, u);
10217 }
10218 else
10219 {
10220 code = is_min ? SMIN : SMAX;
10221 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10222 }
10223
10224 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10225 return true;
10226 }
10227
10228 /* Expand an sse vector comparison. Return the register with the result. */
10229
10230 static rtx
10231 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10232 rtx op_true, rtx op_false)
10233 {
10234 enum machine_mode mode = GET_MODE (dest);
10235 rtx x;
10236
10237 cmp_op0 = force_reg (mode, cmp_op0);
10238 if (!nonimmediate_operand (cmp_op1, mode))
10239 cmp_op1 = force_reg (mode, cmp_op1);
10240
10241 if (optimize
10242 || reg_overlap_mentioned_p (dest, op_true)
10243 || reg_overlap_mentioned_p (dest, op_false))
10244 dest = gen_reg_rtx (mode);
10245
10246 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10247 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10248
10249 return dest;
10250 }
10251
10252 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10253 operations. This is used for both scalar and vector conditional moves. */
10254
10255 static void
10256 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10257 {
10258 enum machine_mode mode = GET_MODE (dest);
10259 rtx t2, t3, x;
10260
10261 if (op_false == CONST0_RTX (mode))
10262 {
10263 op_true = force_reg (mode, op_true);
10264 x = gen_rtx_AND (mode, cmp, op_true);
10265 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10266 }
10267 else if (op_true == CONST0_RTX (mode))
10268 {
10269 op_false = force_reg (mode, op_false);
10270 x = gen_rtx_NOT (mode, cmp);
10271 x = gen_rtx_AND (mode, x, op_false);
10272 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10273 }
10274 else
10275 {
10276 op_true = force_reg (mode, op_true);
10277 op_false = force_reg (mode, op_false);
10278
10279 t2 = gen_reg_rtx (mode);
10280 if (optimize)
10281 t3 = gen_reg_rtx (mode);
10282 else
10283 t3 = dest;
10284
10285 x = gen_rtx_AND (mode, op_true, cmp);
10286 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10287
10288 x = gen_rtx_NOT (mode, cmp);
10289 x = gen_rtx_AND (mode, x, op_false);
10290 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10291
10292 x = gen_rtx_IOR (mode, t3, t2);
10293 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10294 }
10295 }
10296
10297 /* Expand a floating-point conditional move. Return true if successful. */
10298
10299 int
10300 ix86_expand_fp_movcc (rtx operands[])
10301 {
10302 enum machine_mode mode = GET_MODE (operands[0]);
10303 enum rtx_code code = GET_CODE (operands[1]);
10304 rtx tmp, compare_op, second_test, bypass_test;
10305
10306 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10307 {
10308 enum machine_mode cmode;
10309
10310 /* Since we've no cmove for sse registers, don't force bad register
10311 allocation just to gain access to it. Deny movcc when the
10312 comparison mode doesn't match the move mode. */
10313 cmode = GET_MODE (ix86_compare_op0);
10314 if (cmode == VOIDmode)
10315 cmode = GET_MODE (ix86_compare_op1);
10316 if (cmode != mode)
10317 return 0;
10318
10319 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10320 &ix86_compare_op0,
10321 &ix86_compare_op1);
10322 if (code == UNKNOWN)
10323 return 0;
10324
10325 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10326 ix86_compare_op1, operands[2],
10327 operands[3]))
10328 return 1;
10329
10330 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10331 ix86_compare_op1, operands[2], operands[3]);
10332 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10333 return 1;
10334 }
10335
10336 /* The floating point conditional move instructions don't directly
10337 support conditions resulting from a signed integer comparison. */
10338
10339 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10340
10341 /* The floating point conditional move instructions don't directly
10342 support signed integer comparisons. */
10343
10344 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10345 {
10346 gcc_assert (!second_test && !bypass_test);
10347 tmp = gen_reg_rtx (QImode);
10348 ix86_expand_setcc (code, tmp);
10349 code = NE;
10350 ix86_compare_op0 = tmp;
10351 ix86_compare_op1 = const0_rtx;
10352 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10353 }
10354 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10355 {
10356 tmp = gen_reg_rtx (mode);
10357 emit_move_insn (tmp, operands[3]);
10358 operands[3] = tmp;
10359 }
10360 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10361 {
10362 tmp = gen_reg_rtx (mode);
10363 emit_move_insn (tmp, operands[2]);
10364 operands[2] = tmp;
10365 }
10366
10367 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10368 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10369 operands[2], operands[3])));
10370 if (bypass_test)
10371 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10372 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10373 operands[3], operands[0])));
10374 if (second_test)
10375 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10376 gen_rtx_IF_THEN_ELSE (mode, second_test,
10377 operands[2], operands[0])));
10378
10379 return 1;
10380 }
10381
10382 /* Expand a floating-point vector conditional move; a vcond operation
10383 rather than a movcc operation. */
10384
10385 bool
10386 ix86_expand_fp_vcond (rtx operands[])
10387 {
10388 enum rtx_code code = GET_CODE (operands[3]);
10389 rtx cmp;
10390
10391 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10392 &operands[4], &operands[5]);
10393 if (code == UNKNOWN)
10394 return false;
10395
10396 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
10397 operands[5], operands[1], operands[2]))
10398 return true;
10399
10400 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10401 operands[1], operands[2]);
10402 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10403 return true;
10404 }
10405
10406 /* Expand a signed integral vector conditional move. */
10407
10408 bool
10409 ix86_expand_int_vcond (rtx operands[], bool unsignedp)
10410 {
10411 enum machine_mode mode = GET_MODE (operands[0]);
10412 enum rtx_code code = GET_CODE (operands[3]);
10413 rtx cmp, x;
10414
10415 if (unsignedp)
10416 code = signed_condition (code);
10417 if (code == NE || code == LE || code == GE)
10418 {
10419 /* Inverse of a supported code. */
10420 x = operands[1];
10421 operands[1] = operands[2];
10422 operands[2] = x;
10423 code = reverse_condition (code);
10424 }
10425 if (code == LT)
10426 {
10427 /* Swap of a supported code. */
10428 x = operands[4];
10429 operands[4] = operands[5];
10430 operands[5] = x;
10431 code = swap_condition (code);
10432 }
10433 gcc_assert (code == EQ || code == GT);
10434
10435 /* Unlike floating-point, we can rely on the optimizers to have already
10436 converted to MIN/MAX expressions, so we don't have to handle that. */
10437
10438 /* Unsigned GT is not directly supported. We can zero-extend QI and
10439 HImode elements to the next wider element size, use a signed compare,
10440 then repack. For three extra instructions, this is definitely a win. */
10441 if (code == GT && unsignedp)
10442 {
10443 rtx o0l, o0h, o1l, o1h, cl, ch, zero;
10444 enum machine_mode wider;
10445 rtx (*unpackl) (rtx, rtx, rtx);
10446 rtx (*unpackh) (rtx, rtx, rtx);
10447 rtx (*pack) (rtx, rtx, rtx);
10448
10449 switch (mode)
10450 {
10451 case V16QImode:
10452 wider = V8HImode;
10453 unpackl = gen_sse2_punpcklbw;
10454 unpackh = gen_sse2_punpckhbw;
10455 pack = gen_sse2_packsswb;
10456 break;
10457 case V8HImode:
10458 wider = V4SImode;
10459 unpackl = gen_sse2_punpcklwd;
10460 unpackh = gen_sse2_punpckhwd;
10461 pack = gen_sse2_packssdw;
10462 break;
10463 default:
10464 gcc_unreachable ();
10465 }
10466
10467 operands[4] = force_reg (mode, operands[4]);
10468 operands[5] = force_reg (mode, operands[5]);
10469
10470 o0l = gen_reg_rtx (wider);
10471 o0h = gen_reg_rtx (wider);
10472 o1l = gen_reg_rtx (wider);
10473 o1h = gen_reg_rtx (wider);
10474 cl = gen_reg_rtx (wider);
10475 ch = gen_reg_rtx (wider);
10476 cmp = gen_reg_rtx (mode);
10477 zero = force_reg (mode, CONST0_RTX (mode));
10478
10479 emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero));
10480 emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero));
10481 emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero));
10482 emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero));
10483
10484 x = gen_rtx_GT (wider, o0l, o1l);
10485 emit_insn (gen_rtx_SET (VOIDmode, cl, x));
10486
10487 x = gen_rtx_GT (wider, o0h, o1h);
10488 emit_insn (gen_rtx_SET (VOIDmode, ch, x));
10489
10490 emit_insn (pack (cmp, cl, ch));
10491 }
10492 else
10493 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10494 operands[1], operands[2]);
10495
10496 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
10497 return true;
10498 }
10499
10500 /* Expand conditional increment or decrement using adb/sbb instructions.
10501 The default case using setcc followed by the conditional move can be
10502 done by generic code. */
10503 int
10504 ix86_expand_int_addcc (rtx operands[])
10505 {
10506 enum rtx_code code = GET_CODE (operands[1]);
10507 rtx compare_op;
10508 rtx val = const0_rtx;
10509 bool fpcmp = false;
10510 enum machine_mode mode = GET_MODE (operands[0]);
10511
10512 if (operands[3] != const1_rtx
10513 && operands[3] != constm1_rtx)
10514 return 0;
10515 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10516 ix86_compare_op1, &compare_op))
10517 return 0;
10518 code = GET_CODE (compare_op);
10519
10520 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10521 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10522 {
10523 fpcmp = true;
10524 code = ix86_fp_compare_code_to_integer (code);
10525 }
10526
10527 if (code != LTU)
10528 {
10529 val = constm1_rtx;
10530 if (fpcmp)
10531 PUT_CODE (compare_op,
10532 reverse_condition_maybe_unordered
10533 (GET_CODE (compare_op)));
10534 else
10535 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10536 }
10537 PUT_MODE (compare_op, mode);
10538
10539 /* Construct either adc or sbb insn. */
10540 if ((code == LTU) == (operands[3] == constm1_rtx))
10541 {
10542 switch (GET_MODE (operands[0]))
10543 {
10544 case QImode:
10545 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10546 break;
10547 case HImode:
10548 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10549 break;
10550 case SImode:
10551 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10552 break;
10553 case DImode:
10554 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10555 break;
10556 default:
10557 gcc_unreachable ();
10558 }
10559 }
10560 else
10561 {
10562 switch (GET_MODE (operands[0]))
10563 {
10564 case QImode:
10565 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10566 break;
10567 case HImode:
10568 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10569 break;
10570 case SImode:
10571 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10572 break;
10573 case DImode:
10574 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10575 break;
10576 default:
10577 gcc_unreachable ();
10578 }
10579 }
10580 return 1; /* DONE */
10581 }
10582
10583
10584 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10585 works for floating pointer parameters and nonoffsetable memories.
10586 For pushes, it returns just stack offsets; the values will be saved
10587 in the right order. Maximally three parts are generated. */
10588
10589 static int
10590 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10591 {
10592 int size;
10593
10594 if (!TARGET_64BIT)
10595 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10596 else
10597 size = (GET_MODE_SIZE (mode) + 4) / 8;
10598
10599 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
10600 gcc_assert (size >= 2 && size <= 3);
10601
10602 /* Optimize constant pool reference to immediates. This is used by fp
10603 moves, that force all constants to memory to allow combining. */
10604 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
10605 {
10606 rtx tmp = maybe_get_pool_constant (operand);
10607 if (tmp)
10608 operand = tmp;
10609 }
10610
10611 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10612 {
10613 /* The only non-offsetable memories we handle are pushes. */
10614 int ok = push_operand (operand, VOIDmode);
10615
10616 gcc_assert (ok);
10617
10618 operand = copy_rtx (operand);
10619 PUT_MODE (operand, Pmode);
10620 parts[0] = parts[1] = parts[2] = operand;
10621 return size;
10622 }
10623
10624 if (GET_CODE (operand) == CONST_VECTOR)
10625 {
10626 enum machine_mode imode = int_mode_for_mode (mode);
10627 /* Caution: if we looked through a constant pool memory above,
10628 the operand may actually have a different mode now. That's
10629 ok, since we want to pun this all the way back to an integer. */
10630 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
10631 gcc_assert (operand != NULL);
10632 mode = imode;
10633 }
10634
10635 if (!TARGET_64BIT)
10636 {
10637 if (mode == DImode)
10638 split_di (&operand, 1, &parts[0], &parts[1]);
10639 else
10640 {
10641 if (REG_P (operand))
10642 {
10643 gcc_assert (reload_completed);
10644 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10645 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10646 if (size == 3)
10647 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10648 }
10649 else if (offsettable_memref_p (operand))
10650 {
10651 operand = adjust_address (operand, SImode, 0);
10652 parts[0] = operand;
10653 parts[1] = adjust_address (operand, SImode, 4);
10654 if (size == 3)
10655 parts[2] = adjust_address (operand, SImode, 8);
10656 }
10657 else if (GET_CODE (operand) == CONST_DOUBLE)
10658 {
10659 REAL_VALUE_TYPE r;
10660 long l[4];
10661
10662 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10663 switch (mode)
10664 {
10665 case XFmode:
10666 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10667 parts[2] = gen_int_mode (l[2], SImode);
10668 break;
10669 case DFmode:
10670 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10671 break;
10672 default:
10673 gcc_unreachable ();
10674 }
10675 parts[1] = gen_int_mode (l[1], SImode);
10676 parts[0] = gen_int_mode (l[0], SImode);
10677 }
10678 else
10679 gcc_unreachable ();
10680 }
10681 }
10682 else
10683 {
10684 if (mode == TImode)
10685 split_ti (&operand, 1, &parts[0], &parts[1]);
10686 if (mode == XFmode || mode == TFmode)
10687 {
10688 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10689 if (REG_P (operand))
10690 {
10691 gcc_assert (reload_completed);
10692 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10693 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10694 }
10695 else if (offsettable_memref_p (operand))
10696 {
10697 operand = adjust_address (operand, DImode, 0);
10698 parts[0] = operand;
10699 parts[1] = adjust_address (operand, upper_mode, 8);
10700 }
10701 else if (GET_CODE (operand) == CONST_DOUBLE)
10702 {
10703 REAL_VALUE_TYPE r;
10704 long l[4];
10705
10706 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10707 real_to_target (l, &r, mode);
10708
10709 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10710 if (HOST_BITS_PER_WIDE_INT >= 64)
10711 parts[0]
10712 = gen_int_mode
10713 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10714 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10715 DImode);
10716 else
10717 parts[0] = immed_double_const (l[0], l[1], DImode);
10718
10719 if (upper_mode == SImode)
10720 parts[1] = gen_int_mode (l[2], SImode);
10721 else if (HOST_BITS_PER_WIDE_INT >= 64)
10722 parts[1]
10723 = gen_int_mode
10724 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10725 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10726 DImode);
10727 else
10728 parts[1] = immed_double_const (l[2], l[3], DImode);
10729 }
10730 else
10731 gcc_unreachable ();
10732 }
10733 }
10734
10735 return size;
10736 }
10737
10738 /* Emit insns to perform a move or push of DI, DF, and XF values.
10739 Return false when normal moves are needed; true when all required
10740 insns have been emitted. Operands 2-4 contain the input values
10741 int the correct order; operands 5-7 contain the output values. */
10742
10743 void
10744 ix86_split_long_move (rtx operands[])
10745 {
10746 rtx part[2][3];
10747 int nparts;
10748 int push = 0;
10749 int collisions = 0;
10750 enum machine_mode mode = GET_MODE (operands[0]);
10751
10752 /* The DFmode expanders may ask us to move double.
10753 For 64bit target this is single move. By hiding the fact
10754 here we simplify i386.md splitters. */
10755 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10756 {
10757 /* Optimize constant pool reference to immediates. This is used by
10758 fp moves, that force all constants to memory to allow combining. */
10759
10760 if (GET_CODE (operands[1]) == MEM
10761 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10762 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10763 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10764 if (push_operand (operands[0], VOIDmode))
10765 {
10766 operands[0] = copy_rtx (operands[0]);
10767 PUT_MODE (operands[0], Pmode);
10768 }
10769 else
10770 operands[0] = gen_lowpart (DImode, operands[0]);
10771 operands[1] = gen_lowpart (DImode, operands[1]);
10772 emit_move_insn (operands[0], operands[1]);
10773 return;
10774 }
10775
10776 /* The only non-offsettable memory we handle is push. */
10777 if (push_operand (operands[0], VOIDmode))
10778 push = 1;
10779 else
10780 gcc_assert (GET_CODE (operands[0]) != MEM
10781 || offsettable_memref_p (operands[0]));
10782
10783 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10784 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10785
10786 /* When emitting push, take care for source operands on the stack. */
10787 if (push && GET_CODE (operands[1]) == MEM
10788 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10789 {
10790 if (nparts == 3)
10791 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10792 XEXP (part[1][2], 0));
10793 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10794 XEXP (part[1][1], 0));
10795 }
10796
10797 /* We need to do copy in the right order in case an address register
10798 of the source overlaps the destination. */
10799 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10800 {
10801 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10802 collisions++;
10803 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10804 collisions++;
10805 if (nparts == 3
10806 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10807 collisions++;
10808
10809 /* Collision in the middle part can be handled by reordering. */
10810 if (collisions == 1 && nparts == 3
10811 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10812 {
10813 rtx tmp;
10814 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10815 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10816 }
10817
10818 /* If there are more collisions, we can't handle it by reordering.
10819 Do an lea to the last part and use only one colliding move. */
10820 else if (collisions > 1)
10821 {
10822 rtx base;
10823
10824 collisions = 1;
10825
10826 base = part[0][nparts - 1];
10827
10828 /* Handle the case when the last part isn't valid for lea.
10829 Happens in 64-bit mode storing the 12-byte XFmode. */
10830 if (GET_MODE (base) != Pmode)
10831 base = gen_rtx_REG (Pmode, REGNO (base));
10832
10833 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10834 part[1][0] = replace_equiv_address (part[1][0], base);
10835 part[1][1] = replace_equiv_address (part[1][1],
10836 plus_constant (base, UNITS_PER_WORD));
10837 if (nparts == 3)
10838 part[1][2] = replace_equiv_address (part[1][2],
10839 plus_constant (base, 8));
10840 }
10841 }
10842
10843 if (push)
10844 {
10845 if (!TARGET_64BIT)
10846 {
10847 if (nparts == 3)
10848 {
10849 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10850 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10851 emit_move_insn (part[0][2], part[1][2]);
10852 }
10853 }
10854 else
10855 {
10856 /* In 64bit mode we don't have 32bit push available. In case this is
10857 register, it is OK - we will just use larger counterpart. We also
10858 retype memory - these comes from attempt to avoid REX prefix on
10859 moving of second half of TFmode value. */
10860 if (GET_MODE (part[1][1]) == SImode)
10861 {
10862 switch (GET_CODE (part[1][1]))
10863 {
10864 case MEM:
10865 part[1][1] = adjust_address (part[1][1], DImode, 0);
10866 break;
10867
10868 case REG:
10869 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10870 break;
10871
10872 default:
10873 gcc_unreachable ();
10874 }
10875
10876 if (GET_MODE (part[1][0]) == SImode)
10877 part[1][0] = part[1][1];
10878 }
10879 }
10880 emit_move_insn (part[0][1], part[1][1]);
10881 emit_move_insn (part[0][0], part[1][0]);
10882 return;
10883 }
10884
10885 /* Choose correct order to not overwrite the source before it is copied. */
10886 if ((REG_P (part[0][0])
10887 && REG_P (part[1][1])
10888 && (REGNO (part[0][0]) == REGNO (part[1][1])
10889 || (nparts == 3
10890 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10891 || (collisions > 0
10892 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10893 {
10894 if (nparts == 3)
10895 {
10896 operands[2] = part[0][2];
10897 operands[3] = part[0][1];
10898 operands[4] = part[0][0];
10899 operands[5] = part[1][2];
10900 operands[6] = part[1][1];
10901 operands[7] = part[1][0];
10902 }
10903 else
10904 {
10905 operands[2] = part[0][1];
10906 operands[3] = part[0][0];
10907 operands[5] = part[1][1];
10908 operands[6] = part[1][0];
10909 }
10910 }
10911 else
10912 {
10913 if (nparts == 3)
10914 {
10915 operands[2] = part[0][0];
10916 operands[3] = part[0][1];
10917 operands[4] = part[0][2];
10918 operands[5] = part[1][0];
10919 operands[6] = part[1][1];
10920 operands[7] = part[1][2];
10921 }
10922 else
10923 {
10924 operands[2] = part[0][0];
10925 operands[3] = part[0][1];
10926 operands[5] = part[1][0];
10927 operands[6] = part[1][1];
10928 }
10929 }
10930
10931 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
10932 if (optimize_size)
10933 {
10934 if (GET_CODE (operands[5]) == CONST_INT
10935 && operands[5] != const0_rtx
10936 && REG_P (operands[2]))
10937 {
10938 if (GET_CODE (operands[6]) == CONST_INT
10939 && INTVAL (operands[6]) == INTVAL (operands[5]))
10940 operands[6] = operands[2];
10941
10942 if (nparts == 3
10943 && GET_CODE (operands[7]) == CONST_INT
10944 && INTVAL (operands[7]) == INTVAL (operands[5]))
10945 operands[7] = operands[2];
10946 }
10947
10948 if (nparts == 3
10949 && GET_CODE (operands[6]) == CONST_INT
10950 && operands[6] != const0_rtx
10951 && REG_P (operands[3])
10952 && GET_CODE (operands[7]) == CONST_INT
10953 && INTVAL (operands[7]) == INTVAL (operands[6]))
10954 operands[7] = operands[3];
10955 }
10956
10957 emit_move_insn (operands[2], operands[5]);
10958 emit_move_insn (operands[3], operands[6]);
10959 if (nparts == 3)
10960 emit_move_insn (operands[4], operands[7]);
10961
10962 return;
10963 }
10964
10965 /* Helper function of ix86_split_ashldi used to generate an SImode
10966 left shift by a constant, either using a single shift or
10967 a sequence of add instructions. */
10968
10969 static void
10970 ix86_expand_ashlsi3_const (rtx operand, int count)
10971 {
10972 if (count == 1)
10973 emit_insn (gen_addsi3 (operand, operand, operand));
10974 else if (!optimize_size
10975 && count * ix86_cost->add <= ix86_cost->shift_const)
10976 {
10977 int i;
10978 for (i=0; i<count; i++)
10979 emit_insn (gen_addsi3 (operand, operand, operand));
10980 }
10981 else
10982 emit_insn (gen_ashlsi3 (operand, operand, GEN_INT (count)));
10983 }
10984
10985 void
10986 ix86_split_ashldi (rtx *operands, rtx scratch)
10987 {
10988 rtx low[2], high[2];
10989 int count;
10990
10991 if (GET_CODE (operands[2]) == CONST_INT)
10992 {
10993 split_di (operands, 2, low, high);
10994 count = INTVAL (operands[2]) & 63;
10995
10996 if (count >= 32)
10997 {
10998 emit_move_insn (high[0], low[1]);
10999 emit_move_insn (low[0], const0_rtx);
11000
11001 if (count > 32)
11002 ix86_expand_ashlsi3_const (high[0], count - 32);
11003 }
11004 else
11005 {
11006 if (!rtx_equal_p (operands[0], operands[1]))
11007 emit_move_insn (operands[0], operands[1]);
11008 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
11009 ix86_expand_ashlsi3_const (low[0], count);
11010 }
11011 return;
11012 }
11013
11014 split_di (operands, 1, low, high);
11015
11016 if (operands[1] == const1_rtx)
11017 {
11018 /* Assuming we've chosen a QImode capable registers, then 1LL << N
11019 can be done with two 32-bit shifts, no branches, no cmoves. */
11020 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11021 {
11022 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11023
11024 ix86_expand_clear (low[0]);
11025 ix86_expand_clear (high[0]);
11026 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
11027
11028 d = gen_lowpart (QImode, low[0]);
11029 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11030 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11031 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11032
11033 d = gen_lowpart (QImode, high[0]);
11034 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11035 s = gen_rtx_NE (QImode, flags, const0_rtx);
11036 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11037 }
11038
11039 /* Otherwise, we can get the same results by manually performing
11040 a bit extract operation on bit 5, and then performing the two
11041 shifts. The two methods of getting 0/1 into low/high are exactly
11042 the same size. Avoiding the shift in the bit extract case helps
11043 pentium4 a bit; no one else seems to care much either way. */
11044 else
11045 {
11046 rtx x;
11047
11048 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11049 x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
11050 else
11051 x = gen_lowpart (SImode, operands[2]);
11052 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11053
11054 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
11055 emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
11056 emit_move_insn (low[0], high[0]);
11057 emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
11058 }
11059
11060 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
11061 emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
11062 return;
11063 }
11064
11065 if (operands[1] == constm1_rtx)
11066 {
11067 /* For -1LL << N, we can avoid the shld instruction, because we
11068 know that we're shifting 0...31 ones into a -1. */
11069 emit_move_insn (low[0], constm1_rtx);
11070 if (optimize_size)
11071 emit_move_insn (high[0], low[0]);
11072 else
11073 emit_move_insn (high[0], constm1_rtx);
11074 }
11075 else
11076 {
11077 if (!rtx_equal_p (operands[0], operands[1]))
11078 emit_move_insn (operands[0], operands[1]);
11079
11080 split_di (operands, 1, low, high);
11081 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
11082 }
11083
11084 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
11085
11086 if (TARGET_CMOVE && scratch)
11087 {
11088 ix86_expand_clear (scratch);
11089 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
11090 }
11091 else
11092 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11093 }
11094
11095 void
11096 ix86_split_ashrdi (rtx *operands, rtx scratch)
11097 {
11098 rtx low[2], high[2];
11099 int count;
11100
11101 if (GET_CODE (operands[2]) == CONST_INT)
11102 {
11103 split_di (operands, 2, low, high);
11104 count = INTVAL (operands[2]) & 63;
11105
11106 if (count == 63)
11107 {
11108 emit_move_insn (high[0], high[1]);
11109 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11110 emit_move_insn (low[0], high[0]);
11111
11112 }
11113 else if (count >= 32)
11114 {
11115 emit_move_insn (low[0], high[1]);
11116 emit_move_insn (high[0], low[0]);
11117 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11118 if (count > 32)
11119 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11120 }
11121 else
11122 {
11123 if (!rtx_equal_p (operands[0], operands[1]))
11124 emit_move_insn (operands[0], operands[1]);
11125 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11126 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11127 }
11128 }
11129 else
11130 {
11131 if (!rtx_equal_p (operands[0], operands[1]))
11132 emit_move_insn (operands[0], operands[1]);
11133
11134 split_di (operands, 1, low, high);
11135
11136 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11137 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11138
11139 if (TARGET_CMOVE && scratch)
11140 {
11141 emit_move_insn (scratch, high[0]);
11142 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11143 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11144 scratch));
11145 }
11146 else
11147 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11148 }
11149 }
11150
11151 void
11152 ix86_split_lshrdi (rtx *operands, rtx scratch)
11153 {
11154 rtx low[2], high[2];
11155 int count;
11156
11157 if (GET_CODE (operands[2]) == CONST_INT)
11158 {
11159 split_di (operands, 2, low, high);
11160 count = INTVAL (operands[2]) & 63;
11161
11162 if (count >= 32)
11163 {
11164 emit_move_insn (low[0], high[1]);
11165 ix86_expand_clear (high[0]);
11166
11167 if (count > 32)
11168 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11169 }
11170 else
11171 {
11172 if (!rtx_equal_p (operands[0], operands[1]))
11173 emit_move_insn (operands[0], operands[1]);
11174 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11175 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11176 }
11177 }
11178 else
11179 {
11180 if (!rtx_equal_p (operands[0], operands[1]))
11181 emit_move_insn (operands[0], operands[1]);
11182
11183 split_di (operands, 1, low, high);
11184
11185 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11186 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11187
11188 /* Heh. By reversing the arguments, we can reuse this pattern. */
11189 if (TARGET_CMOVE && scratch)
11190 {
11191 ix86_expand_clear (scratch);
11192 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11193 scratch));
11194 }
11195 else
11196 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11197 }
11198 }
11199
11200 /* Helper function for the string operations below. Dest VARIABLE whether
11201 it is aligned to VALUE bytes. If true, jump to the label. */
11202 static rtx
11203 ix86_expand_aligntest (rtx variable, int value)
11204 {
11205 rtx label = gen_label_rtx ();
11206 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11207 if (GET_MODE (variable) == DImode)
11208 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11209 else
11210 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11211 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11212 1, label);
11213 return label;
11214 }
11215
11216 /* Adjust COUNTER by the VALUE. */
11217 static void
11218 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11219 {
11220 if (GET_MODE (countreg) == DImode)
11221 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11222 else
11223 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11224 }
11225
11226 /* Zero extend possibly SImode EXP to Pmode register. */
11227 rtx
11228 ix86_zero_extend_to_Pmode (rtx exp)
11229 {
11230 rtx r;
11231 if (GET_MODE (exp) == VOIDmode)
11232 return force_reg (Pmode, exp);
11233 if (GET_MODE (exp) == Pmode)
11234 return copy_to_mode_reg (Pmode, exp);
11235 r = gen_reg_rtx (Pmode);
11236 emit_insn (gen_zero_extendsidi2 (r, exp));
11237 return r;
11238 }
11239
11240 /* Expand string move (memcpy) operation. Use i386 string operations when
11241 profitable. expand_clrmem contains similar code. */
11242 int
11243 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11244 {
11245 rtx srcreg, destreg, countreg, srcexp, destexp;
11246 enum machine_mode counter_mode;
11247 HOST_WIDE_INT align = 0;
11248 unsigned HOST_WIDE_INT count = 0;
11249
11250 if (GET_CODE (align_exp) == CONST_INT)
11251 align = INTVAL (align_exp);
11252
11253 /* Can't use any of this if the user has appropriated esi or edi. */
11254 if (global_regs[4] || global_regs[5])
11255 return 0;
11256
11257 /* This simple hack avoids all inlining code and simplifies code below. */
11258 if (!TARGET_ALIGN_STRINGOPS)
11259 align = 64;
11260
11261 if (GET_CODE (count_exp) == CONST_INT)
11262 {
11263 count = INTVAL (count_exp);
11264 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11265 return 0;
11266 }
11267
11268 /* Figure out proper mode for counter. For 32bits it is always SImode,
11269 for 64bits use SImode when possible, otherwise DImode.
11270 Set count to number of bytes copied when known at compile time. */
11271 if (!TARGET_64BIT
11272 || GET_MODE (count_exp) == SImode
11273 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11274 counter_mode = SImode;
11275 else
11276 counter_mode = DImode;
11277
11278 gcc_assert (counter_mode == SImode || counter_mode == DImode);
11279
11280 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11281 if (destreg != XEXP (dst, 0))
11282 dst = replace_equiv_address_nv (dst, destreg);
11283 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11284 if (srcreg != XEXP (src, 0))
11285 src = replace_equiv_address_nv (src, srcreg);
11286
11287 /* When optimizing for size emit simple rep ; movsb instruction for
11288 counts not divisible by 4. */
11289
11290 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11291 {
11292 emit_insn (gen_cld ());
11293 countreg = ix86_zero_extend_to_Pmode (count_exp);
11294 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11295 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11296 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11297 destexp, srcexp));
11298 }
11299
11300 /* For constant aligned (or small unaligned) copies use rep movsl
11301 followed by code copying the rest. For PentiumPro ensure 8 byte
11302 alignment to allow rep movsl acceleration. */
11303
11304 else if (count != 0
11305 && (align >= 8
11306 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11307 || optimize_size || count < (unsigned int) 64))
11308 {
11309 unsigned HOST_WIDE_INT offset = 0;
11310 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11311 rtx srcmem, dstmem;
11312
11313 emit_insn (gen_cld ());
11314 if (count & ~(size - 1))
11315 {
11316 countreg = copy_to_mode_reg (counter_mode,
11317 GEN_INT ((count >> (size == 4 ? 2 : 3))
11318 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11319 countreg = ix86_zero_extend_to_Pmode (countreg);
11320
11321 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11322 GEN_INT (size == 4 ? 2 : 3));
11323 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11324 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11325
11326 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11327 countreg, destexp, srcexp));
11328 offset = count & ~(size - 1);
11329 }
11330 if (size == 8 && (count & 0x04))
11331 {
11332 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11333 offset);
11334 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11335 offset);
11336 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11337 offset += 4;
11338 }
11339 if (count & 0x02)
11340 {
11341 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11342 offset);
11343 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11344 offset);
11345 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11346 offset += 2;
11347 }
11348 if (count & 0x01)
11349 {
11350 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11351 offset);
11352 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11353 offset);
11354 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11355 }
11356 }
11357 /* The generic code based on the glibc implementation:
11358 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11359 allowing accelerated copying there)
11360 - copy the data using rep movsl
11361 - copy the rest. */
11362 else
11363 {
11364 rtx countreg2;
11365 rtx label = NULL;
11366 rtx srcmem, dstmem;
11367 int desired_alignment = (TARGET_PENTIUMPRO
11368 && (count == 0 || count >= (unsigned int) 260)
11369 ? 8 : UNITS_PER_WORD);
11370 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11371 dst = change_address (dst, BLKmode, destreg);
11372 src = change_address (src, BLKmode, srcreg);
11373
11374 /* In case we don't know anything about the alignment, default to
11375 library version, since it is usually equally fast and result in
11376 shorter code.
11377
11378 Also emit call when we know that the count is large and call overhead
11379 will not be important. */
11380 if (!TARGET_INLINE_ALL_STRINGOPS
11381 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11382 return 0;
11383
11384 if (TARGET_SINGLE_STRINGOP)
11385 emit_insn (gen_cld ());
11386
11387 countreg2 = gen_reg_rtx (Pmode);
11388 countreg = copy_to_mode_reg (counter_mode, count_exp);
11389
11390 /* We don't use loops to align destination and to copy parts smaller
11391 than 4 bytes, because gcc is able to optimize such code better (in
11392 the case the destination or the count really is aligned, gcc is often
11393 able to predict the branches) and also it is friendlier to the
11394 hardware branch prediction.
11395
11396 Using loops is beneficial for generic case, because we can
11397 handle small counts using the loops. Many CPUs (such as Athlon)
11398 have large REP prefix setup costs.
11399
11400 This is quite costly. Maybe we can revisit this decision later or
11401 add some customizability to this code. */
11402
11403 if (count == 0 && align < desired_alignment)
11404 {
11405 label = gen_label_rtx ();
11406 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11407 LEU, 0, counter_mode, 1, label);
11408 }
11409 if (align <= 1)
11410 {
11411 rtx label = ix86_expand_aligntest (destreg, 1);
11412 srcmem = change_address (src, QImode, srcreg);
11413 dstmem = change_address (dst, QImode, destreg);
11414 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11415 ix86_adjust_counter (countreg, 1);
11416 emit_label (label);
11417 LABEL_NUSES (label) = 1;
11418 }
11419 if (align <= 2)
11420 {
11421 rtx label = ix86_expand_aligntest (destreg, 2);
11422 srcmem = change_address (src, HImode, srcreg);
11423 dstmem = change_address (dst, HImode, destreg);
11424 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11425 ix86_adjust_counter (countreg, 2);
11426 emit_label (label);
11427 LABEL_NUSES (label) = 1;
11428 }
11429 if (align <= 4 && desired_alignment > 4)
11430 {
11431 rtx label = ix86_expand_aligntest (destreg, 4);
11432 srcmem = change_address (src, SImode, srcreg);
11433 dstmem = change_address (dst, SImode, destreg);
11434 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11435 ix86_adjust_counter (countreg, 4);
11436 emit_label (label);
11437 LABEL_NUSES (label) = 1;
11438 }
11439
11440 if (label && desired_alignment > 4 && !TARGET_64BIT)
11441 {
11442 emit_label (label);
11443 LABEL_NUSES (label) = 1;
11444 label = NULL_RTX;
11445 }
11446 if (!TARGET_SINGLE_STRINGOP)
11447 emit_insn (gen_cld ());
11448 if (TARGET_64BIT)
11449 {
11450 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11451 GEN_INT (3)));
11452 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11453 }
11454 else
11455 {
11456 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11457 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11458 }
11459 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11460 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11461 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11462 countreg2, destexp, srcexp));
11463
11464 if (label)
11465 {
11466 emit_label (label);
11467 LABEL_NUSES (label) = 1;
11468 }
11469 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11470 {
11471 srcmem = change_address (src, SImode, srcreg);
11472 dstmem = change_address (dst, SImode, destreg);
11473 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11474 }
11475 if ((align <= 4 || count == 0) && TARGET_64BIT)
11476 {
11477 rtx label = ix86_expand_aligntest (countreg, 4);
11478 srcmem = change_address (src, SImode, srcreg);
11479 dstmem = change_address (dst, SImode, destreg);
11480 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11481 emit_label (label);
11482 LABEL_NUSES (label) = 1;
11483 }
11484 if (align > 2 && count != 0 && (count & 2))
11485 {
11486 srcmem = change_address (src, HImode, srcreg);
11487 dstmem = change_address (dst, HImode, destreg);
11488 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11489 }
11490 if (align <= 2 || count == 0)
11491 {
11492 rtx label = ix86_expand_aligntest (countreg, 2);
11493 srcmem = change_address (src, HImode, srcreg);
11494 dstmem = change_address (dst, HImode, destreg);
11495 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11496 emit_label (label);
11497 LABEL_NUSES (label) = 1;
11498 }
11499 if (align > 1 && count != 0 && (count & 1))
11500 {
11501 srcmem = change_address (src, QImode, srcreg);
11502 dstmem = change_address (dst, QImode, destreg);
11503 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11504 }
11505 if (align <= 1 || count == 0)
11506 {
11507 rtx label = ix86_expand_aligntest (countreg, 1);
11508 srcmem = change_address (src, QImode, srcreg);
11509 dstmem = change_address (dst, QImode, destreg);
11510 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11511 emit_label (label);
11512 LABEL_NUSES (label) = 1;
11513 }
11514 }
11515
11516 return 1;
11517 }
11518
11519 /* Expand string clear operation (bzero). Use i386 string operations when
11520 profitable. expand_movmem contains similar code. */
11521 int
11522 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11523 {
11524 rtx destreg, zeroreg, countreg, destexp;
11525 enum machine_mode counter_mode;
11526 HOST_WIDE_INT align = 0;
11527 unsigned HOST_WIDE_INT count = 0;
11528
11529 if (GET_CODE (align_exp) == CONST_INT)
11530 align = INTVAL (align_exp);
11531
11532 /* Can't use any of this if the user has appropriated esi. */
11533 if (global_regs[4])
11534 return 0;
11535
11536 /* This simple hack avoids all inlining code and simplifies code below. */
11537 if (!TARGET_ALIGN_STRINGOPS)
11538 align = 32;
11539
11540 if (GET_CODE (count_exp) == CONST_INT)
11541 {
11542 count = INTVAL (count_exp);
11543 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11544 return 0;
11545 }
11546 /* Figure out proper mode for counter. For 32bits it is always SImode,
11547 for 64bits use SImode when possible, otherwise DImode.
11548 Set count to number of bytes copied when known at compile time. */
11549 if (!TARGET_64BIT
11550 || GET_MODE (count_exp) == SImode
11551 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11552 counter_mode = SImode;
11553 else
11554 counter_mode = DImode;
11555
11556 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11557 if (destreg != XEXP (dst, 0))
11558 dst = replace_equiv_address_nv (dst, destreg);
11559
11560
11561 /* When optimizing for size emit simple rep ; movsb instruction for
11562 counts not divisible by 4. The movl $N, %ecx; rep; stosb
11563 sequence is 7 bytes long, so if optimizing for size and count is
11564 small enough that some stosl, stosw and stosb instructions without
11565 rep are shorter, fall back into the next if. */
11566
11567 if ((!optimize || optimize_size)
11568 && (count == 0
11569 || ((count & 0x03)
11570 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
11571 {
11572 emit_insn (gen_cld ());
11573
11574 countreg = ix86_zero_extend_to_Pmode (count_exp);
11575 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11576 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11577 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11578 }
11579 else if (count != 0
11580 && (align >= 8
11581 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11582 || optimize_size || count < (unsigned int) 64))
11583 {
11584 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11585 unsigned HOST_WIDE_INT offset = 0;
11586
11587 emit_insn (gen_cld ());
11588
11589 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11590 if (count & ~(size - 1))
11591 {
11592 unsigned HOST_WIDE_INT repcount;
11593 unsigned int max_nonrep;
11594
11595 repcount = count >> (size == 4 ? 2 : 3);
11596 if (!TARGET_64BIT)
11597 repcount &= 0x3fffffff;
11598
11599 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
11600 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
11601 bytes. In both cases the latter seems to be faster for small
11602 values of N. */
11603 max_nonrep = size == 4 ? 7 : 4;
11604 if (!optimize_size)
11605 switch (ix86_tune)
11606 {
11607 case PROCESSOR_PENTIUM4:
11608 case PROCESSOR_NOCONA:
11609 max_nonrep = 3;
11610 break;
11611 default:
11612 break;
11613 }
11614
11615 if (repcount <= max_nonrep)
11616 while (repcount-- > 0)
11617 {
11618 rtx mem = adjust_automodify_address_nv (dst,
11619 GET_MODE (zeroreg),
11620 destreg, offset);
11621 emit_insn (gen_strset (destreg, mem, zeroreg));
11622 offset += size;
11623 }
11624 else
11625 {
11626 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
11627 countreg = ix86_zero_extend_to_Pmode (countreg);
11628 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11629 GEN_INT (size == 4 ? 2 : 3));
11630 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11631 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
11632 destexp));
11633 offset = count & ~(size - 1);
11634 }
11635 }
11636 if (size == 8 && (count & 0x04))
11637 {
11638 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11639 offset);
11640 emit_insn (gen_strset (destreg, mem,
11641 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11642 offset += 4;
11643 }
11644 if (count & 0x02)
11645 {
11646 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11647 offset);
11648 emit_insn (gen_strset (destreg, mem,
11649 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11650 offset += 2;
11651 }
11652 if (count & 0x01)
11653 {
11654 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11655 offset);
11656 emit_insn (gen_strset (destreg, mem,
11657 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11658 }
11659 }
11660 else
11661 {
11662 rtx countreg2;
11663 rtx label = NULL;
11664 /* Compute desired alignment of the string operation. */
11665 int desired_alignment = (TARGET_PENTIUMPRO
11666 && (count == 0 || count >= (unsigned int) 260)
11667 ? 8 : UNITS_PER_WORD);
11668
11669 /* In case we don't know anything about the alignment, default to
11670 library version, since it is usually equally fast and result in
11671 shorter code.
11672
11673 Also emit call when we know that the count is large and call overhead
11674 will not be important. */
11675 if (!TARGET_INLINE_ALL_STRINGOPS
11676 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11677 return 0;
11678
11679 if (TARGET_SINGLE_STRINGOP)
11680 emit_insn (gen_cld ());
11681
11682 countreg2 = gen_reg_rtx (Pmode);
11683 countreg = copy_to_mode_reg (counter_mode, count_exp);
11684 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11685 /* Get rid of MEM_OFFSET, it won't be accurate. */
11686 dst = change_address (dst, BLKmode, destreg);
11687
11688 if (count == 0 && align < desired_alignment)
11689 {
11690 label = gen_label_rtx ();
11691 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11692 LEU, 0, counter_mode, 1, label);
11693 }
11694 if (align <= 1)
11695 {
11696 rtx label = ix86_expand_aligntest (destreg, 1);
11697 emit_insn (gen_strset (destreg, dst,
11698 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11699 ix86_adjust_counter (countreg, 1);
11700 emit_label (label);
11701 LABEL_NUSES (label) = 1;
11702 }
11703 if (align <= 2)
11704 {
11705 rtx label = ix86_expand_aligntest (destreg, 2);
11706 emit_insn (gen_strset (destreg, dst,
11707 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11708 ix86_adjust_counter (countreg, 2);
11709 emit_label (label);
11710 LABEL_NUSES (label) = 1;
11711 }
11712 if (align <= 4 && desired_alignment > 4)
11713 {
11714 rtx label = ix86_expand_aligntest (destreg, 4);
11715 emit_insn (gen_strset (destreg, dst,
11716 (TARGET_64BIT
11717 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11718 : zeroreg)));
11719 ix86_adjust_counter (countreg, 4);
11720 emit_label (label);
11721 LABEL_NUSES (label) = 1;
11722 }
11723
11724 if (label && desired_alignment > 4 && !TARGET_64BIT)
11725 {
11726 emit_label (label);
11727 LABEL_NUSES (label) = 1;
11728 label = NULL_RTX;
11729 }
11730
11731 if (!TARGET_SINGLE_STRINGOP)
11732 emit_insn (gen_cld ());
11733 if (TARGET_64BIT)
11734 {
11735 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11736 GEN_INT (3)));
11737 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11738 }
11739 else
11740 {
11741 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11742 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11743 }
11744 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11745 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11746
11747 if (label)
11748 {
11749 emit_label (label);
11750 LABEL_NUSES (label) = 1;
11751 }
11752
11753 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11754 emit_insn (gen_strset (destreg, dst,
11755 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11756 if (TARGET_64BIT && (align <= 4 || count == 0))
11757 {
11758 rtx label = ix86_expand_aligntest (countreg, 4);
11759 emit_insn (gen_strset (destreg, dst,
11760 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11761 emit_label (label);
11762 LABEL_NUSES (label) = 1;
11763 }
11764 if (align > 2 && count != 0 && (count & 2))
11765 emit_insn (gen_strset (destreg, dst,
11766 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11767 if (align <= 2 || count == 0)
11768 {
11769 rtx label = ix86_expand_aligntest (countreg, 2);
11770 emit_insn (gen_strset (destreg, dst,
11771 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11772 emit_label (label);
11773 LABEL_NUSES (label) = 1;
11774 }
11775 if (align > 1 && count != 0 && (count & 1))
11776 emit_insn (gen_strset (destreg, dst,
11777 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11778 if (align <= 1 || count == 0)
11779 {
11780 rtx label = ix86_expand_aligntest (countreg, 1);
11781 emit_insn (gen_strset (destreg, dst,
11782 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11783 emit_label (label);
11784 LABEL_NUSES (label) = 1;
11785 }
11786 }
11787 return 1;
11788 }
11789
11790 /* Expand strlen. */
11791 int
11792 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11793 {
11794 rtx addr, scratch1, scratch2, scratch3, scratch4;
11795
11796 /* The generic case of strlen expander is long. Avoid it's
11797 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11798
11799 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11800 && !TARGET_INLINE_ALL_STRINGOPS
11801 && !optimize_size
11802 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11803 return 0;
11804
11805 addr = force_reg (Pmode, XEXP (src, 0));
11806 scratch1 = gen_reg_rtx (Pmode);
11807
11808 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11809 && !optimize_size)
11810 {
11811 /* Well it seems that some optimizer does not combine a call like
11812 foo(strlen(bar), strlen(bar));
11813 when the move and the subtraction is done here. It does calculate
11814 the length just once when these instructions are done inside of
11815 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11816 often used and I use one fewer register for the lifetime of
11817 output_strlen_unroll() this is better. */
11818
11819 emit_move_insn (out, addr);
11820
11821 ix86_expand_strlensi_unroll_1 (out, src, align);
11822
11823 /* strlensi_unroll_1 returns the address of the zero at the end of
11824 the string, like memchr(), so compute the length by subtracting
11825 the start address. */
11826 if (TARGET_64BIT)
11827 emit_insn (gen_subdi3 (out, out, addr));
11828 else
11829 emit_insn (gen_subsi3 (out, out, addr));
11830 }
11831 else
11832 {
11833 rtx unspec;
11834 scratch2 = gen_reg_rtx (Pmode);
11835 scratch3 = gen_reg_rtx (Pmode);
11836 scratch4 = force_reg (Pmode, constm1_rtx);
11837
11838 emit_move_insn (scratch3, addr);
11839 eoschar = force_reg (QImode, eoschar);
11840
11841 emit_insn (gen_cld ());
11842 src = replace_equiv_address_nv (src, scratch3);
11843
11844 /* If .md starts supporting :P, this can be done in .md. */
11845 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11846 scratch4), UNSPEC_SCAS);
11847 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11848 if (TARGET_64BIT)
11849 {
11850 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11851 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11852 }
11853 else
11854 {
11855 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11856 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11857 }
11858 }
11859 return 1;
11860 }
11861
11862 /* Expand the appropriate insns for doing strlen if not just doing
11863 repnz; scasb
11864
11865 out = result, initialized with the start address
11866 align_rtx = alignment of the address.
11867 scratch = scratch register, initialized with the startaddress when
11868 not aligned, otherwise undefined
11869
11870 This is just the body. It needs the initializations mentioned above and
11871 some address computing at the end. These things are done in i386.md. */
11872
11873 static void
11874 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11875 {
11876 int align;
11877 rtx tmp;
11878 rtx align_2_label = NULL_RTX;
11879 rtx align_3_label = NULL_RTX;
11880 rtx align_4_label = gen_label_rtx ();
11881 rtx end_0_label = gen_label_rtx ();
11882 rtx mem;
11883 rtx tmpreg = gen_reg_rtx (SImode);
11884 rtx scratch = gen_reg_rtx (SImode);
11885 rtx cmp;
11886
11887 align = 0;
11888 if (GET_CODE (align_rtx) == CONST_INT)
11889 align = INTVAL (align_rtx);
11890
11891 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11892
11893 /* Is there a known alignment and is it less than 4? */
11894 if (align < 4)
11895 {
11896 rtx scratch1 = gen_reg_rtx (Pmode);
11897 emit_move_insn (scratch1, out);
11898 /* Is there a known alignment and is it not 2? */
11899 if (align != 2)
11900 {
11901 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11902 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11903
11904 /* Leave just the 3 lower bits. */
11905 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11906 NULL_RTX, 0, OPTAB_WIDEN);
11907
11908 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11909 Pmode, 1, align_4_label);
11910 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11911 Pmode, 1, align_2_label);
11912 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11913 Pmode, 1, align_3_label);
11914 }
11915 else
11916 {
11917 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11918 check if is aligned to 4 - byte. */
11919
11920 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11921 NULL_RTX, 0, OPTAB_WIDEN);
11922
11923 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11924 Pmode, 1, align_4_label);
11925 }
11926
11927 mem = change_address (src, QImode, out);
11928
11929 /* Now compare the bytes. */
11930
11931 /* Compare the first n unaligned byte on a byte per byte basis. */
11932 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11933 QImode, 1, end_0_label);
11934
11935 /* Increment the address. */
11936 if (TARGET_64BIT)
11937 emit_insn (gen_adddi3 (out, out, const1_rtx));
11938 else
11939 emit_insn (gen_addsi3 (out, out, const1_rtx));
11940
11941 /* Not needed with an alignment of 2 */
11942 if (align != 2)
11943 {
11944 emit_label (align_2_label);
11945
11946 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11947 end_0_label);
11948
11949 if (TARGET_64BIT)
11950 emit_insn (gen_adddi3 (out, out, const1_rtx));
11951 else
11952 emit_insn (gen_addsi3 (out, out, const1_rtx));
11953
11954 emit_label (align_3_label);
11955 }
11956
11957 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11958 end_0_label);
11959
11960 if (TARGET_64BIT)
11961 emit_insn (gen_adddi3 (out, out, const1_rtx));
11962 else
11963 emit_insn (gen_addsi3 (out, out, const1_rtx));
11964 }
11965
11966 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11967 align this loop. It gives only huge programs, but does not help to
11968 speed up. */
11969 emit_label (align_4_label);
11970
11971 mem = change_address (src, SImode, out);
11972 emit_move_insn (scratch, mem);
11973 if (TARGET_64BIT)
11974 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11975 else
11976 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11977
11978 /* This formula yields a nonzero result iff one of the bytes is zero.
11979 This saves three branches inside loop and many cycles. */
11980
11981 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11982 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11983 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11984 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11985 gen_int_mode (0x80808080, SImode)));
11986 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11987 align_4_label);
11988
11989 if (TARGET_CMOVE)
11990 {
11991 rtx reg = gen_reg_rtx (SImode);
11992 rtx reg2 = gen_reg_rtx (Pmode);
11993 emit_move_insn (reg, tmpreg);
11994 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11995
11996 /* If zero is not in the first two bytes, move two bytes forward. */
11997 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11998 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11999 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12000 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12001 gen_rtx_IF_THEN_ELSE (SImode, tmp,
12002 reg,
12003 tmpreg)));
12004 /* Emit lea manually to avoid clobbering of flags. */
12005 emit_insn (gen_rtx_SET (SImode, reg2,
12006 gen_rtx_PLUS (Pmode, out, const2_rtx)));
12007
12008 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12009 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12010 emit_insn (gen_rtx_SET (VOIDmode, out,
12011 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
12012 reg2,
12013 out)));
12014
12015 }
12016 else
12017 {
12018 rtx end_2_label = gen_label_rtx ();
12019 /* Is zero in the first two bytes? */
12020
12021 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12022 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12023 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
12024 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12025 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
12026 pc_rtx);
12027 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12028 JUMP_LABEL (tmp) = end_2_label;
12029
12030 /* Not in the first two. Move two bytes forward. */
12031 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
12032 if (TARGET_64BIT)
12033 emit_insn (gen_adddi3 (out, out, const2_rtx));
12034 else
12035 emit_insn (gen_addsi3 (out, out, const2_rtx));
12036
12037 emit_label (end_2_label);
12038
12039 }
12040
12041 /* Avoid branch in fixing the byte. */
12042 tmpreg = gen_lowpart (QImode, tmpreg);
12043 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
12044 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12045 if (TARGET_64BIT)
12046 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12047 else
12048 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12049
12050 emit_label (end_0_label);
12051 }
12052
12053 void
12054 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12055 rtx callarg2 ATTRIBUTE_UNUSED,
12056 rtx pop, int sibcall)
12057 {
12058 rtx use = NULL, call;
12059
12060 if (pop == const0_rtx)
12061 pop = NULL;
12062 gcc_assert (!TARGET_64BIT || !pop);
12063
12064 #if TARGET_MACHO
12065 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12066 fnaddr = machopic_indirect_call_target (fnaddr);
12067 #else
12068 /* Static functions and indirect calls don't need the pic register. */
12069 if (! TARGET_64BIT && flag_pic
12070 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12071 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12072 use_reg (&use, pic_offset_table_rtx);
12073
12074 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12075 {
12076 rtx al = gen_rtx_REG (QImode, 0);
12077 emit_move_insn (al, callarg2);
12078 use_reg (&use, al);
12079 }
12080 #endif /* TARGET_MACHO */
12081
12082 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12083 {
12084 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12085 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12086 }
12087 if (sibcall && TARGET_64BIT
12088 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12089 {
12090 rtx addr;
12091 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12092 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12093 emit_move_insn (fnaddr, addr);
12094 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12095 }
12096
12097 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12098 if (retval)
12099 call = gen_rtx_SET (VOIDmode, retval, call);
12100 if (pop)
12101 {
12102 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12103 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12104 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12105 }
12106
12107 call = emit_call_insn (call);
12108 if (use)
12109 CALL_INSN_FUNCTION_USAGE (call) = use;
12110 }
12111
12112 \f
12113 /* Clear stack slot assignments remembered from previous functions.
12114 This is called from INIT_EXPANDERS once before RTL is emitted for each
12115 function. */
12116
12117 static struct machine_function *
12118 ix86_init_machine_status (void)
12119 {
12120 struct machine_function *f;
12121
12122 f = ggc_alloc_cleared (sizeof (struct machine_function));
12123 f->use_fast_prologue_epilogue_nregs = -1;
12124
12125 return f;
12126 }
12127
12128 /* Return a MEM corresponding to a stack slot with mode MODE.
12129 Allocate a new slot if necessary.
12130
12131 The RTL for a function can have several slots available: N is
12132 which slot to use. */
12133
12134 rtx
12135 assign_386_stack_local (enum machine_mode mode, int n)
12136 {
12137 struct stack_local_entry *s;
12138
12139 gcc_assert (n >= 0 && n < MAX_386_STACK_LOCALS);
12140
12141 for (s = ix86_stack_locals; s; s = s->next)
12142 if (s->mode == mode && s->n == n)
12143 return s->rtl;
12144
12145 s = (struct stack_local_entry *)
12146 ggc_alloc (sizeof (struct stack_local_entry));
12147 s->n = n;
12148 s->mode = mode;
12149 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12150
12151 s->next = ix86_stack_locals;
12152 ix86_stack_locals = s;
12153 return s->rtl;
12154 }
12155
12156 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12157
12158 static GTY(()) rtx ix86_tls_symbol;
12159 rtx
12160 ix86_tls_get_addr (void)
12161 {
12162
12163 if (!ix86_tls_symbol)
12164 {
12165 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12166 (TARGET_GNU_TLS && !TARGET_64BIT)
12167 ? "___tls_get_addr"
12168 : "__tls_get_addr");
12169 }
12170
12171 return ix86_tls_symbol;
12172 }
12173 \f
12174 /* Calculate the length of the memory address in the instruction
12175 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12176
12177 int
12178 memory_address_length (rtx addr)
12179 {
12180 struct ix86_address parts;
12181 rtx base, index, disp;
12182 int len;
12183 int ok;
12184
12185 if (GET_CODE (addr) == PRE_DEC
12186 || GET_CODE (addr) == POST_INC
12187 || GET_CODE (addr) == PRE_MODIFY
12188 || GET_CODE (addr) == POST_MODIFY)
12189 return 0;
12190
12191 ok = ix86_decompose_address (addr, &parts);
12192 gcc_assert (ok);
12193
12194 if (parts.base && GET_CODE (parts.base) == SUBREG)
12195 parts.base = SUBREG_REG (parts.base);
12196 if (parts.index && GET_CODE (parts.index) == SUBREG)
12197 parts.index = SUBREG_REG (parts.index);
12198
12199 base = parts.base;
12200 index = parts.index;
12201 disp = parts.disp;
12202 len = 0;
12203
12204 /* Rule of thumb:
12205 - esp as the base always wants an index,
12206 - ebp as the base always wants a displacement. */
12207
12208 /* Register Indirect. */
12209 if (base && !index && !disp)
12210 {
12211 /* esp (for its index) and ebp (for its displacement) need
12212 the two-byte modrm form. */
12213 if (addr == stack_pointer_rtx
12214 || addr == arg_pointer_rtx
12215 || addr == frame_pointer_rtx
12216 || addr == hard_frame_pointer_rtx)
12217 len = 1;
12218 }
12219
12220 /* Direct Addressing. */
12221 else if (disp && !base && !index)
12222 len = 4;
12223
12224 else
12225 {
12226 /* Find the length of the displacement constant. */
12227 if (disp)
12228 {
12229 if (GET_CODE (disp) == CONST_INT
12230 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12231 && base)
12232 len = 1;
12233 else
12234 len = 4;
12235 }
12236 /* ebp always wants a displacement. */
12237 else if (base == hard_frame_pointer_rtx)
12238 len = 1;
12239
12240 /* An index requires the two-byte modrm form.... */
12241 if (index
12242 /* ...like esp, which always wants an index. */
12243 || base == stack_pointer_rtx
12244 || base == arg_pointer_rtx
12245 || base == frame_pointer_rtx)
12246 len += 1;
12247 }
12248
12249 return len;
12250 }
12251
12252 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12253 is set, expect that insn have 8bit immediate alternative. */
12254 int
12255 ix86_attr_length_immediate_default (rtx insn, int shortform)
12256 {
12257 int len = 0;
12258 int i;
12259 extract_insn_cached (insn);
12260 for (i = recog_data.n_operands - 1; i >= 0; --i)
12261 if (CONSTANT_P (recog_data.operand[i]))
12262 {
12263 gcc_assert (!len);
12264 if (shortform
12265 && GET_CODE (recog_data.operand[i]) == CONST_INT
12266 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12267 len = 1;
12268 else
12269 {
12270 switch (get_attr_mode (insn))
12271 {
12272 case MODE_QI:
12273 len+=1;
12274 break;
12275 case MODE_HI:
12276 len+=2;
12277 break;
12278 case MODE_SI:
12279 len+=4;
12280 break;
12281 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12282 case MODE_DI:
12283 len+=4;
12284 break;
12285 default:
12286 fatal_insn ("unknown insn mode", insn);
12287 }
12288 }
12289 }
12290 return len;
12291 }
12292 /* Compute default value for "length_address" attribute. */
12293 int
12294 ix86_attr_length_address_default (rtx insn)
12295 {
12296 int i;
12297
12298 if (get_attr_type (insn) == TYPE_LEA)
12299 {
12300 rtx set = PATTERN (insn);
12301
12302 if (GET_CODE (set) == PARALLEL)
12303 set = XVECEXP (set, 0, 0);
12304
12305 gcc_assert (GET_CODE (set) == SET);
12306
12307 return memory_address_length (SET_SRC (set));
12308 }
12309
12310 extract_insn_cached (insn);
12311 for (i = recog_data.n_operands - 1; i >= 0; --i)
12312 if (GET_CODE (recog_data.operand[i]) == MEM)
12313 {
12314 return memory_address_length (XEXP (recog_data.operand[i], 0));
12315 break;
12316 }
12317 return 0;
12318 }
12319 \f
12320 /* Return the maximum number of instructions a cpu can issue. */
12321
12322 static int
12323 ix86_issue_rate (void)
12324 {
12325 switch (ix86_tune)
12326 {
12327 case PROCESSOR_PENTIUM:
12328 case PROCESSOR_K6:
12329 return 2;
12330
12331 case PROCESSOR_PENTIUMPRO:
12332 case PROCESSOR_PENTIUM4:
12333 case PROCESSOR_ATHLON:
12334 case PROCESSOR_K8:
12335 case PROCESSOR_NOCONA:
12336 return 3;
12337
12338 default:
12339 return 1;
12340 }
12341 }
12342
12343 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12344 by DEP_INSN and nothing set by DEP_INSN. */
12345
12346 static int
12347 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12348 {
12349 rtx set, set2;
12350
12351 /* Simplify the test for uninteresting insns. */
12352 if (insn_type != TYPE_SETCC
12353 && insn_type != TYPE_ICMOV
12354 && insn_type != TYPE_FCMOV
12355 && insn_type != TYPE_IBR)
12356 return 0;
12357
12358 if ((set = single_set (dep_insn)) != 0)
12359 {
12360 set = SET_DEST (set);
12361 set2 = NULL_RTX;
12362 }
12363 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12364 && XVECLEN (PATTERN (dep_insn), 0) == 2
12365 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12366 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12367 {
12368 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12369 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12370 }
12371 else
12372 return 0;
12373
12374 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12375 return 0;
12376
12377 /* This test is true if the dependent insn reads the flags but
12378 not any other potentially set register. */
12379 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12380 return 0;
12381
12382 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12383 return 0;
12384
12385 return 1;
12386 }
12387
12388 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12389 address with operands set by DEP_INSN. */
12390
12391 static int
12392 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12393 {
12394 rtx addr;
12395
12396 if (insn_type == TYPE_LEA
12397 && TARGET_PENTIUM)
12398 {
12399 addr = PATTERN (insn);
12400
12401 if (GET_CODE (addr) == PARALLEL)
12402 addr = XVECEXP (addr, 0, 0);
12403
12404 gcc_assert (GET_CODE (addr) == SET);
12405
12406 addr = SET_SRC (addr);
12407 }
12408 else
12409 {
12410 int i;
12411 extract_insn_cached (insn);
12412 for (i = recog_data.n_operands - 1; i >= 0; --i)
12413 if (GET_CODE (recog_data.operand[i]) == MEM)
12414 {
12415 addr = XEXP (recog_data.operand[i], 0);
12416 goto found;
12417 }
12418 return 0;
12419 found:;
12420 }
12421
12422 return modified_in_p (addr, dep_insn);
12423 }
12424
12425 static int
12426 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12427 {
12428 enum attr_type insn_type, dep_insn_type;
12429 enum attr_memory memory;
12430 rtx set, set2;
12431 int dep_insn_code_number;
12432
12433 /* Anti and output dependencies have zero cost on all CPUs. */
12434 if (REG_NOTE_KIND (link) != 0)
12435 return 0;
12436
12437 dep_insn_code_number = recog_memoized (dep_insn);
12438
12439 /* If we can't recognize the insns, we can't really do anything. */
12440 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12441 return cost;
12442
12443 insn_type = get_attr_type (insn);
12444 dep_insn_type = get_attr_type (dep_insn);
12445
12446 switch (ix86_tune)
12447 {
12448 case PROCESSOR_PENTIUM:
12449 /* Address Generation Interlock adds a cycle of latency. */
12450 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12451 cost += 1;
12452
12453 /* ??? Compares pair with jump/setcc. */
12454 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12455 cost = 0;
12456
12457 /* Floating point stores require value to be ready one cycle earlier. */
12458 if (insn_type == TYPE_FMOV
12459 && get_attr_memory (insn) == MEMORY_STORE
12460 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12461 cost += 1;
12462 break;
12463
12464 case PROCESSOR_PENTIUMPRO:
12465 memory = get_attr_memory (insn);
12466
12467 /* INT->FP conversion is expensive. */
12468 if (get_attr_fp_int_src (dep_insn))
12469 cost += 5;
12470
12471 /* There is one cycle extra latency between an FP op and a store. */
12472 if (insn_type == TYPE_FMOV
12473 && (set = single_set (dep_insn)) != NULL_RTX
12474 && (set2 = single_set (insn)) != NULL_RTX
12475 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12476 && GET_CODE (SET_DEST (set2)) == MEM)
12477 cost += 1;
12478
12479 /* Show ability of reorder buffer to hide latency of load by executing
12480 in parallel with previous instruction in case
12481 previous instruction is not needed to compute the address. */
12482 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12483 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12484 {
12485 /* Claim moves to take one cycle, as core can issue one load
12486 at time and the next load can start cycle later. */
12487 if (dep_insn_type == TYPE_IMOV
12488 || dep_insn_type == TYPE_FMOV)
12489 cost = 1;
12490 else if (cost > 1)
12491 cost--;
12492 }
12493 break;
12494
12495 case PROCESSOR_K6:
12496 memory = get_attr_memory (insn);
12497
12498 /* The esp dependency is resolved before the instruction is really
12499 finished. */
12500 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12501 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12502 return 1;
12503
12504 /* INT->FP conversion is expensive. */
12505 if (get_attr_fp_int_src (dep_insn))
12506 cost += 5;
12507
12508 /* Show ability of reorder buffer to hide latency of load by executing
12509 in parallel with previous instruction in case
12510 previous instruction is not needed to compute the address. */
12511 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12512 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12513 {
12514 /* Claim moves to take one cycle, as core can issue one load
12515 at time and the next load can start cycle later. */
12516 if (dep_insn_type == TYPE_IMOV
12517 || dep_insn_type == TYPE_FMOV)
12518 cost = 1;
12519 else if (cost > 2)
12520 cost -= 2;
12521 else
12522 cost = 1;
12523 }
12524 break;
12525
12526 case PROCESSOR_ATHLON:
12527 case PROCESSOR_K8:
12528 memory = get_attr_memory (insn);
12529
12530 /* Show ability of reorder buffer to hide latency of load by executing
12531 in parallel with previous instruction in case
12532 previous instruction is not needed to compute the address. */
12533 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12534 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12535 {
12536 enum attr_unit unit = get_attr_unit (insn);
12537 int loadcost = 3;
12538
12539 /* Because of the difference between the length of integer and
12540 floating unit pipeline preparation stages, the memory operands
12541 for floating point are cheaper.
12542
12543 ??? For Athlon it the difference is most probably 2. */
12544 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12545 loadcost = 3;
12546 else
12547 loadcost = TARGET_ATHLON ? 2 : 0;
12548
12549 if (cost >= loadcost)
12550 cost -= loadcost;
12551 else
12552 cost = 0;
12553 }
12554
12555 default:
12556 break;
12557 }
12558
12559 return cost;
12560 }
12561
12562 /* How many alternative schedules to try. This should be as wide as the
12563 scheduling freedom in the DFA, but no wider. Making this value too
12564 large results extra work for the scheduler. */
12565
12566 static int
12567 ia32_multipass_dfa_lookahead (void)
12568 {
12569 if (ix86_tune == PROCESSOR_PENTIUM)
12570 return 2;
12571
12572 if (ix86_tune == PROCESSOR_PENTIUMPRO
12573 || ix86_tune == PROCESSOR_K6)
12574 return 1;
12575
12576 else
12577 return 0;
12578 }
12579
12580 \f
12581 /* Compute the alignment given to a constant that is being placed in memory.
12582 EXP is the constant and ALIGN is the alignment that the object would
12583 ordinarily have.
12584 The value of this function is used instead of that alignment to align
12585 the object. */
12586
12587 int
12588 ix86_constant_alignment (tree exp, int align)
12589 {
12590 if (TREE_CODE (exp) == REAL_CST)
12591 {
12592 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12593 return 64;
12594 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12595 return 128;
12596 }
12597 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12598 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12599 return BITS_PER_WORD;
12600
12601 return align;
12602 }
12603
12604 /* Compute the alignment for a static variable.
12605 TYPE is the data type, and ALIGN is the alignment that
12606 the object would ordinarily have. The value of this function is used
12607 instead of that alignment to align the object. */
12608
12609 int
12610 ix86_data_alignment (tree type, int align)
12611 {
12612 if (AGGREGATE_TYPE_P (type)
12613 && TYPE_SIZE (type)
12614 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12615 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12616 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12617 return 256;
12618
12619 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12620 to 16byte boundary. */
12621 if (TARGET_64BIT)
12622 {
12623 if (AGGREGATE_TYPE_P (type)
12624 && TYPE_SIZE (type)
12625 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12626 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12627 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12628 return 128;
12629 }
12630
12631 if (TREE_CODE (type) == ARRAY_TYPE)
12632 {
12633 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12634 return 64;
12635 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12636 return 128;
12637 }
12638 else if (TREE_CODE (type) == COMPLEX_TYPE)
12639 {
12640
12641 if (TYPE_MODE (type) == DCmode && align < 64)
12642 return 64;
12643 if (TYPE_MODE (type) == XCmode && align < 128)
12644 return 128;
12645 }
12646 else if ((TREE_CODE (type) == RECORD_TYPE
12647 || TREE_CODE (type) == UNION_TYPE
12648 || TREE_CODE (type) == QUAL_UNION_TYPE)
12649 && TYPE_FIELDS (type))
12650 {
12651 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12652 return 64;
12653 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12654 return 128;
12655 }
12656 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12657 || TREE_CODE (type) == INTEGER_TYPE)
12658 {
12659 if (TYPE_MODE (type) == DFmode && align < 64)
12660 return 64;
12661 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12662 return 128;
12663 }
12664
12665 return align;
12666 }
12667
12668 /* Compute the alignment for a local variable.
12669 TYPE is the data type, and ALIGN is the alignment that
12670 the object would ordinarily have. The value of this macro is used
12671 instead of that alignment to align the object. */
12672
12673 int
12674 ix86_local_alignment (tree type, int align)
12675 {
12676 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12677 to 16byte boundary. */
12678 if (TARGET_64BIT)
12679 {
12680 if (AGGREGATE_TYPE_P (type)
12681 && TYPE_SIZE (type)
12682 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12683 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12684 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12685 return 128;
12686 }
12687 if (TREE_CODE (type) == ARRAY_TYPE)
12688 {
12689 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12690 return 64;
12691 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12692 return 128;
12693 }
12694 else if (TREE_CODE (type) == COMPLEX_TYPE)
12695 {
12696 if (TYPE_MODE (type) == DCmode && align < 64)
12697 return 64;
12698 if (TYPE_MODE (type) == XCmode && align < 128)
12699 return 128;
12700 }
12701 else if ((TREE_CODE (type) == RECORD_TYPE
12702 || TREE_CODE (type) == UNION_TYPE
12703 || TREE_CODE (type) == QUAL_UNION_TYPE)
12704 && TYPE_FIELDS (type))
12705 {
12706 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12707 return 64;
12708 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12709 return 128;
12710 }
12711 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12712 || TREE_CODE (type) == INTEGER_TYPE)
12713 {
12714
12715 if (TYPE_MODE (type) == DFmode && align < 64)
12716 return 64;
12717 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12718 return 128;
12719 }
12720 return align;
12721 }
12722 \f
12723 /* Emit RTL insns to initialize the variable parts of a trampoline.
12724 FNADDR is an RTX for the address of the function's pure code.
12725 CXT is an RTX for the static chain value for the function. */
12726 void
12727 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12728 {
12729 if (!TARGET_64BIT)
12730 {
12731 /* Compute offset from the end of the jmp to the target function. */
12732 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12733 plus_constant (tramp, 10),
12734 NULL_RTX, 1, OPTAB_DIRECT);
12735 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12736 gen_int_mode (0xb9, QImode));
12737 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12738 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12739 gen_int_mode (0xe9, QImode));
12740 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12741 }
12742 else
12743 {
12744 int offset = 0;
12745 /* Try to load address using shorter movl instead of movabs.
12746 We may want to support movq for kernel mode, but kernel does not use
12747 trampolines at the moment. */
12748 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
12749 {
12750 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12751 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12752 gen_int_mode (0xbb41, HImode));
12753 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12754 gen_lowpart (SImode, fnaddr));
12755 offset += 6;
12756 }
12757 else
12758 {
12759 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12760 gen_int_mode (0xbb49, HImode));
12761 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12762 fnaddr);
12763 offset += 10;
12764 }
12765 /* Load static chain using movabs to r10. */
12766 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12767 gen_int_mode (0xba49, HImode));
12768 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12769 cxt);
12770 offset += 10;
12771 /* Jump to the r11 */
12772 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12773 gen_int_mode (0xff49, HImode));
12774 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12775 gen_int_mode (0xe3, QImode));
12776 offset += 3;
12777 gcc_assert (offset <= TRAMPOLINE_SIZE);
12778 }
12779
12780 #ifdef ENABLE_EXECUTE_STACK
12781 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12782 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12783 #endif
12784 }
12785 \f
12786 /* Codes for all the SSE/MMX builtins. */
12787 enum ix86_builtins
12788 {
12789 IX86_BUILTIN_ADDPS,
12790 IX86_BUILTIN_ADDSS,
12791 IX86_BUILTIN_DIVPS,
12792 IX86_BUILTIN_DIVSS,
12793 IX86_BUILTIN_MULPS,
12794 IX86_BUILTIN_MULSS,
12795 IX86_BUILTIN_SUBPS,
12796 IX86_BUILTIN_SUBSS,
12797
12798 IX86_BUILTIN_CMPEQPS,
12799 IX86_BUILTIN_CMPLTPS,
12800 IX86_BUILTIN_CMPLEPS,
12801 IX86_BUILTIN_CMPGTPS,
12802 IX86_BUILTIN_CMPGEPS,
12803 IX86_BUILTIN_CMPNEQPS,
12804 IX86_BUILTIN_CMPNLTPS,
12805 IX86_BUILTIN_CMPNLEPS,
12806 IX86_BUILTIN_CMPNGTPS,
12807 IX86_BUILTIN_CMPNGEPS,
12808 IX86_BUILTIN_CMPORDPS,
12809 IX86_BUILTIN_CMPUNORDPS,
12810 IX86_BUILTIN_CMPNEPS,
12811 IX86_BUILTIN_CMPEQSS,
12812 IX86_BUILTIN_CMPLTSS,
12813 IX86_BUILTIN_CMPLESS,
12814 IX86_BUILTIN_CMPNEQSS,
12815 IX86_BUILTIN_CMPNLTSS,
12816 IX86_BUILTIN_CMPNLESS,
12817 IX86_BUILTIN_CMPNGTSS,
12818 IX86_BUILTIN_CMPNGESS,
12819 IX86_BUILTIN_CMPORDSS,
12820 IX86_BUILTIN_CMPUNORDSS,
12821 IX86_BUILTIN_CMPNESS,
12822
12823 IX86_BUILTIN_COMIEQSS,
12824 IX86_BUILTIN_COMILTSS,
12825 IX86_BUILTIN_COMILESS,
12826 IX86_BUILTIN_COMIGTSS,
12827 IX86_BUILTIN_COMIGESS,
12828 IX86_BUILTIN_COMINEQSS,
12829 IX86_BUILTIN_UCOMIEQSS,
12830 IX86_BUILTIN_UCOMILTSS,
12831 IX86_BUILTIN_UCOMILESS,
12832 IX86_BUILTIN_UCOMIGTSS,
12833 IX86_BUILTIN_UCOMIGESS,
12834 IX86_BUILTIN_UCOMINEQSS,
12835
12836 IX86_BUILTIN_CVTPI2PS,
12837 IX86_BUILTIN_CVTPS2PI,
12838 IX86_BUILTIN_CVTSI2SS,
12839 IX86_BUILTIN_CVTSI642SS,
12840 IX86_BUILTIN_CVTSS2SI,
12841 IX86_BUILTIN_CVTSS2SI64,
12842 IX86_BUILTIN_CVTTPS2PI,
12843 IX86_BUILTIN_CVTTSS2SI,
12844 IX86_BUILTIN_CVTTSS2SI64,
12845
12846 IX86_BUILTIN_MAXPS,
12847 IX86_BUILTIN_MAXSS,
12848 IX86_BUILTIN_MINPS,
12849 IX86_BUILTIN_MINSS,
12850
12851 IX86_BUILTIN_LOADUPS,
12852 IX86_BUILTIN_STOREUPS,
12853 IX86_BUILTIN_MOVSS,
12854
12855 IX86_BUILTIN_MOVHLPS,
12856 IX86_BUILTIN_MOVLHPS,
12857 IX86_BUILTIN_LOADHPS,
12858 IX86_BUILTIN_LOADLPS,
12859 IX86_BUILTIN_STOREHPS,
12860 IX86_BUILTIN_STORELPS,
12861
12862 IX86_BUILTIN_MASKMOVQ,
12863 IX86_BUILTIN_MOVMSKPS,
12864 IX86_BUILTIN_PMOVMSKB,
12865
12866 IX86_BUILTIN_MOVNTPS,
12867 IX86_BUILTIN_MOVNTQ,
12868
12869 IX86_BUILTIN_LOADDQU,
12870 IX86_BUILTIN_STOREDQU,
12871
12872 IX86_BUILTIN_PACKSSWB,
12873 IX86_BUILTIN_PACKSSDW,
12874 IX86_BUILTIN_PACKUSWB,
12875
12876 IX86_BUILTIN_PADDB,
12877 IX86_BUILTIN_PADDW,
12878 IX86_BUILTIN_PADDD,
12879 IX86_BUILTIN_PADDQ,
12880 IX86_BUILTIN_PADDSB,
12881 IX86_BUILTIN_PADDSW,
12882 IX86_BUILTIN_PADDUSB,
12883 IX86_BUILTIN_PADDUSW,
12884 IX86_BUILTIN_PSUBB,
12885 IX86_BUILTIN_PSUBW,
12886 IX86_BUILTIN_PSUBD,
12887 IX86_BUILTIN_PSUBQ,
12888 IX86_BUILTIN_PSUBSB,
12889 IX86_BUILTIN_PSUBSW,
12890 IX86_BUILTIN_PSUBUSB,
12891 IX86_BUILTIN_PSUBUSW,
12892
12893 IX86_BUILTIN_PAND,
12894 IX86_BUILTIN_PANDN,
12895 IX86_BUILTIN_POR,
12896 IX86_BUILTIN_PXOR,
12897
12898 IX86_BUILTIN_PAVGB,
12899 IX86_BUILTIN_PAVGW,
12900
12901 IX86_BUILTIN_PCMPEQB,
12902 IX86_BUILTIN_PCMPEQW,
12903 IX86_BUILTIN_PCMPEQD,
12904 IX86_BUILTIN_PCMPGTB,
12905 IX86_BUILTIN_PCMPGTW,
12906 IX86_BUILTIN_PCMPGTD,
12907
12908 IX86_BUILTIN_PMADDWD,
12909
12910 IX86_BUILTIN_PMAXSW,
12911 IX86_BUILTIN_PMAXUB,
12912 IX86_BUILTIN_PMINSW,
12913 IX86_BUILTIN_PMINUB,
12914
12915 IX86_BUILTIN_PMULHUW,
12916 IX86_BUILTIN_PMULHW,
12917 IX86_BUILTIN_PMULLW,
12918
12919 IX86_BUILTIN_PSADBW,
12920 IX86_BUILTIN_PSHUFW,
12921
12922 IX86_BUILTIN_PSLLW,
12923 IX86_BUILTIN_PSLLD,
12924 IX86_BUILTIN_PSLLQ,
12925 IX86_BUILTIN_PSRAW,
12926 IX86_BUILTIN_PSRAD,
12927 IX86_BUILTIN_PSRLW,
12928 IX86_BUILTIN_PSRLD,
12929 IX86_BUILTIN_PSRLQ,
12930 IX86_BUILTIN_PSLLWI,
12931 IX86_BUILTIN_PSLLDI,
12932 IX86_BUILTIN_PSLLQI,
12933 IX86_BUILTIN_PSRAWI,
12934 IX86_BUILTIN_PSRADI,
12935 IX86_BUILTIN_PSRLWI,
12936 IX86_BUILTIN_PSRLDI,
12937 IX86_BUILTIN_PSRLQI,
12938
12939 IX86_BUILTIN_PUNPCKHBW,
12940 IX86_BUILTIN_PUNPCKHWD,
12941 IX86_BUILTIN_PUNPCKHDQ,
12942 IX86_BUILTIN_PUNPCKLBW,
12943 IX86_BUILTIN_PUNPCKLWD,
12944 IX86_BUILTIN_PUNPCKLDQ,
12945
12946 IX86_BUILTIN_SHUFPS,
12947
12948 IX86_BUILTIN_RCPPS,
12949 IX86_BUILTIN_RCPSS,
12950 IX86_BUILTIN_RSQRTPS,
12951 IX86_BUILTIN_RSQRTSS,
12952 IX86_BUILTIN_SQRTPS,
12953 IX86_BUILTIN_SQRTSS,
12954
12955 IX86_BUILTIN_UNPCKHPS,
12956 IX86_BUILTIN_UNPCKLPS,
12957
12958 IX86_BUILTIN_ANDPS,
12959 IX86_BUILTIN_ANDNPS,
12960 IX86_BUILTIN_ORPS,
12961 IX86_BUILTIN_XORPS,
12962
12963 IX86_BUILTIN_EMMS,
12964 IX86_BUILTIN_LDMXCSR,
12965 IX86_BUILTIN_STMXCSR,
12966 IX86_BUILTIN_SFENCE,
12967
12968 /* 3DNow! Original */
12969 IX86_BUILTIN_FEMMS,
12970 IX86_BUILTIN_PAVGUSB,
12971 IX86_BUILTIN_PF2ID,
12972 IX86_BUILTIN_PFACC,
12973 IX86_BUILTIN_PFADD,
12974 IX86_BUILTIN_PFCMPEQ,
12975 IX86_BUILTIN_PFCMPGE,
12976 IX86_BUILTIN_PFCMPGT,
12977 IX86_BUILTIN_PFMAX,
12978 IX86_BUILTIN_PFMIN,
12979 IX86_BUILTIN_PFMUL,
12980 IX86_BUILTIN_PFRCP,
12981 IX86_BUILTIN_PFRCPIT1,
12982 IX86_BUILTIN_PFRCPIT2,
12983 IX86_BUILTIN_PFRSQIT1,
12984 IX86_BUILTIN_PFRSQRT,
12985 IX86_BUILTIN_PFSUB,
12986 IX86_BUILTIN_PFSUBR,
12987 IX86_BUILTIN_PI2FD,
12988 IX86_BUILTIN_PMULHRW,
12989
12990 /* 3DNow! Athlon Extensions */
12991 IX86_BUILTIN_PF2IW,
12992 IX86_BUILTIN_PFNACC,
12993 IX86_BUILTIN_PFPNACC,
12994 IX86_BUILTIN_PI2FW,
12995 IX86_BUILTIN_PSWAPDSI,
12996 IX86_BUILTIN_PSWAPDSF,
12997
12998 /* SSE2 */
12999 IX86_BUILTIN_ADDPD,
13000 IX86_BUILTIN_ADDSD,
13001 IX86_BUILTIN_DIVPD,
13002 IX86_BUILTIN_DIVSD,
13003 IX86_BUILTIN_MULPD,
13004 IX86_BUILTIN_MULSD,
13005 IX86_BUILTIN_SUBPD,
13006 IX86_BUILTIN_SUBSD,
13007
13008 IX86_BUILTIN_CMPEQPD,
13009 IX86_BUILTIN_CMPLTPD,
13010 IX86_BUILTIN_CMPLEPD,
13011 IX86_BUILTIN_CMPGTPD,
13012 IX86_BUILTIN_CMPGEPD,
13013 IX86_BUILTIN_CMPNEQPD,
13014 IX86_BUILTIN_CMPNLTPD,
13015 IX86_BUILTIN_CMPNLEPD,
13016 IX86_BUILTIN_CMPNGTPD,
13017 IX86_BUILTIN_CMPNGEPD,
13018 IX86_BUILTIN_CMPORDPD,
13019 IX86_BUILTIN_CMPUNORDPD,
13020 IX86_BUILTIN_CMPNEPD,
13021 IX86_BUILTIN_CMPEQSD,
13022 IX86_BUILTIN_CMPLTSD,
13023 IX86_BUILTIN_CMPLESD,
13024 IX86_BUILTIN_CMPNEQSD,
13025 IX86_BUILTIN_CMPNLTSD,
13026 IX86_BUILTIN_CMPNLESD,
13027 IX86_BUILTIN_CMPORDSD,
13028 IX86_BUILTIN_CMPUNORDSD,
13029 IX86_BUILTIN_CMPNESD,
13030
13031 IX86_BUILTIN_COMIEQSD,
13032 IX86_BUILTIN_COMILTSD,
13033 IX86_BUILTIN_COMILESD,
13034 IX86_BUILTIN_COMIGTSD,
13035 IX86_BUILTIN_COMIGESD,
13036 IX86_BUILTIN_COMINEQSD,
13037 IX86_BUILTIN_UCOMIEQSD,
13038 IX86_BUILTIN_UCOMILTSD,
13039 IX86_BUILTIN_UCOMILESD,
13040 IX86_BUILTIN_UCOMIGTSD,
13041 IX86_BUILTIN_UCOMIGESD,
13042 IX86_BUILTIN_UCOMINEQSD,
13043
13044 IX86_BUILTIN_MAXPD,
13045 IX86_BUILTIN_MAXSD,
13046 IX86_BUILTIN_MINPD,
13047 IX86_BUILTIN_MINSD,
13048
13049 IX86_BUILTIN_ANDPD,
13050 IX86_BUILTIN_ANDNPD,
13051 IX86_BUILTIN_ORPD,
13052 IX86_BUILTIN_XORPD,
13053
13054 IX86_BUILTIN_SQRTPD,
13055 IX86_BUILTIN_SQRTSD,
13056
13057 IX86_BUILTIN_UNPCKHPD,
13058 IX86_BUILTIN_UNPCKLPD,
13059
13060 IX86_BUILTIN_SHUFPD,
13061
13062 IX86_BUILTIN_LOADUPD,
13063 IX86_BUILTIN_STOREUPD,
13064 IX86_BUILTIN_MOVSD,
13065
13066 IX86_BUILTIN_LOADHPD,
13067 IX86_BUILTIN_LOADLPD,
13068
13069 IX86_BUILTIN_CVTDQ2PD,
13070 IX86_BUILTIN_CVTDQ2PS,
13071
13072 IX86_BUILTIN_CVTPD2DQ,
13073 IX86_BUILTIN_CVTPD2PI,
13074 IX86_BUILTIN_CVTPD2PS,
13075 IX86_BUILTIN_CVTTPD2DQ,
13076 IX86_BUILTIN_CVTTPD2PI,
13077
13078 IX86_BUILTIN_CVTPI2PD,
13079 IX86_BUILTIN_CVTSI2SD,
13080 IX86_BUILTIN_CVTSI642SD,
13081
13082 IX86_BUILTIN_CVTSD2SI,
13083 IX86_BUILTIN_CVTSD2SI64,
13084 IX86_BUILTIN_CVTSD2SS,
13085 IX86_BUILTIN_CVTSS2SD,
13086 IX86_BUILTIN_CVTTSD2SI,
13087 IX86_BUILTIN_CVTTSD2SI64,
13088
13089 IX86_BUILTIN_CVTPS2DQ,
13090 IX86_BUILTIN_CVTPS2PD,
13091 IX86_BUILTIN_CVTTPS2DQ,
13092
13093 IX86_BUILTIN_MOVNTI,
13094 IX86_BUILTIN_MOVNTPD,
13095 IX86_BUILTIN_MOVNTDQ,
13096
13097 /* SSE2 MMX */
13098 IX86_BUILTIN_MASKMOVDQU,
13099 IX86_BUILTIN_MOVMSKPD,
13100 IX86_BUILTIN_PMOVMSKB128,
13101
13102 IX86_BUILTIN_PACKSSWB128,
13103 IX86_BUILTIN_PACKSSDW128,
13104 IX86_BUILTIN_PACKUSWB128,
13105
13106 IX86_BUILTIN_PADDB128,
13107 IX86_BUILTIN_PADDW128,
13108 IX86_BUILTIN_PADDD128,
13109 IX86_BUILTIN_PADDQ128,
13110 IX86_BUILTIN_PADDSB128,
13111 IX86_BUILTIN_PADDSW128,
13112 IX86_BUILTIN_PADDUSB128,
13113 IX86_BUILTIN_PADDUSW128,
13114 IX86_BUILTIN_PSUBB128,
13115 IX86_BUILTIN_PSUBW128,
13116 IX86_BUILTIN_PSUBD128,
13117 IX86_BUILTIN_PSUBQ128,
13118 IX86_BUILTIN_PSUBSB128,
13119 IX86_BUILTIN_PSUBSW128,
13120 IX86_BUILTIN_PSUBUSB128,
13121 IX86_BUILTIN_PSUBUSW128,
13122
13123 IX86_BUILTIN_PAND128,
13124 IX86_BUILTIN_PANDN128,
13125 IX86_BUILTIN_POR128,
13126 IX86_BUILTIN_PXOR128,
13127
13128 IX86_BUILTIN_PAVGB128,
13129 IX86_BUILTIN_PAVGW128,
13130
13131 IX86_BUILTIN_PCMPEQB128,
13132 IX86_BUILTIN_PCMPEQW128,
13133 IX86_BUILTIN_PCMPEQD128,
13134 IX86_BUILTIN_PCMPGTB128,
13135 IX86_BUILTIN_PCMPGTW128,
13136 IX86_BUILTIN_PCMPGTD128,
13137
13138 IX86_BUILTIN_PMADDWD128,
13139
13140 IX86_BUILTIN_PMAXSW128,
13141 IX86_BUILTIN_PMAXUB128,
13142 IX86_BUILTIN_PMINSW128,
13143 IX86_BUILTIN_PMINUB128,
13144
13145 IX86_BUILTIN_PMULUDQ,
13146 IX86_BUILTIN_PMULUDQ128,
13147 IX86_BUILTIN_PMULHUW128,
13148 IX86_BUILTIN_PMULHW128,
13149 IX86_BUILTIN_PMULLW128,
13150
13151 IX86_BUILTIN_PSADBW128,
13152 IX86_BUILTIN_PSHUFHW,
13153 IX86_BUILTIN_PSHUFLW,
13154 IX86_BUILTIN_PSHUFD,
13155
13156 IX86_BUILTIN_PSLLW128,
13157 IX86_BUILTIN_PSLLD128,
13158 IX86_BUILTIN_PSLLQ128,
13159 IX86_BUILTIN_PSRAW128,
13160 IX86_BUILTIN_PSRAD128,
13161 IX86_BUILTIN_PSRLW128,
13162 IX86_BUILTIN_PSRLD128,
13163 IX86_BUILTIN_PSRLQ128,
13164 IX86_BUILTIN_PSLLDQI128,
13165 IX86_BUILTIN_PSLLWI128,
13166 IX86_BUILTIN_PSLLDI128,
13167 IX86_BUILTIN_PSLLQI128,
13168 IX86_BUILTIN_PSRAWI128,
13169 IX86_BUILTIN_PSRADI128,
13170 IX86_BUILTIN_PSRLDQI128,
13171 IX86_BUILTIN_PSRLWI128,
13172 IX86_BUILTIN_PSRLDI128,
13173 IX86_BUILTIN_PSRLQI128,
13174
13175 IX86_BUILTIN_PUNPCKHBW128,
13176 IX86_BUILTIN_PUNPCKHWD128,
13177 IX86_BUILTIN_PUNPCKHDQ128,
13178 IX86_BUILTIN_PUNPCKHQDQ128,
13179 IX86_BUILTIN_PUNPCKLBW128,
13180 IX86_BUILTIN_PUNPCKLWD128,
13181 IX86_BUILTIN_PUNPCKLDQ128,
13182 IX86_BUILTIN_PUNPCKLQDQ128,
13183
13184 IX86_BUILTIN_CLFLUSH,
13185 IX86_BUILTIN_MFENCE,
13186 IX86_BUILTIN_LFENCE,
13187
13188 /* Prescott New Instructions. */
13189 IX86_BUILTIN_ADDSUBPS,
13190 IX86_BUILTIN_HADDPS,
13191 IX86_BUILTIN_HSUBPS,
13192 IX86_BUILTIN_MOVSHDUP,
13193 IX86_BUILTIN_MOVSLDUP,
13194 IX86_BUILTIN_ADDSUBPD,
13195 IX86_BUILTIN_HADDPD,
13196 IX86_BUILTIN_HSUBPD,
13197 IX86_BUILTIN_LDDQU,
13198
13199 IX86_BUILTIN_MONITOR,
13200 IX86_BUILTIN_MWAIT,
13201
13202 IX86_BUILTIN_VEC_INIT_V2SI,
13203 IX86_BUILTIN_VEC_INIT_V4HI,
13204 IX86_BUILTIN_VEC_INIT_V8QI,
13205 IX86_BUILTIN_VEC_EXT_V2DF,
13206 IX86_BUILTIN_VEC_EXT_V2DI,
13207 IX86_BUILTIN_VEC_EXT_V4SF,
13208 IX86_BUILTIN_VEC_EXT_V4SI,
13209 IX86_BUILTIN_VEC_EXT_V8HI,
13210 IX86_BUILTIN_VEC_EXT_V2SI,
13211 IX86_BUILTIN_VEC_EXT_V4HI,
13212 IX86_BUILTIN_VEC_SET_V8HI,
13213 IX86_BUILTIN_VEC_SET_V4HI,
13214
13215 IX86_BUILTIN_MAX
13216 };
13217
13218 #define def_builtin(MASK, NAME, TYPE, CODE) \
13219 do { \
13220 if ((MASK) & target_flags \
13221 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
13222 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
13223 NULL, NULL_TREE); \
13224 } while (0)
13225
13226 /* Bits for builtin_description.flag. */
13227
13228 /* Set when we don't support the comparison natively, and should
13229 swap_comparison in order to support it. */
13230 #define BUILTIN_DESC_SWAP_OPERANDS 1
13231
13232 struct builtin_description
13233 {
13234 const unsigned int mask;
13235 const enum insn_code icode;
13236 const char *const name;
13237 const enum ix86_builtins code;
13238 const enum rtx_code comparison;
13239 const unsigned int flag;
13240 };
13241
13242 static const struct builtin_description bdesc_comi[] =
13243 {
13244 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13245 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13246 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13247 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13248 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13249 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13250 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13251 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13252 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13253 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13254 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13255 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13256 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13257 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13258 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13259 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13260 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13261 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13262 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13263 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13264 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13265 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13266 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13267 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13268 };
13269
13270 static const struct builtin_description bdesc_2arg[] =
13271 {
13272 /* SSE */
13273 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
13274 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13275 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13276 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13277 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13278 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13279 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13280 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13281
13282 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13283 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13284 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13285 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13286 BUILTIN_DESC_SWAP_OPERANDS },
13287 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13288 BUILTIN_DESC_SWAP_OPERANDS },
13289 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13290 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13291 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13292 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13293 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13294 BUILTIN_DESC_SWAP_OPERANDS },
13295 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13296 BUILTIN_DESC_SWAP_OPERANDS },
13297 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13298 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13299 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13300 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13301 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
13302 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
13303 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
13304 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
13305 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
13306 BUILTIN_DESC_SWAP_OPERANDS },
13307 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
13308 BUILTIN_DESC_SWAP_OPERANDS },
13309 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
13310
13311 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
13312 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
13313 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
13314 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
13315
13316 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
13317 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
13318 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
13319 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
13320
13321 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
13322 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
13323 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
13324 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
13325 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
13326
13327 /* MMX */
13328 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
13329 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
13330 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
13331 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
13332 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
13333 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
13334 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
13335 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
13336
13337 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
13338 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
13339 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
13340 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
13341 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
13342 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
13343 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
13344 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
13345
13346 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
13347 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
13348 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
13349
13350 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
13351 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
13352 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
13353 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
13354
13355 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
13356 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
13357
13358 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
13359 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
13360 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
13361 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
13362 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
13363 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
13364
13365 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
13366 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
13367 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
13368 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
13369
13370 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
13371 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
13372 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
13373 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
13374 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
13375 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
13376
13377 /* Special. */
13378 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
13379 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
13380 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
13381
13382 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
13383 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
13384 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
13385
13386 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
13387 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
13388 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
13389 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
13390 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
13391 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13392
13393 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13394 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13395 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13396 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13397 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13398 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13399
13400 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13401 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13402 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13403 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13404
13405 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13406 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13407
13408 /* SSE2 */
13409 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13410 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13411 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13412 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13413 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13414 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13415 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13416 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13417
13418 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13419 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13420 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13421 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
13422 BUILTIN_DESC_SWAP_OPERANDS },
13423 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
13424 BUILTIN_DESC_SWAP_OPERANDS },
13425 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13426 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
13427 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
13428 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
13429 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
13430 BUILTIN_DESC_SWAP_OPERANDS },
13431 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
13432 BUILTIN_DESC_SWAP_OPERANDS },
13433 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
13434 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13435 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13436 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13437 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13438 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
13439 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
13440 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
13441 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
13442
13443 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13444 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13445 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13446 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13447
13448 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13449 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13450 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13451 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13452
13453 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13454 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13455 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13456
13457 /* SSE2 MMX */
13458 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13459 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13460 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13461 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13462 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13463 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13464 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13465 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13466
13467 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13468 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13469 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13470 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13471 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13472 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13473 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13474 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13475
13476 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13477 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13478
13479 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13480 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13481 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13482 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13483
13484 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13485 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13486
13487 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13488 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13489 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13490 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13491 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13492 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13493
13494 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13495 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13496 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13497 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13498
13499 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13500 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13501 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13502 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13503 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13504 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13505 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13506 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13507
13508 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13509 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13510 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13511
13512 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13513 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13514
13515 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
13516 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
13517
13518 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13519 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13520 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13521
13522 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13523 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13524 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13525
13526 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13527 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13528
13529 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13530
13531 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13532 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13533 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13534 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13535
13536 /* SSE3 MMX */
13537 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13538 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13539 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13540 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13541 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13542 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13543 };
13544
13545 static const struct builtin_description bdesc_1arg[] =
13546 {
13547 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13548 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13549
13550 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13551 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13552 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13553
13554 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13555 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13556 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13557 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13558 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13559 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13560
13561 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13562 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13563
13564 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13565
13566 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13567 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13568
13569 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13570 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13571 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13572 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13573 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13574
13575 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13576
13577 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13578 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13579 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13580 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13581
13582 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13583 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13584 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13585
13586 /* SSE3 */
13587 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13588 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13589 };
13590
13591 static void
13592 ix86_init_builtins (void)
13593 {
13594 if (TARGET_MMX)
13595 ix86_init_mmx_sse_builtins ();
13596 }
13597
13598 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13599 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13600 builtins. */
13601 static void
13602 ix86_init_mmx_sse_builtins (void)
13603 {
13604 const struct builtin_description * d;
13605 size_t i;
13606
13607 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13608 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13609 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13610 tree V2DI_type_node
13611 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
13612 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13613 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13614 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13615 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13616 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13617 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13618
13619 tree pchar_type_node = build_pointer_type (char_type_node);
13620 tree pcchar_type_node = build_pointer_type (
13621 build_type_variant (char_type_node, 1, 0));
13622 tree pfloat_type_node = build_pointer_type (float_type_node);
13623 tree pcfloat_type_node = build_pointer_type (
13624 build_type_variant (float_type_node, 1, 0));
13625 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13626 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13627 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13628
13629 /* Comparisons. */
13630 tree int_ftype_v4sf_v4sf
13631 = build_function_type_list (integer_type_node,
13632 V4SF_type_node, V4SF_type_node, NULL_TREE);
13633 tree v4si_ftype_v4sf_v4sf
13634 = build_function_type_list (V4SI_type_node,
13635 V4SF_type_node, V4SF_type_node, NULL_TREE);
13636 /* MMX/SSE/integer conversions. */
13637 tree int_ftype_v4sf
13638 = build_function_type_list (integer_type_node,
13639 V4SF_type_node, NULL_TREE);
13640 tree int64_ftype_v4sf
13641 = build_function_type_list (long_long_integer_type_node,
13642 V4SF_type_node, NULL_TREE);
13643 tree int_ftype_v8qi
13644 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13645 tree v4sf_ftype_v4sf_int
13646 = build_function_type_list (V4SF_type_node,
13647 V4SF_type_node, integer_type_node, NULL_TREE);
13648 tree v4sf_ftype_v4sf_int64
13649 = build_function_type_list (V4SF_type_node,
13650 V4SF_type_node, long_long_integer_type_node,
13651 NULL_TREE);
13652 tree v4sf_ftype_v4sf_v2si
13653 = build_function_type_list (V4SF_type_node,
13654 V4SF_type_node, V2SI_type_node, NULL_TREE);
13655
13656 /* Miscellaneous. */
13657 tree v8qi_ftype_v4hi_v4hi
13658 = build_function_type_list (V8QI_type_node,
13659 V4HI_type_node, V4HI_type_node, NULL_TREE);
13660 tree v4hi_ftype_v2si_v2si
13661 = build_function_type_list (V4HI_type_node,
13662 V2SI_type_node, V2SI_type_node, NULL_TREE);
13663 tree v4sf_ftype_v4sf_v4sf_int
13664 = build_function_type_list (V4SF_type_node,
13665 V4SF_type_node, V4SF_type_node,
13666 integer_type_node, NULL_TREE);
13667 tree v2si_ftype_v4hi_v4hi
13668 = build_function_type_list (V2SI_type_node,
13669 V4HI_type_node, V4HI_type_node, NULL_TREE);
13670 tree v4hi_ftype_v4hi_int
13671 = build_function_type_list (V4HI_type_node,
13672 V4HI_type_node, integer_type_node, NULL_TREE);
13673 tree v4hi_ftype_v4hi_di
13674 = build_function_type_list (V4HI_type_node,
13675 V4HI_type_node, long_long_unsigned_type_node,
13676 NULL_TREE);
13677 tree v2si_ftype_v2si_di
13678 = build_function_type_list (V2SI_type_node,
13679 V2SI_type_node, long_long_unsigned_type_node,
13680 NULL_TREE);
13681 tree void_ftype_void
13682 = build_function_type (void_type_node, void_list_node);
13683 tree void_ftype_unsigned
13684 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13685 tree void_ftype_unsigned_unsigned
13686 = build_function_type_list (void_type_node, unsigned_type_node,
13687 unsigned_type_node, NULL_TREE);
13688 tree void_ftype_pcvoid_unsigned_unsigned
13689 = build_function_type_list (void_type_node, const_ptr_type_node,
13690 unsigned_type_node, unsigned_type_node,
13691 NULL_TREE);
13692 tree unsigned_ftype_void
13693 = build_function_type (unsigned_type_node, void_list_node);
13694 tree v2si_ftype_v4sf
13695 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13696 /* Loads/stores. */
13697 tree void_ftype_v8qi_v8qi_pchar
13698 = build_function_type_list (void_type_node,
13699 V8QI_type_node, V8QI_type_node,
13700 pchar_type_node, NULL_TREE);
13701 tree v4sf_ftype_pcfloat
13702 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13703 /* @@@ the type is bogus */
13704 tree v4sf_ftype_v4sf_pv2si
13705 = build_function_type_list (V4SF_type_node,
13706 V4SF_type_node, pv2si_type_node, NULL_TREE);
13707 tree void_ftype_pv2si_v4sf
13708 = build_function_type_list (void_type_node,
13709 pv2si_type_node, V4SF_type_node, NULL_TREE);
13710 tree void_ftype_pfloat_v4sf
13711 = build_function_type_list (void_type_node,
13712 pfloat_type_node, V4SF_type_node, NULL_TREE);
13713 tree void_ftype_pdi_di
13714 = build_function_type_list (void_type_node,
13715 pdi_type_node, long_long_unsigned_type_node,
13716 NULL_TREE);
13717 tree void_ftype_pv2di_v2di
13718 = build_function_type_list (void_type_node,
13719 pv2di_type_node, V2DI_type_node, NULL_TREE);
13720 /* Normal vector unops. */
13721 tree v4sf_ftype_v4sf
13722 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13723
13724 /* Normal vector binops. */
13725 tree v4sf_ftype_v4sf_v4sf
13726 = build_function_type_list (V4SF_type_node,
13727 V4SF_type_node, V4SF_type_node, NULL_TREE);
13728 tree v8qi_ftype_v8qi_v8qi
13729 = build_function_type_list (V8QI_type_node,
13730 V8QI_type_node, V8QI_type_node, NULL_TREE);
13731 tree v4hi_ftype_v4hi_v4hi
13732 = build_function_type_list (V4HI_type_node,
13733 V4HI_type_node, V4HI_type_node, NULL_TREE);
13734 tree v2si_ftype_v2si_v2si
13735 = build_function_type_list (V2SI_type_node,
13736 V2SI_type_node, V2SI_type_node, NULL_TREE);
13737 tree di_ftype_di_di
13738 = build_function_type_list (long_long_unsigned_type_node,
13739 long_long_unsigned_type_node,
13740 long_long_unsigned_type_node, NULL_TREE);
13741
13742 tree v2si_ftype_v2sf
13743 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13744 tree v2sf_ftype_v2si
13745 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13746 tree v2si_ftype_v2si
13747 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13748 tree v2sf_ftype_v2sf
13749 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13750 tree v2sf_ftype_v2sf_v2sf
13751 = build_function_type_list (V2SF_type_node,
13752 V2SF_type_node, V2SF_type_node, NULL_TREE);
13753 tree v2si_ftype_v2sf_v2sf
13754 = build_function_type_list (V2SI_type_node,
13755 V2SF_type_node, V2SF_type_node, NULL_TREE);
13756 tree pint_type_node = build_pointer_type (integer_type_node);
13757 tree pdouble_type_node = build_pointer_type (double_type_node);
13758 tree pcdouble_type_node = build_pointer_type (
13759 build_type_variant (double_type_node, 1, 0));
13760 tree int_ftype_v2df_v2df
13761 = build_function_type_list (integer_type_node,
13762 V2DF_type_node, V2DF_type_node, NULL_TREE);
13763
13764 tree ti_ftype_ti_ti
13765 = build_function_type_list (intTI_type_node,
13766 intTI_type_node, intTI_type_node, NULL_TREE);
13767 tree void_ftype_pcvoid
13768 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13769 tree v4sf_ftype_v4si
13770 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13771 tree v4si_ftype_v4sf
13772 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13773 tree v2df_ftype_v4si
13774 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13775 tree v4si_ftype_v2df
13776 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13777 tree v2si_ftype_v2df
13778 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13779 tree v4sf_ftype_v2df
13780 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13781 tree v2df_ftype_v2si
13782 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13783 tree v2df_ftype_v4sf
13784 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13785 tree int_ftype_v2df
13786 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13787 tree int64_ftype_v2df
13788 = build_function_type_list (long_long_integer_type_node,
13789 V2DF_type_node, NULL_TREE);
13790 tree v2df_ftype_v2df_int
13791 = build_function_type_list (V2DF_type_node,
13792 V2DF_type_node, integer_type_node, NULL_TREE);
13793 tree v2df_ftype_v2df_int64
13794 = build_function_type_list (V2DF_type_node,
13795 V2DF_type_node, long_long_integer_type_node,
13796 NULL_TREE);
13797 tree v4sf_ftype_v4sf_v2df
13798 = build_function_type_list (V4SF_type_node,
13799 V4SF_type_node, V2DF_type_node, NULL_TREE);
13800 tree v2df_ftype_v2df_v4sf
13801 = build_function_type_list (V2DF_type_node,
13802 V2DF_type_node, V4SF_type_node, NULL_TREE);
13803 tree v2df_ftype_v2df_v2df_int
13804 = build_function_type_list (V2DF_type_node,
13805 V2DF_type_node, V2DF_type_node,
13806 integer_type_node,
13807 NULL_TREE);
13808 tree v2df_ftype_v2df_pcdouble
13809 = build_function_type_list (V2DF_type_node,
13810 V2DF_type_node, pcdouble_type_node, NULL_TREE);
13811 tree void_ftype_pdouble_v2df
13812 = build_function_type_list (void_type_node,
13813 pdouble_type_node, V2DF_type_node, NULL_TREE);
13814 tree void_ftype_pint_int
13815 = build_function_type_list (void_type_node,
13816 pint_type_node, integer_type_node, NULL_TREE);
13817 tree void_ftype_v16qi_v16qi_pchar
13818 = build_function_type_list (void_type_node,
13819 V16QI_type_node, V16QI_type_node,
13820 pchar_type_node, NULL_TREE);
13821 tree v2df_ftype_pcdouble
13822 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13823 tree v2df_ftype_v2df_v2df
13824 = build_function_type_list (V2DF_type_node,
13825 V2DF_type_node, V2DF_type_node, NULL_TREE);
13826 tree v16qi_ftype_v16qi_v16qi
13827 = build_function_type_list (V16QI_type_node,
13828 V16QI_type_node, V16QI_type_node, NULL_TREE);
13829 tree v8hi_ftype_v8hi_v8hi
13830 = build_function_type_list (V8HI_type_node,
13831 V8HI_type_node, V8HI_type_node, NULL_TREE);
13832 tree v4si_ftype_v4si_v4si
13833 = build_function_type_list (V4SI_type_node,
13834 V4SI_type_node, V4SI_type_node, NULL_TREE);
13835 tree v2di_ftype_v2di_v2di
13836 = build_function_type_list (V2DI_type_node,
13837 V2DI_type_node, V2DI_type_node, NULL_TREE);
13838 tree v2di_ftype_v2df_v2df
13839 = build_function_type_list (V2DI_type_node,
13840 V2DF_type_node, V2DF_type_node, NULL_TREE);
13841 tree v2df_ftype_v2df
13842 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13843 tree v2di_ftype_v2di_int
13844 = build_function_type_list (V2DI_type_node,
13845 V2DI_type_node, integer_type_node, NULL_TREE);
13846 tree v4si_ftype_v4si_int
13847 = build_function_type_list (V4SI_type_node,
13848 V4SI_type_node, integer_type_node, NULL_TREE);
13849 tree v8hi_ftype_v8hi_int
13850 = build_function_type_list (V8HI_type_node,
13851 V8HI_type_node, integer_type_node, NULL_TREE);
13852 tree v8hi_ftype_v8hi_v2di
13853 = build_function_type_list (V8HI_type_node,
13854 V8HI_type_node, V2DI_type_node, NULL_TREE);
13855 tree v4si_ftype_v4si_v2di
13856 = build_function_type_list (V4SI_type_node,
13857 V4SI_type_node, V2DI_type_node, NULL_TREE);
13858 tree v4si_ftype_v8hi_v8hi
13859 = build_function_type_list (V4SI_type_node,
13860 V8HI_type_node, V8HI_type_node, NULL_TREE);
13861 tree di_ftype_v8qi_v8qi
13862 = build_function_type_list (long_long_unsigned_type_node,
13863 V8QI_type_node, V8QI_type_node, NULL_TREE);
13864 tree di_ftype_v2si_v2si
13865 = build_function_type_list (long_long_unsigned_type_node,
13866 V2SI_type_node, V2SI_type_node, NULL_TREE);
13867 tree v2di_ftype_v16qi_v16qi
13868 = build_function_type_list (V2DI_type_node,
13869 V16QI_type_node, V16QI_type_node, NULL_TREE);
13870 tree v2di_ftype_v4si_v4si
13871 = build_function_type_list (V2DI_type_node,
13872 V4SI_type_node, V4SI_type_node, NULL_TREE);
13873 tree int_ftype_v16qi
13874 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13875 tree v16qi_ftype_pcchar
13876 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13877 tree void_ftype_pchar_v16qi
13878 = build_function_type_list (void_type_node,
13879 pchar_type_node, V16QI_type_node, NULL_TREE);
13880
13881 tree float80_type;
13882 tree float128_type;
13883 tree ftype;
13884
13885 /* The __float80 type. */
13886 if (TYPE_MODE (long_double_type_node) == XFmode)
13887 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13888 "__float80");
13889 else
13890 {
13891 /* The __float80 type. */
13892 float80_type = make_node (REAL_TYPE);
13893 TYPE_PRECISION (float80_type) = 80;
13894 layout_type (float80_type);
13895 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13896 }
13897
13898 float128_type = make_node (REAL_TYPE);
13899 TYPE_PRECISION (float128_type) = 128;
13900 layout_type (float128_type);
13901 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13902
13903 /* Add all builtins that are more or less simple operations on two
13904 operands. */
13905 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13906 {
13907 /* Use one of the operands; the target can have a different mode for
13908 mask-generating compares. */
13909 enum machine_mode mode;
13910 tree type;
13911
13912 if (d->name == 0)
13913 continue;
13914 mode = insn_data[d->icode].operand[1].mode;
13915
13916 switch (mode)
13917 {
13918 case V16QImode:
13919 type = v16qi_ftype_v16qi_v16qi;
13920 break;
13921 case V8HImode:
13922 type = v8hi_ftype_v8hi_v8hi;
13923 break;
13924 case V4SImode:
13925 type = v4si_ftype_v4si_v4si;
13926 break;
13927 case V2DImode:
13928 type = v2di_ftype_v2di_v2di;
13929 break;
13930 case V2DFmode:
13931 type = v2df_ftype_v2df_v2df;
13932 break;
13933 case TImode:
13934 type = ti_ftype_ti_ti;
13935 break;
13936 case V4SFmode:
13937 type = v4sf_ftype_v4sf_v4sf;
13938 break;
13939 case V8QImode:
13940 type = v8qi_ftype_v8qi_v8qi;
13941 break;
13942 case V4HImode:
13943 type = v4hi_ftype_v4hi_v4hi;
13944 break;
13945 case V2SImode:
13946 type = v2si_ftype_v2si_v2si;
13947 break;
13948 case DImode:
13949 type = di_ftype_di_di;
13950 break;
13951
13952 default:
13953 gcc_unreachable ();
13954 }
13955
13956 /* Override for comparisons. */
13957 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
13958 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
13959 type = v4si_ftype_v4sf_v4sf;
13960
13961 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
13962 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
13963 type = v2di_ftype_v2df_v2df;
13964
13965 def_builtin (d->mask, d->name, type, d->code);
13966 }
13967
13968 /* Add the remaining MMX insns with somewhat more complicated types. */
13969 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13970 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13971 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13972 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13973
13974 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13975 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13976 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13977
13978 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13979 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13980
13981 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13982 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13983
13984 /* comi/ucomi insns. */
13985 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13986 if (d->mask == MASK_SSE2)
13987 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13988 else
13989 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13990
13991 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13992 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13993 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13994
13995 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13996 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13997 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13998 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13999 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
14000 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
14001 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
14002 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
14003 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
14004 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
14005 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
14006
14007 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
14008
14009 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
14010 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
14011
14012 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
14013 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
14014 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
14015 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
14016
14017 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
14018 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
14019 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
14020 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
14021
14022 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
14023
14024 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
14025
14026 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
14027 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
14028 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
14029 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
14030 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
14031 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
14032
14033 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
14034
14035 /* Original 3DNow! */
14036 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
14037 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
14038 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
14039 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
14040 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
14041 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
14042 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
14043 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
14044 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14045 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14046 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14047 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14048 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14049 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14050 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14051 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14052 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14053 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14054 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14055 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14056
14057 /* 3DNow! extension as used in the Athlon CPU. */
14058 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14059 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14060 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14061 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14062 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14063 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14064
14065 /* SSE2 */
14066 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14067
14068 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14069 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14070
14071 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14072 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14073
14074 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14075 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14076 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14077 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14078 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14079
14080 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14081 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14082 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14083 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14084
14085 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14086 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14087
14088 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14089
14090 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14091 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14092
14093 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14094 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14095 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14096 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14097 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14098
14099 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14100
14101 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14102 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14103 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14104 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14105
14106 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14107 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14108 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14109
14110 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14111 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14112 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14113 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14114
14115 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14116 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14117 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14118
14119 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14120 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14121
14122 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14123 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14124
14125 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
14126 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
14127 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14128
14129 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
14130 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
14131 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14132
14133 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
14134 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
14135
14136 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14137 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14138 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14139 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14140
14141 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14142 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14143 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14144 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14145
14146 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14147 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14148
14149 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14150
14151 /* Prescott New Instructions. */
14152 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14153 void_ftype_pcvoid_unsigned_unsigned,
14154 IX86_BUILTIN_MONITOR);
14155 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14156 void_ftype_unsigned_unsigned,
14157 IX86_BUILTIN_MWAIT);
14158 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14159 v4sf_ftype_v4sf,
14160 IX86_BUILTIN_MOVSHDUP);
14161 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14162 v4sf_ftype_v4sf,
14163 IX86_BUILTIN_MOVSLDUP);
14164 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14165 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14166
14167 /* Access to the vec_init patterns. */
14168 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14169 integer_type_node, NULL_TREE);
14170 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14171 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14172
14173 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14174 short_integer_type_node,
14175 short_integer_type_node,
14176 short_integer_type_node, NULL_TREE);
14177 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14178 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14179
14180 ftype = build_function_type_list (V8QI_type_node, char_type_node,
14181 char_type_node, char_type_node,
14182 char_type_node, char_type_node,
14183 char_type_node, char_type_node,
14184 char_type_node, NULL_TREE);
14185 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14186 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14187
14188 /* Access to the vec_extract patterns. */
14189 ftype = build_function_type_list (double_type_node, V2DF_type_node,
14190 integer_type_node, NULL_TREE);
14191 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14192 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14193
14194 ftype = build_function_type_list (long_long_integer_type_node,
14195 V2DI_type_node, integer_type_node,
14196 NULL_TREE);
14197 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14198 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14199
14200 ftype = build_function_type_list (float_type_node, V4SF_type_node,
14201 integer_type_node, NULL_TREE);
14202 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14203 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14204
14205 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14206 integer_type_node, NULL_TREE);
14207 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14208 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14209
14210 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14211 integer_type_node, NULL_TREE);
14212 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14213 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14214
14215 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14216 integer_type_node, NULL_TREE);
14217 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14218 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14219
14220 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14221 integer_type_node, NULL_TREE);
14222 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14223 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14224
14225 /* Access to the vec_set patterns. */
14226 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14227 intHI_type_node,
14228 integer_type_node, NULL_TREE);
14229 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14230 ftype, IX86_BUILTIN_VEC_SET_V8HI);
14231
14232 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14233 intHI_type_node,
14234 integer_type_node, NULL_TREE);
14235 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14236 ftype, IX86_BUILTIN_VEC_SET_V4HI);
14237 }
14238
14239 /* Errors in the source file can cause expand_expr to return const0_rtx
14240 where we expect a vector. To avoid crashing, use one of the vector
14241 clear instructions. */
14242 static rtx
14243 safe_vector_operand (rtx x, enum machine_mode mode)
14244 {
14245 if (x == const0_rtx)
14246 x = CONST0_RTX (mode);
14247 return x;
14248 }
14249
14250 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14251
14252 static rtx
14253 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14254 {
14255 rtx pat, xops[3];
14256 tree arg0 = TREE_VALUE (arglist);
14257 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14258 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14259 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14260 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14261 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14262 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14263
14264 if (VECTOR_MODE_P (mode0))
14265 op0 = safe_vector_operand (op0, mode0);
14266 if (VECTOR_MODE_P (mode1))
14267 op1 = safe_vector_operand (op1, mode1);
14268
14269 if (optimize || !target
14270 || GET_MODE (target) != tmode
14271 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14272 target = gen_reg_rtx (tmode);
14273
14274 if (GET_MODE (op1) == SImode && mode1 == TImode)
14275 {
14276 rtx x = gen_reg_rtx (V4SImode);
14277 emit_insn (gen_sse2_loadd (x, op1));
14278 op1 = gen_lowpart (TImode, x);
14279 }
14280
14281 /* The insn must want input operands in the same modes as the
14282 result. */
14283 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
14284 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
14285
14286 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14287 op0 = copy_to_mode_reg (mode0, op0);
14288 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14289 op1 = copy_to_mode_reg (mode1, op1);
14290
14291 /* ??? Using ix86_fixup_binary_operands is problematic when
14292 we've got mismatched modes. Fake it. */
14293
14294 xops[0] = target;
14295 xops[1] = op0;
14296 xops[2] = op1;
14297
14298 if (tmode == mode0 && tmode == mode1)
14299 {
14300 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
14301 op0 = xops[1];
14302 op1 = xops[2];
14303 }
14304 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
14305 {
14306 op0 = force_reg (mode0, op0);
14307 op1 = force_reg (mode1, op1);
14308 target = gen_reg_rtx (tmode);
14309 }
14310
14311 pat = GEN_FCN (icode) (target, op0, op1);
14312 if (! pat)
14313 return 0;
14314 emit_insn (pat);
14315 return target;
14316 }
14317
14318 /* Subroutine of ix86_expand_builtin to take care of stores. */
14319
14320 static rtx
14321 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
14322 {
14323 rtx pat;
14324 tree arg0 = TREE_VALUE (arglist);
14325 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14326 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14327 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14328 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
14329 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
14330
14331 if (VECTOR_MODE_P (mode1))
14332 op1 = safe_vector_operand (op1, mode1);
14333
14334 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14335 op1 = copy_to_mode_reg (mode1, op1);
14336
14337 pat = GEN_FCN (icode) (op0, op1);
14338 if (pat)
14339 emit_insn (pat);
14340 return 0;
14341 }
14342
14343 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
14344
14345 static rtx
14346 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
14347 rtx target, int do_load)
14348 {
14349 rtx pat;
14350 tree arg0 = TREE_VALUE (arglist);
14351 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14352 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14353 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14354
14355 if (optimize || !target
14356 || GET_MODE (target) != tmode
14357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14358 target = gen_reg_rtx (tmode);
14359 if (do_load)
14360 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14361 else
14362 {
14363 if (VECTOR_MODE_P (mode0))
14364 op0 = safe_vector_operand (op0, mode0);
14365
14366 if ((optimize && !register_operand (op0, mode0))
14367 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14368 op0 = copy_to_mode_reg (mode0, op0);
14369 }
14370
14371 pat = GEN_FCN (icode) (target, op0);
14372 if (! pat)
14373 return 0;
14374 emit_insn (pat);
14375 return target;
14376 }
14377
14378 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
14379 sqrtss, rsqrtss, rcpss. */
14380
14381 static rtx
14382 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
14383 {
14384 rtx pat;
14385 tree arg0 = TREE_VALUE (arglist);
14386 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14387 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14388 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14389
14390 if (optimize || !target
14391 || GET_MODE (target) != tmode
14392 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14393 target = gen_reg_rtx (tmode);
14394
14395 if (VECTOR_MODE_P (mode0))
14396 op0 = safe_vector_operand (op0, mode0);
14397
14398 if ((optimize && !register_operand (op0, mode0))
14399 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14400 op0 = copy_to_mode_reg (mode0, op0);
14401
14402 op1 = op0;
14403 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14404 op1 = copy_to_mode_reg (mode0, op1);
14405
14406 pat = GEN_FCN (icode) (target, op0, op1);
14407 if (! pat)
14408 return 0;
14409 emit_insn (pat);
14410 return target;
14411 }
14412
14413 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14414
14415 static rtx
14416 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14417 rtx target)
14418 {
14419 rtx pat;
14420 tree arg0 = TREE_VALUE (arglist);
14421 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14422 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14423 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14424 rtx op2;
14425 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14426 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14427 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14428 enum rtx_code comparison = d->comparison;
14429
14430 if (VECTOR_MODE_P (mode0))
14431 op0 = safe_vector_operand (op0, mode0);
14432 if (VECTOR_MODE_P (mode1))
14433 op1 = safe_vector_operand (op1, mode1);
14434
14435 /* Swap operands if we have a comparison that isn't available in
14436 hardware. */
14437 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14438 {
14439 rtx tmp = gen_reg_rtx (mode1);
14440 emit_move_insn (tmp, op1);
14441 op1 = op0;
14442 op0 = tmp;
14443 }
14444
14445 if (optimize || !target
14446 || GET_MODE (target) != tmode
14447 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14448 target = gen_reg_rtx (tmode);
14449
14450 if ((optimize && !register_operand (op0, mode0))
14451 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14452 op0 = copy_to_mode_reg (mode0, op0);
14453 if ((optimize && !register_operand (op1, mode1))
14454 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14455 op1 = copy_to_mode_reg (mode1, op1);
14456
14457 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14458 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14459 if (! pat)
14460 return 0;
14461 emit_insn (pat);
14462 return target;
14463 }
14464
14465 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
14466
14467 static rtx
14468 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14469 rtx target)
14470 {
14471 rtx pat;
14472 tree arg0 = TREE_VALUE (arglist);
14473 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14474 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14475 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14476 rtx op2;
14477 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14478 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14479 enum rtx_code comparison = d->comparison;
14480
14481 if (VECTOR_MODE_P (mode0))
14482 op0 = safe_vector_operand (op0, mode0);
14483 if (VECTOR_MODE_P (mode1))
14484 op1 = safe_vector_operand (op1, mode1);
14485
14486 /* Swap operands if we have a comparison that isn't available in
14487 hardware. */
14488 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
14489 {
14490 rtx tmp = op1;
14491 op1 = op0;
14492 op0 = tmp;
14493 }
14494
14495 target = gen_reg_rtx (SImode);
14496 emit_move_insn (target, const0_rtx);
14497 target = gen_rtx_SUBREG (QImode, target, 0);
14498
14499 if ((optimize && !register_operand (op0, mode0))
14500 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14501 op0 = copy_to_mode_reg (mode0, op0);
14502 if ((optimize && !register_operand (op1, mode1))
14503 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14504 op1 = copy_to_mode_reg (mode1, op1);
14505
14506 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14507 pat = GEN_FCN (d->icode) (op0, op1);
14508 if (! pat)
14509 return 0;
14510 emit_insn (pat);
14511 emit_insn (gen_rtx_SET (VOIDmode,
14512 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14513 gen_rtx_fmt_ee (comparison, QImode,
14514 SET_DEST (pat),
14515 const0_rtx)));
14516
14517 return SUBREG_REG (target);
14518 }
14519
14520 /* Return the integer constant in ARG. Constrain it to be in the range
14521 of the subparts of VEC_TYPE; issue an error if not. */
14522
14523 static int
14524 get_element_number (tree vec_type, tree arg)
14525 {
14526 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14527
14528 if (!host_integerp (arg, 1)
14529 || (elt = tree_low_cst (arg, 1), elt > max))
14530 {
14531 error ("selector must be an integer constant in the range 0..%i", max);
14532 return 0;
14533 }
14534
14535 return elt;
14536 }
14537
14538 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14539 ix86_expand_vector_init. We DO have language-level syntax for this, in
14540 the form of (type){ init-list }. Except that since we can't place emms
14541 instructions from inside the compiler, we can't allow the use of MMX
14542 registers unless the user explicitly asks for it. So we do *not* define
14543 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
14544 we have builtins invoked by mmintrin.h that gives us license to emit
14545 these sorts of instructions. */
14546
14547 static rtx
14548 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
14549 {
14550 enum machine_mode tmode = TYPE_MODE (type);
14551 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
14552 int i, n_elt = GET_MODE_NUNITS (tmode);
14553 rtvec v = rtvec_alloc (n_elt);
14554
14555 gcc_assert (VECTOR_MODE_P (tmode));
14556
14557 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
14558 {
14559 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14560 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14561 }
14562
14563 gcc_assert (arglist == NULL);
14564
14565 if (!target || !register_operand (target, tmode))
14566 target = gen_reg_rtx (tmode);
14567
14568 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
14569 return target;
14570 }
14571
14572 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14573 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
14574 had a language-level syntax for referencing vector elements. */
14575
14576 static rtx
14577 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
14578 {
14579 enum machine_mode tmode, mode0;
14580 tree arg0, arg1;
14581 int elt;
14582 rtx op0;
14583
14584 arg0 = TREE_VALUE (arglist);
14585 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14586
14587 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14588 elt = get_element_number (TREE_TYPE (arg0), arg1);
14589
14590 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14591 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14592 gcc_assert (VECTOR_MODE_P (mode0));
14593
14594 op0 = force_reg (mode0, op0);
14595
14596 if (optimize || !target || !register_operand (target, tmode))
14597 target = gen_reg_rtx (tmode);
14598
14599 ix86_expand_vector_extract (true, target, op0, elt);
14600
14601 return target;
14602 }
14603
14604 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
14605 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
14606 a language-level syntax for referencing vector elements. */
14607
14608 static rtx
14609 ix86_expand_vec_set_builtin (tree arglist)
14610 {
14611 enum machine_mode tmode, mode1;
14612 tree arg0, arg1, arg2;
14613 int elt;
14614 rtx op0, op1;
14615
14616 arg0 = TREE_VALUE (arglist);
14617 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14618 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14619
14620 tmode = TYPE_MODE (TREE_TYPE (arg0));
14621 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14622 gcc_assert (VECTOR_MODE_P (tmode));
14623
14624 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
14625 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
14626 elt = get_element_number (TREE_TYPE (arg0), arg2);
14627
14628 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14629 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14630
14631 op0 = force_reg (tmode, op0);
14632 op1 = force_reg (mode1, op1);
14633
14634 ix86_expand_vector_set (true, op0, op1, elt);
14635
14636 return op0;
14637 }
14638
14639 /* Expand an expression EXP that calls a built-in function,
14640 with result going to TARGET if that's convenient
14641 (and in mode MODE if that's convenient).
14642 SUBTARGET may be used as the target for computing one of EXP's operands.
14643 IGNORE is nonzero if the value is to be ignored. */
14644
14645 static rtx
14646 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14647 enum machine_mode mode ATTRIBUTE_UNUSED,
14648 int ignore ATTRIBUTE_UNUSED)
14649 {
14650 const struct builtin_description *d;
14651 size_t i;
14652 enum insn_code icode;
14653 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14654 tree arglist = TREE_OPERAND (exp, 1);
14655 tree arg0, arg1, arg2;
14656 rtx op0, op1, op2, pat;
14657 enum machine_mode tmode, mode0, mode1, mode2;
14658 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14659
14660 switch (fcode)
14661 {
14662 case IX86_BUILTIN_EMMS:
14663 emit_insn (gen_mmx_emms ());
14664 return 0;
14665
14666 case IX86_BUILTIN_SFENCE:
14667 emit_insn (gen_sse_sfence ());
14668 return 0;
14669
14670 case IX86_BUILTIN_MASKMOVQ:
14671 case IX86_BUILTIN_MASKMOVDQU:
14672 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14673 ? CODE_FOR_mmx_maskmovq
14674 : CODE_FOR_sse2_maskmovdqu);
14675 /* Note the arg order is different from the operand order. */
14676 arg1 = TREE_VALUE (arglist);
14677 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14678 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14679 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14680 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14681 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14682 mode0 = insn_data[icode].operand[0].mode;
14683 mode1 = insn_data[icode].operand[1].mode;
14684 mode2 = insn_data[icode].operand[2].mode;
14685
14686 op0 = force_reg (Pmode, op0);
14687 op0 = gen_rtx_MEM (mode1, op0);
14688
14689 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14690 op0 = copy_to_mode_reg (mode0, op0);
14691 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14692 op1 = copy_to_mode_reg (mode1, op1);
14693 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14694 op2 = copy_to_mode_reg (mode2, op2);
14695 pat = GEN_FCN (icode) (op0, op1, op2);
14696 if (! pat)
14697 return 0;
14698 emit_insn (pat);
14699 return 0;
14700
14701 case IX86_BUILTIN_SQRTSS:
14702 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
14703 case IX86_BUILTIN_RSQRTSS:
14704 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
14705 case IX86_BUILTIN_RCPSS:
14706 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
14707
14708 case IX86_BUILTIN_LOADUPS:
14709 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14710
14711 case IX86_BUILTIN_STOREUPS:
14712 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14713
14714 case IX86_BUILTIN_LOADHPS:
14715 case IX86_BUILTIN_LOADLPS:
14716 case IX86_BUILTIN_LOADHPD:
14717 case IX86_BUILTIN_LOADLPD:
14718 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
14719 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
14720 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
14721 : CODE_FOR_sse2_loadlpd);
14722 arg0 = TREE_VALUE (arglist);
14723 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14724 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14725 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14726 tmode = insn_data[icode].operand[0].mode;
14727 mode0 = insn_data[icode].operand[1].mode;
14728 mode1 = insn_data[icode].operand[2].mode;
14729
14730 op0 = force_reg (mode0, op0);
14731 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14732 if (optimize || target == 0
14733 || GET_MODE (target) != tmode
14734 || !register_operand (target, tmode))
14735 target = gen_reg_rtx (tmode);
14736 pat = GEN_FCN (icode) (target, op0, op1);
14737 if (! pat)
14738 return 0;
14739 emit_insn (pat);
14740 return target;
14741
14742 case IX86_BUILTIN_STOREHPS:
14743 case IX86_BUILTIN_STORELPS:
14744 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
14745 : CODE_FOR_sse_storelps);
14746 arg0 = TREE_VALUE (arglist);
14747 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14748 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14749 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14750 mode0 = insn_data[icode].operand[0].mode;
14751 mode1 = insn_data[icode].operand[1].mode;
14752
14753 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14754 op1 = force_reg (mode1, op1);
14755
14756 pat = GEN_FCN (icode) (op0, op1);
14757 if (! pat)
14758 return 0;
14759 emit_insn (pat);
14760 return const0_rtx;
14761
14762 case IX86_BUILTIN_MOVNTPS:
14763 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14764 case IX86_BUILTIN_MOVNTQ:
14765 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14766
14767 case IX86_BUILTIN_LDMXCSR:
14768 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14769 target = assign_386_stack_local (SImode, 0);
14770 emit_move_insn (target, op0);
14771 emit_insn (gen_sse_ldmxcsr (target));
14772 return 0;
14773
14774 case IX86_BUILTIN_STMXCSR:
14775 target = assign_386_stack_local (SImode, 0);
14776 emit_insn (gen_sse_stmxcsr (target));
14777 return copy_to_mode_reg (SImode, target);
14778
14779 case IX86_BUILTIN_SHUFPS:
14780 case IX86_BUILTIN_SHUFPD:
14781 icode = (fcode == IX86_BUILTIN_SHUFPS
14782 ? CODE_FOR_sse_shufps
14783 : CODE_FOR_sse2_shufpd);
14784 arg0 = TREE_VALUE (arglist);
14785 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14786 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14787 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14788 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14789 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14790 tmode = insn_data[icode].operand[0].mode;
14791 mode0 = insn_data[icode].operand[1].mode;
14792 mode1 = insn_data[icode].operand[2].mode;
14793 mode2 = insn_data[icode].operand[3].mode;
14794
14795 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14796 op0 = copy_to_mode_reg (mode0, op0);
14797 if ((optimize && !register_operand (op1, mode1))
14798 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
14799 op1 = copy_to_mode_reg (mode1, op1);
14800 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14801 {
14802 /* @@@ better error message */
14803 error ("mask must be an immediate");
14804 return gen_reg_rtx (tmode);
14805 }
14806 if (optimize || target == 0
14807 || GET_MODE (target) != tmode
14808 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14809 target = gen_reg_rtx (tmode);
14810 pat = GEN_FCN (icode) (target, op0, op1, op2);
14811 if (! pat)
14812 return 0;
14813 emit_insn (pat);
14814 return target;
14815
14816 case IX86_BUILTIN_PSHUFW:
14817 case IX86_BUILTIN_PSHUFD:
14818 case IX86_BUILTIN_PSHUFHW:
14819 case IX86_BUILTIN_PSHUFLW:
14820 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14821 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14822 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14823 : CODE_FOR_mmx_pshufw);
14824 arg0 = TREE_VALUE (arglist);
14825 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14826 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14827 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14828 tmode = insn_data[icode].operand[0].mode;
14829 mode1 = insn_data[icode].operand[1].mode;
14830 mode2 = insn_data[icode].operand[2].mode;
14831
14832 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14833 op0 = copy_to_mode_reg (mode1, op0);
14834 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14835 {
14836 /* @@@ better error message */
14837 error ("mask must be an immediate");
14838 return const0_rtx;
14839 }
14840 if (target == 0
14841 || GET_MODE (target) != tmode
14842 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14843 target = gen_reg_rtx (tmode);
14844 pat = GEN_FCN (icode) (target, op0, op1);
14845 if (! pat)
14846 return 0;
14847 emit_insn (pat);
14848 return target;
14849
14850 case IX86_BUILTIN_PSLLDQI128:
14851 case IX86_BUILTIN_PSRLDQI128:
14852 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14853 : CODE_FOR_sse2_lshrti3);
14854 arg0 = TREE_VALUE (arglist);
14855 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14856 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14857 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14858 tmode = insn_data[icode].operand[0].mode;
14859 mode1 = insn_data[icode].operand[1].mode;
14860 mode2 = insn_data[icode].operand[2].mode;
14861
14862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14863 {
14864 op0 = copy_to_reg (op0);
14865 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14866 }
14867 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14868 {
14869 error ("shift must be an immediate");
14870 return const0_rtx;
14871 }
14872 target = gen_reg_rtx (V2DImode);
14873 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14874 if (! pat)
14875 return 0;
14876 emit_insn (pat);
14877 return target;
14878
14879 case IX86_BUILTIN_FEMMS:
14880 emit_insn (gen_mmx_femms ());
14881 return NULL_RTX;
14882
14883 case IX86_BUILTIN_PAVGUSB:
14884 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
14885
14886 case IX86_BUILTIN_PF2ID:
14887 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
14888
14889 case IX86_BUILTIN_PFACC:
14890 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
14891
14892 case IX86_BUILTIN_PFADD:
14893 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
14894
14895 case IX86_BUILTIN_PFCMPEQ:
14896 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
14897
14898 case IX86_BUILTIN_PFCMPGE:
14899 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
14900
14901 case IX86_BUILTIN_PFCMPGT:
14902 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
14903
14904 case IX86_BUILTIN_PFMAX:
14905 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
14906
14907 case IX86_BUILTIN_PFMIN:
14908 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
14909
14910 case IX86_BUILTIN_PFMUL:
14911 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
14912
14913 case IX86_BUILTIN_PFRCP:
14914 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
14915
14916 case IX86_BUILTIN_PFRCPIT1:
14917 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
14918
14919 case IX86_BUILTIN_PFRCPIT2:
14920 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
14921
14922 case IX86_BUILTIN_PFRSQIT1:
14923 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
14924
14925 case IX86_BUILTIN_PFRSQRT:
14926 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
14927
14928 case IX86_BUILTIN_PFSUB:
14929 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
14930
14931 case IX86_BUILTIN_PFSUBR:
14932 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
14933
14934 case IX86_BUILTIN_PI2FD:
14935 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
14936
14937 case IX86_BUILTIN_PMULHRW:
14938 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
14939
14940 case IX86_BUILTIN_PF2IW:
14941 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
14942
14943 case IX86_BUILTIN_PFNACC:
14944 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
14945
14946 case IX86_BUILTIN_PFPNACC:
14947 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
14948
14949 case IX86_BUILTIN_PI2FW:
14950 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
14951
14952 case IX86_BUILTIN_PSWAPDSI:
14953 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
14954
14955 case IX86_BUILTIN_PSWAPDSF:
14956 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
14957
14958 case IX86_BUILTIN_SQRTSD:
14959 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
14960 case IX86_BUILTIN_LOADUPD:
14961 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14962 case IX86_BUILTIN_STOREUPD:
14963 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14964
14965 case IX86_BUILTIN_MFENCE:
14966 emit_insn (gen_sse2_mfence ());
14967 return 0;
14968 case IX86_BUILTIN_LFENCE:
14969 emit_insn (gen_sse2_lfence ());
14970 return 0;
14971
14972 case IX86_BUILTIN_CLFLUSH:
14973 arg0 = TREE_VALUE (arglist);
14974 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14975 icode = CODE_FOR_sse2_clflush;
14976 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14977 op0 = copy_to_mode_reg (Pmode, op0);
14978
14979 emit_insn (gen_sse2_clflush (op0));
14980 return 0;
14981
14982 case IX86_BUILTIN_MOVNTPD:
14983 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14984 case IX86_BUILTIN_MOVNTDQ:
14985 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14986 case IX86_BUILTIN_MOVNTI:
14987 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14988
14989 case IX86_BUILTIN_LOADDQU:
14990 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14991 case IX86_BUILTIN_STOREDQU:
14992 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14993
14994 case IX86_BUILTIN_MONITOR:
14995 arg0 = TREE_VALUE (arglist);
14996 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14997 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14998 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14999 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15000 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15001 if (!REG_P (op0))
15002 op0 = copy_to_mode_reg (SImode, op0);
15003 if (!REG_P (op1))
15004 op1 = copy_to_mode_reg (SImode, op1);
15005 if (!REG_P (op2))
15006 op2 = copy_to_mode_reg (SImode, op2);
15007 emit_insn (gen_sse3_monitor (op0, op1, op2));
15008 return 0;
15009
15010 case IX86_BUILTIN_MWAIT:
15011 arg0 = TREE_VALUE (arglist);
15012 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15013 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15014 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15015 if (!REG_P (op0))
15016 op0 = copy_to_mode_reg (SImode, op0);
15017 if (!REG_P (op1))
15018 op1 = copy_to_mode_reg (SImode, op1);
15019 emit_insn (gen_sse3_mwait (op0, op1));
15020 return 0;
15021
15022 case IX86_BUILTIN_LDDQU:
15023 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
15024 target, 1);
15025
15026 case IX86_BUILTIN_VEC_INIT_V2SI:
15027 case IX86_BUILTIN_VEC_INIT_V4HI:
15028 case IX86_BUILTIN_VEC_INIT_V8QI:
15029 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
15030
15031 case IX86_BUILTIN_VEC_EXT_V2DF:
15032 case IX86_BUILTIN_VEC_EXT_V2DI:
15033 case IX86_BUILTIN_VEC_EXT_V4SF:
15034 case IX86_BUILTIN_VEC_EXT_V4SI:
15035 case IX86_BUILTIN_VEC_EXT_V8HI:
15036 case IX86_BUILTIN_VEC_EXT_V2SI:
15037 case IX86_BUILTIN_VEC_EXT_V4HI:
15038 return ix86_expand_vec_ext_builtin (arglist, target);
15039
15040 case IX86_BUILTIN_VEC_SET_V8HI:
15041 case IX86_BUILTIN_VEC_SET_V4HI:
15042 return ix86_expand_vec_set_builtin (arglist);
15043
15044 default:
15045 break;
15046 }
15047
15048 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15049 if (d->code == fcode)
15050 {
15051 /* Compares are treated specially. */
15052 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15053 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15054 || d->icode == CODE_FOR_sse2_maskcmpv2df3
15055 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15056 return ix86_expand_sse_compare (d, arglist, target);
15057
15058 return ix86_expand_binop_builtin (d->icode, arglist, target);
15059 }
15060
15061 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15062 if (d->code == fcode)
15063 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15064
15065 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15066 if (d->code == fcode)
15067 return ix86_expand_sse_comi (d, arglist, target);
15068
15069 gcc_unreachable ();
15070 }
15071
15072 /* Store OPERAND to the memory after reload is completed. This means
15073 that we can't easily use assign_stack_local. */
15074 rtx
15075 ix86_force_to_memory (enum machine_mode mode, rtx operand)
15076 {
15077 rtx result;
15078
15079 gcc_assert (reload_completed);
15080 if (TARGET_RED_ZONE)
15081 {
15082 result = gen_rtx_MEM (mode,
15083 gen_rtx_PLUS (Pmode,
15084 stack_pointer_rtx,
15085 GEN_INT (-RED_ZONE_SIZE)));
15086 emit_move_insn (result, operand);
15087 }
15088 else if (!TARGET_RED_ZONE && TARGET_64BIT)
15089 {
15090 switch (mode)
15091 {
15092 case HImode:
15093 case SImode:
15094 operand = gen_lowpart (DImode, operand);
15095 /* FALLTHRU */
15096 case DImode:
15097 emit_insn (
15098 gen_rtx_SET (VOIDmode,
15099 gen_rtx_MEM (DImode,
15100 gen_rtx_PRE_DEC (DImode,
15101 stack_pointer_rtx)),
15102 operand));
15103 break;
15104 default:
15105 gcc_unreachable ();
15106 }
15107 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15108 }
15109 else
15110 {
15111 switch (mode)
15112 {
15113 case DImode:
15114 {
15115 rtx operands[2];
15116 split_di (&operand, 1, operands, operands + 1);
15117 emit_insn (
15118 gen_rtx_SET (VOIDmode,
15119 gen_rtx_MEM (SImode,
15120 gen_rtx_PRE_DEC (Pmode,
15121 stack_pointer_rtx)),
15122 operands[1]));
15123 emit_insn (
15124 gen_rtx_SET (VOIDmode,
15125 gen_rtx_MEM (SImode,
15126 gen_rtx_PRE_DEC (Pmode,
15127 stack_pointer_rtx)),
15128 operands[0]));
15129 }
15130 break;
15131 case HImode:
15132 /* It is better to store HImodes as SImodes. */
15133 if (!TARGET_PARTIAL_REG_STALL)
15134 operand = gen_lowpart (SImode, operand);
15135 /* FALLTHRU */
15136 case SImode:
15137 emit_insn (
15138 gen_rtx_SET (VOIDmode,
15139 gen_rtx_MEM (GET_MODE (operand),
15140 gen_rtx_PRE_DEC (SImode,
15141 stack_pointer_rtx)),
15142 operand));
15143 break;
15144 default:
15145 gcc_unreachable ();
15146 }
15147 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15148 }
15149 return result;
15150 }
15151
15152 /* Free operand from the memory. */
15153 void
15154 ix86_free_from_memory (enum machine_mode mode)
15155 {
15156 if (!TARGET_RED_ZONE)
15157 {
15158 int size;
15159
15160 if (mode == DImode || TARGET_64BIT)
15161 size = 8;
15162 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
15163 size = 2;
15164 else
15165 size = 4;
15166 /* Use LEA to deallocate stack space. In peephole2 it will be converted
15167 to pop or add instruction if registers are available. */
15168 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15169 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15170 GEN_INT (size))));
15171 }
15172 }
15173
15174 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
15175 QImode must go into class Q_REGS.
15176 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
15177 movdf to do mem-to-mem moves through integer regs. */
15178 enum reg_class
15179 ix86_preferred_reload_class (rtx x, enum reg_class class)
15180 {
15181 /* We're only allowed to return a subclass of CLASS. Many of the
15182 following checks fail for NO_REGS, so eliminate that early. */
15183 if (class == NO_REGS)
15184 return NO_REGS;
15185
15186 /* All classes can load zeros. */
15187 if (x == CONST0_RTX (GET_MODE (x)))
15188 return class;
15189
15190 /* Floating-point constants need more complex checks. */
15191 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
15192 {
15193 /* General regs can load everything. */
15194 if (reg_class_subset_p (class, GENERAL_REGS))
15195 return class;
15196
15197 /* Floats can load 0 and 1 plus some others. Note that we eliminated
15198 zero above. We only want to wind up preferring 80387 registers if
15199 we plan on doing computation with them. */
15200 if (TARGET_80387
15201 && (TARGET_MIX_SSE_I387
15202 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
15203 && standard_80387_constant_p (x))
15204 {
15205 /* Limit class to non-sse. */
15206 if (class == FLOAT_SSE_REGS)
15207 return FLOAT_REGS;
15208 if (class == FP_TOP_SSE_REGS)
15209 return FP_TOP_REG;
15210 if (class == FP_SECOND_SSE_REGS)
15211 return FP_SECOND_REG;
15212 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
15213 return class;
15214 }
15215
15216 return NO_REGS;
15217 }
15218 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
15219 return NO_REGS;
15220 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
15221 return NO_REGS;
15222
15223 /* Generally when we see PLUS here, it's the function invariant
15224 (plus soft-fp const_int). Which can only be computed into general
15225 regs. */
15226 if (GET_CODE (x) == PLUS)
15227 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
15228
15229 /* QImode constants are easy to load, but non-constant QImode data
15230 must go into Q_REGS. */
15231 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
15232 {
15233 if (reg_class_subset_p (class, Q_REGS))
15234 return class;
15235 if (reg_class_subset_p (Q_REGS, class))
15236 return Q_REGS;
15237 return NO_REGS;
15238 }
15239
15240 return class;
15241 }
15242
15243 /* If we are copying between general and FP registers, we need a memory
15244 location. The same is true for SSE and MMX registers.
15245
15246 The macro can't work reliably when one of the CLASSES is class containing
15247 registers from multiple units (SSE, MMX, integer). We avoid this by never
15248 combining those units in single alternative in the machine description.
15249 Ensure that this constraint holds to avoid unexpected surprises.
15250
15251 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15252 enforce these sanity checks. */
15253
15254 int
15255 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
15256 enum machine_mode mode, int strict)
15257 {
15258 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
15259 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
15260 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
15261 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
15262 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
15263 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
15264 {
15265 gcc_assert (!strict);
15266 return true;
15267 }
15268
15269 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
15270 return true;
15271
15272 /* ??? This is a lie. We do have moves between mmx/general, and for
15273 mmx/sse2. But by saying we need secondary memory we discourage the
15274 register allocator from using the mmx registers unless needed. */
15275 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15276 return true;
15277
15278 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15279 {
15280 /* SSE1 doesn't have any direct moves from other classes. */
15281 if (!TARGET_SSE2)
15282 return true;
15283
15284 /* If the target says that inter-unit moves are more expensive
15285 than moving through memory, then don't generate them. */
15286 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15287 return true;
15288
15289 /* Between SSE and general, we have moves no larger than word size. */
15290 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15291 return true;
15292
15293 /* ??? For the cost of one register reformat penalty, we could use
15294 the same instructions to move SFmode and DFmode data, but the
15295 relevant move patterns don't support those alternatives. */
15296 if (mode == SFmode || mode == DFmode)
15297 return true;
15298 }
15299
15300 return false;
15301 }
15302
15303 /* Return true if the registers in CLASS cannot represent the change from
15304 modes FROM to TO. */
15305
15306 bool
15307 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
15308 enum reg_class class)
15309 {
15310 if (from == to)
15311 return false;
15312
15313 /* x87 registers can't do subreg at all, as all values are reformated
15314 to extended precision. */
15315 if (MAYBE_FLOAT_CLASS_P (class))
15316 return true;
15317
15318 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
15319 {
15320 /* Vector registers do not support QI or HImode loads. If we don't
15321 disallow a change to these modes, reload will assume it's ok to
15322 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
15323 the vec_dupv4hi pattern. */
15324 if (GET_MODE_SIZE (from) < 4)
15325 return true;
15326
15327 /* Vector registers do not support subreg with nonzero offsets, which
15328 are otherwise valid for integer registers. Since we can't see
15329 whether we have a nonzero offset from here, prohibit all
15330 nonparadoxical subregs changing size. */
15331 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
15332 return true;
15333 }
15334
15335 return false;
15336 }
15337
15338 /* Return the cost of moving data from a register in class CLASS1 to
15339 one in class CLASS2.
15340
15341 It is not required that the cost always equal 2 when FROM is the same as TO;
15342 on some machines it is expensive to move between registers if they are not
15343 general registers. */
15344
15345 int
15346 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
15347 enum reg_class class2)
15348 {
15349 /* In case we require secondary memory, compute cost of the store followed
15350 by load. In order to avoid bad register allocation choices, we need
15351 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
15352
15353 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
15354 {
15355 int cost = 1;
15356
15357 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
15358 MEMORY_MOVE_COST (mode, class1, 1));
15359 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
15360 MEMORY_MOVE_COST (mode, class2, 1));
15361
15362 /* In case of copying from general_purpose_register we may emit multiple
15363 stores followed by single load causing memory size mismatch stall.
15364 Count this as arbitrarily high cost of 20. */
15365 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
15366 cost += 20;
15367
15368 /* In the case of FP/MMX moves, the registers actually overlap, and we
15369 have to switch modes in order to treat them differently. */
15370 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
15371 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
15372 cost += 20;
15373
15374 return cost;
15375 }
15376
15377 /* Moves between SSE/MMX and integer unit are expensive. */
15378 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
15379 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15380 return ix86_cost->mmxsse_to_integer;
15381 if (MAYBE_FLOAT_CLASS_P (class1))
15382 return ix86_cost->fp_move;
15383 if (MAYBE_SSE_CLASS_P (class1))
15384 return ix86_cost->sse_move;
15385 if (MAYBE_MMX_CLASS_P (class1))
15386 return ix86_cost->mmx_move;
15387 return 2;
15388 }
15389
15390 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
15391
15392 bool
15393 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
15394 {
15395 /* Flags and only flags can only hold CCmode values. */
15396 if (CC_REGNO_P (regno))
15397 return GET_MODE_CLASS (mode) == MODE_CC;
15398 if (GET_MODE_CLASS (mode) == MODE_CC
15399 || GET_MODE_CLASS (mode) == MODE_RANDOM
15400 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
15401 return 0;
15402 if (FP_REGNO_P (regno))
15403 return VALID_FP_MODE_P (mode);
15404 if (SSE_REGNO_P (regno))
15405 {
15406 /* We implement the move patterns for all vector modes into and
15407 out of SSE registers, even when no operation instructions
15408 are available. */
15409 return (VALID_SSE_REG_MODE (mode)
15410 || VALID_SSE2_REG_MODE (mode)
15411 || VALID_MMX_REG_MODE (mode)
15412 || VALID_MMX_REG_MODE_3DNOW (mode));
15413 }
15414 if (MMX_REGNO_P (regno))
15415 {
15416 /* We implement the move patterns for 3DNOW modes even in MMX mode,
15417 so if the register is available at all, then we can move data of
15418 the given mode into or out of it. */
15419 return (VALID_MMX_REG_MODE (mode)
15420 || VALID_MMX_REG_MODE_3DNOW (mode));
15421 }
15422
15423 if (mode == QImode)
15424 {
15425 /* Take care for QImode values - they can be in non-QI regs,
15426 but then they do cause partial register stalls. */
15427 if (regno < 4 || TARGET_64BIT)
15428 return 1;
15429 if (!TARGET_PARTIAL_REG_STALL)
15430 return 1;
15431 return reload_in_progress || reload_completed;
15432 }
15433 /* We handle both integer and floats in the general purpose registers. */
15434 else if (VALID_INT_MODE_P (mode))
15435 return 1;
15436 else if (VALID_FP_MODE_P (mode))
15437 return 1;
15438 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
15439 on to use that value in smaller contexts, this can easily force a
15440 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
15441 supporting DImode, allow it. */
15442 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
15443 return 1;
15444
15445 return 0;
15446 }
15447
15448 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
15449 tieable integer mode. */
15450
15451 static bool
15452 ix86_tieable_integer_mode_p (enum machine_mode mode)
15453 {
15454 switch (mode)
15455 {
15456 case HImode:
15457 case SImode:
15458 return true;
15459
15460 case QImode:
15461 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
15462
15463 case DImode:
15464 return TARGET_64BIT;
15465
15466 default:
15467 return false;
15468 }
15469 }
15470
15471 /* Return true if MODE1 is accessible in a register that can hold MODE2
15472 without copying. That is, all register classes that can hold MODE2
15473 can also hold MODE1. */
15474
15475 bool
15476 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
15477 {
15478 if (mode1 == mode2)
15479 return true;
15480
15481 if (ix86_tieable_integer_mode_p (mode1)
15482 && ix86_tieable_integer_mode_p (mode2))
15483 return true;
15484
15485 /* MODE2 being XFmode implies fp stack or general regs, which means we
15486 can tie any smaller floating point modes to it. Note that we do not
15487 tie this with TFmode. */
15488 if (mode2 == XFmode)
15489 return mode1 == SFmode || mode1 == DFmode;
15490
15491 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
15492 that we can tie it with SFmode. */
15493 if (mode2 == DFmode)
15494 return mode1 == SFmode;
15495
15496 /* If MODE2 is only appropriate for an SSE register, then tie with
15497 any other mode acceptable to SSE registers. */
15498 if (GET_MODE_SIZE (mode2) >= 8
15499 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
15500 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
15501
15502 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
15503 with any other mode acceptable to MMX registers. */
15504 if (GET_MODE_SIZE (mode2) == 8
15505 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
15506 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
15507
15508 return false;
15509 }
15510
15511 /* Return the cost of moving data of mode M between a
15512 register and memory. A value of 2 is the default; this cost is
15513 relative to those in `REGISTER_MOVE_COST'.
15514
15515 If moving between registers and memory is more expensive than
15516 between two registers, you should define this macro to express the
15517 relative cost.
15518
15519 Model also increased moving costs of QImode registers in non
15520 Q_REGS classes.
15521 */
15522 int
15523 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
15524 {
15525 if (FLOAT_CLASS_P (class))
15526 {
15527 int index;
15528 switch (mode)
15529 {
15530 case SFmode:
15531 index = 0;
15532 break;
15533 case DFmode:
15534 index = 1;
15535 break;
15536 case XFmode:
15537 index = 2;
15538 break;
15539 default:
15540 return 100;
15541 }
15542 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15543 }
15544 if (SSE_CLASS_P (class))
15545 {
15546 int index;
15547 switch (GET_MODE_SIZE (mode))
15548 {
15549 case 4:
15550 index = 0;
15551 break;
15552 case 8:
15553 index = 1;
15554 break;
15555 case 16:
15556 index = 2;
15557 break;
15558 default:
15559 return 100;
15560 }
15561 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15562 }
15563 if (MMX_CLASS_P (class))
15564 {
15565 int index;
15566 switch (GET_MODE_SIZE (mode))
15567 {
15568 case 4:
15569 index = 0;
15570 break;
15571 case 8:
15572 index = 1;
15573 break;
15574 default:
15575 return 100;
15576 }
15577 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15578 }
15579 switch (GET_MODE_SIZE (mode))
15580 {
15581 case 1:
15582 if (in)
15583 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15584 : ix86_cost->movzbl_load);
15585 else
15586 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15587 : ix86_cost->int_store[0] + 4);
15588 break;
15589 case 2:
15590 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15591 default:
15592 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15593 if (mode == TFmode)
15594 mode = XFmode;
15595 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15596 * (((int) GET_MODE_SIZE (mode)
15597 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15598 }
15599 }
15600
15601 /* Compute a (partial) cost for rtx X. Return true if the complete
15602 cost has been computed, and false if subexpressions should be
15603 scanned. In either case, *TOTAL contains the cost result. */
15604
15605 static bool
15606 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15607 {
15608 enum machine_mode mode = GET_MODE (x);
15609
15610 switch (code)
15611 {
15612 case CONST_INT:
15613 case CONST:
15614 case LABEL_REF:
15615 case SYMBOL_REF:
15616 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
15617 *total = 3;
15618 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
15619 *total = 2;
15620 else if (flag_pic && SYMBOLIC_CONST (x)
15621 && (!TARGET_64BIT
15622 || (!GET_CODE (x) != LABEL_REF
15623 && (GET_CODE (x) != SYMBOL_REF
15624 || !SYMBOL_REF_LOCAL_P (x)))))
15625 *total = 1;
15626 else
15627 *total = 0;
15628 return true;
15629
15630 case CONST_DOUBLE:
15631 if (mode == VOIDmode)
15632 *total = 0;
15633 else
15634 switch (standard_80387_constant_p (x))
15635 {
15636 case 1: /* 0.0 */
15637 *total = 1;
15638 break;
15639 default: /* Other constants */
15640 *total = 2;
15641 break;
15642 case 0:
15643 case -1:
15644 /* Start with (MEM (SYMBOL_REF)), since that's where
15645 it'll probably end up. Add a penalty for size. */
15646 *total = (COSTS_N_INSNS (1)
15647 + (flag_pic != 0 && !TARGET_64BIT)
15648 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15649 break;
15650 }
15651 return true;
15652
15653 case ZERO_EXTEND:
15654 /* The zero extensions is often completely free on x86_64, so make
15655 it as cheap as possible. */
15656 if (TARGET_64BIT && mode == DImode
15657 && GET_MODE (XEXP (x, 0)) == SImode)
15658 *total = 1;
15659 else if (TARGET_ZERO_EXTEND_WITH_AND)
15660 *total = COSTS_N_INSNS (ix86_cost->add);
15661 else
15662 *total = COSTS_N_INSNS (ix86_cost->movzx);
15663 return false;
15664
15665 case SIGN_EXTEND:
15666 *total = COSTS_N_INSNS (ix86_cost->movsx);
15667 return false;
15668
15669 case ASHIFT:
15670 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15671 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15672 {
15673 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15674 if (value == 1)
15675 {
15676 *total = COSTS_N_INSNS (ix86_cost->add);
15677 return false;
15678 }
15679 if ((value == 2 || value == 3)
15680 && ix86_cost->lea <= ix86_cost->shift_const)
15681 {
15682 *total = COSTS_N_INSNS (ix86_cost->lea);
15683 return false;
15684 }
15685 }
15686 /* FALLTHRU */
15687
15688 case ROTATE:
15689 case ASHIFTRT:
15690 case LSHIFTRT:
15691 case ROTATERT:
15692 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15693 {
15694 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15695 {
15696 if (INTVAL (XEXP (x, 1)) > 32)
15697 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15698 else
15699 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15700 }
15701 else
15702 {
15703 if (GET_CODE (XEXP (x, 1)) == AND)
15704 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15705 else
15706 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15707 }
15708 }
15709 else
15710 {
15711 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15712 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15713 else
15714 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15715 }
15716 return false;
15717
15718 case MULT:
15719 if (FLOAT_MODE_P (mode))
15720 {
15721 *total = COSTS_N_INSNS (ix86_cost->fmul);
15722 return false;
15723 }
15724 else
15725 {
15726 rtx op0 = XEXP (x, 0);
15727 rtx op1 = XEXP (x, 1);
15728 int nbits;
15729 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15730 {
15731 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15732 for (nbits = 0; value != 0; value &= value - 1)
15733 nbits++;
15734 }
15735 else
15736 /* This is arbitrary. */
15737 nbits = 7;
15738
15739 /* Compute costs correctly for widening multiplication. */
15740 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15741 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15742 == GET_MODE_SIZE (mode))
15743 {
15744 int is_mulwiden = 0;
15745 enum machine_mode inner_mode = GET_MODE (op0);
15746
15747 if (GET_CODE (op0) == GET_CODE (op1))
15748 is_mulwiden = 1, op1 = XEXP (op1, 0);
15749 else if (GET_CODE (op1) == CONST_INT)
15750 {
15751 if (GET_CODE (op0) == SIGN_EXTEND)
15752 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15753 == INTVAL (op1);
15754 else
15755 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15756 }
15757
15758 if (is_mulwiden)
15759 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15760 }
15761
15762 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15763 + nbits * ix86_cost->mult_bit)
15764 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15765
15766 return true;
15767 }
15768
15769 case DIV:
15770 case UDIV:
15771 case MOD:
15772 case UMOD:
15773 if (FLOAT_MODE_P (mode))
15774 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15775 else
15776 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15777 return false;
15778
15779 case PLUS:
15780 if (FLOAT_MODE_P (mode))
15781 *total = COSTS_N_INSNS (ix86_cost->fadd);
15782 else if (GET_MODE_CLASS (mode) == MODE_INT
15783 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15784 {
15785 if (GET_CODE (XEXP (x, 0)) == PLUS
15786 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15787 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15788 && CONSTANT_P (XEXP (x, 1)))
15789 {
15790 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15791 if (val == 2 || val == 4 || val == 8)
15792 {
15793 *total = COSTS_N_INSNS (ix86_cost->lea);
15794 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15795 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15796 outer_code);
15797 *total += rtx_cost (XEXP (x, 1), outer_code);
15798 return true;
15799 }
15800 }
15801 else if (GET_CODE (XEXP (x, 0)) == MULT
15802 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15803 {
15804 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15805 if (val == 2 || val == 4 || val == 8)
15806 {
15807 *total = COSTS_N_INSNS (ix86_cost->lea);
15808 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15809 *total += rtx_cost (XEXP (x, 1), outer_code);
15810 return true;
15811 }
15812 }
15813 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15814 {
15815 *total = COSTS_N_INSNS (ix86_cost->lea);
15816 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15817 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15818 *total += rtx_cost (XEXP (x, 1), outer_code);
15819 return true;
15820 }
15821 }
15822 /* FALLTHRU */
15823
15824 case MINUS:
15825 if (FLOAT_MODE_P (mode))
15826 {
15827 *total = COSTS_N_INSNS (ix86_cost->fadd);
15828 return false;
15829 }
15830 /* FALLTHRU */
15831
15832 case AND:
15833 case IOR:
15834 case XOR:
15835 if (!TARGET_64BIT && mode == DImode)
15836 {
15837 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15838 + (rtx_cost (XEXP (x, 0), outer_code)
15839 << (GET_MODE (XEXP (x, 0)) != DImode))
15840 + (rtx_cost (XEXP (x, 1), outer_code)
15841 << (GET_MODE (XEXP (x, 1)) != DImode)));
15842 return true;
15843 }
15844 /* FALLTHRU */
15845
15846 case NEG:
15847 if (FLOAT_MODE_P (mode))
15848 {
15849 *total = COSTS_N_INSNS (ix86_cost->fchs);
15850 return false;
15851 }
15852 /* FALLTHRU */
15853
15854 case NOT:
15855 if (!TARGET_64BIT && mode == DImode)
15856 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15857 else
15858 *total = COSTS_N_INSNS (ix86_cost->add);
15859 return false;
15860
15861 case COMPARE:
15862 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
15863 && XEXP (XEXP (x, 0), 1) == const1_rtx
15864 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
15865 && XEXP (x, 1) == const0_rtx)
15866 {
15867 /* This kind of construct is implemented using test[bwl].
15868 Treat it as if we had an AND. */
15869 *total = (COSTS_N_INSNS (ix86_cost->add)
15870 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
15871 + rtx_cost (const1_rtx, outer_code));
15872 return true;
15873 }
15874 return false;
15875
15876 case FLOAT_EXTEND:
15877 if (!TARGET_SSE_MATH
15878 || mode == XFmode
15879 || (mode == DFmode && !TARGET_SSE2))
15880 *total = 0;
15881 return false;
15882
15883 case ABS:
15884 if (FLOAT_MODE_P (mode))
15885 *total = COSTS_N_INSNS (ix86_cost->fabs);
15886 return false;
15887
15888 case SQRT:
15889 if (FLOAT_MODE_P (mode))
15890 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15891 return false;
15892
15893 case UNSPEC:
15894 if (XINT (x, 1) == UNSPEC_TP)
15895 *total = 0;
15896 return false;
15897
15898 default:
15899 return false;
15900 }
15901 }
15902
15903 #if TARGET_MACHO
15904
15905 static int current_machopic_label_num;
15906
15907 /* Given a symbol name and its associated stub, write out the
15908 definition of the stub. */
15909
15910 void
15911 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15912 {
15913 unsigned int length;
15914 char *binder_name, *symbol_name, lazy_ptr_name[32];
15915 int label = ++current_machopic_label_num;
15916
15917 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15918 symb = (*targetm.strip_name_encoding) (symb);
15919
15920 length = strlen (stub);
15921 binder_name = alloca (length + 32);
15922 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15923
15924 length = strlen (symb);
15925 symbol_name = alloca (length + 32);
15926 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15927
15928 sprintf (lazy_ptr_name, "L%d$lz", label);
15929
15930 if (MACHOPIC_PURE)
15931 machopic_picsymbol_stub_section ();
15932 else
15933 machopic_symbol_stub_section ();
15934
15935 fprintf (file, "%s:\n", stub);
15936 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15937
15938 if (MACHOPIC_PURE)
15939 {
15940 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15941 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15942 fprintf (file, "\tjmp %%edx\n");
15943 }
15944 else
15945 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15946
15947 fprintf (file, "%s:\n", binder_name);
15948
15949 if (MACHOPIC_PURE)
15950 {
15951 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15952 fprintf (file, "\tpushl %%eax\n");
15953 }
15954 else
15955 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15956
15957 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15958
15959 machopic_lazy_symbol_ptr_section ();
15960 fprintf (file, "%s:\n", lazy_ptr_name);
15961 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15962 fprintf (file, "\t.long %s\n", binder_name);
15963 }
15964 #endif /* TARGET_MACHO */
15965
15966 /* Order the registers for register allocator. */
15967
15968 void
15969 x86_order_regs_for_local_alloc (void)
15970 {
15971 int pos = 0;
15972 int i;
15973
15974 /* First allocate the local general purpose registers. */
15975 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15976 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15977 reg_alloc_order [pos++] = i;
15978
15979 /* Global general purpose registers. */
15980 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15981 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15982 reg_alloc_order [pos++] = i;
15983
15984 /* x87 registers come first in case we are doing FP math
15985 using them. */
15986 if (!TARGET_SSE_MATH)
15987 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15988 reg_alloc_order [pos++] = i;
15989
15990 /* SSE registers. */
15991 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15992 reg_alloc_order [pos++] = i;
15993 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15994 reg_alloc_order [pos++] = i;
15995
15996 /* x87 registers. */
15997 if (TARGET_SSE_MATH)
15998 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15999 reg_alloc_order [pos++] = i;
16000
16001 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
16002 reg_alloc_order [pos++] = i;
16003
16004 /* Initialize the rest of array as we do not allocate some registers
16005 at all. */
16006 while (pos < FIRST_PSEUDO_REGISTER)
16007 reg_alloc_order [pos++] = 0;
16008 }
16009
16010 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16011 struct attribute_spec.handler. */
16012 static tree
16013 ix86_handle_struct_attribute (tree *node, tree name,
16014 tree args ATTRIBUTE_UNUSED,
16015 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
16016 {
16017 tree *type = NULL;
16018 if (DECL_P (*node))
16019 {
16020 if (TREE_CODE (*node) == TYPE_DECL)
16021 type = &TREE_TYPE (*node);
16022 }
16023 else
16024 type = node;
16025
16026 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
16027 || TREE_CODE (*type) == UNION_TYPE)))
16028 {
16029 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
16030 *no_add_attrs = true;
16031 }
16032
16033 else if ((is_attribute_p ("ms_struct", name)
16034 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
16035 || ((is_attribute_p ("gcc_struct", name)
16036 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
16037 {
16038 warning (0, "%qs incompatible attribute ignored",
16039 IDENTIFIER_POINTER (name));
16040 *no_add_attrs = true;
16041 }
16042
16043 return NULL_TREE;
16044 }
16045
16046 static bool
16047 ix86_ms_bitfield_layout_p (tree record_type)
16048 {
16049 return (TARGET_MS_BITFIELD_LAYOUT &&
16050 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16051 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16052 }
16053
16054 /* Returns an expression indicating where the this parameter is
16055 located on entry to the FUNCTION. */
16056
16057 static rtx
16058 x86_this_parameter (tree function)
16059 {
16060 tree type = TREE_TYPE (function);
16061
16062 if (TARGET_64BIT)
16063 {
16064 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16065 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16066 }
16067
16068 if (ix86_function_regparm (type, function) > 0)
16069 {
16070 tree parm;
16071
16072 parm = TYPE_ARG_TYPES (type);
16073 /* Figure out whether or not the function has a variable number of
16074 arguments. */
16075 for (; parm; parm = TREE_CHAIN (parm))
16076 if (TREE_VALUE (parm) == void_type_node)
16077 break;
16078 /* If not, the this parameter is in the first argument. */
16079 if (parm)
16080 {
16081 int regno = 0;
16082 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16083 regno = 2;
16084 return gen_rtx_REG (SImode, regno);
16085 }
16086 }
16087
16088 if (aggregate_value_p (TREE_TYPE (type), type))
16089 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16090 else
16091 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16092 }
16093
16094 /* Determine whether x86_output_mi_thunk can succeed. */
16095
16096 static bool
16097 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16098 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16099 HOST_WIDE_INT vcall_offset, tree function)
16100 {
16101 /* 64-bit can handle anything. */
16102 if (TARGET_64BIT)
16103 return true;
16104
16105 /* For 32-bit, everything's fine if we have one free register. */
16106 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16107 return true;
16108
16109 /* Need a free register for vcall_offset. */
16110 if (vcall_offset)
16111 return false;
16112
16113 /* Need a free register for GOT references. */
16114 if (flag_pic && !(*targetm.binds_local_p) (function))
16115 return false;
16116
16117 /* Otherwise ok. */
16118 return true;
16119 }
16120
16121 /* Output the assembler code for a thunk function. THUNK_DECL is the
16122 declaration for the thunk function itself, FUNCTION is the decl for
16123 the target function. DELTA is an immediate constant offset to be
16124 added to THIS. If VCALL_OFFSET is nonzero, the word at
16125 *(*this + vcall_offset) should be added to THIS. */
16126
16127 static void
16128 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16129 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16130 HOST_WIDE_INT vcall_offset, tree function)
16131 {
16132 rtx xops[3];
16133 rtx this = x86_this_parameter (function);
16134 rtx this_reg, tmp;
16135
16136 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
16137 pull it in now and let DELTA benefit. */
16138 if (REG_P (this))
16139 this_reg = this;
16140 else if (vcall_offset)
16141 {
16142 /* Put the this parameter into %eax. */
16143 xops[0] = this;
16144 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16145 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16146 }
16147 else
16148 this_reg = NULL_RTX;
16149
16150 /* Adjust the this parameter by a fixed constant. */
16151 if (delta)
16152 {
16153 xops[0] = GEN_INT (delta);
16154 xops[1] = this_reg ? this_reg : this;
16155 if (TARGET_64BIT)
16156 {
16157 if (!x86_64_general_operand (xops[0], DImode))
16158 {
16159 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16160 xops[1] = tmp;
16161 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16162 xops[0] = tmp;
16163 xops[1] = this;
16164 }
16165 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16166 }
16167 else
16168 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16169 }
16170
16171 /* Adjust the this parameter by a value stored in the vtable. */
16172 if (vcall_offset)
16173 {
16174 if (TARGET_64BIT)
16175 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16176 else
16177 {
16178 int tmp_regno = 2 /* ECX */;
16179 if (lookup_attribute ("fastcall",
16180 TYPE_ATTRIBUTES (TREE_TYPE (function))))
16181 tmp_regno = 0 /* EAX */;
16182 tmp = gen_rtx_REG (SImode, tmp_regno);
16183 }
16184
16185 xops[0] = gen_rtx_MEM (Pmode, this_reg);
16186 xops[1] = tmp;
16187 if (TARGET_64BIT)
16188 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16189 else
16190 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16191
16192 /* Adjust the this parameter. */
16193 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
16194 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
16195 {
16196 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
16197 xops[0] = GEN_INT (vcall_offset);
16198 xops[1] = tmp2;
16199 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16200 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
16201 }
16202 xops[1] = this_reg;
16203 if (TARGET_64BIT)
16204 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16205 else
16206 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16207 }
16208
16209 /* If necessary, drop THIS back to its stack slot. */
16210 if (this_reg && this_reg != this)
16211 {
16212 xops[0] = this_reg;
16213 xops[1] = this;
16214 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16215 }
16216
16217 xops[0] = XEXP (DECL_RTL (function), 0);
16218 if (TARGET_64BIT)
16219 {
16220 if (!flag_pic || (*targetm.binds_local_p) (function))
16221 output_asm_insn ("jmp\t%P0", xops);
16222 else
16223 {
16224 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
16225 tmp = gen_rtx_CONST (Pmode, tmp);
16226 tmp = gen_rtx_MEM (QImode, tmp);
16227 xops[0] = tmp;
16228 output_asm_insn ("jmp\t%A0", xops);
16229 }
16230 }
16231 else
16232 {
16233 if (!flag_pic || (*targetm.binds_local_p) (function))
16234 output_asm_insn ("jmp\t%P0", xops);
16235 else
16236 #if TARGET_MACHO
16237 if (TARGET_MACHO)
16238 {
16239 rtx sym_ref = XEXP (DECL_RTL (function), 0);
16240 tmp = (gen_rtx_SYMBOL_REF
16241 (Pmode,
16242 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
16243 tmp = gen_rtx_MEM (QImode, tmp);
16244 xops[0] = tmp;
16245 output_asm_insn ("jmp\t%0", xops);
16246 }
16247 else
16248 #endif /* TARGET_MACHO */
16249 {
16250 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
16251 output_set_got (tmp);
16252
16253 xops[1] = tmp;
16254 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
16255 output_asm_insn ("jmp\t{*}%1", xops);
16256 }
16257 }
16258 }
16259
16260 static void
16261 x86_file_start (void)
16262 {
16263 default_file_start ();
16264 if (X86_FILE_START_VERSION_DIRECTIVE)
16265 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
16266 if (X86_FILE_START_FLTUSED)
16267 fputs ("\t.global\t__fltused\n", asm_out_file);
16268 if (ix86_asm_dialect == ASM_INTEL)
16269 fputs ("\t.intel_syntax\n", asm_out_file);
16270 }
16271
16272 int
16273 x86_field_alignment (tree field, int computed)
16274 {
16275 enum machine_mode mode;
16276 tree type = TREE_TYPE (field);
16277
16278 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
16279 return computed;
16280 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
16281 ? get_inner_array_type (type) : type);
16282 if (mode == DFmode || mode == DCmode
16283 || GET_MODE_CLASS (mode) == MODE_INT
16284 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16285 return MIN (32, computed);
16286 return computed;
16287 }
16288
16289 /* Output assembler code to FILE to increment profiler label # LABELNO
16290 for profiling a function entry. */
16291 void
16292 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
16293 {
16294 if (TARGET_64BIT)
16295 if (flag_pic)
16296 {
16297 #ifndef NO_PROFILE_COUNTERS
16298 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
16299 #endif
16300 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
16301 }
16302 else
16303 {
16304 #ifndef NO_PROFILE_COUNTERS
16305 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
16306 #endif
16307 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16308 }
16309 else if (flag_pic)
16310 {
16311 #ifndef NO_PROFILE_COUNTERS
16312 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
16313 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
16314 #endif
16315 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
16316 }
16317 else
16318 {
16319 #ifndef NO_PROFILE_COUNTERS
16320 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
16321 PROFILE_COUNT_REGISTER);
16322 #endif
16323 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
16324 }
16325 }
16326
16327 /* We don't have exact information about the insn sizes, but we may assume
16328 quite safely that we are informed about all 1 byte insns and memory
16329 address sizes. This is enough to eliminate unnecessary padding in
16330 99% of cases. */
16331
16332 static int
16333 min_insn_size (rtx insn)
16334 {
16335 int l = 0;
16336
16337 if (!INSN_P (insn) || !active_insn_p (insn))
16338 return 0;
16339
16340 /* Discard alignments we've emit and jump instructions. */
16341 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
16342 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
16343 return 0;
16344 if (GET_CODE (insn) == JUMP_INSN
16345 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
16346 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
16347 return 0;
16348
16349 /* Important case - calls are always 5 bytes.
16350 It is common to have many calls in the row. */
16351 if (GET_CODE (insn) == CALL_INSN
16352 && symbolic_reference_mentioned_p (PATTERN (insn))
16353 && !SIBLING_CALL_P (insn))
16354 return 5;
16355 if (get_attr_length (insn) <= 1)
16356 return 1;
16357
16358 /* For normal instructions we may rely on the sizes of addresses
16359 and the presence of symbol to require 4 bytes of encoding.
16360 This is not the case for jumps where references are PC relative. */
16361 if (GET_CODE (insn) != JUMP_INSN)
16362 {
16363 l = get_attr_length_address (insn);
16364 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
16365 l = 4;
16366 }
16367 if (l)
16368 return 1+l;
16369 else
16370 return 2;
16371 }
16372
16373 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
16374 window. */
16375
16376 static void
16377 ix86_avoid_jump_misspredicts (void)
16378 {
16379 rtx insn, start = get_insns ();
16380 int nbytes = 0, njumps = 0;
16381 int isjump = 0;
16382
16383 /* Look for all minimal intervals of instructions containing 4 jumps.
16384 The intervals are bounded by START and INSN. NBYTES is the total
16385 size of instructions in the interval including INSN and not including
16386 START. When the NBYTES is smaller than 16 bytes, it is possible
16387 that the end of START and INSN ends up in the same 16byte page.
16388
16389 The smallest offset in the page INSN can start is the case where START
16390 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
16391 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
16392 */
16393 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16394 {
16395
16396 nbytes += min_insn_size (insn);
16397 if (dump_file)
16398 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
16399 INSN_UID (insn), min_insn_size (insn));
16400 if ((GET_CODE (insn) == JUMP_INSN
16401 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16402 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
16403 || GET_CODE (insn) == CALL_INSN)
16404 njumps++;
16405 else
16406 continue;
16407
16408 while (njumps > 3)
16409 {
16410 start = NEXT_INSN (start);
16411 if ((GET_CODE (start) == JUMP_INSN
16412 && GET_CODE (PATTERN (start)) != ADDR_VEC
16413 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
16414 || GET_CODE (start) == CALL_INSN)
16415 njumps--, isjump = 1;
16416 else
16417 isjump = 0;
16418 nbytes -= min_insn_size (start);
16419 }
16420 gcc_assert (njumps >= 0);
16421 if (dump_file)
16422 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
16423 INSN_UID (start), INSN_UID (insn), nbytes);
16424
16425 if (njumps == 3 && isjump && nbytes < 16)
16426 {
16427 int padsize = 15 - nbytes + min_insn_size (insn);
16428
16429 if (dump_file)
16430 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
16431 INSN_UID (insn), padsize);
16432 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
16433 }
16434 }
16435 }
16436
16437 /* AMD Athlon works faster
16438 when RET is not destination of conditional jump or directly preceded
16439 by other jump instruction. We avoid the penalty by inserting NOP just
16440 before the RET instructions in such cases. */
16441 static void
16442 ix86_pad_returns (void)
16443 {
16444 edge e;
16445 edge_iterator ei;
16446
16447 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16448 {
16449 basic_block bb = e->src;
16450 rtx ret = BB_END (bb);
16451 rtx prev;
16452 bool replace = false;
16453
16454 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
16455 || !maybe_hot_bb_p (bb))
16456 continue;
16457 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
16458 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
16459 break;
16460 if (prev && GET_CODE (prev) == CODE_LABEL)
16461 {
16462 edge e;
16463 edge_iterator ei;
16464
16465 FOR_EACH_EDGE (e, ei, bb->preds)
16466 if (EDGE_FREQUENCY (e) && e->src->index >= 0
16467 && !(e->flags & EDGE_FALLTHRU))
16468 replace = true;
16469 }
16470 if (!replace)
16471 {
16472 prev = prev_active_insn (ret);
16473 if (prev
16474 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
16475 || GET_CODE (prev) == CALL_INSN))
16476 replace = true;
16477 /* Empty functions get branch mispredict even when the jump destination
16478 is not visible to us. */
16479 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
16480 replace = true;
16481 }
16482 if (replace)
16483 {
16484 emit_insn_before (gen_return_internal_long (), ret);
16485 delete_insn (ret);
16486 }
16487 }
16488 }
16489
16490 /* Implement machine specific optimizations. We implement padding of returns
16491 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
16492 static void
16493 ix86_reorg (void)
16494 {
16495 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
16496 ix86_pad_returns ();
16497 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
16498 ix86_avoid_jump_misspredicts ();
16499 }
16500
16501 /* Return nonzero when QImode register that must be represented via REX prefix
16502 is used. */
16503 bool
16504 x86_extended_QIreg_mentioned_p (rtx insn)
16505 {
16506 int i;
16507 extract_insn_cached (insn);
16508 for (i = 0; i < recog_data.n_operands; i++)
16509 if (REG_P (recog_data.operand[i])
16510 && REGNO (recog_data.operand[i]) >= 4)
16511 return true;
16512 return false;
16513 }
16514
16515 /* Return nonzero when P points to register encoded via REX prefix.
16516 Called via for_each_rtx. */
16517 static int
16518 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
16519 {
16520 unsigned int regno;
16521 if (!REG_P (*p))
16522 return 0;
16523 regno = REGNO (*p);
16524 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
16525 }
16526
16527 /* Return true when INSN mentions register that must be encoded using REX
16528 prefix. */
16529 bool
16530 x86_extended_reg_mentioned_p (rtx insn)
16531 {
16532 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
16533 }
16534
16535 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
16536 optabs would emit if we didn't have TFmode patterns. */
16537
16538 void
16539 x86_emit_floatuns (rtx operands[2])
16540 {
16541 rtx neglab, donelab, i0, i1, f0, in, out;
16542 enum machine_mode mode, inmode;
16543
16544 inmode = GET_MODE (operands[1]);
16545 gcc_assert (inmode == SImode || inmode == DImode);
16546
16547 out = operands[0];
16548 in = force_reg (inmode, operands[1]);
16549 mode = GET_MODE (out);
16550 neglab = gen_label_rtx ();
16551 donelab = gen_label_rtx ();
16552 i1 = gen_reg_rtx (Pmode);
16553 f0 = gen_reg_rtx (mode);
16554
16555 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
16556
16557 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
16558 emit_jump_insn (gen_jump (donelab));
16559 emit_barrier ();
16560
16561 emit_label (neglab);
16562
16563 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16564 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
16565 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
16566 expand_float (f0, i0, 0);
16567 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
16568
16569 emit_label (donelab);
16570 }
16571 \f
16572 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16573 with all elements equal to VAR. Return true if successful. */
16574
16575 static bool
16576 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
16577 rtx target, rtx val)
16578 {
16579 enum machine_mode smode, wsmode, wvmode;
16580 rtx x;
16581
16582 switch (mode)
16583 {
16584 case V2SImode:
16585 case V2SFmode:
16586 if (!mmx_ok && !TARGET_SSE)
16587 return false;
16588 /* FALLTHRU */
16589
16590 case V2DFmode:
16591 case V2DImode:
16592 case V4SFmode:
16593 case V4SImode:
16594 val = force_reg (GET_MODE_INNER (mode), val);
16595 x = gen_rtx_VEC_DUPLICATE (mode, val);
16596 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16597 return true;
16598
16599 case V4HImode:
16600 if (!mmx_ok)
16601 return false;
16602 if (TARGET_SSE || TARGET_3DNOW_A)
16603 {
16604 val = gen_lowpart (SImode, val);
16605 x = gen_rtx_TRUNCATE (HImode, val);
16606 x = gen_rtx_VEC_DUPLICATE (mode, x);
16607 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16608 return true;
16609 }
16610 else
16611 {
16612 smode = HImode;
16613 wsmode = SImode;
16614 wvmode = V2SImode;
16615 goto widen;
16616 }
16617
16618 case V8QImode:
16619 if (!mmx_ok)
16620 return false;
16621 smode = QImode;
16622 wsmode = HImode;
16623 wvmode = V4HImode;
16624 goto widen;
16625 case V8HImode:
16626 smode = HImode;
16627 wsmode = SImode;
16628 wvmode = V4SImode;
16629 goto widen;
16630 case V16QImode:
16631 smode = QImode;
16632 wsmode = HImode;
16633 wvmode = V8HImode;
16634 goto widen;
16635 widen:
16636 /* Replicate the value once into the next wider mode and recurse. */
16637 val = convert_modes (wsmode, smode, val, true);
16638 x = expand_simple_binop (wsmode, ASHIFT, val,
16639 GEN_INT (GET_MODE_BITSIZE (smode)),
16640 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16641 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
16642
16643 x = gen_reg_rtx (wvmode);
16644 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
16645 gcc_unreachable ();
16646 emit_move_insn (target, gen_lowpart (mode, x));
16647 return true;
16648
16649 default:
16650 return false;
16651 }
16652 }
16653
16654 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16655 whose low element is VAR, and other elements are zero. Return true
16656 if successful. */
16657
16658 static bool
16659 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
16660 rtx target, rtx var)
16661 {
16662 enum machine_mode vsimode;
16663 rtx x;
16664
16665 switch (mode)
16666 {
16667 case V2SFmode:
16668 case V2SImode:
16669 if (!mmx_ok && !TARGET_SSE)
16670 return false;
16671 /* FALLTHRU */
16672
16673 case V2DFmode:
16674 case V2DImode:
16675 var = force_reg (GET_MODE_INNER (mode), var);
16676 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
16677 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16678 return true;
16679
16680 case V4SFmode:
16681 case V4SImode:
16682 var = force_reg (GET_MODE_INNER (mode), var);
16683 x = gen_rtx_VEC_DUPLICATE (mode, var);
16684 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
16685 emit_insn (gen_rtx_SET (VOIDmode, target, x));
16686 return true;
16687
16688 case V8HImode:
16689 case V16QImode:
16690 vsimode = V4SImode;
16691 goto widen;
16692 case V4HImode:
16693 case V8QImode:
16694 if (!mmx_ok)
16695 return false;
16696 vsimode = V2SImode;
16697 goto widen;
16698 widen:
16699 /* Zero extend the variable element to SImode and recurse. */
16700 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
16701
16702 x = gen_reg_rtx (vsimode);
16703 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
16704 gcc_unreachable ();
16705
16706 emit_move_insn (target, gen_lowpart (mode, x));
16707 return true;
16708
16709 default:
16710 return false;
16711 }
16712 }
16713
16714 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
16715 consisting of the values in VALS. It is known that all elements
16716 except ONE_VAR are constants. Return true if successful. */
16717
16718 static bool
16719 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
16720 rtx target, rtx vals, int one_var)
16721 {
16722 rtx var = XVECEXP (vals, 0, one_var);
16723 enum machine_mode wmode;
16724 rtx const_vec, x;
16725
16726 XVECEXP (vals, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
16727 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
16728
16729 switch (mode)
16730 {
16731 case V2DFmode:
16732 case V2DImode:
16733 case V2SFmode:
16734 case V2SImode:
16735 /* For the two element vectors, it's just as easy to use
16736 the general case. */
16737 return false;
16738
16739 case V4SFmode:
16740 case V4SImode:
16741 case V8HImode:
16742 case V4HImode:
16743 break;
16744
16745 case V16QImode:
16746 wmode = V8HImode;
16747 goto widen;
16748 case V8QImode:
16749 wmode = V4HImode;
16750 goto widen;
16751 widen:
16752 /* There's no way to set one QImode entry easily. Combine
16753 the variable value with its adjacent constant value, and
16754 promote to an HImode set. */
16755 x = XVECEXP (vals, 0, one_var ^ 1);
16756 if (one_var & 1)
16757 {
16758 var = convert_modes (HImode, QImode, var, true);
16759 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
16760 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16761 x = GEN_INT (INTVAL (x) & 0xff);
16762 }
16763 else
16764 {
16765 var = convert_modes (HImode, QImode, var, true);
16766 x = gen_int_mode (INTVAL (x) << 8, HImode);
16767 }
16768 if (x != const0_rtx)
16769 var = expand_simple_binop (HImode, IOR, var, x, var,
16770 1, OPTAB_LIB_WIDEN);
16771
16772 x = gen_reg_rtx (wmode);
16773 emit_move_insn (x, gen_lowpart (wmode, const_vec));
16774 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
16775
16776 emit_move_insn (target, gen_lowpart (mode, x));
16777 return true;
16778
16779 default:
16780 return false;
16781 }
16782
16783 emit_move_insn (target, const_vec);
16784 ix86_expand_vector_set (mmx_ok, target, var, one_var);
16785 return true;
16786 }
16787
16788 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
16789 all values variable, and none identical. */
16790
16791 static void
16792 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
16793 rtx target, rtx vals)
16794 {
16795 enum machine_mode half_mode = GET_MODE_INNER (mode);
16796 rtx op0 = NULL, op1 = NULL;
16797 bool use_vec_concat = false;
16798
16799 switch (mode)
16800 {
16801 case V2SFmode:
16802 case V2SImode:
16803 if (!mmx_ok && !TARGET_SSE)
16804 break;
16805 /* FALLTHRU */
16806
16807 case V2DFmode:
16808 case V2DImode:
16809 /* For the two element vectors, we always implement VEC_CONCAT. */
16810 op0 = XVECEXP (vals, 0, 0);
16811 op1 = XVECEXP (vals, 0, 1);
16812 use_vec_concat = true;
16813 break;
16814
16815 case V4SFmode:
16816 half_mode = V2SFmode;
16817 goto half;
16818 case V4SImode:
16819 half_mode = V2SImode;
16820 goto half;
16821 half:
16822 {
16823 rtvec v;
16824
16825 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
16826 Recurse to load the two halves. */
16827
16828 op0 = gen_reg_rtx (half_mode);
16829 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
16830 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
16831
16832 op1 = gen_reg_rtx (half_mode);
16833 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
16834 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
16835
16836 use_vec_concat = true;
16837 }
16838 break;
16839
16840 case V8HImode:
16841 case V16QImode:
16842 case V4HImode:
16843 case V8QImode:
16844 break;
16845
16846 default:
16847 gcc_unreachable ();
16848 }
16849
16850 if (use_vec_concat)
16851 {
16852 if (!register_operand (op0, half_mode))
16853 op0 = force_reg (half_mode, op0);
16854 if (!register_operand (op1, half_mode))
16855 op1 = force_reg (half_mode, op1);
16856
16857 emit_insn (gen_rtx_SET (VOIDmode, target,
16858 gen_rtx_VEC_CONCAT (mode, op0, op1)));
16859 }
16860 else
16861 {
16862 int i, j, n_elts, n_words, n_elt_per_word;
16863 enum machine_mode inner_mode;
16864 rtx words[4], shift;
16865
16866 inner_mode = GET_MODE_INNER (mode);
16867 n_elts = GET_MODE_NUNITS (mode);
16868 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
16869 n_elt_per_word = n_elts / n_words;
16870 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
16871
16872 for (i = 0; i < n_words; ++i)
16873 {
16874 rtx word = NULL_RTX;
16875
16876 for (j = 0; j < n_elt_per_word; ++j)
16877 {
16878 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
16879 elt = convert_modes (word_mode, inner_mode, elt, true);
16880
16881 if (j == 0)
16882 word = elt;
16883 else
16884 {
16885 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
16886 word, 1, OPTAB_LIB_WIDEN);
16887 word = expand_simple_binop (word_mode, IOR, word, elt,
16888 word, 1, OPTAB_LIB_WIDEN);
16889 }
16890 }
16891
16892 words[i] = word;
16893 }
16894
16895 if (n_words == 1)
16896 emit_move_insn (target, gen_lowpart (mode, words[0]));
16897 else if (n_words == 2)
16898 {
16899 rtx tmp = gen_reg_rtx (mode);
16900 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
16901 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
16902 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
16903 emit_move_insn (target, tmp);
16904 }
16905 else if (n_words == 4)
16906 {
16907 rtx tmp = gen_reg_rtx (V4SImode);
16908 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
16909 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
16910 emit_move_insn (target, gen_lowpart (mode, tmp));
16911 }
16912 else
16913 gcc_unreachable ();
16914 }
16915 }
16916
16917 /* Initialize vector TARGET via VALS. Suppress the use of MMX
16918 instructions unless MMX_OK is true. */
16919
16920 void
16921 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
16922 {
16923 enum machine_mode mode = GET_MODE (target);
16924 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16925 int n_elts = GET_MODE_NUNITS (mode);
16926 int n_var = 0, one_var = -1;
16927 bool all_same = true, all_const_zero = true;
16928 int i;
16929 rtx x;
16930
16931 for (i = 0; i < n_elts; ++i)
16932 {
16933 x = XVECEXP (vals, 0, i);
16934 if (!CONSTANT_P (x))
16935 n_var++, one_var = i;
16936 else if (x != CONST0_RTX (inner_mode))
16937 all_const_zero = false;
16938 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
16939 all_same = false;
16940 }
16941
16942 /* Constants are best loaded from the constant pool. */
16943 if (n_var == 0)
16944 {
16945 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16946 return;
16947 }
16948
16949 /* If all values are identical, broadcast the value. */
16950 if (all_same
16951 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
16952 XVECEXP (vals, 0, 0)))
16953 return;
16954
16955 /* Values where only one field is non-constant are best loaded from
16956 the pool and overwritten via move later. */
16957 if (n_var == 1)
16958 {
16959 if (all_const_zero && one_var == 0
16960 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
16961 XVECEXP (vals, 0, 0)))
16962 return;
16963
16964 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
16965 return;
16966 }
16967
16968 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
16969 }
16970
16971 void
16972 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
16973 {
16974 enum machine_mode mode = GET_MODE (target);
16975 enum machine_mode inner_mode = GET_MODE_INNER (mode);
16976 bool use_vec_merge = false;
16977 rtx tmp;
16978
16979 switch (mode)
16980 {
16981 case V2SFmode:
16982 case V2SImode:
16983 if (mmx_ok)
16984 {
16985 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
16986 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
16987 if (elt == 0)
16988 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
16989 else
16990 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
16991 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
16992 return;
16993 }
16994 break;
16995
16996 case V2DFmode:
16997 case V2DImode:
16998 {
16999 rtx op0, op1;
17000
17001 /* For the two element vectors, we implement a VEC_CONCAT with
17002 the extraction of the other element. */
17003
17004 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
17005 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
17006
17007 if (elt == 0)
17008 op0 = val, op1 = tmp;
17009 else
17010 op0 = tmp, op1 = val;
17011
17012 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
17013 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17014 }
17015 return;
17016
17017 case V4SFmode:
17018 switch (elt)
17019 {
17020 case 0:
17021 use_vec_merge = true;
17022 break;
17023
17024 case 1:
17025 /* tmp = op0 = A B C D */
17026 tmp = copy_to_reg (target);
17027
17028 /* op0 = C C D D */
17029 emit_insn (gen_sse_unpcklps (target, target, target));
17030
17031 /* op0 = C C D X */
17032 ix86_expand_vector_set (false, target, val, 0);
17033
17034 /* op0 = A B X D */
17035 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17036 GEN_INT (1), GEN_INT (0),
17037 GEN_INT (2+4), GEN_INT (3+4)));
17038 return;
17039
17040 case 2:
17041 tmp = copy_to_reg (target);
17042 ix86_expand_vector_set (false, target, val, 0);
17043 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17044 GEN_INT (0), GEN_INT (1),
17045 GEN_INT (0+4), GEN_INT (3+4)));
17046 return;
17047
17048 case 3:
17049 tmp = copy_to_reg (target);
17050 ix86_expand_vector_set (false, target, val, 0);
17051 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17052 GEN_INT (0), GEN_INT (1),
17053 GEN_INT (2+4), GEN_INT (0+4)));
17054 return;
17055
17056 default:
17057 gcc_unreachable ();
17058 }
17059 break;
17060
17061 case V4SImode:
17062 /* Element 0 handled by vec_merge below. */
17063 if (elt == 0)
17064 {
17065 use_vec_merge = true;
17066 break;
17067 }
17068
17069 if (TARGET_SSE2)
17070 {
17071 /* With SSE2, use integer shuffles to swap element 0 and ELT,
17072 store into element 0, then shuffle them back. */
17073
17074 rtx order[4];
17075
17076 order[0] = GEN_INT (elt);
17077 order[1] = const1_rtx;
17078 order[2] = const2_rtx;
17079 order[3] = GEN_INT (3);
17080 order[elt] = const0_rtx;
17081
17082 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17083 order[1], order[2], order[3]));
17084
17085 ix86_expand_vector_set (false, target, val, 0);
17086
17087 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17088 order[1], order[2], order[3]));
17089 }
17090 else
17091 {
17092 /* For SSE1, we have to reuse the V4SF code. */
17093 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17094 gen_lowpart (SFmode, val), elt);
17095 }
17096 return;
17097
17098 case V8HImode:
17099 use_vec_merge = TARGET_SSE2;
17100 break;
17101 case V4HImode:
17102 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17103 break;
17104
17105 case V16QImode:
17106 case V8QImode:
17107 default:
17108 break;
17109 }
17110
17111 if (use_vec_merge)
17112 {
17113 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17114 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17115 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17116 }
17117 else
17118 {
17119 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17120
17121 emit_move_insn (mem, target);
17122
17123 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17124 emit_move_insn (tmp, val);
17125
17126 emit_move_insn (target, mem);
17127 }
17128 }
17129
17130 void
17131 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17132 {
17133 enum machine_mode mode = GET_MODE (vec);
17134 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17135 bool use_vec_extr = false;
17136 rtx tmp;
17137
17138 switch (mode)
17139 {
17140 case V2SImode:
17141 case V2SFmode:
17142 if (!mmx_ok)
17143 break;
17144 /* FALLTHRU */
17145
17146 case V2DFmode:
17147 case V2DImode:
17148 use_vec_extr = true;
17149 break;
17150
17151 case V4SFmode:
17152 switch (elt)
17153 {
17154 case 0:
17155 tmp = vec;
17156 break;
17157
17158 case 1:
17159 case 3:
17160 tmp = gen_reg_rtx (mode);
17161 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17162 GEN_INT (elt), GEN_INT (elt),
17163 GEN_INT (elt+4), GEN_INT (elt+4)));
17164 break;
17165
17166 case 2:
17167 tmp = gen_reg_rtx (mode);
17168 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
17169 break;
17170
17171 default:
17172 gcc_unreachable ();
17173 }
17174 vec = tmp;
17175 use_vec_extr = true;
17176 elt = 0;
17177 break;
17178
17179 case V4SImode:
17180 if (TARGET_SSE2)
17181 {
17182 switch (elt)
17183 {
17184 case 0:
17185 tmp = vec;
17186 break;
17187
17188 case 1:
17189 case 3:
17190 tmp = gen_reg_rtx (mode);
17191 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
17192 GEN_INT (elt), GEN_INT (elt),
17193 GEN_INT (elt), GEN_INT (elt)));
17194 break;
17195
17196 case 2:
17197 tmp = gen_reg_rtx (mode);
17198 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
17199 break;
17200
17201 default:
17202 gcc_unreachable ();
17203 }
17204 vec = tmp;
17205 use_vec_extr = true;
17206 elt = 0;
17207 }
17208 else
17209 {
17210 /* For SSE1, we have to reuse the V4SF code. */
17211 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
17212 gen_lowpart (V4SFmode, vec), elt);
17213 return;
17214 }
17215 break;
17216
17217 case V8HImode:
17218 use_vec_extr = TARGET_SSE2;
17219 break;
17220 case V4HImode:
17221 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17222 break;
17223
17224 case V16QImode:
17225 case V8QImode:
17226 /* ??? Could extract the appropriate HImode element and shift. */
17227 default:
17228 break;
17229 }
17230
17231 if (use_vec_extr)
17232 {
17233 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
17234 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
17235
17236 /* Let the rtl optimizers know about the zero extension performed. */
17237 if (inner_mode == HImode)
17238 {
17239 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
17240 target = gen_lowpart (SImode, target);
17241 }
17242
17243 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17244 }
17245 else
17246 {
17247 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17248
17249 emit_move_insn (mem, vec);
17250
17251 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17252 emit_move_insn (target, tmp);
17253 }
17254 }
17255 \f
17256 /* Implements target hook vector_mode_supported_p. */
17257 static bool
17258 ix86_vector_mode_supported_p (enum machine_mode mode)
17259 {
17260 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
17261 return true;
17262 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
17263 return true;
17264 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
17265 return true;
17266 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
17267 return true;
17268 return false;
17269 }
17270
17271 /* Worker function for TARGET_MD_ASM_CLOBBERS.
17272
17273 We do this in the new i386 backend to maintain source compatibility
17274 with the old cc0-based compiler. */
17275
17276 static tree
17277 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
17278 tree inputs ATTRIBUTE_UNUSED,
17279 tree clobbers)
17280 {
17281 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
17282 clobbers);
17283 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
17284 clobbers);
17285 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
17286 clobbers);
17287 return clobbers;
17288 }
17289
17290 /* Worker function for REVERSE_CONDITION. */
17291
17292 enum rtx_code
17293 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
17294 {
17295 return (mode != CCFPmode && mode != CCFPUmode
17296 ? reverse_condition (code)
17297 : reverse_condition_maybe_unordered (code));
17298 }
17299
17300 /* Output code to perform an x87 FP register move, from OPERANDS[1]
17301 to OPERANDS[0]. */
17302
17303 const char *
17304 output_387_reg_move (rtx insn, rtx *operands)
17305 {
17306 if (REG_P (operands[1])
17307 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17308 {
17309 if (REGNO (operands[0]) == FIRST_STACK_REG
17310 && TARGET_USE_FFREEP)
17311 return "ffreep\t%y0";
17312 return "fstp\t%y0";
17313 }
17314 if (STACK_TOP_P (operands[0]))
17315 return "fld%z1\t%y1";
17316 return "fst\t%y0";
17317 }
17318
17319 /* Output code to perform a conditional jump to LABEL, if C2 flag in
17320 FP status register is set. */
17321
17322 void
17323 ix86_emit_fp_unordered_jump (rtx label)
17324 {
17325 rtx reg = gen_reg_rtx (HImode);
17326 rtx temp;
17327
17328 emit_insn (gen_x86_fnstsw_1 (reg));
17329
17330 if (TARGET_USE_SAHF)
17331 {
17332 emit_insn (gen_x86_sahf_1 (reg));
17333
17334 temp = gen_rtx_REG (CCmode, FLAGS_REG);
17335 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
17336 }
17337 else
17338 {
17339 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
17340
17341 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17342 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
17343 }
17344
17345 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
17346 gen_rtx_LABEL_REF (VOIDmode, label),
17347 pc_rtx);
17348 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
17349 emit_jump_insn (temp);
17350 }
17351
17352 /* Output code to perform a log1p XFmode calculation. */
17353
17354 void ix86_emit_i387_log1p (rtx op0, rtx op1)
17355 {
17356 rtx label1 = gen_label_rtx ();
17357 rtx label2 = gen_label_rtx ();
17358
17359 rtx tmp = gen_reg_rtx (XFmode);
17360 rtx tmp2 = gen_reg_rtx (XFmode);
17361
17362 emit_insn (gen_absxf2 (tmp, op1));
17363 emit_insn (gen_cmpxf (tmp,
17364 CONST_DOUBLE_FROM_REAL_VALUE (
17365 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
17366 XFmode)));
17367 emit_jump_insn (gen_bge (label1));
17368
17369 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17370 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
17371 emit_jump (label2);
17372
17373 emit_label (label1);
17374 emit_move_insn (tmp, CONST1_RTX (XFmode));
17375 emit_insn (gen_addxf3 (tmp, op1, tmp));
17376 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
17377 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
17378
17379 emit_label (label2);
17380 }
17381
17382 /* Solaris named-section hook. Parameters are as for
17383 named_section_real. */
17384
17385 static void
17386 i386_solaris_elf_named_section (const char *name, unsigned int flags,
17387 tree decl)
17388 {
17389 /* With Binutils 2.15, the "@unwind" marker must be specified on
17390 every occurrence of the ".eh_frame" section, not just the first
17391 one. */
17392 if (TARGET_64BIT
17393 && strcmp (name, ".eh_frame") == 0)
17394 {
17395 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
17396 flags & SECTION_WRITE ? "aw" : "a");
17397 return;
17398 }
17399 default_elf_asm_named_section (name, flags, decl);
17400 }
17401
17402 #include "gt-i386.h"
This page took 0.817354 seconds and 4 git commands to generate.